Index: head/sys/fs/nfs/nfs.h =================================================================== --- head/sys/fs/nfs/nfs.h (revision 335011) +++ head/sys/fs/nfs/nfs.h (revision 335012) @@ -1,750 +1,782 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 500 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfscl_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_TCPTIMEO 300 /* TCP timeout */ #define NFS_MAXRCVTIMEO 60 /* 1 minute in seconds */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFSV4_CALLBACKTIMEO (2 * NFS_HZ) /* Timeout in ticks */ #define NFSV4_CALLBACKRETRY 5 /* Number of retries before failure */ #define NFSV4_SLOTS 64 /* Number of slots, fore channel */ #define NFSV4_CBSLOTS 8 /* Number of slots, back channel */ #define NFSV4_CBRETRYCNT 4 /* # of CBRecall retries upon err */ #define NFSV4_UPCALLTIMEO (15 * NFS_HZ) /* Timeout in ticks for upcalls */ /* to gssd or nfsuserd */ #define NFSV4_UPCALLRETRY 4 /* Number of retries before failure */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_RETRANS_TCP 2 /* Num of retrans for TCP soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #define NFS_TRYLATERDEL 15 /* Maximum delay timeout (sec) */ #ifndef NFS_REMOVETIMEO #define NFS_REMOVETIMEO 15 /* # sec to wait for delegret in local syscall */ #endif #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 16 /* Max. read ahead # blocks */ #define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #ifndef NFSRV_LEASE #define NFSRV_LEASE 120 /* Lease time in seconds for V4 */ #endif /* assigned to nfsrv_lease */ #ifndef NFSRV_STALELEASE #define NFSRV_STALELEASE (5 * nfsrv_lease) #endif #ifndef NFSRV_MOULDYLEASE #define NFSRV_MOULDYLEASE 604800 /* One week (in sec) */ #endif #ifndef NFSCLIENTHASHSIZE #define NFSCLIENTHASHSIZE 20 /* Size of server client hash table */ #endif #ifndef NFSLOCKHASHSIZE #define NFSLOCKHASHSIZE 20 /* Size of server nfslock hash table */ #endif #ifndef NFSSESSIONHASHSIZE #define NFSSESSIONHASHSIZE 20 /* Size of server session hash table */ #endif #define NFSSTATEHASHSIZE 10 /* Size of server stateid hash table */ +#define NFSLAYOUTHIGHWATER 1000000 /* Upper limit for # of layouts */ #ifndef NFSCLDELEGHIGHWATER #define NFSCLDELEGHIGHWATER 10000 /* limit for client delegations */ #endif #ifndef NFSCLLAYOUTHIGHWATER #define NFSCLLAYOUTHIGHWATER 10000 /* limit for client pNFS layouts */ #endif #ifndef NFSNOOPEN /* Inactive open owner (sec) */ #define NFSNOOPEN 120 #endif #define NFSRV_LEASEDELTA 15 /* # of seconds to delay beyond lease */ #define NFS_IDMAXSIZE 4 /* max sizeof (in_addr_t) */ #ifndef NFSRVCACHE_UDPTIMEOUT #define NFSRVCACHE_UDPTIMEOUT 30 /* # of sec to hold cached rpcs(udp) */ #endif #ifndef NFSRVCACHE_UDPHIGHWATER #define NFSRVCACHE_UDPHIGHWATER 500 /* Max # of udp cache entries */ #endif #ifndef NFSRVCACHE_TCPTIMEOUT #define NFSRVCACHE_TCPTIMEOUT (3600*12) /*#of sec to hold cached rpcs(tcp) */ #endif #ifndef NFSRVCACHE_FLOODLEVEL #define NFSRVCACHE_FLOODLEVEL 16384 /* Very high water mark for cache */ #endif #ifndef NFSRV_CLIENTHIGHWATER #define NFSRV_CLIENTHIGHWATER 1000 #endif #ifndef NFSRV_MAXDUMPLIST #define NFSRV_MAXDUMPLIST 10000 #endif #ifndef NFS_ACCESSCACHESIZE #define NFS_ACCESSCACHESIZE 8 #endif #define NFSV4_CBPORT 7745 /* Callback port for testing */ /* * This macro defines the high water mark for issuing V4 delegations. * (It is currently set at a conservative 20% of nfsrv_v4statelimit. This * may want to increase when clients can make more effective use of * delegations.) */ #define NFSRV_V4DELEGLIMIT(c) (((c) * 5) > nfsrv_v4statelimit) #define NFS_READDIRBLKSIZ DIRBLKSIZ /* Minimal nm_readdirsize */ /* * Oddballs */ #define NFS_CMPFH(n, f, s) \ ((n)->n_fhp->nfh_len == (s) && !NFSBCMP((n)->n_fhp->nfh_fh, (caddr_t)(f), (s))) #define NFSRV_CMPFH(nf, ns, f, s) \ ((ns) == (s) && !NFSBCMP((caddr_t)(nf), (caddr_t)(f), (s))) #define NFS_CMPTIME(t1, t2) \ ((t1).tv_sec == (t2).tv_sec && (t1).tv_nsec == (t2).tv_nsec) #define NFS_SETTIME(t) do { \ (t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? \ NFS_SRVMAXIO : NFS_V2MAXDATA) #define NFS64BITSSET 0xffffffffffffffffull #define NFS64BITSMINUS1 0xfffffffffffffffeull /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd, mount_nfs * and nfsloaduser should ever try and use it. */ struct nfsd_addsock_args { int sock; /* Socket to serve */ caddr_t name; /* Client addr for connection based sockets */ int namelen; /* Length of name */ }; /* * nfsd argument for new krpc. + * (New version supports pNFS, indicated by NFSSVC_NEWSTRUCT flag.) */ struct nfsd_nfsd_args { const char *principal; /* GSS-API service principal name */ int minthreads; /* minimum service thread count */ int maxthreads; /* maximum service thread count */ + int version; /* Allow multiple variants */ + char *addr; /* pNFS DS addresses */ + int addrlen; /* Length of addrs */ + char *dnshost; /* DNS names for DS addresses */ + int dnshostlen; /* Length of DNS names */ + char *dspath; /* DS Mount path on MDS */ + int dspathlen; /* Length of DS Mount path on MDS */ + int mirrorcnt; /* Number of mirrors to create on DSs */ }; /* * NFSDEV_MAXMIRRORS - Maximum level of mirroring for a DS. * (Most will only put files on two DSs, but this setting allows up to 4.) * NFSDEV_MAXVERS - maximum number of NFS versions supported by Flex File. */ #define NFSDEV_MAXMIRRORS 4 #define NFSDEV_MAXVERS 4 +struct nfsd_pnfsd_args { + int op; /* Which pNFSd op to perform. */ + char *mdspath; /* Path of MDS file. */ + char *dspath; /* Path of recovered DS mounted on dir. */ + char *curdspath; /* Path of current DS mounted on dir. */ +}; + +#define PNFSDOP_DELDSSERVER 1 +#define PNFSDOP_COPYMR 2 + +/* Old version. */ +struct nfsd_nfsd_oargs { + const char *principal; /* GSS-API service principal name */ + int minthreads; /* minimum service thread count */ + int maxthreads; /* maximum service thread count */ +}; + /* * Arguments for use by the callback daemon. */ struct nfsd_nfscbd_args { const char *principal; /* GSS-API service principal name */ }; struct nfscbd_args { int sock; /* Socket to serve */ caddr_t name; /* Client addr for connection based sockets */ int namelen; /* Length of name */ u_short port; /* Port# for callbacks */ }; struct nfsd_idargs { int nid_flag; /* Flags (see below) */ uid_t nid_uid; /* user/group id */ gid_t nid_gid; int nid_usermax; /* Upper bound on user name cache */ int nid_usertimeout;/* User name timeout (minutes) */ u_char *nid_name; /* Name */ int nid_namelen; /* and its length */ gid_t *nid_grps; /* and the list */ int nid_ngroup; /* Size of groups list */ }; struct nfsd_oidargs { int nid_flag; /* Flags (see below) */ uid_t nid_uid; /* user/group id */ gid_t nid_gid; int nid_usermax; /* Upper bound on user name cache */ int nid_usertimeout;/* User name timeout (minutes) */ u_char *nid_name; /* Name */ int nid_namelen; /* and its length */ }; struct nfsd_clid { int nclid_idlen; /* Length of client id */ u_char nclid_id[NFSV4_OPAQUELIMIT]; /* and name */ }; struct nfsd_dumplist { int ndl_size; /* Number of elements */ void *ndl_list; /* and the list of elements */ }; struct nfsd_dumpclients { u_int32_t ndcl_flags; /* LCL_xxx flags */ u_int32_t ndcl_nopenowners; /* Number of openowners */ u_int32_t ndcl_nopens; /* and opens */ u_int32_t ndcl_nlockowners; /* and of lockowners */ u_int32_t ndcl_nlocks; /* and of locks */ u_int32_t ndcl_ndelegs; /* and of delegations */ u_int32_t ndcl_nolddelegs; /* and old delegations */ sa_family_t ndcl_addrfam; /* Callback address */ union { struct in_addr sin_addr; struct in6_addr sin6_addr; } ndcl_cbaddr; struct nfsd_clid ndcl_clid; /* and client id */ }; struct nfsd_dumplocklist { char *ndllck_fname; /* File Name */ int ndllck_size; /* Number of elements */ void *ndllck_list; /* and the list of elements */ }; struct nfsd_dumplocks { u_int32_t ndlck_flags; /* state flags NFSLCK_xxx */ nfsv4stateid_t ndlck_stateid; /* stateid */ u_int64_t ndlck_first; /* lock byte range */ u_int64_t ndlck_end; struct nfsd_clid ndlck_owner; /* Owner of open/lock */ sa_family_t ndlck_addrfam; /* Callback address */ union { struct in_addr sin_addr; struct in6_addr sin6_addr; } ndlck_cbaddr; struct nfsd_clid ndlck_clid; /* and client id */ }; /* * Structure for referral information. */ struct nfsreferral { u_char *nfr_srvlist; /* List of servers */ int nfr_srvcnt; /* number of servers */ vnode_t nfr_vp; /* vnode for referral */ uint64_t nfr_dfileno; /* assigned dir inode# */ }; /* * Flags for lc_flags and opsflags for nfsrv_getclient(). */ #define LCL_NEEDSCONFIRM 0x00000001 #define LCL_DONTCLEAN 0x00000002 #define LCL_WAKEUPWANTED 0x00000004 #define LCL_TCPCALLBACK 0x00000008 #define LCL_CALLBACKSON 0x00000010 #define LCL_INDEXNOTOK 0x00000020 #define LCL_STAMPEDSTABLE 0x00000040 #define LCL_EXPIREIT 0x00000080 #define LCL_CBDOWN 0x00000100 #define LCL_KERBV 0x00000400 #define LCL_NAME 0x00000800 #define LCL_NEEDSCBNULL 0x00001000 #define LCL_GSSINTEGRITY 0x00002000 #define LCL_GSSPRIVACY 0x00004000 #define LCL_ADMINREVOKED 0x00008000 #define LCL_RECLAIMCOMPLETE 0x00010000 #define LCL_NFSV41 0x00020000 #define LCL_DONEBINDCONN 0x00040000 #define LCL_GSS LCL_KERBV /* Or of all mechs */ /* * Bits for flags in nfslock and nfsstate. * The access, deny, NFSLCK_READ and NFSLCK_WRITE bits must be defined as * below, in the correct order, so the shifts work for tests. */ #define NFSLCK_READACCESS 0x00000001 #define NFSLCK_WRITEACCESS 0x00000002 #define NFSLCK_ACCESSBITS (NFSLCK_READACCESS | NFSLCK_WRITEACCESS) #define NFSLCK_SHIFT 2 #define NFSLCK_READDENY 0x00000004 #define NFSLCK_WRITEDENY 0x00000008 #define NFSLCK_DENYBITS (NFSLCK_READDENY | NFSLCK_WRITEDENY) #define NFSLCK_SHAREBITS \ (NFSLCK_READACCESS|NFSLCK_WRITEACCESS|NFSLCK_READDENY|NFSLCK_WRITEDENY) #define NFSLCK_LOCKSHIFT 4 #define NFSLCK_READ 0x00000010 #define NFSLCK_WRITE 0x00000020 #define NFSLCK_BLOCKING 0x00000040 #define NFSLCK_RECLAIM 0x00000080 #define NFSLCK_OPENTOLOCK 0x00000100 #define NFSLCK_TEST 0x00000200 #define NFSLCK_LOCK 0x00000400 #define NFSLCK_UNLOCK 0x00000800 #define NFSLCK_OPEN 0x00001000 #define NFSLCK_CLOSE 0x00002000 #define NFSLCK_CHECK 0x00004000 #define NFSLCK_RELEASE 0x00008000 #define NFSLCK_NEEDSCONFIRM 0x00010000 #define NFSLCK_CONFIRM 0x00020000 #define NFSLCK_DOWNGRADE 0x00040000 #define NFSLCK_DELEGREAD 0x00080000 #define NFSLCK_DELEGWRITE 0x00100000 #define NFSLCK_DELEGCUR 0x00200000 #define NFSLCK_DELEGPREV 0x00400000 #define NFSLCK_OLDDELEG 0x00800000 #define NFSLCK_DELEGRECALL 0x01000000 #define NFSLCK_SETATTR 0x02000000 #define NFSLCK_DELEGPURGE 0x04000000 #define NFSLCK_DELEGRETURN 0x08000000 #define NFSLCK_WANTWDELEG 0x10000000 #define NFSLCK_WANTRDELEG 0x20000000 #define NFSLCK_WANTNODELEG 0x40000000 #define NFSLCK_WANTBITS \ (NFSLCK_WANTWDELEG | NFSLCK_WANTRDELEG | NFSLCK_WANTNODELEG) /* And bits for nid_flag */ #define NFSID_INITIALIZE 0x0001 #define NFSID_ADDUID 0x0002 #define NFSID_DELUID 0x0004 #define NFSID_ADDUSERNAME 0x0008 #define NFSID_DELUSERNAME 0x0010 #define NFSID_ADDGID 0x0020 #define NFSID_DELGID 0x0040 #define NFSID_ADDGROUPNAME 0x0080 #define NFSID_DELGROUPNAME 0x0100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ /* * Here is the definition of the attribute bits array and macros that * manipulate it. * THE MACROS MUST BE MANUALLY MODIFIED IF NFSATTRBIT_MAXWORDS CHANGES!! * It is (NFSATTRBIT_MAX + 31) / 32. */ #define NFSATTRBIT_MAXWORDS 3 typedef struct { u_int32_t bits[NFSATTRBIT_MAXWORDS]; } nfsattrbit_t; #define NFSZERO_ATTRBIT(b) do { \ (b)->bits[0] = 0; \ (b)->bits[1] = 0; \ (b)->bits[2] = 0; \ } while (0) #define NFSSET_ATTRBIT(t, f) do { \ (t)->bits[0] = (f)->bits[0]; \ (t)->bits[1] = (f)->bits[1]; \ (t)->bits[2] = (f)->bits[2]; \ } while (0) #define NFSSETSUPP_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_SUPP0; \ (b)->bits[1] = (NFSATTRBIT_SUPP1 | NFSATTRBIT_SUPPSETONLY); \ (b)->bits[2] = NFSATTRBIT_SUPP2; \ } while (0) #define NFSISSET_ATTRBIT(b, p) ((b)->bits[(p) / 32] & (1 << ((p) % 32))) #define NFSSETBIT_ATTRBIT(b, p) ((b)->bits[(p) / 32] |= (1 << ((p) % 32))) #define NFSCLRBIT_ATTRBIT(b, p) ((b)->bits[(p) / 32] &= ~(1 << ((p) % 32))) #define NFSCLRALL_ATTRBIT(b, a) do { \ (b)->bits[0] &= ~((a)->bits[0]); \ (b)->bits[1] &= ~((a)->bits[1]); \ (b)->bits[2] &= ~((a)->bits[2]); \ } while (0) #define NFSCLRNOT_ATTRBIT(b, a) do { \ (b)->bits[0] &= ((a)->bits[0]); \ (b)->bits[1] &= ((a)->bits[1]); \ (b)->bits[2] &= ((a)->bits[2]); \ } while (0) #define NFSCLRNOTFILLABLE_ATTRBIT(b) do { \ (b)->bits[0] &= NFSATTRBIT_SUPP0; \ (b)->bits[1] &= NFSATTRBIT_SUPP1; \ (b)->bits[2] &= NFSATTRBIT_SUPP2; \ } while (0) #define NFSCLRNOTSETABLE_ATTRBIT(b) do { \ (b)->bits[0] &= NFSATTRBIT_SETABLE0; \ (b)->bits[1] &= NFSATTRBIT_SETABLE1; \ (b)->bits[2] &= NFSATTRBIT_SETABLE2; \ } while (0) #define NFSNONZERO_ATTRBIT(b) ((b)->bits[0] || (b)->bits[1] || (b)->bits[2]) #define NFSEQUAL_ATTRBIT(b, p) ((b)->bits[0] == (p)->bits[0] && \ (b)->bits[1] == (p)->bits[1] && (b)->bits[2] == (p)->bits[2]) #define NFSGETATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_GETATTR0; \ (b)->bits[1] = NFSATTRBIT_GETATTR1; \ (b)->bits[2] = NFSATTRBIT_GETATTR2; \ } while (0) #define NFSWCCATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_WCCATTR0; \ (b)->bits[1] = NFSATTRBIT_WCCATTR1; \ (b)->bits[2] = NFSATTRBIT_WCCATTR2; \ } while (0) #define NFSWRITEGETATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_WRITEGETATTR0; \ (b)->bits[1] = NFSATTRBIT_WRITEGETATTR1; \ (b)->bits[2] = NFSATTRBIT_WRITEGETATTR2; \ } while (0) #define NFSCBGETATTR_ATTRBIT(b, c) do { \ (c)->bits[0] = ((b)->bits[0] & NFSATTRBIT_CBGETATTR0); \ (c)->bits[1] = ((b)->bits[1] & NFSATTRBIT_CBGETATTR1); \ (c)->bits[2] = ((b)->bits[2] & NFSATTRBIT_CBGETATTR2); \ } while (0) #define NFSPATHCONF_GETATTRBIT(b) do { \ (b)->bits[0] = NFSGETATTRBIT_PATHCONF0; \ (b)->bits[1] = NFSGETATTRBIT_PATHCONF1; \ (b)->bits[2] = NFSGETATTRBIT_PATHCONF2; \ } while (0) #define NFSSTATFS_GETATTRBIT(b) do { \ (b)->bits[0] = NFSGETATTRBIT_STATFS0; \ (b)->bits[1] = NFSGETATTRBIT_STATFS1; \ (b)->bits[2] = NFSGETATTRBIT_STATFS2; \ } while (0) #define NFSISSETSTATFS_ATTRBIT(b) \ (((b)->bits[0] & NFSATTRBIT_STATFS0) || \ ((b)->bits[1] & NFSATTRBIT_STATFS1) || \ ((b)->bits[2] & NFSATTRBIT_STATFS2)) #define NFSCLRSTATFS_ATTRBIT(b) do { \ (b)->bits[0] &= ~NFSATTRBIT_STATFS0; \ (b)->bits[1] &= ~NFSATTRBIT_STATFS1; \ (b)->bits[2] &= ~NFSATTRBIT_STATFS2; \ } while (0) #define NFSREADDIRPLUS_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_READDIRPLUS0; \ (b)->bits[1] = NFSATTRBIT_READDIRPLUS1; \ (b)->bits[2] = NFSATTRBIT_READDIRPLUS2; \ } while (0) #define NFSREFERRAL_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_REFERRAL0; \ (b)->bits[1] = NFSATTRBIT_REFERRAL1; \ (b)->bits[2] = NFSATTRBIT_REFERRAL2; \ } while (0) /* * Store uid, gid creds that were used when the stateid was acquired. * The RPC layer allows NFS_MAXGRPS + 1 groups to go out on the wire, * so that's how many gets stored here. */ struct nfscred { uid_t nfsc_uid; gid_t nfsc_groups[NFS_MAXGRPS + 1]; int nfsc_ngroups; }; /* * Constants that define the file handle for the V4 root directory. * (The FSID must never be used by other file systems that are exported.) */ #define NFSV4ROOT_FSID0 ((int32_t) -1) #define NFSV4ROOT_FSID1 ((int32_t) -1) #define NFSV4ROOT_REFERRAL ((int32_t) -2) #define NFSV4ROOT_INO 2 /* It's traditional */ #define NFSV4ROOT_GEN 1 /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(_KERNEL) || defined(KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ /* * Socket errors ignored for connectionless sockets? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * This structure holds socket information for a connection. Used by the * client and the server for callbacks. */ struct nfssockreq { NFSSOCKADDR_T nr_nam; int nr_sotype; int nr_soproto; int nr_soflags; struct ucred *nr_cred; int nr_lock; NFSMUTEX_T nr_mtx; u_int32_t nr_prog; u_int32_t nr_vers; struct __rpc_client *nr_client; AUTH *nr_auth; }; /* * And associated nr_lock bits. */ #define NFSR_SNDLOCK 0x01 #define NFSR_WANTSND 0x02 #define NFSR_RCVLOCK 0x04 #define NFSR_WANTRCV 0x08 #define NFSR_RESERVEDPORT 0x10 #define NFSR_LOCALHOST 0x20 /* * Queue head for nfsreq's */ TAILQ_HEAD(nfsreqhead, nfsreq); /* This is the only nfsreq R_xxx flag still used. */ #define R_DONTRECOVER 0x00000100 /* don't initiate recovery when this rpc gets a stale state reply */ /* * Network address hash list element */ union nethostaddr { struct in_addr had_inet; struct in6_addr had_inet6; }; /* * Structure of list of mechanisms. */ struct nfsgss_mechlist { int len; const u_char *str; int totlen; }; #define KERBV_MECH 0 /* position in list */ /* * This structure is used by the server for describing each request. */ struct nfsrv_descript { mbuf_t nd_mrep; /* Request mbuf list */ mbuf_t nd_md; /* Current dissect mbuf */ mbuf_t nd_mreq; /* Reply mbuf list */ mbuf_t nd_mb; /* Current build mbuf */ NFSSOCKADDR_T nd_nam; /* and socket addr */ NFSSOCKADDR_T nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ caddr_t nd_bpos; /* Current build pos */ + u_int64_t nd_flag; /* nd_flag */ u_int16_t nd_procnum; /* RPC # */ - u_int32_t nd_flag; /* nd_flag */ u_int32_t nd_repstat; /* Reply status */ int *nd_errp; /* Pointer to ret status */ u_int32_t nd_retxid; /* Reply xid */ struct nfsrvcache *nd_rp; /* Assoc. cache entry */ fhandle_t nd_fh; /* File handle */ struct ucred *nd_cred; /* Credentials */ uid_t nd_saveduid; /* Saved uid */ u_int64_t nd_sockref; /* Rcv socket ref# */ u_int64_t nd_compref; /* Compound RPC ref# */ time_t nd_tcpconntime; /* Time TCP connection est. */ nfsquad_t nd_clientid; /* Implied clientid */ int nd_gssnamelen; /* principal name length */ char *nd_gssname; /* principal name */ uint32_t *nd_slotseq; /* ptr to slot seq# in req */ uint8_t nd_sessionid[NFSX_V4SESSIONID]; /* Session id */ uint32_t nd_slotid; /* Slotid for this RPC */ SVCXPRT *nd_xprt; /* Server RPC handle */ uint32_t *nd_sequence; /* Sequence Op. ptr */ + nfsv4stateid_t nd_curstateid; /* Current StateID */ + nfsv4stateid_t nd_savedcurstateid; /* Saved Current StateID */ }; #define nd_princlen nd_gssnamelen #define nd_principal nd_gssname /* Bits for "nd_flag" */ #define ND_DONTSAVEREPLY 0x00000001 #define ND_SAVEREPLY 0x00000002 #define ND_NFSV2 0x00000004 #define ND_NFSV3 0x00000008 #define ND_NFSV4 0x00000010 #define ND_KERBV 0x00000020 #define ND_GSSINTEGRITY 0x00000040 #define ND_GSSPRIVACY 0x00000080 #define ND_WINDOWVERF 0x00000100 #define ND_GSSINITREPLY 0x00000200 #define ND_STREAMSOCK 0x00000400 #define ND_PUBLOOKUP 0x00000800 #define ND_USEGSSNAME 0x00001000 #define ND_SAMETCPCONN 0x00002000 #define ND_IMPLIEDCLID 0x00004000 #define ND_NOMOREDATA 0x00008000 #define ND_V4WCCATTR 0x00010000 #define ND_NFSCB 0x00020000 #define ND_AUTHNONE 0x00040000 #define ND_EXAUTHSYS 0x00080000 #define ND_EXGSS 0x00100000 #define ND_EXGSSINTEGRITY 0x00200000 #define ND_EXGSSPRIVACY 0x00400000 #define ND_INCRSEQID 0x00800000 #define ND_NFSCL 0x01000000 #define ND_NFSV41 0x02000000 #define ND_HASSEQUENCE 0x04000000 #define ND_CACHETHIS 0x08000000 #define ND_LASTOP 0x10000000 #define ND_LOOPBADSESS 0x20000000 +#define ND_DSSERVER 0x40000000 +#define ND_CURSTATEID 0x80000000 +#define ND_SAVEDCURSTATEID 0x100000000 /* * ND_GSS should be the "or" of all GSS type authentications. */ #define ND_GSS (ND_KERBV) struct nfsv4_opflag { int retfh; int needscfh; int savereply; int modifyfs; int lktype; int needsseq; int loopbadsess; }; /* * Flags used to indicate what to do w.r.t. seqid checking. */ #define NFSRVSEQID_FIRST 0x01 #define NFSRVSEQID_LAST 0x02 #define NFSRVSEQID_OPEN 0x04 /* * assign a doubly linked list to a new head * and prepend one list into another. */ #define LIST_NEWHEAD(nhead, ohead, field) do { \ if (((nhead)->lh_first = (ohead)->lh_first) != NULL) \ (ohead)->lh_first->field.le_prev = &(nhead)->lh_first; \ (ohead)->lh_first = NULL; \ } while (0) #define LIST_PREPEND(head, phead, lelm, field) do { \ if ((head)->lh_first != NULL) { \ (lelm)->field.le_next = (head)->lh_first; \ (lelm)->field.le_next->field.le_prev = \ &(lelm)->field.le_next; \ } \ (head)->lh_first = (phead)->lh_first; \ (head)->lh_first->field.le_prev = &(head)->lh_first; \ } while (0) /* * File handle structure for client. Malloc'd to the correct length with * malloc type M_NFSFH. */ struct nfsfh { u_int16_t nfh_len; /* Length of file handle */ u_int8_t nfh_fh[1]; /* and the file handle */ }; /* * File handle structure for server. The NFSRV_MAXFH constant is * set in nfsdport.h. I use a 32bit length, so that alignment is * preserved. */ struct nfsrvfh { u_int32_t nfsrvfh_len; u_int8_t nfsrvfh_data[NFSRV_MAXFH]; }; /* * This structure is used for sleep locks on the NFSv4 nfsd threads and * NFSv4 client data structures. */ struct nfsv4lock { u_int32_t nfslock_usecnt; u_int8_t nfslock_lock; }; #define NFSV4LOCK_LOCK 0x01 #define NFSV4LOCK_LOCKWANTED 0x02 #define NFSV4LOCK_WANTED 0x04 /* * Values for the override argument for nfsvno_accchk(). */ #define NFSACCCHK_NOOVERRIDE 0 #define NFSACCCHK_ALLOWROOT 1 #define NFSACCCHK_ALLOWOWNER 2 /* * and values for the vpislocked argument for nfsvno_accchk(). */ #define NFSACCCHK_VPNOTLOCKED 0 #define NFSACCCHK_VPISLOCKED 1 /* * Slot for the NFSv4.1 Sequence Op. */ struct nfsslot { int nfssl_inprog; uint32_t nfssl_seq; struct mbuf *nfssl_reply; }; #endif /* _KERNEL */ #endif /* _NFS_NFS_H */ Index: head/sys/fs/nfs/nfs_commonacl.c =================================================================== --- head/sys/fs/nfs/nfs_commonacl.c (revision 335011) +++ head/sys/fs/nfs/nfs_commonacl.c (revision 335012) @@ -1,515 +1,485 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include extern int nfsrv_useacl; #endif static int nfsrv_acemasktoperm(u_int32_t acetype, u_int32_t mask, int owner, enum vtype type, acl_perm_t *permp); /* * Handle xdr for an ace. */ APPLESTATIC int nfsrv_dissectace(struct nfsrv_descript *nd, struct acl_entry *acep, int *aceerrp, int *acesizep, NFSPROC_T *p) { u_int32_t *tl; int len, gotid = 0, owner = 0, error = 0, aceerr = 0; u_char *name, namestr[NFSV4_SMALLSTR + 1]; u_int32_t flag, mask, acetype; gid_t gid; uid_t uid; *aceerrp = 0; acep->ae_flags = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); acetype = fxdr_unsigned(u_int32_t, *tl++); flag = fxdr_unsigned(u_int32_t, *tl++); mask = fxdr_unsigned(u_int32_t, *tl++); len = fxdr_unsigned(int, *tl); if (len < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (len == 0) { /* Netapp filers return a 0 length who for nil users */ acep->ae_tag = ACL_UNDEFINED_TAG; acep->ae_id = ACL_UNDEFINED_ID; acep->ae_perm = (acl_perm_t)0; acep->ae_entry_type = ACL_ENTRY_TYPE_DENY; if (acesizep) *acesizep = 4 * NFSX_UNSIGNED; error = 0; goto nfsmout; } if (len > NFSV4_SMALLSTR) name = malloc(len + 1, M_NFSSTRING, M_WAITOK); else name = namestr; error = nfsrv_mtostr(nd, name, len); if (error) { if (len > NFSV4_SMALLSTR) free(name, M_NFSSTRING); goto nfsmout; } if (len == 6) { if (!NFSBCMP(name, "OWNER@", 6)) { acep->ae_tag = ACL_USER_OBJ; acep->ae_id = ACL_UNDEFINED_ID; owner = 1; gotid = 1; } else if (!NFSBCMP(name, "GROUP@", 6)) { acep->ae_tag = ACL_GROUP_OBJ; acep->ae_id = ACL_UNDEFINED_ID; gotid = 1; } } else if (len == 9 && !NFSBCMP(name, "EVERYONE@", 9)) { acep->ae_tag = ACL_EVERYONE; acep->ae_id = ACL_UNDEFINED_ID; gotid = 1; } if (gotid == 0) { if (flag & NFSV4ACE_IDENTIFIERGROUP) { acep->ae_tag = ACL_GROUP; aceerr = nfsv4_strtogid(nd, name, len, &gid, p); if (aceerr == 0) acep->ae_id = (uid_t)gid; } else { acep->ae_tag = ACL_USER; aceerr = nfsv4_strtouid(nd, name, len, &uid, p); if (aceerr == 0) acep->ae_id = uid; } } if (len > NFSV4_SMALLSTR) free(name, M_NFSSTRING); if (aceerr == 0) { /* * Handle the flags. */ flag &= ~NFSV4ACE_IDENTIFIERGROUP; if (flag & NFSV4ACE_FILEINHERIT) { flag &= ~NFSV4ACE_FILEINHERIT; acep->ae_flags |= ACL_ENTRY_FILE_INHERIT; } if (flag & NFSV4ACE_DIRECTORYINHERIT) { flag &= ~NFSV4ACE_DIRECTORYINHERIT; acep->ae_flags |= ACL_ENTRY_DIRECTORY_INHERIT; } if (flag & NFSV4ACE_NOPROPAGATEINHERIT) { flag &= ~NFSV4ACE_NOPROPAGATEINHERIT; acep->ae_flags |= ACL_ENTRY_NO_PROPAGATE_INHERIT; } if (flag & NFSV4ACE_INHERITONLY) { flag &= ~NFSV4ACE_INHERITONLY; acep->ae_flags |= ACL_ENTRY_INHERIT_ONLY; } if (flag & NFSV4ACE_SUCCESSFULACCESS) { flag &= ~NFSV4ACE_SUCCESSFULACCESS; acep->ae_flags |= ACL_ENTRY_SUCCESSFUL_ACCESS; } if (flag & NFSV4ACE_FAILEDACCESS) { flag &= ~NFSV4ACE_FAILEDACCESS; acep->ae_flags |= ACL_ENTRY_FAILED_ACCESS; } /* * Set ae_entry_type. */ if (acetype == NFSV4ACE_ALLOWEDTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_ALLOW; else if (acetype == NFSV4ACE_DENIEDTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_DENY; else if (acetype == NFSV4ACE_AUDITTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_AUDIT; else if (acetype == NFSV4ACE_ALARMTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_ALARM; else aceerr = NFSERR_ATTRNOTSUPP; } /* * Now, check for unsupported flag bits. */ if (aceerr == 0 && flag != 0) aceerr = NFSERR_ATTRNOTSUPP; /* * And turn the mask into perm bits. */ if (aceerr == 0) aceerr = nfsrv_acemasktoperm(acetype, mask, owner, VREG, &acep->ae_perm); *aceerrp = aceerr; if (acesizep) *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); error = 0; nfsmout: NFSEXITCODE(error); return (error); } /* * Turn an NFSv4 ace mask into R/W/X flag bits. */ static int nfsrv_acemasktoperm(u_int32_t acetype, u_int32_t mask, int owner, enum vtype type, acl_perm_t *permp) { acl_perm_t perm = 0x0; int error = 0; if (mask & NFSV4ACE_READDATA) { mask &= ~NFSV4ACE_READDATA; perm |= ACL_READ_DATA; } if (mask & NFSV4ACE_LISTDIRECTORY) { mask &= ~NFSV4ACE_LISTDIRECTORY; perm |= ACL_LIST_DIRECTORY; } if (mask & NFSV4ACE_WRITEDATA) { mask &= ~NFSV4ACE_WRITEDATA; perm |= ACL_WRITE_DATA; } if (mask & NFSV4ACE_ADDFILE) { mask &= ~NFSV4ACE_ADDFILE; perm |= ACL_ADD_FILE; } if (mask & NFSV4ACE_APPENDDATA) { mask &= ~NFSV4ACE_APPENDDATA; perm |= ACL_APPEND_DATA; } if (mask & NFSV4ACE_ADDSUBDIRECTORY) { mask &= ~NFSV4ACE_ADDSUBDIRECTORY; perm |= ACL_ADD_SUBDIRECTORY; } if (mask & NFSV4ACE_READNAMEDATTR) { mask &= ~NFSV4ACE_READNAMEDATTR; perm |= ACL_READ_NAMED_ATTRS; } if (mask & NFSV4ACE_WRITENAMEDATTR) { mask &= ~NFSV4ACE_WRITENAMEDATTR; perm |= ACL_WRITE_NAMED_ATTRS; } if (mask & NFSV4ACE_EXECUTE) { mask &= ~NFSV4ACE_EXECUTE; perm |= ACL_EXECUTE; } if (mask & NFSV4ACE_SEARCH) { mask &= ~NFSV4ACE_SEARCH; perm |= ACL_EXECUTE; } if (mask & NFSV4ACE_DELETECHILD) { mask &= ~NFSV4ACE_DELETECHILD; perm |= ACL_DELETE_CHILD; } if (mask & NFSV4ACE_READATTRIBUTES) { mask &= ~NFSV4ACE_READATTRIBUTES; perm |= ACL_READ_ATTRIBUTES; } if (mask & NFSV4ACE_WRITEATTRIBUTES) { mask &= ~NFSV4ACE_WRITEATTRIBUTES; perm |= ACL_WRITE_ATTRIBUTES; } if (mask & NFSV4ACE_DELETE) { mask &= ~NFSV4ACE_DELETE; perm |= ACL_DELETE; } if (mask & NFSV4ACE_READACL) { mask &= ~NFSV4ACE_READACL; perm |= ACL_READ_ACL; } if (mask & NFSV4ACE_WRITEACL) { mask &= ~NFSV4ACE_WRITEACL; perm |= ACL_WRITE_ACL; } if (mask & NFSV4ACE_WRITEOWNER) { mask &= ~NFSV4ACE_WRITEOWNER; perm |= ACL_WRITE_OWNER; } if (mask & NFSV4ACE_SYNCHRONIZE) { mask &= ~NFSV4ACE_SYNCHRONIZE; perm |= ACL_SYNCHRONIZE; } if (mask != 0) { error = NFSERR_ATTRNOTSUPP; goto out; } *permp = perm; out: NFSEXITCODE(error); return (error); } /* local functions */ static int nfsrv_buildace(struct nfsrv_descript *, u_char *, int, enum vtype, int, int, struct acl_entry *); /* * This function builds an NFS ace. */ static int nfsrv_buildace(struct nfsrv_descript *nd, u_char *name, int namelen, enum vtype type, int group, int owner, struct acl_entry *ace) { u_int32_t *tl, aceflag = 0x0, acemask = 0x0, acetype; int full_len; full_len = NFSM_RNDUP(namelen); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED + full_len); /* * Fill in the ace type. */ if (ace->ae_entry_type & ACL_ENTRY_TYPE_ALLOW) acetype = NFSV4ACE_ALLOWEDTYPE; else if (ace->ae_entry_type & ACL_ENTRY_TYPE_DENY) acetype = NFSV4ACE_DENIEDTYPE; else if (ace->ae_entry_type & ACL_ENTRY_TYPE_AUDIT) acetype = NFSV4ACE_AUDITTYPE; else acetype = NFSV4ACE_ALARMTYPE; *tl++ = txdr_unsigned(acetype); /* * Set the flag bits from the ACL. */ if (ace->ae_flags & ACL_ENTRY_FILE_INHERIT) aceflag |= NFSV4ACE_FILEINHERIT; if (ace->ae_flags & ACL_ENTRY_DIRECTORY_INHERIT) aceflag |= NFSV4ACE_DIRECTORYINHERIT; if (ace->ae_flags & ACL_ENTRY_NO_PROPAGATE_INHERIT) aceflag |= NFSV4ACE_NOPROPAGATEINHERIT; if (ace->ae_flags & ACL_ENTRY_INHERIT_ONLY) aceflag |= NFSV4ACE_INHERITONLY; if (ace->ae_flags & ACL_ENTRY_SUCCESSFUL_ACCESS) aceflag |= NFSV4ACE_SUCCESSFULACCESS; if (ace->ae_flags & ACL_ENTRY_FAILED_ACCESS) aceflag |= NFSV4ACE_FAILEDACCESS; if (group) aceflag |= NFSV4ACE_IDENTIFIERGROUP; *tl++ = txdr_unsigned(aceflag); if (type == VDIR) { if (ace->ae_perm & ACL_LIST_DIRECTORY) acemask |= NFSV4ACE_LISTDIRECTORY; if (ace->ae_perm & ACL_ADD_FILE) acemask |= NFSV4ACE_ADDFILE; if (ace->ae_perm & ACL_ADD_SUBDIRECTORY) acemask |= NFSV4ACE_ADDSUBDIRECTORY; if (ace->ae_perm & ACL_READ_NAMED_ATTRS) acemask |= NFSV4ACE_READNAMEDATTR; if (ace->ae_perm & ACL_WRITE_NAMED_ATTRS) acemask |= NFSV4ACE_WRITENAMEDATTR; if (ace->ae_perm & ACL_EXECUTE) acemask |= NFSV4ACE_SEARCH; if (ace->ae_perm & ACL_DELETE_CHILD) acemask |= NFSV4ACE_DELETECHILD; if (ace->ae_perm & ACL_READ_ATTRIBUTES) acemask |= NFSV4ACE_READATTRIBUTES; if (ace->ae_perm & ACL_WRITE_ATTRIBUTES) acemask |= NFSV4ACE_WRITEATTRIBUTES; if (ace->ae_perm & ACL_DELETE) acemask |= NFSV4ACE_DELETE; if (ace->ae_perm & ACL_READ_ACL) acemask |= NFSV4ACE_READACL; if (ace->ae_perm & ACL_WRITE_ACL) acemask |= NFSV4ACE_WRITEACL; if (ace->ae_perm & ACL_WRITE_OWNER) acemask |= NFSV4ACE_WRITEOWNER; if (ace->ae_perm & ACL_SYNCHRONIZE) acemask |= NFSV4ACE_SYNCHRONIZE; } else { if (ace->ae_perm & ACL_READ_DATA) acemask |= NFSV4ACE_READDATA; if (ace->ae_perm & ACL_WRITE_DATA) acemask |= NFSV4ACE_WRITEDATA; if (ace->ae_perm & ACL_APPEND_DATA) acemask |= NFSV4ACE_APPENDDATA; if (ace->ae_perm & ACL_READ_NAMED_ATTRS) acemask |= NFSV4ACE_READNAMEDATTR; if (ace->ae_perm & ACL_WRITE_NAMED_ATTRS) acemask |= NFSV4ACE_WRITENAMEDATTR; if (ace->ae_perm & ACL_EXECUTE) acemask |= NFSV4ACE_EXECUTE; if (ace->ae_perm & ACL_READ_ATTRIBUTES) acemask |= NFSV4ACE_READATTRIBUTES; if (ace->ae_perm & ACL_WRITE_ATTRIBUTES) acemask |= NFSV4ACE_WRITEATTRIBUTES; if (ace->ae_perm & ACL_DELETE) acemask |= NFSV4ACE_DELETE; if (ace->ae_perm & ACL_READ_ACL) acemask |= NFSV4ACE_READACL; if (ace->ae_perm & ACL_WRITE_ACL) acemask |= NFSV4ACE_WRITEACL; if (ace->ae_perm & ACL_WRITE_OWNER) acemask |= NFSV4ACE_WRITEOWNER; if (ace->ae_perm & ACL_SYNCHRONIZE) acemask |= NFSV4ACE_SYNCHRONIZE; } *tl++ = txdr_unsigned(acemask); *tl++ = txdr_unsigned(namelen); if (full_len - namelen) *(tl + (namelen / NFSX_UNSIGNED)) = 0x0; NFSBCOPY(name, (caddr_t)tl, namelen); return (full_len + 4 * NFSX_UNSIGNED); } /* * Build an NFSv4 ACL. */ APPLESTATIC int nfsrv_buildacl(struct nfsrv_descript *nd, NFSACL_T *aclp, enum vtype type, NFSPROC_T *p) { int i, entrycnt = 0, retlen; u_int32_t *entrycntp; int isowner, isgroup, namelen, malloced; u_char *name, namestr[NFSV4_SMALLSTR]; NFSM_BUILD(entrycntp, u_int32_t *, NFSX_UNSIGNED); retlen = NFSX_UNSIGNED; /* * Loop through the acl entries, building each one. */ for (i = 0; i < aclp->acl_cnt; i++) { isowner = isgroup = malloced = 0; switch (aclp->acl_entry[i].ae_tag) { case ACL_USER_OBJ: isowner = 1; name = "OWNER@"; namelen = 6; break; case ACL_GROUP_OBJ: isgroup = 1; name = "GROUP@"; namelen = 6; break; case ACL_EVERYONE: name = "EVERYONE@"; namelen = 9; break; case ACL_USER: name = namestr; nfsv4_uidtostr(aclp->acl_entry[i].ae_id, &name, &namelen, p); if (name != namestr) malloced = 1; break; case ACL_GROUP: isgroup = 1; name = namestr; nfsv4_gidtostr((gid_t)aclp->acl_entry[i].ae_id, &name, &namelen, p); if (name != namestr) malloced = 1; break; default: continue; } retlen += nfsrv_buildace(nd, name, namelen, type, isgroup, isowner, &aclp->acl_entry[i]); entrycnt++; if (malloced) free(name, M_NFSSTRING); } *entrycntp = txdr_unsigned(entrycnt); return (retlen); } /* - * Set an NFSv4 acl. - */ -APPLESTATIC int -nfsrv_setacl(vnode_t vp, NFSACL_T *aclp, struct ucred *cred, - NFSPROC_T *p) -{ - int error; - - if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { - error = NFSERR_ATTRNOTSUPP; - goto out; - } - /* - * With NFSv4 ACLs, chmod(2) may need to add additional entries. - * Make sure it has enough room for that - splitting every entry - * into two and appending "canonical six" entries at the end. - * Cribbed out of kern/vfs_acl.c - Rick M. - */ - if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { - error = NFSERR_ATTRNOTSUPP; - goto out; - } - error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); - -out: - NFSEXITCODE(error); - return (error); -} - -/* * Compare two NFSv4 acls. * Return 0 if they are the same, 1 if not the same. */ APPLESTATIC int nfsrv_compareacl(NFSACL_T *aclp1, NFSACL_T *aclp2) { int i; struct acl_entry *acep1, *acep2; if (aclp1->acl_cnt != aclp2->acl_cnt) return (1); acep1 = aclp1->acl_entry; acep2 = aclp2->acl_entry; for (i = 0; i < aclp1->acl_cnt; i++) { if (acep1->ae_tag != acep2->ae_tag) return (1); switch (acep1->ae_tag) { case ACL_GROUP: case ACL_USER: if (acep1->ae_id != acep2->ae_id) return (1); /* fall through */ case ACL_USER_OBJ: case ACL_GROUP_OBJ: case ACL_OTHER: if (acep1->ae_perm != acep2->ae_perm) return (1); } acep1++; acep2++; } return (0); } Index: head/sys/fs/nfs/nfs_commonport.c =================================================================== --- head/sys/fs/nfs/nfs_commonport.c (revision 335011) +++ head/sys/fs/nfs/nfs_commonport.c (revision 335012) @@ -1,819 +1,825 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Functions that need to be different for different versions of BSD * kernel should be kept here, along with any global storage specific * to this BSD variant. */ #include #include #include #include #include #include #include #include #include #include #include #include #include extern int nfscl_ticks; extern int nfsrv_nfsuserd; extern struct nfssockreq nfsrv_nfsuserdsock; extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, struct thread *); extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; struct nfsstatsv1 nfsstatsv1; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; struct callout newnfsd_callout; int nfsrv_lughashsize = 100; +struct mtx nfsrv_dslock_mtx; +struct nfsdevicehead nfsrv_devidhead; +volatile int nfsrv_devidcnt = 0; void (*nfsd_call_servertimer)(void) = NULL; void (*ncl_call_invalcaches)(struct vnode *) = NULL; int nfs_pnfsio(task_fn_t *, void *); static int nfs_realign_test; static int nfs_realign_count; static struct ext_nfsstats oldnfsstats; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "Number of realign tests done"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "Number of mbuf realignments done"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW, nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr), "NFSv4 callback addr for server to use"); SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel, 0, "Debug level for NFS client"); SYSCTL_INT(_vfs_nfs, OID_AUTO, userhashsize, CTLFLAG_RDTUN, &nfsrv_lughashsize, 0, "Size of hash tables for uid/name mapping"); int nfs_pnfsiothreads = -1; SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsiothreads, CTLFLAG_RW, &nfs_pnfsiothreads, 0, "Number of pNFS mirror I/O threads"); /* * Defines for malloc * (Here for FreeBSD, since they allocate storage.) */ MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache"); MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id"); MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state", "NFSD V4 State (Openowner, Open, Lockowner, Delegation"); MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock"); MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file"); MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string"); MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map"); MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle"); MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner"); MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open"); MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation"); MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client"); MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner"); MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock"); MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "NFS vnode"); MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "NFS Direct IO buffer"); MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff", "NFS directory offset data"); MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", "NFS local lock rollback"); MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout"); MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout"); MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info"); MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req"); MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session"); MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall"); MALLOC_DEFINE(M_NEWNFSDSESSION, "NFSD session", "NFSD Session for a client"); /* * Definition of mutex locks. * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list * and assorted other nfsd structures. */ struct mtx newnfsd_mtx; struct mtx nfs_sockl_mutex; struct mtx nfs_state_mutex; struct mtx nfs_nameid_mutex; struct mtx nfs_req_mutex; struct mtx nfs_slock_mutex; struct mtx nfs_clstate_mutex; /* local functions */ static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *); #ifdef __NO_STRICT_ALIGNMENT /* * These architectures don't need re-alignment, so just return. */ int newnfs_realign(struct mbuf **pm, int how) { return (0); } #else /* !__NO_STRICT_ALIGNMENT */ /* * newnfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occasionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. * */ int newnfs_realign(struct mbuf **pm, int how) { struct mbuf *m, *n; int off, space; ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { /* * NB: we can't depend on m_pkthdr.len to help us * decide what to do here. May not be worth doing * the m_length calculation as m_copyback will * expand the mbuf chain below as needed. */ space = m_length(m, NULL); if (space >= MINCLSIZE) { /* NB: m_copyback handles space > MCLBYTES */ n = m_getcl(how, MT_DATA, 0); } else n = m_get(how, MT_DATA); if (n == NULL) return (ENOMEM); /* * Align the remainder of the mbuf chain. */ n->m_len = 0; off = 0; while (m != NULL) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; ++nfs_realign_count; break; } pm = &m->m_next; } return (0); } #endif /* __NO_STRICT_ALIGNMENT */ #ifdef notdef static void nfsrv_object_create(struct vnode *vp, struct thread *td) { if (vp == NULL || vp->v_type != VREG) return; (void) vfs_object_create(vp, td, td->td_ucred); } #endif /* * Look up a file name. Basically just initialize stuff and call namei(). */ int nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p) { int error; NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname, p); error = namei(ndp); if (!error) { NDFREE(ndp, NDF_ONLY_PNBUF); } return (error); } /* * Copy NFS uid, gids to the cred structure. */ void newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) { KASSERT(nfscr->nfsc_ngroups >= 0, ("newnfs_copycred: negative nfsc_ngroups")); cr->cr_uid = nfscr->nfsc_uid; crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); } /* * Map args from nfsmsleep() to msleep(). */ int nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg, struct timespec *ts) { u_int64_t nsecval; int error, timeo; if (ts) { timeo = hz * ts->tv_sec; nsecval = (u_int64_t)ts->tv_nsec; nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) / 1000000000; timeo += (int)nsecval; } else { timeo = 0; } error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo); return (error); } /* * Get the file system info for the server. For now, just assume FFS. */ void nfsvno_getfs(struct nfsfsinfo *sip, int isdgram) { int pref; /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (isdgram) pref = NFS_MAXDGRAMDATA; else pref = NFS_SRVMAXIO; sip->fs_rtmax = NFS_SRVMAXIO; sip->fs_rtpref = pref; sip->fs_rtmult = NFS_FABLKSIZE; sip->fs_wtmax = NFS_SRVMAXIO; sip->fs_wtpref = pref; sip->fs_wtmult = NFS_FABLKSIZE; sip->fs_dtpref = pref; sip->fs_maxfilesize = 0xffffffffffffffffull; sip->fs_timedelta.tv_sec = 0; sip->fs_timedelta.tv_nsec = 1; sip->fs_properties = (NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); } /* * Do the pathconf vnode op. */ int nfsvno_pathconf(struct vnode *vp, int flag, long *retf, struct ucred *cred, struct thread *p) { int error; error = VOP_PATHCONF(vp, flag, retf); if (error == EOPNOTSUPP || error == EINVAL) { /* * Some file systems return EINVAL for name arguments not * supported and some return EOPNOTSUPP for this case. * So the NFSv3 Pathconf RPC doesn't fail for these cases, * just fake them. */ switch (flag) { case _PC_LINK_MAX: *retf = NFS_LINK_MAX; break; case _PC_NAME_MAX: *retf = NAME_MAX; break; case _PC_CHOWN_RESTRICTED: *retf = 1; break; case _PC_NO_TRUNC: *retf = 1; break; default: /* * Only happens if a _PC_xxx is added to the server, * but this isn't updated. */ *retf = 0; printf("nfsrvd pathconf flag=%d not supp\n", flag); } error = 0; } NFSEXITCODE(error); return (error); } /* Fake nfsrv_atroot. Just return 0 */ int nfsrv_atroot(struct vnode *vp, uint64_t *retp) { return (0); } /* * Set the credentials to refer to root. * If only the various BSDen could agree on whether cr_gid is a separate * field or cr_groups[0]... */ void newnfs_setroot(struct ucred *cred) { cred->cr_uid = 0; cred->cr_groups[0] = 0; cred->cr_ngroups = 1; } /* * Get the client credential. Used for Renew and recovery. */ struct ucred * newnfs_getcred(void) { struct ucred *cred; struct thread *td = curthread; cred = crdup(td->td_ucred); newnfs_setroot(cred); return (cred); } /* * Nfs timer routine * Call the nfsd's timer function once/sec. */ void newnfs_timer(void *arg) { static time_t lasttime = 0; /* * Call the server timer, if set up. * The argument indicates if it is the next second and therefore * leases should be checked. */ if (lasttime != NFSD_MONOSEC) { lasttime = NFSD_MONOSEC; if (nfsd_call_servertimer != NULL) (*nfsd_call_servertimer)(); } callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL); } /* * Sleep for a short period of time unless errval == NFSERR_GRACE, where * the sleep should be for 5 seconds. * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on * an event that never gets a wakeup. Only return EINTR or 0. */ int nfs_catnap(int prio, int errval, const char *wmesg) { static int non_event; int ret; if (errval == NFSERR_GRACE) ret = tsleep(&non_event, prio, wmesg, 5 * hz); else ret = tsleep(&non_event, prio, wmesg, 1); if (ret != EINTR) ret = 0; return (ret); } /* * Get referral. For now, just fail. */ struct nfsreferral * nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno) { return (NULL); } static int nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) { int error; error = nfssvc_call(td, uap, td->td_ucred); NFSEXITCODE(error); return (error); } static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { int error = EINVAL, i, j; struct nfsd_idargs nid; struct nfsd_oidargs onid; struct { int vers; /* Just the first field of nfsstats. */ } nfsstatver; if (uap->flag & NFSSVC_IDNAME) { if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) error = copyin(uap->argp, &nid, sizeof(nid)); else { error = copyin(uap->argp, &onid, sizeof(onid)); if (error == 0) { nid.nid_flag = onid.nid_flag; nid.nid_uid = onid.nid_uid; nid.nid_gid = onid.nid_gid; nid.nid_usermax = onid.nid_usermax; nid.nid_usertimeout = onid.nid_usertimeout; nid.nid_name = onid.nid_name; nid.nid_namelen = onid.nid_namelen; nid.nid_ngroup = 0; nid.nid_grps = NULL; } } if (error) goto out; error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { /* Copy fields to the old ext_nfsstat structure. */ oldnfsstats.attrcache_hits = nfsstatsv1.attrcache_hits; oldnfsstats.attrcache_misses = nfsstatsv1.attrcache_misses; oldnfsstats.lookupcache_hits = nfsstatsv1.lookupcache_hits; oldnfsstats.lookupcache_misses = nfsstatsv1.lookupcache_misses; oldnfsstats.direofcache_hits = nfsstatsv1.direofcache_hits; oldnfsstats.direofcache_misses = nfsstatsv1.direofcache_misses; oldnfsstats.accesscache_hits = nfsstatsv1.accesscache_hits; oldnfsstats.accesscache_misses = nfsstatsv1.accesscache_misses; oldnfsstats.biocache_reads = nfsstatsv1.biocache_reads; oldnfsstats.read_bios = nfsstatsv1.read_bios; oldnfsstats.read_physios = nfsstatsv1.read_physios; oldnfsstats.biocache_writes = nfsstatsv1.biocache_writes; oldnfsstats.write_bios = nfsstatsv1.write_bios; oldnfsstats.write_physios = nfsstatsv1.write_physios; oldnfsstats.biocache_readlinks = nfsstatsv1.biocache_readlinks; oldnfsstats.readlink_bios = nfsstatsv1.readlink_bios; oldnfsstats.biocache_readdirs = nfsstatsv1.biocache_readdirs; oldnfsstats.readdir_bios = nfsstatsv1.readdir_bios; for (i = 0; i < NFSV4_NPROCS; i++) oldnfsstats.rpccnt[i] = nfsstatsv1.rpccnt[i]; oldnfsstats.rpcretries = nfsstatsv1.rpcretries; for (i = 0; i < NFSV4OP_NOPS; i++) oldnfsstats.srvrpccnt[i] = nfsstatsv1.srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) oldnfsstats.srvrpccnt[j] = nfsstatsv1.srvrpccnt[i]; oldnfsstats.srvrpc_errs = nfsstatsv1.srvrpc_errs; oldnfsstats.srv_errs = nfsstatsv1.srv_errs; oldnfsstats.rpcrequests = nfsstatsv1.rpcrequests; oldnfsstats.rpctimeouts = nfsstatsv1.rpctimeouts; oldnfsstats.rpcunexpected = nfsstatsv1.rpcunexpected; oldnfsstats.rpcinvalid = nfsstatsv1.rpcinvalid; oldnfsstats.srvcache_inproghits = nfsstatsv1.srvcache_inproghits; oldnfsstats.srvcache_idemdonehits = nfsstatsv1.srvcache_idemdonehits; oldnfsstats.srvcache_nonidemdonehits = nfsstatsv1.srvcache_nonidemdonehits; oldnfsstats.srvcache_misses = nfsstatsv1.srvcache_misses; oldnfsstats.srvcache_tcppeak = nfsstatsv1.srvcache_tcppeak; oldnfsstats.srvcache_size = nfsstatsv1.srvcache_size; oldnfsstats.srvclients = nfsstatsv1.srvclients; oldnfsstats.srvopenowners = nfsstatsv1.srvopenowners; oldnfsstats.srvopens = nfsstatsv1.srvopens; oldnfsstats.srvlockowners = nfsstatsv1.srvlockowners; oldnfsstats.srvlocks = nfsstatsv1.srvlocks; oldnfsstats.srvdelegates = nfsstatsv1.srvdelegates; for (i = 0; i < NFSV4OP_CBNOPS; i++) oldnfsstats.cbrpccnt[i] = nfsstatsv1.cbrpccnt[i]; oldnfsstats.clopenowners = nfsstatsv1.clopenowners; oldnfsstats.clopens = nfsstatsv1.clopens; oldnfsstats.cllockowners = nfsstatsv1.cllockowners; oldnfsstats.cllocks = nfsstatsv1.cllocks; oldnfsstats.cldelegates = nfsstatsv1.cldelegates; oldnfsstats.cllocalopenowners = nfsstatsv1.cllocalopenowners; oldnfsstats.cllocalopens = nfsstatsv1.cllocalopens; oldnfsstats.cllocallockowners = nfsstatsv1.cllocallockowners; oldnfsstats.cllocallocks = nfsstatsv1.cllocallocks; error = copyout(&oldnfsstats, uap->argp, sizeof (oldnfsstats)); } else { error = copyin(uap->argp, &nfsstatver, sizeof(nfsstatver)); if (error == 0 && nfsstatver.vers != NFSSTATS_V1) error = EPERM; if (error == 0) error = copyout(&nfsstatsv1, uap->argp, sizeof (nfsstatsv1)); } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { nfsstatsv1.attrcache_hits = 0; nfsstatsv1.attrcache_misses = 0; nfsstatsv1.lookupcache_hits = 0; nfsstatsv1.lookupcache_misses = 0; nfsstatsv1.direofcache_hits = 0; nfsstatsv1.direofcache_misses = 0; nfsstatsv1.accesscache_hits = 0; nfsstatsv1.accesscache_misses = 0; nfsstatsv1.biocache_reads = 0; nfsstatsv1.read_bios = 0; nfsstatsv1.read_physios = 0; nfsstatsv1.biocache_writes = 0; nfsstatsv1.write_bios = 0; nfsstatsv1.write_physios = 0; nfsstatsv1.biocache_readlinks = 0; nfsstatsv1.readlink_bios = 0; nfsstatsv1.biocache_readdirs = 0; nfsstatsv1.readdir_bios = 0; nfsstatsv1.rpcretries = 0; nfsstatsv1.rpcrequests = 0; nfsstatsv1.rpctimeouts = 0; nfsstatsv1.rpcunexpected = 0; nfsstatsv1.rpcinvalid = 0; bzero(nfsstatsv1.rpccnt, sizeof(nfsstatsv1.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { nfsstatsv1.srvrpc_errs = 0; nfsstatsv1.srv_errs = 0; nfsstatsv1.srvcache_inproghits = 0; nfsstatsv1.srvcache_idemdonehits = 0; nfsstatsv1.srvcache_nonidemdonehits = 0; nfsstatsv1.srvcache_misses = 0; nfsstatsv1.srvcache_tcppeak = 0; bzero(nfsstatsv1.srvrpccnt, sizeof(nfsstatsv1.srvrpccnt)); bzero(nfsstatsv1.cbrpccnt, sizeof(nfsstatsv1.cbrpccnt)); } } goto out; } else if (uap->flag & NFSSVC_NFSUSERDPORT) { u_short sockport; struct sockaddr *sad; struct sockaddr_un *sun; if ((uap->flag & NFSSVC_NEWSTRUCT) != 0) { /* New nfsuserd using an AF_LOCAL socket. */ sun = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK | M_ZERO); error = copyinstr(uap->argp, sun->sun_path, sizeof(sun->sun_path), NULL); if (error != 0) { free(sun, M_SONAME); return (error); } sun->sun_family = AF_LOCAL; sun->sun_len = SUN_LEN(sun); sockport = 0; sad = (struct sockaddr *)sun; } else { error = copyin(uap->argp, (caddr_t)&sockport, sizeof (u_short)); sad = NULL; } if (error == 0) error = nfsrv_nfsuserdport(sad, sockport, p); } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { nfsrv_nfsuserddelport(); error = 0; } out: NFSEXITCODE(error); return (error); } /* * called by all three modevent routines, so that it gets things * initialized soon enough. */ void newnfs_portinit(void) { static int inited = 0; if (inited) return; inited = 1; /* Initialize SMP locks used by both client and server. */ mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF); mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF); mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF); } /* * Determine if the file system supports NFSv4 ACLs. * Return 1 if it does, 0 otherwise. */ int nfs_supportsnfsv4acls(struct vnode *vp) { int error; long retval; ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls"); if (nfsrv_useacl == 0) return (0); error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval); if (error == 0 && retval != 0) return (1); return (0); } /* * These are the first fields of all the context structures passed into * nfs_pnfsio(). */ struct pnfsio { int done; int inprog; struct task tsk; }; /* * Do a mirror I/O on a pNFS thread. */ int nfs_pnfsio(task_fn_t *func, void *context) { struct pnfsio *pio; int ret; static struct taskqueue *pnfsioq = NULL; pio = (struct pnfsio *)context; if (pnfsioq == NULL) { if (nfs_pnfsiothreads == 0) return (EPERM); if (nfs_pnfsiothreads < 0) nfs_pnfsiothreads = mp_ncpus * 4; pnfsioq = taskqueue_create("pnfsioq", M_WAITOK, taskqueue_thread_enqueue, &pnfsioq); if (pnfsioq == NULL) return (ENOMEM); ret = taskqueue_start_threads(&pnfsioq, nfs_pnfsiothreads, 0, "pnfsiot"); if (ret != 0) { taskqueue_free(pnfsioq); pnfsioq = NULL; return (ret); } } pio->inprog = 1; TASK_INIT(&pio->tsk, 0, func, context); ret = taskqueue_enqueue(pnfsioq, &pio->tsk); if (ret != 0) pio->inprog = 0; return (ret); } extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscommon_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF); mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF); mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF); mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF); mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL, MTX_DEF); + mtx_init(&nfsrv_dslock_mtx, "nfs4ds", NULL, MTX_DEF); + TAILQ_INIT(&nfsrv_devidhead); callout_init(&newnfsd_callout, 1); newnfs_init(); nfsd_call_nfscommon = nfssvc_nfscommon; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != 0 || nfs_numnfscbd != 0) { error = EBUSY; break; } nfsd_call_nfscommon = NULL; callout_drain(&newnfsd_callout); /* Clean out the name<-->id cache. */ nfsrv_cleanusergroup(); /* and get rid of the mutexes */ mtx_destroy(&nfs_nameid_mutex); mtx_destroy(&newnfsd_mtx); mtx_destroy(&nfs_state_mutex); mtx_destroy(&nfs_clstate_mutex); mtx_destroy(&nfs_sockl_mutex); mtx_destroy(&nfs_slock_mutex); mtx_destroy(&nfs_req_mutex); mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx); + mtx_destroy(&nfsrv_dslock_mtx); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return error; } static moduledata_t nfscommon_mod = { "nfscommon", nfscommon_modevent, NULL, }; DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscommon, 1); MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscommon, krpc, 1, 1, 1); Index: head/sys/fs/nfs/nfs_commonsubs.c =================================================================== --- head/sys/fs/nfs/nfs_commonsubs.c (revision 335011) +++ head/sys/fs/nfs/nfs_commonsubs.c (revision 335012) @@ -1,4242 +1,4369 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; /* And other global data */ nfstype nfsv34_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; enum vtype newnv2tov_type[8] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv34tov_type[8]={ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; struct timeval nfsboottime; /* Copy boottime once, so it never changes */ int nfscl_ticks; int nfsrv_useacl = 1; struct nfssockreq nfsrv_nfsuserdsock; int nfsrv_nfsuserd = 0; struct nfsreqhead nfsd_reqq; uid_t nfsrv_defaultuid = UID_NOBODY; gid_t nfsrv_defaultgid = GID_NOGROUP; int nfsrv_lease = NFSRV_LEASE; int ncl_mbuf_mlen = MLEN; int nfsd_enable_stringtouid = 0; +int nfsrv_doflexfile = 0; static int nfs_enable_uidtostring = 0; NFSNAMEIDMUTEX; NFSSOCKMUTEX; extern int nfsrv_lughashsize; +extern struct mtx nfsrv_dslock_mtx; +extern volatile int nfsrv_devidcnt; +extern int nfscl_debuglevel; +extern struct nfsdevicehead nfsrv_devidhead; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW, &nfs_enable_uidtostring, 0, "Make nfs always send numeric owner_names"); +int nfsrv_maxpnfsmirror = 1; +SYSCTL_INT(_vfs_nfs, OID_AUTO, pnfsmirror, CTLFLAG_RD, + &nfsrv_maxpnfsmirror, 0, "Mirror level for pNFS service"); + /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used * 0 - doesn't change cfh or use a sfh * 1 - replaces cfh with a new one (unless it returns an error status) * 2 - uses cfh and sfh * needscfh - if the op wants a cfh and premtime * 0 - doesn't use a cfh * 1 - uses a cfh, but doesn't want pre-op attributes * 2 - uses a cfh and wants pre-op attributes * savereply - indicates a non-idempotent Op * 0 - not non-idempotent * 1 - non-idempotent * Ops that are ordered via seqid# are handled separately from these * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Access */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Close */ { 0, 2, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Commit */ { 1, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Create */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegpurge */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegreturn */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getattr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* GetFH */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Link */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Lock */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockT */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockU */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookup */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookupp */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* NVerify */ { 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Open */ { 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenAttr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenDowngrade */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutPubFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutRootFH */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Read */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Readdir */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* ReadLink */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Remove */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Rename */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Renew */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* RestoreFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SaveFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SecInfo */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Setattr */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientIDConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Verify */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Write */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* ReleaseLockOwner */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Backchannel Ctrl */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Bind Conn to Sess */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Exchange ID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Create Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Free StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Dir Deleg */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device Info */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device List */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Get */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Secinfo No name */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Sequence */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Set SSV */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Test StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Want Delegation */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy ClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Reclaim Complete */ }; #endif /* !APPLEKEXT */ static int ncl_mbuf_mhlen = MHLEN; static int nfsrv_usercnt = 0; static int nfsrv_dnsnamelen; static u_char *nfsrv_dnsname = NULL; static int nfsrv_usermax = 999999999; struct nfsrv_lughash { struct mtx mtx; struct nfsuserhashhead lughead; }; static struct nfsrv_lughash *nfsuserhash; static struct nfsrv_lughash *nfsusernamehash; static struct nfsrv_lughash *nfsgrouphash; static struct nfsrv_lughash *nfsgroupnamehash; /* * This static array indicates whether or not the RPC generates a large * reply. This is used by nfs_reply() to decide whether or not an mbuf * cluster should be allocated. (If a cluster is required by an RPC * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); static void nfsv4_wanted(struct nfsv4lock *lp); static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len); static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p); static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser); static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, int *, int *); static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); #ifndef APPLE /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbufuio(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *mbufcp, *uiocp; int xfer, left, len; mbuf_t mp; long uiosiz, rem; int error = 0; mp = nd->nd_md; mbufcp = nd->nd_dpos; len = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - mbufcp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) { error = EBADRPC; goto out; } left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mbuf_next(mp); if (mp == NULL) { error = EBADRPC; goto out; } mbufcp = NFSMTOD(mp, caddr_t); len = mbuf_len(mp); KASSERT(len >= 0, ("len %d, corrupted mbuf?", len)); } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(mbufcp, uiocp, xfer); else copyout(mbufcp, CAST_USER_ADDR_T(uiocp), xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (void *) ((char *)uiop->uio_iov->iov_base + uiosiz); uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } nd->nd_dpos = mbufcp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } #endif /* !APPLE */ /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macro NFSM_DISSECT for tough * cases. */ APPLESTATIC void * nfsm_dissct(struct nfsrv_descript *nd, int siz, int how) { mbuf_t mp2; int siz2, xfer; caddr_t p; int left; caddr_t retp; retp = NULL; left = NFSMTOD(nd->nd_md, caddr_t) + mbuf_len(nd->nd_md) - nd->nd_dpos; while (left == 0) { nd->nd_md = mbuf_next(nd->nd_md); if (nd->nd_md == NULL) return (retp); left = mbuf_len(nd->nd_md); nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); } if (left >= siz) { retp = nd->nd_dpos; nd->nd_dpos += siz; } else if (mbuf_next(nd->nd_md) == NULL) { return (retp); } else if (siz > ncl_mbuf_mhlen) { panic("nfs S too big"); } else { MGET(mp2, MT_DATA, how); if (mp2 == NULL) return (NULL); mbuf_setnext(mp2, mbuf_next(nd->nd_md)); mbuf_setnext(nd->nd_md, mp2); mbuf_setlen(nd->nd_md, mbuf_len(nd->nd_md) - left); nd->nd_md = mp2; retp = p = NFSMTOD(mp2, caddr_t); NFSBCOPY(nd->nd_dpos, p, left); /* Copy what was left */ siz2 = siz - left; p += left; mp2 = mbuf_next(mp2); /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (NULL); xfer = (siz2 > mbuf_len(mp2)) ? mbuf_len(mp2) : siz2; if (xfer > 0) { NFSBCOPY(NFSMTOD(mp2, caddr_t), p, xfer); NFSM_DATAP(mp2, xfer); mbuf_setlen(mp2, mbuf_len(mp2) - xfer); p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mbuf_next(mp2); } mbuf_setlen(nd->nd_md, siz); nd->nd_md = mp2; nd->nd_dpos = NFSMTOD(mp2, caddr_t); } return (retp); } /* * Advance the position in the mbuf chain. * If offs == 0, this is a no-op, but it is simpler to just return from * here than check for offs > 0 for all calls to nfsm_advance. * If left == -1, it should be calculated here. */ APPLESTATIC int nfsm_advance(struct nfsrv_descript *nd, int offs, int left) { int error = 0; if (offs == 0) goto out; /* * A negative offs should be considered a serious problem. */ if (offs < 0) panic("nfsrv_advance"); /* * If left == -1, calculate it here. */ if (left == -1) left = NFSMTOD(nd->nd_md, caddr_t) + mbuf_len(nd->nd_md) - nd->nd_dpos; /* * Loop around, advancing over the mbuf data. */ while (offs > left) { offs -= left; nd->nd_md = mbuf_next(nd->nd_md); if (nd->nd_md == NULL) { error = EBADRPC; goto out; } left = mbuf_len(nd->nd_md); nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); } nd->nd_dpos += offs; out: NFSEXITCODE(error); return (error); } /* * Copy a string into mbuf(s). * Return the number of bytes output, including XDR overheads. */ APPLESTATIC int nfsm_strtom(struct nfsrv_descript *nd, const char *cp, int siz) { mbuf_t m2; int xfer, left; mbuf_t m1; int rem, bytesize; u_int32_t *tl; char *cp2; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(siz); rem = NFSM_RNDUP(siz) - siz; bytesize = NFSX_UNSIGNED + siz + rem; m2 = nd->nd_mb; cp2 = nd->nd_bpos; left = M_TRAILINGSPACE(m2); /* * Loop around copying the string to mbuf(s). */ while (siz > 0) { if (left == 0) { if (siz > ncl_mbuf_mlen) NFSMCLGET(m1, M_WAITOK); else NFSMGET(m1); mbuf_setlen(m1, 0); mbuf_setnext(m2, m1); m2 = m1; cp2 = NFSMTOD(m2, caddr_t); left = M_TRAILINGSPACE(m2); } if (left >= siz) xfer = siz; else xfer = left; NFSBCOPY(cp, cp2, xfer); cp += xfer; mbuf_setlen(m2, mbuf_len(m2) + xfer); siz -= xfer; left -= xfer; if (siz == 0 && rem) { if (left < rem) panic("nfsm_strtom"); NFSBZERO(cp2 + xfer, rem); mbuf_setlen(m2, mbuf_len(m2) + rem); } } nd->nd_mb = m2; nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2); return (bytesize); } /* * Called once to initialize data structures... */ APPLESTATIC void newnfs_init(void) { static int nfs_inited = 0; if (nfs_inited) return; nfs_inited = 1; newnfs_true = txdr_unsigned(TRUE); newnfs_false = txdr_unsigned(FALSE); newnfs_xdrneg1 = txdr_unsigned(-1); nfscl_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfscl_ticks < 1) nfscl_ticks = 1; NFSSETBOOTTIME(nfsboottime); /* * Initialize reply list and start timer */ TAILQ_INIT(&nfsd_reqq); NFS_TIMERINIT; } /* * Put a file handle in an mbuf list. * If the size argument == 0, just use the default size. * set_true == 1 if there should be an newnfs_true prepended on the file handle. * Return the number of bytes output, including XDR overhead. */ APPLESTATIC int nfsm_fhtom(struct nfsrv_descript *nd, u_int8_t *fhp, int size, int set_true) { u_int32_t *tl; u_int8_t *cp; - int fullsiz, bytesize = 0; + int fullsiz, rem, bytesize = 0; if (size == 0) size = NFSX_MYFH; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: if (size > NFSX_V2FH) panic("fh size > NFSX_V2FH for NFSv2"); NFSM_BUILD(cp, u_int8_t *, NFSX_V2FH); NFSBCOPY(fhp, cp, size); if (size < NFSX_V2FH) NFSBZERO(cp + size, NFSX_V2FH - size); bytesize = NFSX_V2FH; break; case ND_NFSV3: case ND_NFSV4: fullsiz = NFSM_RNDUP(size); + rem = fullsiz - size; if (set_true) { bytesize = 2 * NFSX_UNSIGNED + fullsiz; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; } else { bytesize = NFSX_UNSIGNED + fullsiz; } (void) nfsm_strtom(nd, fhp, size); break; } return (bytesize); } /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ APPLESTATIC int nfsaddr_match(int family, union nethostaddr *haddr, NFSSOCKADDR_T nam) { struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = NFSSOCKADDR(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inet.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inetaddr6; inetaddr6 = NFSSOCKADDR(nam, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inetaddr6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inetaddr6->sin6_addr, &haddr->had_inet6)) return (1); } break; #endif } return (0); } /* * Similar to the above, but takes to NFSSOCKADDR_T args. */ APPLESTATIC int nfsaddr2_match(NFSSOCKADDR_T nam1, NFSSOCKADDR_T nam2) { struct sockaddr_in *addr1, *addr2; struct sockaddr *inaddr; inaddr = NFSSOCKADDR(nam1, struct sockaddr *); switch (inaddr->sa_family) { case AF_INET: addr1 = NFSSOCKADDR(nam1, struct sockaddr_in *); addr2 = NFSSOCKADDR(nam2, struct sockaddr_in *); if (addr2->sin_family == AF_INET && addr1->sin_addr.s_addr == addr2->sin_addr.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inet6addr1, *inet6addr2; inet6addr1 = NFSSOCKADDR(nam1, struct sockaddr_in6 *); inet6addr2 = NFSSOCKADDR(nam2, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inet6addr2->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inet6addr1->sin6_addr, &inet6addr2->sin6_addr)) return (1); } break; #endif } return (0); } /* * Trim the stuff already dissected off the mbuf list. */ APPLESTATIC void newnfs_trimleading(nd) struct nfsrv_descript *nd; { mbuf_t m, n; int offs; /* * First, free up leading mbufs. */ if (nd->nd_mrep != nd->nd_md) { m = nd->nd_mrep; while (mbuf_next(m) != nd->nd_md) { if (mbuf_next(m) == NULL) panic("nfsm trim leading"); m = mbuf_next(m); } mbuf_setnext(m, NULL); mbuf_freem(nd->nd_mrep); } m = nd->nd_md; /* * Now, adjust this mbuf, based on nd_dpos. */ offs = nd->nd_dpos - NFSMTOD(m, caddr_t); if (offs == mbuf_len(m)) { n = m; m = mbuf_next(m); if (m == NULL) panic("nfsm trim leading2"); mbuf_setnext(n, NULL); mbuf_freem(n); } else if (offs > 0) { mbuf_setlen(m, mbuf_len(m) - offs); NFSM_DATAP(m, offs); } else if (offs < 0) panic("nfsm trimleading offs"); nd->nd_mrep = m; nd->nd_md = m; nd->nd_dpos = NFSMTOD(m, caddr_t); } /* * Trim trailing data off the mbuf list being built. */ APPLESTATIC void newnfs_trimtrailing(nd, mb, bpos) struct nfsrv_descript *nd; mbuf_t mb; caddr_t bpos; { if (mbuf_next(mb)) { mbuf_freem(mbuf_next(mb)); mbuf_setnext(mb, NULL); } mbuf_setlen(mb, bpos - NFSMTOD(mb, caddr_t)); nd->nd_mb = mb; nd->nd_bpos = bpos; } /* * Dissect a file handle on the client. */ APPLESTATIC int nfsm_getfh(struct nfsrv_descript *nd, struct nfsfh **nfhpp) { u_int32_t *tl; struct nfsfh *nfhp; int error, len; *nfhpp = NULL; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { error = EBADRPC; goto nfsmout; } } else len = NFSX_V2FH; nfhp = malloc(sizeof (struct nfsfh) + len, M_NFSFH, M_WAITOK); error = nfsrv_mtostr(nd, nfhp->nfh_fh, len); if (error) { free(nfhp, M_NFSFH); goto nfsmout; } nfhp->nfh_len = len; *nfhpp = nfhp; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Break down the nfsv4 acl. * If the aclp == NULL or won't fit in an acl, just discard the acl info. */ APPLESTATIC int nfsrv_dissectacl(struct nfsrv_descript *nd, NFSACL_T *aclp, int *aclerrp, int *aclsizep, __unused NFSPROC_T *p) { u_int32_t *tl; int i, aclsize; int acecnt, error = 0, aceerr = 0, acesize; *aclerrp = 0; if (aclp) aclp->acl_cnt = 0; /* * Parse out the ace entries and expect them to conform to * what can be supported by R/W/X bits. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); aclsize = NFSX_UNSIGNED; acecnt = fxdr_unsigned(int, *tl); if (acecnt > ACL_MAX_ENTRIES) aceerr = NFSERR_ATTRNOTSUPP; if (nfsrv_useacl == 0) aceerr = NFSERR_ATTRNOTSUPP; for (i = 0; i < acecnt; i++) { if (aclp && !aceerr) error = nfsrv_dissectace(nd, &aclp->acl_entry[i], &aceerr, &acesize, p); else error = nfsrv_skipace(nd, &acesize); if (error) goto nfsmout; aclsize += acesize; } if (aclp && !aceerr) aclp->acl_cnt = acecnt; if (aceerr) *aclerrp = aceerr; if (aclsizep) *aclsizep = aclsize; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Skip over an NFSv4 ace entry. Just dissect the xdr and discard it. */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep) { u_int32_t *tl; int error, len = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 3)); error = nfsm_advance(nd, NFSM_RNDUP(len), -1); nfsmout: *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); NFSEXITCODE2(error, nd); return (error); } /* * Get attribute bits from an mbuf list. * Returns EBADRPC for a parsing error, 0 otherwise. * If the clearinvalid flag is set, clear the bits not supported. */ APPLESTATIC int nfsrv_getattrbits(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp, int *cntp, int *retnotsupp) { u_int32_t *tl; int cnt, i, outcnt; int error = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (cnt > NFSATTRBIT_MAXWORDS) outcnt = NFSATTRBIT_MAXWORDS; else outcnt = cnt; NFSZERO_ATTRBIT(attrbitp); if (outcnt > 0) { NFSM_DISSECT(tl, u_int32_t *, outcnt * NFSX_UNSIGNED); for (i = 0; i < outcnt; i++) attrbitp->bits[i] = fxdr_unsigned(u_int32_t, *tl++); } for (i = 0; i < (cnt - outcnt); i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (retnotsupp != NULL && *tl != 0) *retnotsupp = NFSERR_ATTRNOTSUPP; } if (cntp) *cntp = NFSX_UNSIGNED + (cnt * NFSX_UNSIGNED); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Get the attributes for V4. * If the compare flag is true, test for any attribute changes, * otherwise return the attribute values. * These attributes cover fields in "struct vattr", "struct statfs", * "struct nfsfsinfo", the file handle and the lease duration. * The value of retcmpp is set to 1 if all attributes are the same, * and 0 otherwise. * Returns EBADRPC if it can't be parsed, 0 otherwise. */ APPLESTATIC int nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize, struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp, struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp, u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred) { u_int32_t *tl; int i = 0, j, k, l = 0, m, bitpos, attrsum = 0; int error, tfhsize, aceerr, attrsize, cnt, retnotsup; u_char *cp, *cp2, namestr[NFSV4_SMALLSTR + 1]; nfsattrbit_t attrbits, retattrbits, checkattrbits; struct nfsfh *tnfhp; struct nfsreferral *refp; u_quad_t tquad; nfsquad_t tnfsquad; struct timespec temptime; uid_t uid; gid_t gid; u_int32_t freenum = 0, tuint; u_int64_t uquad = 0, thyp, thyp2; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif CTASSERT(sizeof(ino_t) == sizeof(uint64_t)); if (compare) { retnotsup = 0; error = nfsrv_getattrbits(nd, &attrbits, NULL, &retnotsup); } else { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (error) goto nfsmout; if (compare) { *retcmpp = retnotsup; } else { /* * Just set default values to some of the important ones. */ if (nap != NULL) { nap->na_type = VREG; nap->na_mode = 0; nap->na_rdev = (NFSDEV_T)0; nap->na_mtime.tv_sec = 0; nap->na_mtime.tv_nsec = 0; nap->na_gen = 0; nap->na_flags = 0; nap->na_blocksize = NFS_FABLKSIZE; } if (sbp != NULL) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; } if (fsp != NULL) { fsp->fs_rtmax = 8192; fsp->fs_rtpref = 8192; fsp->fs_maxname = NFS_MAXNAMLEN; fsp->fs_wtmax = 8192; fsp->fs_wtpref = 8192; fsp->fs_wtmult = NFS_FABLKSIZE; fsp->fs_dtpref = 8192; fsp->fs_maxfilesize = 0xffffffffffffffffull; fsp->fs_timedelta.tv_sec = 0; fsp->fs_timedelta.tv_nsec = 1; fsp->fs_properties = (NFSV3_FSFLINK | NFSV3_FSFSYMLINK | NFSV3_FSFHOMOGENEOUS | NFSV3_FSFCANSETTIME); } if (pc != NULL) { pc->pc_linkmax = NFS_LINK_MAX; pc->pc_namemax = NAME_MAX; pc->pc_notrunc = 0; pc->pc_chownrestricted = 0; pc->pc_caseinsensitive = 0; pc->pc_casepreserving = 1; } if (sfp != NULL) { sfp->sf_ffiles = UINT64_MAX; sfp->sf_tfiles = UINT64_MAX; sfp->sf_afiles = UINT64_MAX; sfp->sf_fbytes = UINT64_MAX; sfp->sf_tbytes = UINT64_MAX; sfp->sf_abytes = UINT64_MAX; } } /* * Loop around getting the attributes. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(&attrbits, bitpos)) switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: retnotsup = 0; if (compare || nap == NULL) error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); else error = nfsrv_getattrbits(nd, &nap->na_suppattr, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits); /* Some filesystem do not support NFSv4ACL */ if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT); } if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_TYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_type != nfsv34tov_type(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_type = nfsv34tov_type(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFSV4FHTYPE_PERSISTENT) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_filerev != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filerev = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_size != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_size = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFLINK; else fsp->fs_properties &= ~NFSV3_FSFLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFSYMLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFSYMLINK; else fsp->fs_properties &= ~NFSV3_FSFSYMLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); thyp = fxdr_hyper(tl); tl += 2; thyp2 = fxdr_hyper(tl); if (compare) { if (*retcmpp == 0) { if (thyp != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[0] || thyp2 != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[1]) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filesid[0] = thyp; nap->na_filesid[1] = thyp2; } attrsum += (4 * NFSX_UNSIGNED); break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (fxdr_unsigned(int, *tl) != nfsrv_lease && !(*retcmpp)) *retcmpp = NFSERR_NOTSAME; } else if (leasep != NULL) { *leasep = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) *retcmpp = NFSERR_INVAL; } else if (rderrp != NULL) { *rderrp = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ACL: if (compare) { if (!(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { NFSACL_T *naclp; naclp = acl_alloc(M_WAITOK); error = nfsrv_dissectacl(nd, naclp, &aceerr, &cnt, p); if (error) { acl_free(naclp); goto nfsmout; } if (aceerr || aclp == NULL || nfsrv_compareacl(aclp, naclp)) *retcmpp = NFSERR_NOTSAME; acl_free(naclp); } else { error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); *retcmpp = NFSERR_ATTRNOTSUPP; } } } else { if (vp != NULL && aclp != NULL) error = nfsrv_dissectacl(nd, aclp, &aceerr, &cnt, p); else error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); if (error) goto nfsmout; } attrsum += cnt; break; case NFSATTRBIT_ACLSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { if (fxdr_unsigned(u_int32_t, *tl) != NFSV4ACE_SUPTYPES) *retcmpp = NFSERR_NOTSAME; } else { *retcmpp = NFSERR_ATTRNOTSUPP; } } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFCANSETTIME) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFCANSETTIME; else fsp->fs_properties &= ~NFSV3_FSFCANSETTIME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: error = nfsm_getfh(nd, &tnfhp); if (error) goto nfsmout; tfhsize = tnfhp->nfh_len; if (compare) { if (!(*retcmpp) && !NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize, fhp, fhsize)) *retcmpp = NFSERR_NOTSAME; free(tnfhp, M_NFSFH); } else if (nfhpp != NULL) { *nfhpp = tnfhp; } else { free(tnfhp, M_NFSFH); } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(tfhsize)); break; case NFSATTRBIT_FILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (nap->na_fileid != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_fileid = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_afiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_afiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_ffiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_ffiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tfiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tfiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: error = nfsrv_getrefstr(nd, &cp, &cp2, &l, &m); if (error) goto nfsmout; attrsum += l; if (compare && !(*retcmpp)) { refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { if (cp == NULL || cp2 == NULL || strcmp(cp, "/") || strcmp(cp2, refp->nfr_srvlist)) *retcmpp = NFSERR_NOTSAME; } else if (m == 0) { *retcmpp = NFSERR_NOTSAME; } } if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFHOMOGENEOUS) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFHOMOGENEOUS; else fsp->fs_properties &= ~NFSV3_FSFHOMOGENEOUS; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); tnfsquad.qval = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { tquad = NFSRV_MAXFILESIZE; if (tquad != tnfsquad.qval) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_maxfilesize = tnfsquad.qval; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFS_LINK_MAX) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_maxname != fxdr_unsigned(u_int32_t, *tl)) *retcmpp = NFSERR_NOTSAME; } } else { tuint = fxdr_unsigned(u_int32_t, *tl); /* * Some Linux NFSv4 servers report this * as 0 or 4billion, so I'll set it to * NFS_MAXNAMLEN. If a server actually creates * a name longer than NFS_MAXNAMLEN, it will * get an error back. */ if (tuint == 0 || tuint > NFS_MAXNAMLEN) tuint = NFS_MAXNAMLEN; if (fsp != NULL) fsp->fs_maxname = tuint; if (pc != NULL) pc->pc_namemax = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_rtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *++tl); fsp->fs_rtpref = fsp->fs_rtmax; fsp->fs_dtpref = fsp->fs_rtpref; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_wtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_wtmax = fxdr_unsigned(int, *++tl); fsp->fs_wtpref = fsp->fs_wtmax; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_mode != nfstov_mode(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mode = nfstov_mode(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); tuint = fxdr_unsigned(u_int32_t, *tl); if (compare) { if (!(*retcmpp)) { if ((u_int32_t)nap->na_nlink != tuint) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_nlink = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtouid(nd, cp, j, &uid, p) || nap->na_uid != uid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtouid(nd, cp, j, &uid, p)) nap->na_uid = nfsrv_defaultuid; else nap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtogid(nd, cp, j, &gid, p) || nap->na_gid != gid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtogid(nd, cp, j, &gid, p)) nap->na_gid = nfsrv_defaultgid; else nap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4SPECDATA); j = fxdr_unsigned(int, *tl++); k = fxdr_unsigned(int, *tl); if (compare) { if (!(*retcmpp)) { if (nap->na_rdev != NFSMAKEDEV(j, k)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_rdev = NFSMAKEDEV(j, k); } attrsum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_abytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_abytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_fbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_fbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if ((u_int64_t)nap->na_bytes != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_bytes = thyp; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESS: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_atime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_atime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEDELTA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (fsp != NULL) { if (compare) { if (!(*retcmpp)) { if ((u_int32_t)fsp->fs_timedelta.tv_sec != fxdr_unsigned(u_int32_t, *(tl + 1)) || (u_int32_t)fsp->fs_timedelta.tv_nsec != (fxdr_unsigned(u_int32_t, *(tl + 2)) % 1000000000) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else { fxdr_nfsv4time(tl, &fsp->fs_timedelta); } } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_ctime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_ctime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_mtime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mtime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (!vp || !nfsrv_atroot(vp, &thyp2)) thyp2 = nap->na_fileid; if (thyp2 != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_mntonfileno = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: retnotsup = 0; error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits); NFSCLRNOTSETABLE_ATTRBIT(&checkattrbits); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_TIMEACCESSSET); if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; + case NFSATTRBIT_FSLAYOUTTYPE: + case NFSATTRBIT_LAYOUTTYPE: + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + attrsum += NFSX_UNSIGNED; + i = fxdr_unsigned(int, *tl); + if (i > 0) { + NFSM_DISSECT(tl, u_int32_t *, i * + NFSX_UNSIGNED); + attrsum += i * NFSX_UNSIGNED; + j = fxdr_unsigned(int, *tl); + if (i == 1 && compare && !(*retcmpp) && + (((nfsrv_doflexfile != 0 || + nfsrv_maxpnfsmirror > 1) && + j != NFSLAYOUT_FLEXFILE) || + (nfsrv_doflexfile == 0 && + j != NFSLAYOUT_NFSV4_1_FILES))) + *retcmpp = NFSERR_NOTSAME; + } + if (nfsrv_devidcnt == 0) { + if (compare && !(*retcmpp) && i > 0) + *retcmpp = NFSERR_NOTSAME; + } else { + if (compare && !(*retcmpp) && i != 1) + *retcmpp = NFSERR_NOTSAME; + } + break; + case NFSATTRBIT_LAYOUTALIGNMENT: + case NFSATTRBIT_LAYOUTBLKSIZE: + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + attrsum += NFSX_UNSIGNED; + i = fxdr_unsigned(int, *tl); + if (compare && !(*retcmpp) && i != NFS_SRVMAXIO) + *retcmpp = NFSERR_NOTSAME; + break; default: printf("EEK! nfsv4_loadattr unknown attr=%d\n", bitpos); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; /* * and get out of the loop, since we can't parse * the unknown attrbute data. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Implement sleep locks for newnfs. The nfslock_usecnt allows for a * shared lock and the NFSXXX_LOCK flag permits an exclusive lock. * The first argument is a pointer to an nfsv4lock structure. * The second argument is 1 iff a blocking lock is wanted. * If this argument is 0, the call waits until no thread either wants nor * holds an exclusive lock. * It returns 1 if the lock was acquired, 0 otherwise. * If several processes call this function concurrently wanting the exclusive * lock, one will get the lock and the rest will return without getting the * lock. (If the caller must have the lock, it simply calls this function in a * loop until the function returns 1 to indicate the lock was acquired.) * Any usecnt must be decremented by calling nfsv4_relref() before * calling nfsv4_lock(). It was done this way, so nfsv4_lock() could * be called in a loop. * The isleptp argument is set to indicate if the call slept, iff not NULL * and the mp argument indicates to check for a forced dismount, iff not * NULL. */ APPLESTATIC int nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * If a lock is wanted, loop around until the lock is acquired by * someone and then released. If I want the lock, try to acquire it. * For a lock to be issued, no lock must be in force and the usecnt * must be zero. */ if (iwantlock) { if (!(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } lp->nfslock_lock |= NFSV4LOCK_LOCKWANTED; } while (lp->nfslock_lock & (NFSV4LOCK_LOCK | NFSV4LOCK_LOCKWANTED)) { if (mp != NULL && NFSCL_FORCEDISM(mp)) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; return (0); } lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4lck", NULL); if (iwantlock && !(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } } return (0); } /* * Release the lock acquired by nfsv4_lock(). * The second argument is set to 1 to indicate the nfslock_usecnt should be * incremented, as well. */ APPLESTATIC void nfsv4_unlock(struct nfsv4lock *lp, int incref) { lp->nfslock_lock &= ~NFSV4LOCK_LOCK; if (incref) lp->nfslock_usecnt++; nfsv4_wanted(lp); } /* * Release a reference cnt. */ APPLESTATIC void nfsv4_relref(struct nfsv4lock *lp) { if (lp->nfslock_usecnt <= 0) panic("nfsv4root ref cnt"); lp->nfslock_usecnt--; if (lp->nfslock_usecnt == 0) nfsv4_wanted(lp); } /* * Get a reference cnt. * This function will wait for any exclusive lock to be released, but will * not wait for threads that want the exclusive lock. If priority needs * to be given to threads that need the exclusive lock, a call to nfsv4_lock() * with the 2nd argument == 0 should be done before calling nfsv4_getref(). * If the mp argument is not NULL, check for NFSCL_FORCEDISM() being set and * return without getting a refcnt for that case. */ APPLESTATIC void nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * Wait for a lock held. */ while (lp->nfslock_lock & NFSV4LOCK_LOCK) { if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4gr", NULL); } if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_usecnt++; } /* * Get a reference as above, but return failure instead of sleeping if * an exclusive lock is held. */ APPLESTATIC int nfsv4_getref_nonblock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) != 0) return (0); lp->nfslock_usecnt++; return (1); } /* * Test for a lock. Return 1 if locked, 0 otherwise. */ APPLESTATIC int nfsv4_testlock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) == 0 && lp->nfslock_usecnt == 0) return (0); return (1); } /* * Wake up anyone sleeping, waiting for this lock. */ static void nfsv4_wanted(struct nfsv4lock *lp) { if (lp->nfslock_lock & NFSV4LOCK_WANTED) { lp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)&lp->nfslock_lock); } } /* * Copy a string from an mbuf list into a character array. * Return EBADRPC if there is an mbuf error, * 0 otherwise. */ APPLESTATIC int nfsrv_mtostr(struct nfsrv_descript *nd, char *str, int siz) { char *cp; int xfer, len; mbuf_t mp; int rem, error = 0; mp = nd->nd_md; cp = nd->nd_dpos; len = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - cp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (len > siz) xfer = siz; else xfer = len; NFSBCOPY(cp, str, xfer); str += xfer; siz -= xfer; if (siz > 0) { mp = mbuf_next(mp); if (mp == NULL) { error = EBADRPC; goto out; } cp = NFSMTOD(mp, caddr_t); len = mbuf_len(mp); } else { cp += xfer; len -= xfer; } } *str = '\0'; nd->nd_dpos = cp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } /* * Fill in the attributes as marked by the bitmap (V4). */ APPLESTATIC int nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, - int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) + int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno, + struct statfs *pnfssf) { int bitpos, retnum = 0; u_int32_t *tl; int siz, prefixnum, error; u_char *cp, namestr[NFSV4_SMALLSTR]; nfsattrbit_t attrbits, retbits; nfsattrbit_t *retbitp = &retbits; u_int32_t freenum, *retnump; u_int64_t uquad; struct statfs *fs; struct nfsfsinfo fsinf; struct timespec temptime; NFSACL_T *aclp, *naclp = NULL; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif /* * First, set the bits that can be filled and get fsinfo. */ NFSSET_ATTRBIT(retbitp, attrbitp); /* * If both p and cred are NULL, it is a client side setattr call. * If both p and cred are not NULL, it is a server side reply call. * If p is not NULL and cred is NULL, it is a client side callback * reply call. */ if (p == NULL && cred == NULL) { NFSCLRNOTSETABLE_ATTRBIT(retbitp); aclp = saclp; } else { NFSCLRNOTFILLABLE_ATTRBIT(retbitp); naclp = acl_alloc(M_WAITOK); aclp = naclp; } nfsvno_getfs(&fsinf, isdgram); #ifndef APPLE /* * Get the VFS_STATFS(), since some attributes need them. */ fs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); if (NFSISSETSTATFS_ATTRBIT(retbitp)) { error = VFS_STATFS(mp, fs); if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRSTATFS_ATTRBIT(retbitp); } } #endif /* * And the NFSv4 ACL... */ if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT) && (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0))) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT); } if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACL)) { if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } else if (naclp != NULL) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_ACCESSX(vp, VREAD_ACL, cred, p); if (error == 0) error = VOP_GETACL(vp, ACL_TYPE_NFS4, naclp, cred, p); NFSVOPUNLOCK(vp, 0); } else error = NFSERR_PERM; if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } } } /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, retbitp); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(retbitp, bitpos)) { switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: NFSSETSUPP_ATTRBIT(&attrbits); if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT); NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL); } retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vap->va_type); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4FHTYPE_PERSISTENT); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_filerev, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_size, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_LINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_SYMLINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(mp->mnt_stat.f_fsid.val[0]); *tl++ = 0; *tl = txdr_unsigned(mp->mnt_stat.f_fsid.val[1]); retnum += NFSX_V4FSID; break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsrv_lease); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(rderror); retnum += NFSX_UNSIGNED; break; /* * Recommended Attributes. (Only the supported ones.) */ case NFSATTRBIT_ACL: retnum += nfsrv_buildacl(nd, aclp, vnode_vtype(vp), p); break; case NFSATTRBIT_ACLSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4ACE_SUPTYPES); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_CANSETTIME) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: retnum += nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); break; case NFSATTRBIT_FILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: /* * Check quota and use min(quota, f_ffree). */ freenum = fs->f_ffree; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_isoftlimit-dqb.dqb_curinodes, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(freenum); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_ffree); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_files); retnum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; retnum += 2 * NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = NFSRV_MAXFILESIZE; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_LINK_MAX); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_MAXNAMLEN); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_rtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_wtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MODE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_mode(vap->va_mode); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(vap->va_nlink); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: cp = namestr; nfsv4_uidtostr(vap->va_uid, &cp, &siz, p); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: cp = namestr; nfsv4_gidtostr(vap->va_gid, &cp, &siz, p); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_BUILD(tl, u_int32_t *, NFSX_V4SPECDATA); *tl++ = txdr_unsigned(NFSMAJOR(vap->va_rdev)); *tl = txdr_unsigned(NFSMINOR(vap->va_rdev)); retnum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); - if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0)) - uquad = (u_int64_t)fs->f_bfree; + if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0)) { + if (pnfssf != NULL) + uquad = (u_int64_t)pnfssf->f_bfree; + else + uquad = (u_int64_t)fs->f_bfree; + } else { + if (pnfssf != NULL) + uquad = (u_int64_t)pnfssf->f_bavail; + else + uquad = (u_int64_t)fs->f_bavail; + } + if (pnfssf != NULL) + uquad *= pnfssf->f_bsize; else - uquad = (u_int64_t)fs->f_bavail; - uquad *= fs->f_bsize; + uquad *= fs->f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); - uquad = (u_int64_t)fs->f_bfree; - uquad *= fs->f_bsize; + if (pnfssf != NULL) { + uquad = (u_int64_t)pnfssf->f_bfree; + uquad *= pnfssf->f_bsize; + } else { + uquad = (u_int64_t)fs->f_bfree; + uquad *= fs->f_bsize; + } txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); - uquad = (u_int64_t)fs->f_blocks; - uquad *= fs->f_bsize; + if (pnfssf != NULL) { + uquad = (u_int64_t)pnfssf->f_blocks; + uquad *= pnfssf->f_bsize; + } else { + uquad = (u_int64_t)fs->f_blocks; + uquad *= fs->f_bsize; + } txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_bytes, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_TIMEACCESS: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_TIMEDELTA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); temptime.tv_sec = 0; temptime.tv_nsec = 1000000000 / hz; txdr_nfsv4time(&temptime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_ctime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (at_root != 0) uquad = mounted_on_fileno; else uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: NFSSETSUPP_ATTRBIT(&attrbits); NFSCLRNOTSETABLE_ATTRBIT(&attrbits); NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); retnum += nfsrv_putattrbit(nd, &attrbits); break; + case NFSATTRBIT_FSLAYOUTTYPE: + case NFSATTRBIT_LAYOUTTYPE: + if (nfsrv_devidcnt == 0) + siz = 1; + else + siz = 2; + if (siz == 2) { + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(1); /* One entry. */ + if (nfsrv_doflexfile != 0 || + nfsrv_maxpnfsmirror > 1) + *tl = txdr_unsigned(NFSLAYOUT_FLEXFILE); + else + *tl = txdr_unsigned( + NFSLAYOUT_NFSV4_1_FILES); + } else { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = 0; + } + retnum += siz * NFSX_UNSIGNED; + break; + case NFSATTRBIT_LAYOUTALIGNMENT: + case NFSATTRBIT_LAYOUTBLKSIZE: + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFS_SRVMAXIO); + retnum += NFSX_UNSIGNED; + break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } } } if (naclp != NULL) acl_free(naclp); free(fs, M_STATFS); *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Put the attribute bits onto an mbuf list. * Return the number of bytes of output generated. */ APPLESTATIC int nfsrv_putattrbit(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp) { u_int32_t *tl; int cnt, i, bytesize; for (cnt = NFSATTRBIT_MAXWORDS; cnt > 0; cnt--) if (attrbitp->bits[cnt - 1]) break; bytesize = (cnt + 1) * NFSX_UNSIGNED; NFSM_BUILD(tl, u_int32_t *, bytesize); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(attrbitp->bits[i]); return (bytesize); } /* * Convert a uid to a string. * If the lookup fails, just output the digits. * uid - the user id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_uidtostr(uid_t uid, u_char **cpp, int *retlenp, NFSPROC_T *p) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; uid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultuid to "nobody". */ if (uid == nfsrv_defaultuid) { i = nfsrv_dnsnamelen + 7; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nobody@", cp, 7); cp += 7; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL, p); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = uid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = uid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Get a credential for the uid with the server's group list. * If none is found, just return the credential passed in after * logging a warning message. */ struct ucred * nfsrv_getgrpscred(struct ucred *oldcred) { struct nfsusrgrp *usrp; struct ucred *newcred; int cnt, ret; uid_t uid; struct nfsrv_lughash *hp; cnt = 0; uid = oldcred->cr_uid; tryagain: if (nfsrv_dnsnamelen > 0) { hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; if (usrp->lug_cred != NULL) { newcred = crhold(usrp->lug_cred); crfree(oldcred); } else newcred = oldcred; TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return (newcred); } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL, curthread); if (ret == 0 && cnt < 2) goto tryagain; } return (oldcred); } /* * Convert a string to a uid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp, NFSPROC_T *p) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; uid_t tuid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tuid = (uid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *uidp = tuid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the domain name matches, search for * the name with dns stripped off. * Mixed case alpahbetics will match for the domain name, but * all upper case will not. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nobody". */ if (len == 6 && !NFSBCMP(str, "nobody", 6)) { *uidp = nfsrv_defaultuid; error = 0; goto out; } hp = NFSUSERNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSUSERHASH(usrp->lug_uid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *uidp = usrp->lug_uid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0, str, p); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Convert a gid to a string. * gid - the group id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_gidtostr(gid_t gid, u_char **cpp, int *retlenp, NFSPROC_T *p) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; gid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultgid to "nogroup". */ if (gid == nfsrv_defaultgid) { i = nfsrv_dnsnamelen + 8; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nogroup@", cp, 8); cp += 8; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSGROUPHASH(gid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_gid == gid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGID, (uid_t)0, gid, NULL, p); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = gid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = gid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Convert a string to a gid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp, NFSPROC_T *p) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; gid_t tgid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tgid = (gid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *gidp = tgid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the dns name matches, search for the * name with the dns stripped off. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nogroup". */ if (len == 7 && !NFSBCMP(str, "nogroup", 7)) { *gidp = nfsrv_defaultgid; error = 0; goto out; } hp = NFSGROUPNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *gidp = usrp->lug_gid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0, str, p); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Cmp len chars, allowing mixed case in the first argument to match lower * case in the second, but not if the first argument is all upper case. * Return 0 for a match, 1 otherwise. */ static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len) { int i; u_char tmp; int fndlower = 0; for (i = 0; i < len; i++) { if (*cp >= 'A' && *cp <= 'Z') { tmp = *cp++ + ('a' - 'A'); } else { tmp = *cp++; if (tmp >= 'a' && tmp <= 'z') fndlower = 1; } if (tmp != *cp2++) return (1); } if (fndlower) return (0); else return (1); } /* * Set the port for the nfsuserd. */ APPLESTATIC int nfsrv_nfsuserdport(struct sockaddr *sad, u_short port, NFSPROC_T *p) { struct nfssockreq *rp; struct sockaddr_in *ad; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd) { NFSUNLOCKNAMEID(); error = EPERM; free(sad, M_SONAME); goto out; } nfsrv_nfsuserd = 1; NFSUNLOCKNAMEID(); /* * Set up the socket record and connect. */ rp = &nfsrv_nfsuserdsock; rp->nr_client = NULL; rp->nr_cred = NULL; rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST); if (sad != NULL) { /* Use the AF_LOCAL socket address passed in. */ rp->nr_sotype = SOCK_STREAM; rp->nr_soproto = 0; rp->nr_nam = sad; } else { /* Use the port# for a UDP socket (old nfsuserd). */ rp->nr_sotype = SOCK_DGRAM; rp->nr_soproto = IPPROTO_UDP; rp->nr_nam = malloc(sizeof(*rp->nr_nam), M_SONAME, M_WAITOK | M_ZERO); NFSSOCKADDRSIZE(rp->nr_nam, sizeof (struct sockaddr_in)); ad = NFSSOCKADDR(rp->nr_nam, struct sockaddr_in *); ad->sin_family = AF_INET; ad->sin_addr.s_addr = htonl((u_int32_t)0x7f000001); ad->sin_port = port; } rp->nr_prog = RPCPROG_NFSUSERD; rp->nr_vers = RPCNFSUSERD_VERS; error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0); if (error) { free(rp->nr_nam, M_SONAME); nfsrv_nfsuserd = 0; } out: NFSEXITCODE(error); return (error); } /* * Delete the nfsuserd port. */ APPLESTATIC void nfsrv_nfsuserddelport(void) { NFSLOCKNAMEID(); if (nfsrv_nfsuserd == 0) { NFSUNLOCKNAMEID(); return; } nfsrv_nfsuserd = 0; NFSUNLOCKNAMEID(); newnfs_disconnect(&nfsrv_nfsuserdsock); free(nfsrv_nfsuserdsock.nr_nam, M_SONAME); } /* * Do upcalls to the nfsuserd, for cache misses of the owner/ownergroup * name<-->id cache. * Returns 0 upon success, non-zero otherwise. */ static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript *nd; int len; struct nfsrv_descript nfsd; struct ucred *cred; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd == 0) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } NFSUNLOCKNAMEID(); nd = &nfsd; cred = newnfs_getcred(); nd->nd_flag = ND_GSSINITREPLY; nfsrvd_rephead(nd); nd->nd_procnum = procnum; if (procnum == RPCNFSUSERD_GETUID || procnum == RPCNFSUSERD_GETGID) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (procnum == RPCNFSUSERD_GETUID) *tl = txdr_unsigned(uid); else *tl = txdr_unsigned(gid); } else { len = strlen(name); (void) nfsm_strtom(nd, name, len); } error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL, cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSFREECRED(cred); if (!error) { mbuf_freem(nd->nd_mrep); error = nd->nd_repstat; } out: NFSEXITCODE(error); return (error); } /* * This function is called from the nfssvc(2) system call, to update the * kernel user/group name list(s) for the V4 owner and ownergroup attributes. */ APPLESTATIC int nfssvc_idname(struct nfsd_idargs *nidp) { struct nfsusrgrp *nusrp, *usrp, *newusrp; struct nfsrv_lughash *hp_name, *hp_idnum, *thp; int i, group_locked, groupname_locked, user_locked, username_locked; int error = 0; u_char *cp; gid_t *grps; struct ucred *cr; static int onethread = 0; static time_t lasttime = 0; if (nidp->nid_namelen <= 0 || nidp->nid_namelen > MAXHOSTNAMELEN) { error = EINVAL; goto out; } if (nidp->nid_flag & NFSID_INITIALIZE) { cp = malloc(nidp->nid_namelen + 1, M_NFSSTRING, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), cp, nidp->nid_namelen); if (error != 0) { free(cp, M_NFSSTRING); goto out; } if (atomic_cmpset_acq_int(&nfsrv_dnsnamelen, 0, 0) == 0) { /* * Free up all the old stuff and reinitialize hash * lists. All mutexes for both lists must be locked, * with the user/group name ones before the uid/gid * ones, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 1); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 0); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); free(nfsrv_dnsname, M_NFSSTRING); nfsrv_dnsname = NULL; } if (nfsuserhash == NULL) { /* Allocate the hash tables. */ nfsuserhash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsuserhash[i].mtx, "nfsuidhash", NULL, MTX_DEF | MTX_DUPOK); nfsusernamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsusernamehash[i].mtx, "nfsusrhash", NULL, MTX_DEF | MTX_DUPOK); nfsgrouphash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgrouphash[i].mtx, "nfsgidhash", NULL, MTX_DEF | MTX_DUPOK); nfsgroupnamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgroupnamehash[i].mtx, "nfsgrphash", NULL, MTX_DEF | MTX_DUPOK); } /* (Re)initialize the list heads. */ for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsuserhash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsusernamehash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgrouphash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgroupnamehash[i].lughead); /* * Put name in "DNS" string. */ nfsrv_dnsname = cp; nfsrv_defaultuid = nidp->nid_uid; nfsrv_defaultgid = nidp->nid_gid; nfsrv_usercnt = 0; nfsrv_usermax = nidp->nid_usermax; atomic_store_rel_int(&nfsrv_dnsnamelen, nidp->nid_namelen); goto out; } /* * malloc the new one now, so any potential sleep occurs before * manipulation of the lists. */ newusrp = malloc(sizeof(struct nfsusrgrp) + nidp->nid_namelen, M_NFSUSERGROUP, M_WAITOK | M_ZERO); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), newusrp->lug_name, nidp->nid_namelen); if (error == 0 && nidp->nid_ngroup > 0 && (nidp->nid_flag & NFSID_ADDUID) != 0) { grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_grps), grps, sizeof(gid_t) * nidp->nid_ngroup); if (error == 0) { /* * Create a credential just like svc_getcred(), * but using the group list provided. */ cr = crget(); cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid; crsetgroups(cr, nidp->nid_ngroup, grps); cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0]; cr->cr_prison = &prison0; prison_hold(cr->cr_prison); #ifdef MAC mac_cred_associate_nfsd(cr); #endif newusrp->lug_cred = cr; } free(grps, M_TEMP); } if (error) { free(newusrp, M_NFSUSERGROUP); goto out; } newusrp->lug_namelen = nidp->nid_namelen; /* * The lock order is username[0]->[nfsrv_lughashsize - 1] followed * by uid[0]->[nfsrv_lughashsize - 1], with the same for group. * The flags user_locked, username_locked, group_locked and * groupname_locked are set to indicate all of those hash lists are * locked. hp_name != NULL and hp_idnum != NULL indicates that * the respective one mutex is locked. */ user_locked = username_locked = group_locked = groupname_locked = 0; hp_name = hp_idnum = NULL; /* * Delete old entries, as required. */ if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) { /* Must lock all username hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_uid == nidp->nid_uid) nfsrv_removeuser(usrp, 1); } } else if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) { hp_name = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSUSERHASH(usrp->lug_uid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 1); mtx_unlock(&thp->mtx); } } hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); } else if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) { /* Must lock all groupname hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_gid == nidp->nid_gid) nfsrv_removeuser(usrp, 0); } } else if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) { hp_name = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 0); mtx_unlock(&thp->mtx); } } hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); } /* * Now, we can add the new one. */ if (nidp->nid_usertimeout) newusrp->lug_expiry = NFSD_MONOSEC + nidp->nid_usertimeout; else newusrp->lug_expiry = NFSD_MONOSEC + 5; if (nidp->nid_flag & (NFSID_ADDUID | NFSID_ADDUSERNAME)) { newusrp->lug_uid = nidp->nid_uid; thp = NFSUSERHASH(newusrp->lug_uid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) { newusrp->lug_gid = nidp->nid_gid; thp = NFSGROUPHASH(newusrp->lug_gid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else { if (newusrp->lug_cred != NULL) crfree(newusrp->lug_cred); free(newusrp, M_NFSUSERGROUP); } /* * Once per second, allow one thread to trim the cache. */ if (lasttime < NFSD_MONOSEC && atomic_cmpset_acq_int(&onethread, 0, 1) != 0) { /* * First, unlock the single mutexes, so that all entries * can be locked and any LOR is avoided. */ if (hp_name != NULL) { mtx_unlock(&hp_name->mtx); hp_name = NULL; } if (hp_idnum != NULL) { mtx_unlock(&hp_idnum->mtx); hp_idnum = NULL; } if ((nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID | NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) != 0) { if (username_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; } KASSERT(user_locked == 0, ("nfssvc_idname: user_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); user_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 1); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently used entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsuserhash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 1); } } else { if (groupname_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; } KASSERT(group_locked == 0, ("nfssvc_idname: group_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); group_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 0); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently user entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsgrouphash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 0); } } lasttime = NFSD_MONOSEC; atomic_store_rel_int(&onethread, 0); } /* Now, unlock all locked mutexes. */ if (hp_idnum != NULL) mtx_unlock(&hp_idnum->mtx); if (hp_name != NULL) mtx_unlock(&hp_name->mtx); if (user_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); if (username_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); if (group_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); if (groupname_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); out: NFSEXITCODE(error); return (error); } /* * Remove a user/group name element. */ static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser) { struct nfsrv_lughash *hp; if (isuser != 0) { hp = NFSUSERHASH(usrp->lug_uid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } else { hp = NFSGROUPHASH(usrp->lug_gid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } atomic_add_int(&nfsrv_usercnt, -1); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } /* * Free up all the allocations related to the name<-->id cache. * This function should only be called when the nfsuserd daemon isn't * running, since it doesn't do any locking. * This function is meant to be used when the nfscommon module is unloaded. */ APPLESTATIC void nfsrv_cleanusergroup(void) { struct nfsrv_lughash *hp, *hp2; struct nfsusrgrp *nusrp, *usrp; int i; if (nfsuserhash == NULL) return; for (i = 0; i < nfsrv_lughashsize; i++) { hp = &nfsuserhash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } hp = &nfsgrouphash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } mtx_destroy(&nfsuserhash[i].mtx); mtx_destroy(&nfsusernamehash[i].mtx); mtx_destroy(&nfsgroupnamehash[i].mtx); mtx_destroy(&nfsgrouphash[i].mtx); } free(nfsuserhash, M_NFSUSERGROUP); free(nfsusernamehash, M_NFSUSERGROUP); free(nfsgrouphash, M_NFSUSERGROUP); free(nfsgroupnamehash, M_NFSUSERGROUP); free(nfsrv_dnsname, M_NFSSTRING); } /* * This function scans a byte string and checks for UTF-8 compliance. * It returns 0 if it conforms and NFSERR_INVAL if not. */ APPLESTATIC int nfsrv_checkutf8(u_int8_t *cp, int len) { u_int32_t val = 0x0; int cnt = 0, gotd = 0, shift = 0; u_int8_t byte; static int utf8_shift[5] = { 7, 11, 16, 21, 26 }; int error = 0; /* * Here are what the variables are used for: * val - the calculated value of a multibyte char, used to check * that it was coded with the correct range * cnt - the number of 10xxxxxx bytes to follow * gotd - set for a char of Dxxx, so D800<->DFFF can be checked for * shift - lower order bits of range (ie. "val >> shift" should * not be 0, in other words, dividing by the lower bound * of the range should get a non-zero value) * byte - used to calculate cnt */ while (len > 0) { if (cnt > 0) { /* This handles the 10xxxxxx bytes */ if ((*cp & 0xc0) != 0x80 || (gotd && (*cp & 0x20))) { error = NFSERR_INVAL; goto out; } gotd = 0; val <<= 6; val |= (*cp & 0x3f); cnt--; if (cnt == 0 && (val >> shift) == 0x0) { error = NFSERR_INVAL; goto out; } } else if (*cp & 0x80) { /* first byte of multi byte char */ byte = *cp; while ((byte & 0x40) && cnt < 6) { cnt++; byte <<= 1; } if (cnt == 0 || cnt == 6) { error = NFSERR_INVAL; goto out; } val = (*cp & (0x3f >> cnt)); shift = utf8_shift[cnt - 1]; if (cnt == 2 && val == 0xd) /* Check for the 0xd800-0xdfff case */ gotd = 1; } cp++; len--; } if (cnt > 0) error = NFSERR_INVAL; out: NFSEXITCODE(error); return (error); } /* * Parse the xdr for an NFSv4 FsLocations attribute. Return two malloc'd * strings, one with the root path in it and the other with the list of * locations. The list is in the same format as is found in nfr_refs. * It is a "," separated list of entries, where each of them is of the * form :. For example * "nfsv4-test:/sub2,nfsv4-test2:/user/mnt,nfsv4-test2:/user/mnt2" * The nilp argument is set to 1 for the special case of a null fs_root * and an empty server list. * It returns NFSERR_BADXDR, if the xdr can't be parsed and returns the * number of xdr bytes parsed in sump. */ static int nfsrv_getrefstr(struct nfsrv_descript *nd, u_char **fsrootp, u_char **srvp, int *sump, int *nilp) { u_int32_t *tl; u_char *cp = NULL, *cp2 = NULL, *cp3, *str; int i, j, len, stringlen, cnt, slen, siz, xdrsum, error = 0, nsrv; struct list { SLIST_ENTRY(list) next; int len; u_char host[1]; } *lsp, *nlsp; SLIST_HEAD(, list) head; *fsrootp = NULL; *srvp = NULL; *nilp = 0; /* * Get the fs_root path and check for the special case of null path * and 0 length server list. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > 10240) { error = NFSERR_BADXDR; goto nfsmout; } if (len == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = NFSERR_BADXDR; goto nfsmout; } *nilp = 1; *sump = 2 * NFSX_UNSIGNED; error = 0; goto nfsmout; } cp = malloc(len + 1, M_NFSSTRING, M_WAITOK); error = nfsrv_mtostr(nd, cp, len); if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0) error = NFSERR_BADXDR; } if (error) goto nfsmout; /* * Now, loop through the location list and make up the srvlist. */ xdrsum = (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); cp2 = cp3 = malloc(1024, M_NFSSTRING, M_WAITOK); slen = 1024; siz = 0; for (i = 0; i < cnt; i++) { SLIST_INIT(&head); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nsrv = fxdr_unsigned(int, *tl); if (nsrv <= 0) { error = NFSERR_BADXDR; goto nfsmout; } /* * Handle the first server by putting it in the srvstr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 3, &cp2, &cp3, &slen); if (cp3 != cp2) { *cp3++ = ','; siz++; } error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; cp3 += len; *cp3++ = ':'; siz += (len + 1); xdrsum += (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); for (j = 1; j < nsrv; j++) { /* * Yuck, put them in an slist and process them later. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } lsp = (struct list *)malloc(sizeof (struct list) + len, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, lsp->host, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); lsp->len = len; SLIST_INSERT_HEAD(&head, lsp, next); } /* * Finally, we can get the path. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 1, &cp2, &cp3, &slen); error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); str = cp3; stringlen = len; cp3 += len; siz += len; SLIST_FOREACH_SAFE(lsp, &head, next, nlsp) { nfsrv_refstrbigenough(siz + lsp->len + stringlen + 3, &cp2, &cp3, &slen); *cp3++ = ','; NFSBCOPY(lsp->host, cp3, lsp->len); cp3 += lsp->len; *cp3++ = ':'; NFSBCOPY(str, cp3, stringlen); cp3 += stringlen; *cp3 = '\0'; siz += (lsp->len + stringlen + 2); free(lsp, M_TEMP); } } *fsrootp = cp; *srvp = cp2; *sump = xdrsum; NFSEXITCODE2(0, nd); return (0); nfsmout: if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); NFSEXITCODE2(error, nd); return (error); } /* * Make the malloc'd space large enough. This is a pain, but the xdr * doesn't set an upper bound on the side, so... */ static void nfsrv_refstrbigenough(int siz, u_char **cpp, u_char **cpp2, int *slenp) { u_char *cp; int i; if (siz <= *slenp) return; cp = malloc(siz + 1024, M_NFSSTRING, M_WAITOK); NFSBCOPY(*cpp, cp, *slenp); free(*cpp, M_NFSSTRING); i = *cpp2 - *cpp; *cpp = cp; *cpp2 = cp + i; *slenp = siz + 1024; } /* * Initialize the reply header data structures. */ APPLESTATIC void nfsrvd_rephead(struct nfsrv_descript *nd) { mbuf_t mreq; /* * If this is a big reply, use a cluster. */ if ((nd->nd_flag & ND_GSSINITREPLY) == 0 && nfs_bigreply[nd->nd_procnum]) { NFSMCLGET(mreq, M_WAITOK); nd->nd_mreq = mreq; nd->nd_mb = mreq; } else { NFSMGET(mreq); nd->nd_mreq = mreq; nd->nd_mb = mreq; } nd->nd_bpos = NFSMTOD(mreq, caddr_t); mbuf_setlen(mreq, 0); if ((nd->nd_flag & ND_GSSINITREPLY) == 0) NFSM_BUILD(nd->nd_errp, int *, NFSX_UNSIGNED); } /* * Lock a socket against others. * Currently used to serialize connect/disconnect attempts. */ int newnfs_sndlock(int *flagp) { struct timespec ts; NFSLOCKSOCK(); while (*flagp & NFSR_SNDLOCK) { *flagp |= NFSR_WANTSND; ts.tv_sec = 0; ts.tv_nsec = 0; (void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR, PZERO - 1, "nfsndlck", &ts); } *flagp |= NFSR_SNDLOCK; NFSUNLOCKSOCK(); return (0); } /* * Unlock the stream socket for others. */ void newnfs_sndunlock(int *flagp) { NFSLOCKSOCK(); if ((*flagp & NFSR_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSR_SNDLOCK; if (*flagp & NFSR_WANTSND) { *flagp &= ~NFSR_WANTSND; wakeup((caddr_t)flagp); } NFSUNLOCKSOCK(); } APPLESTATIC int nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t *saf, int *isudp) { struct in_addr saddr; uint32_t portnum, *tl; int i, j, k; sa_family_t af = AF_UNSPEC; char addr[64], protocol[5], *cp; int cantparse = 0, error = 0; uint16_t portv; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (strcmp(protocol, "tcp") == 0) { af = AF_INET; *isudp = 0; } else if (strcmp(protocol, "udp") == 0) { af = AF_INET; *isudp = 1; } else if (strcmp(protocol, "tcp6") == 0) { af = AF_INET6; *isudp = 0; } else if (strcmp(protocol, "udp6") == 0) { af = AF_INET6; *isudp = 1; } else cantparse = 1; } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (cantparse == 0 && i >= 11 && i < 64) { /* * The shortest address is 11chars and the longest is < 64. */ error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* Find the port# at the end and extract that. */ i = strlen(addr); k = 0; cp = &addr[i - 1]; /* Count back two '.'s from end to get port# field. */ for (j = 0; j < i; j++) { if (*cp == '.') { k++; if (k == 2) break; } cp--; } if (k == 2) { /* * The NFSv4 port# is appended as .N.N, where N is * a decimal # in the range 0-255, just like an inet4 * address. Cheat and use inet_aton(), which will * return a Class A address and then shift the high * order 8bits over to convert it to the port#. */ *cp++ = '\0'; if (inet_aton(cp, &saddr) == 1) { portnum = ntohl(saddr.s_addr); portv = (uint16_t)((portnum >> 16) | (portnum & 0xff)); } else cantparse = 1; } else cantparse = 1; if (cantparse == 0) { if (af == AF_INET) { if (inet_pton(af, addr, &sin->sin_addr) == 1) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = htons(portv); *saf = af; return (0); } } else { if (inet_pton(af, addr, &sin6->sin6_addr) == 1) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(portv); *saf = af; return (0); } } } } else { if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } error = EPERM; nfsmout: return (error); } /* * Handle an NFSv4.1 Sequence request for the session. * If reply != NULL, use it to return the cached reply, as required. * The client gets a cached reply via this call for callbacks, however the * server gets a cached reply via the nfsv4_seqsess_cachereply() call. */ int nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot, struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot) { int error; error = 0; if (reply != NULL) *reply = NULL; if (slotid > maxslot) return (NFSERR_BADSLOT); if (seqid == slots[slotid].nfssl_seq) { /* A retry. */ if (slots[slotid].nfssl_inprog != 0) error = NFSERR_DELAY; else if (slots[slotid].nfssl_reply != NULL) { if (reply != NULL) { *reply = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } slots[slotid].nfssl_inprog = 1; error = NFSERR_REPLYFROMCACHE; } else /* No reply cached, so just do it. */ slots[slotid].nfssl_inprog = 1; } else if ((slots[slotid].nfssl_seq + 1) == seqid) { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = NULL; slots[slotid].nfssl_inprog = 1; slots[slotid].nfssl_seq++; } else error = NFSERR_SEQMISORDERED; return (error); } /* * Cache this reply for the slot. * Use the "rep" argument to return the cached reply if repstat is set to * NFSERR_REPLYFROMCACHE. The client never sets repstat to this value. */ void nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, struct mbuf **rep) { if (repstat == NFSERR_REPLYFROMCACHE) { *rep = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } else { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = *rep; } slots[slotid].nfssl_inprog = 0; } /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ APPLESTATIC void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, struct nfsclsession *sep, int dont_replycache) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); nd->nd_sequence = tl; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; if (error == 0) { *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl = newnfs_true; else *tl = newnfs_false; } else { /* * There are two errors and the rest of the session can * just be zeros. * NFSERR_BADSESSION: This bad session should just generate * the same error again when the RPC is retried. * ESTALE: A forced dismount is in progress and will cause the * RPC to fail later. */ *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl = 0; } nd->nd_flag |= ND_HASSEQUENCE; } int nfsv4_sequencelookup(struct nfsmount *nmp, struct nfsclsession *sep, int *slotposp, int *maxslotp, uint32_t *slotseqp, uint8_t *sessionid) { int i, maxslot, slotpos; uint64_t bitval; /* Find an unused slot. */ slotpos = -1; maxslot = -1; mtx_lock(&sep->nfsess_mtx); do { if (nmp != NULL && sep->nfsess_defunct != 0) { /* Just return the bad session. */ bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); return (NFSERR_BADSESSION); } bitval = 1; for (i = 0; i < sep->nfsess_foreslots; i++) { if ((bitval & sep->nfsess_slots) == 0) { slotpos = i; sep->nfsess_slots |= bitval; sep->nfsess_slotseq[i]++; *slotseqp = sep->nfsess_slotseq[i]; break; } bitval <<= 1; } if (slotpos == -1) { /* * If a forced dismount is in progress, just return. * This RPC attempt will fail when it calls * newnfs_request(). */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { mtx_unlock(&sep->nfsess_mtx); return (ESTALE); } /* Wake up once/sec, to check for a forced dismount. */ (void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx, PZERO, "nfsclseq", hz); } } while (slotpos == -1); /* Now, find the highest slot in use. (nfsc_slots is 64bits) */ bitval = 1; for (i = 0; i < 64; i++) { if ((bitval & sep->nfsess_slots) != 0) maxslot = i; bitval <<= 1; } bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); *slotposp = slotpos; *maxslotp = maxslot; return (0); } /* * Free a session slot. */ APPLESTATIC void nfsv4_freeslot(struct nfsclsession *sep, int slot) { uint64_t bitval; bitval = 1; if (slot > 0) bitval <<= slot; mtx_lock(&sep->nfsess_mtx); if ((bitval & sep->nfsess_slots) == 0) printf("freeing free slot!!\n"); sep->nfsess_slots &= ~bitval; wakeup(&sep->nfsess_slots); mtx_unlock(&sep->nfsess_mtx); +} + +/* + * Search for a matching pnfsd mirror device structure, base on the nmp arg. + * Return one if found, NULL otherwise. + */ +struct nfsdevice * +nfsv4_findmirror(struct nfsmount *nmp) +{ + struct nfsdevice *ds, *fndds; + int fndmirror; + + mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); + /* + * Search the DS server list for a match with nmp. + * Remove the DS entry if found and there is a mirror. + */ + fndds = NULL; + fndmirror = 0; + if (nfsrv_devidcnt == 0) + return (fndds); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds->nfsdev_nmp == nmp) { + NFSCL_DEBUG(4, "fnd main ds\n"); + fndds = ds; + } else if (ds->nfsdev_nmp != NULL) + fndmirror = 1; + if (fndds != NULL && fndmirror != 0) + break; + } + if (fndmirror == 0) { + NFSCL_DEBUG(4, "no mirror for DS\n"); + return (NULL); + } + return (fndds); } Index: head/sys/fs/nfs/nfs_var.h =================================================================== --- head/sys/fs/nfs/nfs_var.h (revision 335011) +++ head/sys/fs/nfs/nfs_var.h (revision 335012) @@ -1,693 +1,739 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * XXX needs and because of typedefs */ struct uio; struct ucred; struct nfscred; NFSPROC_T; struct buf; struct sockaddr_in; struct nfs_dlmount; struct file; struct nfsmount; struct socket; struct nfsreq; struct nfssockreq; struct vattr; struct nameidata; struct nfsnode; struct nfsfh; struct sillyrename; struct componentname; struct nfsd_srvargs; struct nfsrv_descript; struct nfs_fattr; union nethostaddr; struct nfsstate; struct nfslock; struct nfsclient; +struct nfslayout; struct nfsdsession; struct nfslockconflict; struct nfsd_idargs; struct nfsd_clid; struct nfsusrgrp; struct nfsclowner; struct nfsclopen; struct nfsclopenhead; struct nfsclclient; struct nfsclsession; struct nfscllockowner; struct nfscllock; struct nfscldeleg; struct nfscllayout; struct nfscldevinfo; struct nfsv4lock; struct nfsvattr; struct nfs_vattr; struct NFSSVCARGS; +struct nfsdevice; +struct pnfsdsfile; +struct pnfsdsattr; #ifdef __FreeBSD__ NFS_ACCESS_ARGS; NFS_OPEN_ARGS; NFS_GETATTR_ARGS; NFS_LOOKUP_ARGS; NFS_READDIR_ARGS; #endif /* nfs_nfsdstate.c */ int nfsrv_setclient(struct nfsrv_descript *, struct nfsclient **, nfsquad_t *, nfsquad_t *, NFSPROC_T *); int nfsrv_getclient(nfsquad_t, int, struct nfsclient **, struct nfsdsession *, nfsquad_t, uint32_t, struct nfsrv_descript *, NFSPROC_T *); int nfsrv_destroyclient(nfsquad_t, NFSPROC_T *); int nfsrv_destroysession(struct nfsrv_descript *, uint8_t *); int nfsrv_bindconnsess(struct nfsrv_descript *, uint8_t *, int *); int nfsrv_freestateid(struct nfsrv_descript *, nfsv4stateid_t *, NFSPROC_T *); int nfsrv_teststateid(struct nfsrv_descript *, nfsv4stateid_t *, NFSPROC_T *); int nfsrv_adminrevoke(struct nfsd_clid *, NFSPROC_T *); void nfsrv_dumpclients(struct nfsd_dumpclients *, int); void nfsrv_dumplocks(vnode_t, struct nfsd_dumplocks *, int, NFSPROC_T *); int nfsrv_lockctrl(vnode_t, struct nfsstate **, struct nfslock **, struct nfslockconflict *, nfsquad_t, nfsv4stateid_t *, struct nfsexstuff *, struct nfsrv_descript *, NFSPROC_T *); int nfsrv_openctrl(struct nfsrv_descript *, vnode_t, struct nfsstate **, nfsquad_t, nfsv4stateid_t *, nfsv4stateid_t *, u_int32_t *, struct nfsexstuff *, NFSPROC_T *, u_quad_t); int nfsrv_opencheck(nfsquad_t, nfsv4stateid_t *, struct nfsstate *, vnode_t, struct nfsrv_descript *, NFSPROC_T *, int); int nfsrv_openupdate(vnode_t, struct nfsstate *, nfsquad_t, - nfsv4stateid_t *, struct nfsrv_descript *, NFSPROC_T *); + nfsv4stateid_t *, struct nfsrv_descript *, NFSPROC_T *, int *); int nfsrv_delegupdate(struct nfsrv_descript *, nfsquad_t, nfsv4stateid_t *, - vnode_t, int, struct ucred *, NFSPROC_T *); + vnode_t, int, struct ucred *, NFSPROC_T *, int *); int nfsrv_releaselckown(struct nfsstate *, nfsquad_t, NFSPROC_T *); void nfsrv_zapclient(struct nfsclient *, NFSPROC_T *); int nfssvc_idname(struct nfsd_idargs *); void nfsrv_servertimer(void); int nfsrv_getclientipaddr(struct nfsrv_descript *, struct nfsclient *); void nfsrv_setupstable(NFSPROC_T *); void nfsrv_updatestable(NFSPROC_T *); void nfsrv_writestable(u_char *, int, int, NFSPROC_T *); void nfsrv_throwawayopens(NFSPROC_T *); int nfsrv_checkremove(vnode_t, int, NFSPROC_T *); void nfsd_recalldelegation(vnode_t, NFSPROC_T *); void nfsd_disabledelegation(vnode_t, NFSPROC_T *); int nfsrv_checksetattr(vnode_t, struct nfsrv_descript *, nfsv4stateid_t *, struct nfsvattr *, nfsattrbit_t *, struct nfsexstuff *, NFSPROC_T *); int nfsrv_checkgetattr(struct nfsrv_descript *, vnode_t, - struct nfsvattr *, nfsattrbit_t *, struct ucred *, NFSPROC_T *); + struct nfsvattr *, nfsattrbit_t *, NFSPROC_T *); int nfsrv_nfsuserdport(struct sockaddr *, u_short, NFSPROC_T *); void nfsrv_nfsuserddelport(void); void nfsrv_throwawayallstate(NFSPROC_T *); int nfsrv_checksequence(struct nfsrv_descript *, uint32_t, uint32_t *, uint32_t *, int, uint32_t *, NFSPROC_T *); int nfsrv_checkreclaimcomplete(struct nfsrv_descript *); void nfsrv_cache_session(uint8_t *, uint32_t, int, struct mbuf **); void nfsrv_freeallbackchannel_xprts(void); +int nfsrv_layoutcommit(struct nfsrv_descript *, vnode_t, int, int, uint64_t, + uint64_t, uint64_t, int, struct timespec *, int, nfsv4stateid_t *, + int, char *, int *, uint64_t *, struct ucred *, NFSPROC_T *); +int nfsrv_layoutget(struct nfsrv_descript *, vnode_t, struct nfsexstuff *, + int, int *, uint64_t *, uint64_t *, uint64_t, nfsv4stateid_t *, int, int *, + int *, char *, struct ucred *, NFSPROC_T *); +void nfsrv_flexmirrordel(char *, NFSPROC_T *); +void nfsrv_recalloldlayout(NFSPROC_T *); +int nfsrv_layoutreturn(struct nfsrv_descript *, vnode_t, int, int, uint64_t, + uint64_t, int, int, nfsv4stateid_t *, int, uint32_t *, int *, + struct ucred *, NFSPROC_T *); +int nfsrv_getdevinfo(char *, int, uint32_t *, uint32_t *, int *, char **); +void nfsrv_freeonedevid(struct nfsdevice *); +void nfsrv_freealllayoutsanddevids(void); +void nfsrv_freefilelayouts(fhandle_t *); +int nfsrv_deldsserver(char *, NFSPROC_T *); +struct nfsdevice *nfsrv_deldsnmp(struct nfsmount *, NFSPROC_T *); +int nfsrv_createdevids(struct nfsd_nfsd_args *, NFSPROC_T *); +int nfsrv_checkdsattr(struct nfsrv_descript *, vnode_t, NFSPROC_T *); +int nfsrv_copymr(vnode_t, vnode_t, vnode_t, struct nfsdevice *, + struct pnfsdsfile *, struct pnfsdsfile *, int, struct ucred *, NFSPROC_T *); +int nfsrv_mdscopymr(char *, char *, char *, char *, int *, char *, NFSPROC_T *, + struct vnode **, struct vnode **, struct pnfsdsfile **, struct nfsdevice **, + struct nfsdevice **); /* nfs_nfsdserv.c */ int nfsrvd_access(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_getattr(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_setattr(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_lookup(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_readlink(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_read(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_write(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_create(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_mknod(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_remove(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_rename(struct nfsrv_descript *, int, vnode_t, vnode_t, NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *); int nfsrvd_link(struct nfsrv_descript *, int, vnode_t, vnode_t, NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *); int nfsrvd_symlink(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_mkdir(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_readdir(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_readdirplus(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_commit(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_statfs(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_fsinfo(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_close(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_delegpurge(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_delegreturn(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_getfh(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_lock(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_lockt(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_locku(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_openconfirm(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_opendowngrade(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_renew(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_secinfo(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_setclientid(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_setclientidcfrm(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_verify(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_open(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_openattr(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_releaselckown(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_pathconf(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_exchangeid(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_createsession(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_sequence(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_reclaimcomplete(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_destroyclientid(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_bindconnsess(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_destroysession(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_freestateid(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); +int nfsrvd_layoutget(struct nfsrv_descript *, int, + vnode_t, NFSPROC_T *, struct nfsexstuff *); +int nfsrvd_getdevinfo(struct nfsrv_descript *, int, + vnode_t, NFSPROC_T *, struct nfsexstuff *); +int nfsrvd_layoutcommit(struct nfsrv_descript *, int, + vnode_t, NFSPROC_T *, struct nfsexstuff *); +int nfsrvd_layoutreturn(struct nfsrv_descript *, int, + vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_teststateid(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_notsupp(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); /* nfs_nfsdsocket.c */ void nfsrvd_rephead(struct nfsrv_descript *); void nfsrvd_dorpc(struct nfsrv_descript *, int, u_char *, int, u_int32_t, NFSPROC_T *); /* nfs_nfsdcache.c */ void nfsrvd_initcache(void); int nfsrvd_getcache(struct nfsrv_descript *); struct nfsrvcache *nfsrvd_updatecache(struct nfsrv_descript *); void nfsrvd_sentcache(struct nfsrvcache *, int, uint32_t); void nfsrvd_cleancache(void); void nfsrvd_refcache(struct nfsrvcache *); void nfsrvd_derefcache(struct nfsrvcache *); void nfsrvd_delcache(struct nfsrvcache *); void nfsrc_trimcache(uint64_t, uint32_t, int); /* nfs_commonsubs.c */ void newnfs_init(void); int nfsaddr_match(int, union nethostaddr *, NFSSOCKADDR_T); int nfsaddr2_match(NFSSOCKADDR_T, NFSSOCKADDR_T); int nfsm_strtom(struct nfsrv_descript *, const char *, int); int nfsm_mbufuio(struct nfsrv_descript *, struct uio *, int); int nfsm_fhtom(struct nfsrv_descript *, u_int8_t *, int, int); int nfsm_advance(struct nfsrv_descript *, int, int); void *nfsm_dissct(struct nfsrv_descript *, int, int); void newnfs_trimleading(struct nfsrv_descript *); void newnfs_trimtrailing(struct nfsrv_descript *, mbuf_t, caddr_t); void newnfs_copycred(struct nfscred *, struct ucred *); void newnfs_copyincred(struct ucred *, struct nfscred *); int nfsrv_dissectacl(struct nfsrv_descript *, NFSACL_T *, int *, int *, NFSPROC_T *); int nfsrv_getattrbits(struct nfsrv_descript *, nfsattrbit_t *, int *, int *); int nfsv4_loadattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, struct nfsfh **, fhandle_t *, int, struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *, struct nfsfsinfo *, NFSACL_T *, int, int *, u_int32_t *, u_int32_t *, NFSPROC_T *, struct ucred *); int nfsv4_lock(struct nfsv4lock *, int, int *, void *, struct mount *); void nfsv4_unlock(struct nfsv4lock *, int); void nfsv4_relref(struct nfsv4lock *); void nfsv4_getref(struct nfsv4lock *, int *, void *, struct mount *); int nfsv4_getref_nonblock(struct nfsv4lock *); int nfsv4_testlock(struct nfsv4lock *); int nfsrv_mtostr(struct nfsrv_descript *, char *, int); void nfsrv_cleanusergroup(void); int nfsrv_checkutf8(u_int8_t *, int); int newnfs_sndlock(int *); void newnfs_sndunlock(int *); int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_in *, struct sockaddr_in6 *, sa_family_t *, int *); int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *, struct mbuf **, uint16_t); void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, int, struct mbuf **); void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, struct nfsclsession *, int); int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *, int *, uint32_t *, uint8_t *); void nfsv4_freeslot(struct nfsclsession *, int); struct ucred *nfsrv_getgrpscred(struct ucred *); +struct nfsdevice *nfsv4_findmirror(struct nfsmount *); /* nfs_clcomsubs.c */ void nfsm_uiombuf(struct nfsrv_descript *, struct uio *, int); struct mbuf *nfsm_uiombuflist(struct uio *, int, struct mbuf **, char **); void nfscl_reqstart(struct nfsrv_descript *, int, struct nfsmount *, u_int8_t *, int, u_int32_t **, struct nfsclsession *, int, int); nfsuint64 *nfscl_getcookie(struct nfsnode *, off_t off, int); void nfscl_fillsattr(struct nfsrv_descript *, struct vattr *, vnode_t, int, u_int32_t); u_int8_t *nfscl_getmyip(struct nfsmount *, struct in6_addr *, int *); int nfsm_getfh(struct nfsrv_descript *, struct nfsfh **); int nfscl_mtofh(struct nfsrv_descript *, struct nfsfh **, struct nfsvattr *, int *); int nfscl_postop_attr(struct nfsrv_descript *, struct nfsvattr *, int *, void *); int nfscl_wcc_data(struct nfsrv_descript *, vnode_t, struct nfsvattr *, int *, int *, void *); int nfsm_loadattr(struct nfsrv_descript *, struct nfsvattr *); int nfscl_request(struct nfsrv_descript *, vnode_t, NFSPROC_T *, struct ucred *, void *); void nfsm_stateidtom(struct nfsrv_descript *, nfsv4stateid_t *, int); /* nfs_nfsdsubs.c */ void nfsd_fhtovp(struct nfsrv_descript *, struct nfsrvfh *, int, vnode_t *, struct nfsexstuff *, mount_t *, int, NFSPROC_T *); int nfsd_excred(struct nfsrv_descript *, struct nfsexstuff *, struct ucred *); int nfsrv_mtofh(struct nfsrv_descript *, struct nfsrvfh *); int nfsrv_putattrbit(struct nfsrv_descript *, nfsattrbit_t *); void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int, struct nfsvattr *); int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *, struct vattr *, fhandle_t *, int, nfsattrbit_t *, - struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t); + struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t, struct statfs *); void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *); void nfsrv_adj(mbuf_t, int, int); void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *); int nfsd_errmap(struct nfsrv_descript *); void nfsv4_uidtostr(uid_t, u_char **, int *, NFSPROC_T *); int nfsv4_strtouid(struct nfsrv_descript *, u_char *, int, uid_t *, NFSPROC_T *); void nfsv4_gidtostr(gid_t, u_char **, int *, NFSPROC_T *); int nfsv4_strtogid(struct nfsrv_descript *, u_char *, int, gid_t *, NFSPROC_T *); int nfsrv_checkuidgid(struct nfsrv_descript *, struct nfsvattr *); void nfsrv_fixattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, NFSACL_T *, NFSPROC_T *, nfsattrbit_t *, struct nfsexstuff *); int nfsrv_errmoved(int); int nfsrv_putreferralattr(struct nfsrv_descript *, nfsattrbit_t *, struct nfsreferral *, int, int *); int nfsrv_parsename(struct nfsrv_descript *, char *, u_long *, NFSPATHLEN_T *); void nfsd_init(void); int nfsd_checkrootexp(struct nfsrv_descript *); void nfsd_getminorvers(struct nfsrv_descript *, u_char *, u_char **, int *, u_int32_t *); /* nfs_clvfsops.c */ void nfscl_retopts(struct nfsmount *, char *, size_t); /* nfs_commonport.c */ int nfsrv_lookupfilename(struct nameidata *, char *, NFSPROC_T *); void nfsrv_object_create(vnode_t, NFSPROC_T *); int nfsrv_mallocmget_limit(void); int nfsvno_v4rootexport(struct nfsrv_descript *); void newnfs_portinit(void); struct ucred *newnfs_getcred(void); void newnfs_setroot(struct ucred *); int nfs_catnap(int, int, const char *); struct nfsreferral *nfsv4root_getreferral(vnode_t, vnode_t, u_int32_t); int nfsvno_pathconf(vnode_t, int, long *, struct ucred *, NFSPROC_T *); int nfsrv_atroot(vnode_t, uint64_t *); void newnfs_timer(void *); int nfs_supportsnfsv4acls(vnode_t); /* nfs_commonacl.c */ int nfsrv_dissectace(struct nfsrv_descript *, struct acl_entry *, int *, int *, NFSPROC_T *); int nfsrv_buildacl(struct nfsrv_descript *, NFSACL_T *, enum vtype, NFSPROC_T *); -int nfsrv_setacl(vnode_t, NFSACL_T *, struct ucred *, - NFSPROC_T *); int nfsrv_compareacl(NFSACL_T *, NFSACL_T *); /* nfs_clrpcops.c */ int nfsrpc_null(vnode_t, struct ucred *, NFSPROC_T *); int nfsrpc_access(vnode_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *); int nfsrpc_accessrpc(vnode_t, u_int32_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, u_int32_t *, void *); int nfsrpc_open(vnode_t, int, struct ucred *, NFSPROC_T *); int nfsrpc_openrpc(struct nfsmount *, vnode_t, u_int8_t *, int, u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *, int, int); int nfsrpc_opendowngrade(vnode_t, u_int32_t, struct nfsclopen *, struct ucred *, NFSPROC_T *); int nfsrpc_close(vnode_t, int, NFSPROC_T *); int nfsrpc_closerpc(struct nfsrv_descript *, struct nfsmount *, struct nfsclopen *, struct ucred *, NFSPROC_T *, int); int nfsrpc_openconfirm(vnode_t, u_int8_t *, int, struct nfsclopen *, struct ucred *, NFSPROC_T *); int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, int, struct ucred *, NFSPROC_T *); int nfsrpc_getattr(vnode_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, void *); int nfsrpc_getattrnovp(struct nfsmount *, u_int8_t *, int, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *, uint32_t *); int nfsrpc_setattr(vnode_t, struct vattr *, NFSACL_T *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_lookup(vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_write(vnode_t, struct uio *, int *, int *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int); int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t, enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_create(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_remove(vnode_t, char *, int, vnode_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_rename(vnode_t, vnode_t, char *, int, vnode_t, vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, int *, int *, void *, void *); int nfsrpc_link(vnode_t, vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, int *, int *, void *); int nfsrpc_symlink(vnode_t, char *, int, char *, struct vattr *, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_mkdir(vnode_t, char *, int, struct vattr *, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_rmdir(vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_readdir(vnode_t, struct uio *, nfsuint64 *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *); int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *); int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int, struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lockt(struct nfsrv_descript *, vnode_t, struct nfsclclient *, u_int64_t, u_int64_t, struct flock *, struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t, u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, int); int nfsrpc_statfs(vnode_t, struct nfsstatfs *, struct nfsfsinfo *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *, NFSPROC_T *); int nfsrpc_rellockown(struct nfsmount *, struct nfscllockowner *, uint8_t *, int, struct ucred *, NFSPROC_T *); int nfsrpc_getdirpath(struct nfsmount *, u_char *, struct ucred *, NFSPROC_T *); int nfsrpc_delegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *, int); int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *, struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *, NFSPROC_T *); int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *, struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *); int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *, struct ucred *, NFSPROC_T *); int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *, struct ucred *, NFSPROC_T *); int nfsrpc_getdeviceinfo(struct nfsmount *, uint8_t *, int, uint32_t *, struct nfscldevinfo **, struct ucred *, NFSPROC_T *); int nfsrpc_layoutcommit(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, struct ucred *, NFSPROC_T *, void *); int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t, int, uint64_t, uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, void *); int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *); int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, int, struct ucred *, NFSPROC_T *); int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t, struct nfsclflayout **); void nfscl_freenfsclds(struct nfsclds *); /* nfs_clstate.c */ int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *, NFSPROC_T *, struct nfsclowner **, struct nfsclopen **, int *, int *, int); int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *, NFSPROC_T *, nfsv4stateid_t *, void **); void nfscl_ownerrelease(struct nfsmount *, struct nfsclowner *, int, int, int); void nfscl_openrelease(struct nfsmount *, struct nfsclopen *, int, int); int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, int, struct nfsclclient **); struct nfsclclient *nfscl_findcl(struct nfsmount *); void nfscl_clientrelease(struct nfsclclient *); void nfscl_freelock(struct nfscllock *, int); void nfscl_freelockowner(struct nfscllockowner *, int); int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int, u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *); int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t, struct ucred *, NFSPROC_T *, int, struct nfsclclient *, void *, int, struct nfscllockowner **, int *); int nfscl_checkwritelocked(vnode_t, struct flock *, struct ucred *, NFSPROC_T *, void *, int); void nfscl_lockrelease(struct nfscllockowner *, int, int); void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t); void nfscl_filllockowner(void *, u_int8_t *, int); void nfscl_freeopen(struct nfsclopen *, int); void nfscl_umount(struct nfsmount *, NFSPROC_T *); void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *); void nfscl_initiate_recovery(struct nfsclclient *); int nfscl_hasexpired(struct nfsclclient *, u_int32_t, NFSPROC_T *); void nfscl_dumpstate(struct nfsmount *, int, int, int, int); void nfscl_dupopen(vnode_t, int); int nfscl_getclose(vnode_t, struct nfsclclient **); int nfscl_doclose(vnode_t, struct nfsclclient **, NFSPROC_T *); void nfsrpc_doclose(struct nfsmount *, struct nfsclopen *, NFSPROC_T *); int nfscl_deleg(mount_t, struct nfsclclient *, u_int8_t *, int, struct ucred *, NFSPROC_T *, struct nfscldeleg **); void nfscl_lockinit(struct nfsv4lock *); void nfscl_lockexcl(struct nfsv4lock *, void *); void nfscl_lockunlock(struct nfsv4lock *); void nfscl_lockderef(struct nfsv4lock *); void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *); void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *, int); int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t, u_int64_t, struct flock *, NFSPROC_T *, void *, int); int nfscl_mustflush(vnode_t); int nfscl_nodeleg(vnode_t, int); int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *); int nfscl_getref(struct nfsmount *); void nfscl_relref(struct nfsmount *); int nfscl_renamedeleg(vnode_t, nfsv4stateid_t *, int *, vnode_t, nfsv4stateid_t *, int *, NFSPROC_T *); void nfscl_reclaimnode(vnode_t); void nfscl_newnode(vnode_t); void nfscl_delegmodtime(vnode_t); void nfscl_deleggetmodtime(vnode_t, struct timespec *); int nfscl_tryclose(struct nfsclopen *, struct ucred *, struct nfsmount *, NFSPROC_T *); void nfscl_cleanup(NFSPROC_T *); int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *, int, int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *, NFSPROC_T *); struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int, uint64_t, struct nfsclflayout **, int *); void nfscl_rellayout(struct nfscllayout *, int); struct nfscldevinfo *nfscl_getdevinfo(struct nfsclclient *, uint8_t *, struct nfscldevinfo *); void nfscl_reldevinfo(struct nfscldevinfo *); int nfscl_adddevinfo(struct nfsmount *, struct nfscldevinfo *, struct nfsclflayout *); void nfscl_freelayout(struct nfscllayout *); void nfscl_freeflayout(struct nfsclflayout *); void nfscl_freedevinfo(struct nfscldevinfo *); int nfscl_layoutcommit(vnode_t, NFSPROC_T *); /* nfs_clport.c */ int nfscl_nget(mount_t, vnode_t, struct nfsfh *, struct componentname *, NFSPROC_T *, struct nfsnode **, void *, int); NFSPROC_T *nfscl_getparent(NFSPROC_T *); void nfscl_start_renewthread(struct nfsclclient *); void nfscl_loadsbinfo(struct nfsmount *, struct nfsstatfs *, void *); void nfscl_loadfsinfo (struct nfsmount *, struct nfsfsinfo *); void nfscl_delegreturn(struct nfscldeleg *, int, struct nfsmount *, struct ucred *, NFSPROC_T *); void nfsrvd_cbinit(int); int nfscl_checksattr(struct vattr *, struct nfsvattr *); int nfscl_ngetreopen(mount_t, u_int8_t *, int, NFSPROC_T *, struct nfsnode **); int nfscl_procdoesntexist(u_int8_t *); int nfscl_maperr(NFSPROC_T *, int, uid_t, gid_t); /* nfs_clsubs.c */ void nfscl_init(void); /* nfs_clbio.c */ int ncl_flush(vnode_t, int, NFSPROC_T *, int, int); /* nfs_clnode.c */ void ncl_invalcaches(vnode_t); /* nfs_nfsdport.c */ -int nfsvno_getattr(vnode_t, struct nfsvattr *, struct ucred *, - NFSPROC_T *, int); +int nfsvno_getattr(vnode_t, struct nfsvattr *, struct nfsrv_descript *, + NFSPROC_T *, int, nfsattrbit_t *); int nfsvno_setattr(vnode_t, struct nfsvattr *, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_getfh(vnode_t, fhandle_t *, NFSPROC_T *); int nfsvno_accchk(vnode_t, accmode_t, struct ucred *, struct nfsexstuff *, NFSPROC_T *, int, int, u_int32_t *); int nfsvno_namei(struct nfsrv_descript *, struct nameidata *, vnode_t, int, struct nfsexstuff *, NFSPROC_T *, vnode_t *); void nfsvno_setpathbuf(struct nameidata *, char **, u_long **); void nfsvno_relpathbuf(struct nameidata *); int nfsvno_readlink(vnode_t, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *, int *); int nfsvno_read(vnode_t, off_t, int, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *); -int nfsvno_write(vnode_t, off_t, int, int, int, mbuf_t, +int nfsvno_write(vnode_t, off_t, int, int, int *, mbuf_t, char *, struct ucred *, NFSPROC_T *); int nfsvno_createsub(struct nfsrv_descript *, struct nameidata *, vnode_t *, struct nfsvattr *, int *, int32_t *, NFSDEV_T, NFSPROC_T *, struct nfsexstuff *); int nfsvno_mknod(struct nameidata *, struct nfsvattr *, struct ucred *, NFSPROC_T *); int nfsvno_mkdir(struct nameidata *, struct nfsvattr *, uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_symlink(struct nameidata *, struct nfsvattr *, char *, int, int, uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_getsymlink(struct nfsrv_descript *, struct nfsvattr *, NFSPROC_T *, char **, int *); int nfsvno_removesub(struct nameidata *, int, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_rmdirsub(struct nameidata *, int, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_rename(struct nameidata *, struct nameidata *, u_int32_t, u_int32_t, struct ucred *, NFSPROC_T *); int nfsvno_link(struct nameidata *, vnode_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_fsync(vnode_t, u_int64_t, int, struct ucred *, NFSPROC_T *); int nfsvno_statfs(vnode_t, struct statfs *); void nfsvno_getfs(struct nfsfsinfo *, int); void nfsvno_open(struct nfsrv_descript *, struct nameidata *, nfsquad_t, nfsv4stateid_t *, struct nfsstate *, int *, struct nfsvattr *, int32_t *, int, NFSACL_T *, nfsattrbit_t *, struct ucred *, NFSPROC_T *, struct nfsexstuff *, vnode_t *); -int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct ucred *, +int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct nfsrv_descript *, NFSPROC_T *); int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t); int nfsrv_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, NFSACL_T *, NFSPROC_T *); int nfsv4_sattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, NFSACL_T *, NFSPROC_T *); int nfsvno_checkexp(mount_t, NFSSOCKADDR_T, struct nfsexstuff *, struct ucred **); int nfsvno_fhtovp(mount_t, fhandle_t *, NFSSOCKADDR_T, int, vnode_t *, struct nfsexstuff *, struct ucred **); vnode_t nfsvno_getvp(fhandle_t *); int nfsvno_advlock(vnode_t, int, u_int64_t, u_int64_t, NFSPROC_T *); int nfsrv_v4rootexport(void *, struct ucred *, NFSPROC_T *); int nfsvno_testexp(struct nfsrv_descript *, struct nfsexstuff *); uint32_t nfsrv_hashfh(fhandle_t *); uint32_t nfsrv_hashsessionid(uint8_t *); void nfsrv_backupstable(void); +int nfsrv_dsgetdevandfh(struct vnode *, NFSPROC_T *, int *, fhandle_t *, + char *); +int nfsrv_dsgetsockmnt(struct vnode *, int, char *, int *, int *, + NFSPROC_T *, struct vnode **, fhandle_t *, char *, char *, + struct vnode **, struct nfsmount **, struct nfsmount *, int *, int *); +int nfsrv_dscreate(struct vnode *, struct vattr *, struct vattr *, + fhandle_t *, struct pnfsdsfile *, struct pnfsdsattr *, char *, + struct ucred *, NFSPROC_T *, struct vnode **); +int nfsrv_updatemdsattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); +void nfsrv_killrpcs(struct nfsmount *); +int nfsrv_setacl(struct vnode *, NFSACL_T *, struct ucred *, NFSPROC_T *); /* nfs_commonkrpc.c */ int newnfs_nmcancelreqs(struct nfsmount *); void newnfs_set_sigmask(struct thread *, sigset_t *); void newnfs_restore_sigmask(struct thread *, sigset_t *); int newnfs_msleep(struct thread *, void *, struct mtx *, int, char *, int); int newnfs_request(struct nfsrv_descript *, struct nfsmount *, struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *, struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *, struct nfsclsession *); int newnfs_connect(struct nfsmount *, struct nfssockreq *, struct ucred *, NFSPROC_T *, int); void newnfs_disconnect(struct nfssockreq *); int newnfs_sigintr(struct nfsmount *, NFSPROC_T *); /* nfs_nfsdkrpc.c */ int nfsrvd_addsock(struct file *); int nfsrvd_nfsd(NFSPROC_T *, struct nfsd_nfsd_args *); void nfsrvd_init(int); /* nfs_clkrpc.c */ int nfscbd_addsock(struct file *); int nfscbd_nfsd(NFSPROC_T *, struct nfsd_nfscbd_args *); Index: head/sys/fs/nfs/nfsport.h =================================================================== --- head/sys/fs/nfs/nfsport.h (revision 335011) +++ head/sys/fs/nfs/nfsport.h (revision 335012) @@ -1,1042 +1,1059 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPORT_H_ #define _NFS_NFSPORT_H_ /* * In general, I'm not fond of #includes in .h files, but this seems * to be the cleanest way to handle #include files for the ports. */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * For Darwin, these functions should be "static" when built in a kext. * (This is always defined as nil otherwise.) */ #define APPLESTATIC #include #include #include #include #include #include #include #include #include #include #include "opt_nfs.h" #include "opt_ufs.h" /* * These types must be defined before the nfs includes. */ #define NFSSOCKADDR_T struct sockaddr * #define NFSPROC_T struct thread #define NFSDEV_T dev_t #define NFSSVCARGS nfssvc_args #define NFSACL_T struct acl /* * These should be defined as the types used for the corresponding VOP's * argument type. */ #define NFS_ACCESS_ARGS struct vop_access_args #define NFS_OPEN_ARGS struct vop_open_args #define NFS_GETATTR_ARGS struct vop_getattr_args #define NFS_LOOKUP_ARGS struct vop_lookup_args #define NFS_READDIR_ARGS struct vop_readdir_args /* * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL. */ #define NFSMGET(m) do { \ MGET((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMGETHDR(m) do { \ MGETHDR((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMCLGET(m, w) do { \ MGET((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_WAITOK, MT_DATA); \ } \ MCLGET((m), (w)); \ } while (0) #define NFSMCLGETHDR(m, w) do { \ MGETHDR((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMTOD mtod /* * Client side constant for size of a lockowner name. */ #define NFSV4CL_LOCKNAMELEN 12 /* * Type for a mutex lock. */ #define NFSMUTEX_T struct mtx #endif /* _KERNEL */ /* * NFSv4 Operation numbers. */ #define NFSV4OP_ACCESS 3 #define NFSV4OP_CLOSE 4 #define NFSV4OP_COMMIT 5 #define NFSV4OP_CREATE 6 #define NFSV4OP_DELEGPURGE 7 #define NFSV4OP_DELEGRETURN 8 #define NFSV4OP_GETATTR 9 #define NFSV4OP_GETFH 10 #define NFSV4OP_LINK 11 #define NFSV4OP_LOCK 12 #define NFSV4OP_LOCKT 13 #define NFSV4OP_LOCKU 14 #define NFSV4OP_LOOKUP 15 #define NFSV4OP_LOOKUPP 16 #define NFSV4OP_NVERIFY 17 #define NFSV4OP_OPEN 18 #define NFSV4OP_OPENATTR 19 #define NFSV4OP_OPENCONFIRM 20 #define NFSV4OP_OPENDOWNGRADE 21 #define NFSV4OP_PUTFH 22 #define NFSV4OP_PUTPUBFH 23 #define NFSV4OP_PUTROOTFH 24 #define NFSV4OP_READ 25 #define NFSV4OP_READDIR 26 #define NFSV4OP_READLINK 27 #define NFSV4OP_REMOVE 28 #define NFSV4OP_RENAME 29 #define NFSV4OP_RENEW 30 #define NFSV4OP_RESTOREFH 31 #define NFSV4OP_SAVEFH 32 #define NFSV4OP_SECINFO 33 #define NFSV4OP_SETATTR 34 #define NFSV4OP_SETCLIENTID 35 #define NFSV4OP_SETCLIENTIDCFRM 36 #define NFSV4OP_VERIFY 37 #define NFSV4OP_WRITE 38 #define NFSV4OP_RELEASELCKOWN 39 /* * Must be one greater than the last Operation#. */ #define NFSV4OP_NOPS 40 /* * Additional Ops for NFSv4.1. */ #define NFSV4OP_BACKCHANNELCTL 40 #define NFSV4OP_BINDCONNTOSESS 41 #define NFSV4OP_EXCHANGEID 42 #define NFSV4OP_CREATESESSION 43 #define NFSV4OP_DESTROYSESSION 44 #define NFSV4OP_FREESTATEID 45 #define NFSV4OP_GETDIRDELEG 46 #define NFSV4OP_GETDEVINFO 47 #define NFSV4OP_GETDEVLIST 48 #define NFSV4OP_LAYOUTCOMMIT 49 #define NFSV4OP_LAYOUTGET 50 #define NFSV4OP_LAYOUTRETURN 51 #define NFSV4OP_SECINFONONAME 52 #define NFSV4OP_SEQUENCE 53 #define NFSV4OP_SETSSV 54 #define NFSV4OP_TESTSTATEID 55 #define NFSV4OP_WANTDELEG 56 #define NFSV4OP_DESTROYCLIENTID 57 #define NFSV4OP_RECLAIMCOMPL 58 /* * Must be one more than last op#. * NFSv4.2 isn't implemented yet, but define the op# limit for it. */ #define NFSV41_NOPS 59 #define NFSV42_NOPS 72 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 /* * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS. */ #define NFSV4OP_SYMLINK (NFSV42_NOPS) #define NFSV4OP_MKDIR (NFSV42_NOPS + 1) #define NFSV4OP_RMDIR (NFSV42_NOPS + 2) #define NFSV4OP_READDIRPLUS (NFSV42_NOPS + 3) #define NFSV4OP_MKNOD (NFSV42_NOPS + 4) #define NFSV4OP_FSSTAT (NFSV42_NOPS + 5) #define NFSV4OP_FSINFO (NFSV42_NOPS + 6) #define NFSV4OP_PATHCONF (NFSV42_NOPS + 7) #define NFSV4OP_V3CREATE (NFSV42_NOPS + 8) /* * This is the count of the fake operations listed above. */ #define NFSV4OP_FAKENOPS 9 /* * and the Callback OPs */ #define NFSV4OP_CBGETATTR 3 #define NFSV4OP_CBRECALL 4 /* * Must be one greater than the last Callback Operation# for NFSv4.0. */ #define NFSV4OP_CBNOPS 5 /* * Additional Callback Ops for NFSv4.1 only. */ #define NFSV4OP_CBLAYOUTRECALL 5 #define NFSV4OP_CBNOTIFY 6 #define NFSV4OP_CBPUSHDELEG 7 #define NFSV4OP_CBRECALLANY 8 #define NFSV4OP_CBRECALLOBJAVAIL 9 #define NFSV4OP_CBRECALLSLOT 10 #define NFSV4OP_CBSEQUENCE 11 #define NFSV4OP_CBWANTCANCELLED 12 #define NFSV4OP_CBNOTIFYLOCK 13 #define NFSV4OP_CBNOTIFYDEVID 14 #define NFSV41_CBNOPS 15 #define NFSV42_CBNOPS 16 /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 /* Additional procedures for NFSv4.1. */ #define NFSPROC_EXCHANGEID 41 #define NFSPROC_CREATESESSION 42 #define NFSPROC_DESTROYSESSION 43 #define NFSPROC_DESTROYCLIENT 44 #define NFSPROC_FREESTATEID 45 #define NFSPROC_LAYOUTGET 46 #define NFSPROC_GETDEVICEINFO 47 #define NFSPROC_LAYOUTCOMMIT 48 #define NFSPROC_LAYOUTRETURN 49 #define NFSPROC_RECLAIMCOMPL 50 #define NFSPROC_WRITEDS 51 #define NFSPROC_READDS 52 #define NFSPROC_COMMITDS 53 #define NFSPROC_OPENLAYGET 54 #define NFSPROC_CREATELAYGET 55 /* * Must be defined as one higher than the last NFSv4.1 Proc# above. */ #define NFSV41_NPROCS 56 #endif /* NFS_V3NPROCS */ /* * New stats structure. * The vers field will be set to NFSSTATS_V1 by the caller. */ #define NFSSTATS_V1 1 struct nfsstatsv1 { int vers; /* Set to version requested by caller. */ uint64_t attrcache_hits; uint64_t attrcache_misses; uint64_t lookupcache_hits; uint64_t lookupcache_misses; uint64_t direofcache_hits; uint64_t direofcache_misses; uint64_t accesscache_hits; uint64_t accesscache_misses; uint64_t biocache_reads; uint64_t read_bios; uint64_t read_physios; uint64_t biocache_writes; uint64_t write_bios; uint64_t write_physios; uint64_t biocache_readlinks; uint64_t readlink_bios; uint64_t biocache_readdirs; uint64_t readdir_bios; uint64_t rpccnt[NFSV41_NPROCS + 13]; uint64_t rpcretries; uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; uint64_t srvrpc_errs; uint64_t srv_errs; uint64_t rpcrequests; uint64_t rpctimeouts; uint64_t rpcunexpected; uint64_t rpcinvalid; uint64_t srvcache_inproghits; uint64_t srvcache_idemdonehits; uint64_t srvcache_nonidemdonehits; uint64_t srvcache_misses; uint64_t srvcache_tcppeak; int srvcache_size; /* Updated by atomic_xx_int(). */ uint64_t srvclients; uint64_t srvopenowners; uint64_t srvopens; uint64_t srvlockowners; uint64_t srvlocks; uint64_t srvdelegates; uint64_t cbrpccnt[NFSV42_CBNOPS]; uint64_t clopenowners; uint64_t clopens; uint64_t cllockowners; uint64_t cllocks; uint64_t cldelegates; uint64_t cllocalopenowners; uint64_t cllocalopens; uint64_t cllocallockowners; uint64_t cllocallocks; uint64_t srvstartcnt; uint64_t srvdonecnt; uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; struct bintime busyfrom; struct bintime busytime; }; /* * Old stats structure. */ struct ext_nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int accesscache_hits; int accesscache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFSV4_NPROCS]; int rpcretries; int srvrpccnt[NFSV4OP_NOPS + NFSV4OP_FAKENOPS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvcache_tcppeak; int srvcache_size; int srvclients; int srvopenowners; int srvopens; int srvlockowners; int srvlocks; int srvdelegates; int cbrpccnt[NFSV4OP_CBNOPS]; int clopenowners; int clopens; int cllockowners; int cllocks; int cldelegates; int cllocalopenowners; int cllocalopens; int cllocallockowners; int cllocallocks; }; #ifdef _KERNEL /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Just to keep nfs_var.h happy. */ struct nfs_vattr { int junk; }; struct nfsvattr { struct vattr na_vattr; nfsattrbit_t na_suppattr; u_int64_t na_mntonfileno; u_int64_t na_filesid[2]; }; #define na_type na_vattr.va_type #define na_mode na_vattr.va_mode #define na_nlink na_vattr.va_nlink #define na_uid na_vattr.va_uid #define na_gid na_vattr.va_gid #define na_fsid na_vattr.va_fsid #define na_fileid na_vattr.va_fileid #define na_size na_vattr.va_size #define na_blocksize na_vattr.va_blocksize #define na_atime na_vattr.va_atime #define na_mtime na_vattr.va_mtime #define na_ctime na_vattr.va_ctime #define na_gen na_vattr.va_gen #define na_flags na_vattr.va_flags #define na_rdev na_vattr.va_rdev #define na_bytes na_vattr.va_bytes #define na_filerev na_vattr.va_filerev #define na_vaflags na_vattr.va_vaflags #include /* * This is the header structure used for the lists, etc. (It has the * above record in it. */ struct nfsrv_stablefirst { LIST_HEAD(, nfsrv_stable) nsf_head; /* Head of nfsrv_stable list */ time_t nsf_eograce; /* Time grace period ends */ time_t *nsf_bootvals; /* Previous boottime values */ struct file *nsf_fp; /* File table pointer */ u_char nsf_flags; /* NFSNSF_ flags */ struct nfsf_rec nsf_rec; /* and above first record */ }; #define nsf_lease nsf_rec.lease #define nsf_numboots nsf_rec.numboots /* NFSNSF_xxx flags */ #define NFSNSF_UPDATEDONE 0x01 #define NFSNSF_GRACEOVER 0x02 #define NFSNSF_NEEDLOCK 0x04 #define NFSNSF_EXPIREDCLIENT 0x08 #define NFSNSF_NOOPENS 0x10 #define NFSNSF_OK 0x20 /* * Maximum number of boot times allowed in record. Although there is * really no need for a fixed upper bound, this serves as a sanity check * for a corrupted file. */ #define NFSNSF_MAXNUMBOOTS 10000 /* * This structure defines the other records in the file. The * nst_client array is actually the size of the client string name. */ struct nfst_rec { u_int16_t len; u_char flag; u_char client[1]; }; /* and the values for flag */ #define NFSNST_NEWSTATE 0x1 #define NFSNST_REVOKE 0x2 #define NFSNST_GOTSTATE 0x4 #define NFSNST_RECLAIMED 0x8 /* * This structure is linked onto nfsrv_stablefirst for the duration of * reclaim. */ struct nfsrv_stable { LIST_ENTRY(nfsrv_stable) nst_list; struct nfsclient *nst_clp; struct nfst_rec nst_rec; }; #define nst_timestamp nst_rec.timestamp #define nst_len nst_rec.len #define nst_flag nst_rec.flag #define nst_client nst_rec.client /* * At some point the server will run out of kernel storage for * state structures. For FreeBSD5.2, this results in a panic * kmem_map is full. It happens at well over 1000000 opens plus * locks on a PIII-800 with 256Mbytes, so that is where I've set * the limit. If your server panics due to too many opens/locks, * decrease the size of NFSRV_V4STATELIMIT. If you find the server * returning NFS4ERR_RESOURCE a lot and have lots of memory, try * increasing it. */ #define NFSRV_V4STATELIMIT 500000 /* Max # of Opens + Locks */ /* * The type required differs with BSDen (just the second arg). */ void nfsrvd_rcv(struct socket *, void *, int); /* * Macros for handling socket addresses. (Hopefully this makes the code * more portable, since I've noticed some 'BSD don't have sockaddrs in * mbufs any more.) */ #define NFSSOCKADDR(a, t) ((t)(a)) #define NFSSOCKADDRSIZE(a, s) ((a)->sa_len = (s)) /* * These should be defined as a process or thread structure, as required * for signal handling, etc. */ #define NFSNEWCRED(c) (crdup(c)) #define NFSPROCCRED(p) ((p)->td_ucred) #define NFSFREECRED(c) (crfree(c)) #define NFSUIOPROC(u, p) ((u)->uio_td = NULL) #define NFSPROCP(p) ((p)->td_proc) /* * Define these so that cn_hash and its length is ignored. */ #define NFSCNHASHZERO(c) #define NFSCNHASH(c, v) #define NCHNAMLEN 9999999 /* * These macros are defined to initialize and set the timer routine. */ #define NFS_TIMERINIT \ newnfs_timer(NULL) /* * Handle SMP stuff: */ #define NFSSTATESPINLOCK extern struct mtx nfs_state_mutex #define NFSLOCKSTATE() mtx_lock(&nfs_state_mutex) #define NFSUNLOCKSTATE() mtx_unlock(&nfs_state_mutex) #define NFSSTATEMUTEXPTR (&nfs_state_mutex) #define NFSREQSPINLOCK extern struct mtx nfs_req_mutex #define NFSLOCKREQ() mtx_lock(&nfs_req_mutex) #define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex) #define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex #define NFSSOCKMUTEXPTR (&nfs_slock_mutex) #define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex) #define NFSUNLOCKSOCK() mtx_unlock(&nfs_slock_mutex) #define NFSNAMEIDMUTEX extern struct mtx nfs_nameid_mutex #define NFSLOCKNAMEID() mtx_lock(&nfs_nameid_mutex) #define NFSUNLOCKNAMEID() mtx_unlock(&nfs_nameid_mutex) #define NFSNAMEIDREQUIRED() mtx_assert(&nfs_nameid_mutex, MA_OWNED) #define NFSCLSTATEMUTEX extern struct mtx nfs_clstate_mutex #define NFSCLSTATEMUTEXPTR (&nfs_clstate_mutex) #define NFSLOCKCLSTATE() mtx_lock(&nfs_clstate_mutex) #define NFSUNLOCKCLSTATE() mtx_unlock(&nfs_clstate_mutex) #define NFSDLOCKMUTEX extern struct mtx newnfsd_mtx #define NFSDLOCKMUTEXPTR (&newnfsd_mtx) #define NFSD_LOCK() mtx_lock(&newnfsd_mtx) #define NFSD_UNLOCK() mtx_unlock(&newnfsd_mtx) #define NFSD_LOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_OWNED) #define NFSD_UNLOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_NOTOWNED) #define NFSV4ROOTLOCKMUTEX extern struct mtx nfs_v4root_mutex #define NFSV4ROOTLOCKMUTEXPTR (&nfs_v4root_mutex) #define NFSLOCKV4ROOTMUTEX() mtx_lock(&nfs_v4root_mutex) #define NFSUNLOCKV4ROOTMUTEX() mtx_unlock(&nfs_v4root_mutex) #define NFSLOCKNODE(n) mtx_lock(&((n)->n_mtx)) #define NFSUNLOCKNODE(n) mtx_unlock(&((n)->n_mtx)) #define NFSLOCKMNT(m) mtx_lock(&((m)->nm_mtx)) #define NFSUNLOCKMNT(m) mtx_unlock(&((m)->nm_mtx)) #define NFSLOCKREQUEST(r) mtx_lock(&((r)->r_mtx)) #define NFSUNLOCKREQUEST(r) mtx_unlock(&((r)->r_mtx)) #define NFSPROCLISTLOCK() sx_slock(&allproc_lock) #define NFSPROCLISTUNLOCK() sx_sunlock(&allproc_lock) #define NFSLOCKSOCKREQ(r) mtx_lock(&((r)->nr_mtx)) #define NFSUNLOCKSOCKREQ(r) mtx_unlock(&((r)->nr_mtx)) #define NFSLOCKDS(d) mtx_lock(&((d)->nfsclds_mtx)) #define NFSUNLOCKDS(d) mtx_unlock(&((d)->nfsclds_mtx)) #define NFSSESSIONMUTEXPTR(s) (&((s)->mtx)) #define NFSLOCKSESSION(s) mtx_lock(&((s)->mtx)) #define NFSUNLOCKSESSION(s) mtx_unlock(&((s)->mtx)) +#define NFSLAYOUTMUTEXPTR(l) (&((l)->mtx)) #define NFSLOCKLAYOUT(l) mtx_lock(&((l)->mtx)) #define NFSUNLOCKLAYOUT(l) mtx_unlock(&((l)->mtx)) +#define NFSDDSMUTEXPTR (&nfsrv_dslock_mtx) #define NFSDDSLOCK() mtx_lock(&nfsrv_dslock_mtx) #define NFSDDSUNLOCK() mtx_unlock(&nfsrv_dslock_mtx) +#define NFSDDONTLISTMUTEXPTR (&nfsrv_dontlistlock_mtx) +#define NFSDDONTLISTLOCK() mtx_lock(&nfsrv_dontlistlock_mtx) +#define NFSDDONTLISTUNLOCK() mtx_unlock(&nfsrv_dontlistlock_mtx) +#define NFSDRECALLMUTEXPTR (&nfsrv_recalllock_mtx) +#define NFSDRECALLLOCK() mtx_lock(&nfsrv_recalllock_mtx) +#define NFSDRECALLUNLOCK() mtx_unlock(&nfsrv_recalllock_mtx) /* * Use these macros to initialize/free a mutex. */ #define NFSINITSOCKMUTEX(m) mtx_init((m), "nfssock", NULL, MTX_DEF) #define NFSFREEMUTEX(m) mtx_destroy((m)) int nfsmsleep(void *, void *, int, const char *, struct timespec *); /* * And weird vm stuff in the nfs server. */ #define PDIRUNLOCK 0x0 #define MAX_COMMIT_COUNT (1024 * 1024) /* * Define these to handle the type of va_rdev. */ #define NFSMAKEDEV(m, n) makedev((m), (n)) #define NFSMAJOR(d) major(d) #define NFSMINOR(d) minor(d) /* * The vnode tag for nfsv4root. */ #define VT_NFSV4ROOT "nfsv4root" /* * Define whatever it takes to do a vn_rdwr(). */ #define NFSD_RDWR(r, v, b, l, o, s, i, c, a, p) \ vn_rdwr((r), (v), (b), (l), (o), (s), (i), (c), NULL, (a), (p)) /* * Macros for handling memory for different BSDen. * NFSBCOPY(src, dst, len) - copies len bytes, non-overlapping * NFSOVBCOPY(src, dst, len) - ditto, but data areas might overlap * NFSBCMP(cp1, cp2, len) - compare len bytes, return 0 if same * NFSBZERO(cp, len) - set len bytes to 0x0 */ #define NFSBCOPY(s, d, l) bcopy((s), (d), (l)) #define NFSOVBCOPY(s, d, l) ovbcopy((s), (d), (l)) #define NFSBCMP(s, d, l) bcmp((s), (d), (l)) #define NFSBZERO(s, l) bzero((s), (l)) /* * Some queue.h files don't have these dfined in them. */ #define LIST_END(head) NULL #define SLIST_END(head) NULL #define TAILQ_END(head) NULL /* * This must be defined to be a global variable that increments once * per second, but never stops or goes backwards, even when a "date" * command changes the TOD clock. It is used for delta times for * leases, etc. */ #define NFSD_MONOSEC time_uptime /* * Declare the malloc types. */ MALLOC_DECLARE(M_NEWNFSRVCACHE); MALLOC_DECLARE(M_NEWNFSDCLIENT); MALLOC_DECLARE(M_NEWNFSDSTATE); MALLOC_DECLARE(M_NEWNFSDLOCK); MALLOC_DECLARE(M_NEWNFSDLOCKFILE); MALLOC_DECLARE(M_NEWNFSSTRING); MALLOC_DECLARE(M_NEWNFSUSERGROUP); MALLOC_DECLARE(M_NEWNFSDREQ); MALLOC_DECLARE(M_NEWNFSFH); MALLOC_DECLARE(M_NEWNFSCLOWNER); MALLOC_DECLARE(M_NEWNFSCLOPEN); MALLOC_DECLARE(M_NEWNFSCLDELEG); MALLOC_DECLARE(M_NEWNFSCLCLIENT); MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER); MALLOC_DECLARE(M_NEWNFSCLLOCK); MALLOC_DECLARE(M_NEWNFSDIROFF); MALLOC_DECLARE(M_NEWNFSV4NODE); MALLOC_DECLARE(M_NEWNFSDIRECTIO); MALLOC_DECLARE(M_NEWNFSMNT); MALLOC_DECLARE(M_NEWNFSDROLLBACK); MALLOC_DECLARE(M_NEWNFSLAYOUT); MALLOC_DECLARE(M_NEWNFSFLAYOUT); MALLOC_DECLARE(M_NEWNFSDEVINFO); MALLOC_DECLARE(M_NEWNFSSOCKREQ); MALLOC_DECLARE(M_NEWNFSCLDS); MALLOC_DECLARE(M_NEWNFSLAYRECALL); MALLOC_DECLARE(M_NEWNFSDSESSION); #define M_NFSRVCACHE M_NEWNFSRVCACHE #define M_NFSDCLIENT M_NEWNFSDCLIENT #define M_NFSDSTATE M_NEWNFSDSTATE #define M_NFSDLOCK M_NEWNFSDLOCK #define M_NFSDLOCKFILE M_NEWNFSDLOCKFILE #define M_NFSSTRING M_NEWNFSSTRING #define M_NFSUSERGROUP M_NEWNFSUSERGROUP #define M_NFSDREQ M_NEWNFSDREQ #define M_NFSFH M_NEWNFSFH #define M_NFSCLOWNER M_NEWNFSCLOWNER #define M_NFSCLOPEN M_NEWNFSCLOPEN #define M_NFSCLDELEG M_NEWNFSCLDELEG #define M_NFSCLCLIENT M_NEWNFSCLCLIENT #define M_NFSCLLOCKOWNER M_NEWNFSCLLOCKOWNER #define M_NFSCLLOCK M_NEWNFSCLLOCK #define M_NFSDIROFF M_NEWNFSDIROFF #define M_NFSV4NODE M_NEWNFSV4NODE #define M_NFSDIRECTIO M_NEWNFSDIRECTIO #define M_NFSDROLLBACK M_NEWNFSDROLLBACK #define M_NFSLAYOUT M_NEWNFSLAYOUT #define M_NFSFLAYOUT M_NEWNFSFLAYOUT #define M_NFSDEVINFO M_NEWNFSDEVINFO #define M_NFSSOCKREQ M_NEWNFSSOCKREQ #define M_NFSCLDS M_NEWNFSCLDS #define M_NFSLAYRECALL M_NEWNFSLAYRECALL #define M_NFSDSESSION M_NEWNFSDSESSION #define NFSINT_SIGMASK(set) \ (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \ SIGISMEMBER(set, SIGQUIT)) /* * Convert a quota block count to byte count. */ #define NFSQUOTABLKTOBYTE(q, b) (q) *= (b) /* * Define this as the largest file size supported. (It should probably * be available via a VFS_xxx Op, but it isn't. */ #define NFSRV_MAXFILESIZE ((u_int64_t)0x800000000000) /* * Set this macro to index() or strchr(), whichever is supported. */ #define STRCHR(s, c) strchr((s), (c)) /* * Set the n_time in the client write rpc, as required. */ #define NFSWRITERPC_SETTIME(w, n, a, v4) \ do { \ if (w) { \ mtx_lock(&((n)->n_mtx)); \ (n)->n_mtime = (a)->na_mtime; \ if (v4) \ (n)->n_change = (a)->na_filerev; \ mtx_unlock(&((n)->n_mtx)); \ } \ } while (0) /* * Fake value, just to make the client work. */ #define NFS_LATTR_NOSHRINK 1 /* * Prototypes for functions where the arguments vary for different ports. */ int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, void *, int, int); int newnfs_realign(struct mbuf **, int); /* * If the port runs on an SMP box that can enforce Atomic ops with low * overheads, define these as atomic increments/decrements. If not, * don't worry about it, since these are used for stats that can be * "out by one" without disastrous consequences. */ #define NFSINCRGLOBAL(a) ((a)++) /* * Assorted funky stuff to make things work under Darwin8. */ /* * These macros checks for a field in vattr being set. */ #define NFSATTRISSET(t, v, a) ((v)->a != (t)VNOVAL) #define NFSATTRISSETTIME(v, a) ((v)->a.tv_sec != VNOVAL) /* * Manipulate mount flags. */ #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the fsinfo */ #define NFSSTA_OPENMODE 0x00200000 /* Must use correct open mode */ #define NFSSTA_FLEXFILE 0x00800000 /* Use Flex File Layout */ #define NFSSTA_NOLAYOUTCOMMIT 0x04000000 /* Don't do LayoutCommit */ #define NFSSTA_SESSPERSIST 0x08000000 /* Has a persistent session */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ #define NFSSTA_LOCKTIMEO 0x20000000 /* Experiencing a lockd timeout */ #define NFSSTA_HASSETFSID 0x40000000 /* Has set the fsid */ #define NFSSTA_PNFS 0x80000000 /* pNFS is enabled */ #define NFSHASNFSV3(n) ((n)->nm_flag & NFSMNT_NFSV3) #define NFSHASNFSV4(n) ((n)->nm_flag & NFSMNT_NFSV4) #define NFSHASNFSV4N(n) ((n)->nm_minorvers > 0) #define NFSHASNFSV3OR4(n) ((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) #define NFSHASGOTFSINFO(n) ((n)->nm_state & NFSSTA_GOTFSINFO) #define NFSHASHASSETFSID(n) ((n)->nm_state & NFSSTA_HASSETFSID) #define NFSHASSTRICT3530(n) ((n)->nm_flag & NFSMNT_STRICT3530) #define NFSHASWRITEVERF(n) ((n)->nm_state & NFSSTA_HASWRITEVERF) #define NFSHASINT(n) ((n)->nm_flag & NFSMNT_INT) #define NFSHASSOFT(n) ((n)->nm_flag & NFSMNT_SOFT) #define NFSHASINTORSOFT(n) ((n)->nm_flag & (NFSMNT_INT | NFSMNT_SOFT)) #define NFSHASDUMBTIMR(n) ((n)->nm_flag & NFSMNT_DUMBTIMR) #define NFSHASNOCONN(n) ((n)->nm_flag & NFSMNT_MNTD) #define NFSHASKERB(n) ((n)->nm_flag & NFSMNT_KERB) #define NFSHASALLGSSNAME(n) ((n)->nm_flag & NFSMNT_ALLGSSNAME) #define NFSHASINTEGRITY(n) ((n)->nm_flag & NFSMNT_INTEGRITY) #define NFSHASPRIVACY(n) ((n)->nm_flag & NFSMNT_PRIVACY) #define NFSSETWRITEVERF(n) ((n)->nm_state |= NFSSTA_HASWRITEVERF) #define NFSSETHASSETFSID(n) ((n)->nm_state |= NFSSTA_HASSETFSID) #define NFSHASPNFSOPT(n) ((n)->nm_flag & NFSMNT_PNFS) #define NFSHASNOLAYOUTCOMMIT(n) ((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT) #define NFSHASSESSPERSIST(n) ((n)->nm_state & NFSSTA_SESSPERSIST) #define NFSHASPNFS(n) ((n)->nm_state & NFSSTA_PNFS) #define NFSHASFLEXFILE(n) ((n)->nm_state & NFSSTA_FLEXFILE) #define NFSHASOPENMODE(n) ((n)->nm_state & NFSSTA_OPENMODE) #define NFSHASONEOPENOWN(n) (((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 && \ (n)->nm_minorvers > 0) /* * Gets the stats field out of the mount structure. */ #define vfs_statfs(m) (&((m)->mnt_stat)) /* * Set boottime. */ #define NFSSETBOOTTIME(b) (getboottime(&b)) /* * The size of directory blocks in the buffer cache. * MUST BE in the range of PAGE_SIZE <= NFS_DIRBLKSIZ <= MAXBSIZE!! */ #define NFS_DIRBLKSIZ (16 * DIRBLKSIZ) /* Must be a multiple of DIRBLKSIZ */ /* * Define these macros to access mnt_flag fields. */ #define NFSMNT_RDONLY(m) ((m)->mnt_flag & MNT_RDONLY) #endif /* _KERNEL */ /* * Define a structure similar to ufs_args for use in exporting the V4 root. */ struct nfsex_args { char *fspec; struct export_args export; }; /* * These export flags should be defined, but there are no bits left. * Maybe a separate mnt_exflag field could be added or the mnt_flag * field increased to 64 bits? */ #ifndef MNT_EXSTRICTACCESS #define MNT_EXSTRICTACCESS 0x0 #endif #ifndef MNT_EXV4ONLY #define MNT_EXV4ONLY 0x0 #endif #ifdef _KERNEL /* * Define this to invalidate the attribute cache for the nfs node. */ #define NFSINVALATTRCACHE(n) ((n)->n_attrstamp = 0) /* Used for FreeBSD only */ void nfsd_mntinit(void); /* * Define these for vnode lock/unlock ops. * * These are good abstractions to macro out, so that they can be added to * later, for debugging or stats, etc. */ #define NFSVOPLOCK(v, f) vn_lock((v), (f)) #define NFSVOPUNLOCK(v, f) VOP_UNLOCK((v), (f)) #define NFSVOPISLOCKED(v) VOP_ISLOCKED((v)) /* * Define ncl_hash(). */ #define ncl_hash(f, l) (fnv_32_buf((f), (l), FNV1_32_INIT)) int newnfs_iosize(struct nfsmount *); int newnfs_vncmpf(struct vnode *, void *); #ifndef NFS_MINDIRATTRTIMO #define NFS_MINDIRATTRTIMO 3 /* VDIR attrib cache timeout in sec */ #endif #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; u_int32_t r_flags; /* flags on request, see below */ struct nfsmount *r_nmp; /* Client mnt ptr */ struct mtx r_mtx; /* Mutex lock for this structure */ }; #ifndef NFS_MAXBSIZE #define NFS_MAXBSIZE (maxbcachebuf) #endif /* * This macro checks to see if issuing of delegations is allowed for this * vnode. */ #ifdef VV_DISABLEDELEG #define NFSVNO_DELEGOK(v) \ ((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0) #else #define NFSVNO_DELEGOK(v) (1) #endif /* * Name used by getnewvnode() to describe filesystem, "nfs". * For performance reasons it is useful to have the same string * used in both places that call getnewvnode(). */ extern const char nfs_vnode_tag[]; + +/* + * Check for the errors that indicate a DS should be disabled. + * ENXIO indicates that the krpc cannot do an RPC on the DS. + * EIO is returned by the RPC as an indication of I/O problems on the + * server. + * Are there other fatal errors? + */ +#define nfsds_failerr(e) ((e) == ENXIO || (e) == EIO) #endif /* _KERNEL */ #endif /* _NFS_NFSPORT_H */ Index: head/sys/fs/nfs/nfsproto.h =================================================================== --- head/sys/fs/nfs/nfsproto.h (revision 335011) +++ head/sys/fs/nfs/nfsproto.h (revision 335012) @@ -1,1382 +1,1412 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPROTO_H_ #define _NFS_NFSPROTO_H_ /* * nfs definitions as per the Version 2, 3 and 4 specs */ /* * Constants as defined in the NFS Version 2, 3 and 4 specs. * "NFS: Network File System Protocol Specification" RFC1094 * and in the "NFS: Network File System Version 3 Protocol * Specification" */ #define NFS_PORT 2049 #define NFS_PROG 100003 #define NFS_CALLBCKPROG 0x40000000 /* V4 only */ #define NFS_VER2 2 #define NFS_VER3 3 #define NFS_VER4 4 #define NFS_V2MAXDATA 8192 #define NFS_MAXDGRAMDATA 16384 #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 /* * Calculating the maximum XDR overhead for an NFS RPC isn't easy. * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP. * NFS_MAXXDR should be sufficient for all NFS versions over TCP. * It includes: * - Maximum RPC message header. It can include 2 400byte authenticators plus * a machine name of unlimited length, although it is usually relatively * small. * - XDR overheads for the NFSv4 compound. This can include Owner and * Owner_group strings, which are usually fairly small, but are allowed * to be up to 1024 bytes each. * 4096 is overkill, but should always be sufficient. */ #define NFS_MAXPKTHDR 404 #define NFS_MAXXDR 4096 #define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR) #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ #define NFSV4_MINORVERSION 0 /* V4 Minor version */ #define NFSV41_MINORVERSION 1 /* V4 Minor version */ #define NFSV4_CBVERS 1 /* V4 CB Version */ #define NFSV41_CBVERS 4 /* V4.1 CB Version */ #define NFSV4_SMALLSTR 50 /* Strings small enough for stack */ /* * This value isn't a fixed value in the RFCs. * It is the maximum data size supported by NFSv3 or NFSv4 over TCP for * the server. It should be set to the I/O size preferred by ZFS or * MAXBSIZE, whichever is greater. * ZFS currently prefers 128K. * It used to be called NFS_MAXDATA, but has been renamed to clarify that * it refers to server side only and doesn't conflict with the NFS_MAXDATA * defined in rpcsvc/nfs_prot.h for userland. */ #define NFS_SRVMAXIO (128 * 1024) /* Stat numbers for rpc returns (version 2, 3 and 4) */ /* * These numbers are hard-wired in the RFCs, so they can't be changed. * The code currently assumes that the ones < 10000 are the same as * sys/errno.h and that sys/errno.h will never go as high as 10000. * If the value in sys/errno.h of any entry listed below is changed, * the NFS code must be modified to do the mapping between them. * (You can ignore NFSERR_WFLUSH, since it is never actually used.) */ #define NFSERR_OK 0 #define NFSERR_PERM 1 #define NFSERR_NOENT 2 #define NFSERR_IO 5 #define NFSERR_NXIO 6 #define NFSERR_ACCES 13 #define NFSERR_EXIST 17 #define NFSERR_XDEV 18 /* Version 3, 4 only */ #define NFSERR_NODEV 19 #define NFSERR_NOTDIR 20 #define NFSERR_ISDIR 21 #define NFSERR_INVAL 22 /* Version 3, 4 only */ #define NFSERR_FBIG 27 #define NFSERR_NOSPC 28 #define NFSERR_ROFS 30 #define NFSERR_MLINK 31 /* Version 3, 4 only */ #define NFSERR_NAMETOL 63 #define NFSERR_NOTEMPTY 66 #define NFSERR_DQUOT 69 #define NFSERR_STALE 70 #define NFSERR_REMOTE 71 /* Version 3 only */ #define NFSERR_WFLUSH 99 /* Version 2 only */ #define NFSERR_BADHANDLE 10001 /* These are Version 3, 4 only */ #define NFSERR_NOT_SYNC 10002 /* Version 3 Only */ #define NFSERR_BAD_COOKIE 10003 #define NFSERR_NOTSUPP 10004 #define NFSERR_TOOSMALL 10005 #define NFSERR_SERVERFAULT 10006 #define NFSERR_BADTYPE 10007 #define NFSERR_DELAY 10008 /* Called NFSERR_JUKEBOX for V3 */ #define NFSERR_SAME 10009 /* These are Version 4 only */ #define NFSERR_DENIED 10010 #define NFSERR_EXPIRED 10011 #define NFSERR_LOCKED 10012 #define NFSERR_GRACE 10013 #define NFSERR_FHEXPIRED 10014 #define NFSERR_SHAREDENIED 10015 #define NFSERR_WRONGSEC 10016 #define NFSERR_CLIDINUSE 10017 #define NFSERR_RESOURCE 10018 #define NFSERR_MOVED 10019 #define NFSERR_NOFILEHANDLE 10020 #define NFSERR_MINORVERMISMATCH 10021 #define NFSERR_STALECLIENTID 10022 #define NFSERR_STALESTATEID 10023 #define NFSERR_OLDSTATEID 10024 #define NFSERR_BADSTATEID 10025 #define NFSERR_BADSEQID 10026 #define NFSERR_NOTSAME 10027 #define NFSERR_LOCKRANGE 10028 #define NFSERR_SYMLINK 10029 #define NFSERR_RESTOREFH 10030 #define NFSERR_LEASEMOVED 10031 #define NFSERR_ATTRNOTSUPP 10032 #define NFSERR_NOGRACE 10033 #define NFSERR_RECLAIMBAD 10034 #define NFSERR_RECLAIMCONFLICT 10035 #define NFSERR_BADXDR 10036 #define NFSERR_LOCKSHELD 10037 #define NFSERR_OPENMODE 10038 #define NFSERR_BADOWNER 10039 #define NFSERR_BADCHAR 10040 #define NFSERR_BADNAME 10041 #define NFSERR_BADRANGE 10042 #define NFSERR_LOCKNOTSUPP 10043 #define NFSERR_OPILLEGAL 10044 #define NFSERR_DEADLOCK 10045 #define NFSERR_FILEOPEN 10046 #define NFSERR_ADMINREVOKED 10047 #define NFSERR_CBPATHDOWN 10048 /* NFSv4.1 specific errors. */ #define NFSERR_BADIOMODE 10049 #define NFSERR_BADLAYOUT 10050 #define NFSERR_BADSESSIONDIGEST 10051 #define NFSERR_BADSESSION 10052 #define NFSERR_BADSLOT 10053 #define NFSERR_COMPLETEALREADY 10054 #define NFSERR_NOTBNDTOSESS 10055 #define NFSERR_DELEGALREADYWANT 10056 #define NFSERR_BACKCHANBUSY 10057 #define NFSERR_LAYOUTTRYLATER 10058 #define NFSERR_LAYOUTUNAVAIL 10059 #define NFSERR_NOMATCHLAYOUT 10060 #define NFSERR_RECALLCONFLICT 10061 #define NFSERR_UNKNLAYOUTTYPE 10062 #define NFSERR_SEQMISORDERED 10063 #define NFSERR_SEQUENCEPOS 10064 #define NFSERR_REQTOOBIG 10065 #define NFSERR_REPTOOBIG 10066 #define NFSERR_REPTOOBIGTOCACHE 10067 #define NFSERR_RETRYUNCACHEDREP 10068 #define NFSERR_UNSAFECOMPOUND 10069 #define NFSERR_TOOMANYOPS 10070 #define NFSERR_OPNOTINSESS 10071 #define NFSERR_HASHALGUNSUPP 10072 #define NFSERR_CLIENTIDBUSY 10074 #define NFSERR_PNFSIOHOLE 10075 #define NFSERR_SEQFALSERETRY 10076 #define NFSERR_BADHIGHSLOT 10077 #define NFSERR_DEADSESSION 10078 #define NFSERR_ENCRALGUNSUPP 10079 #define NFSERR_PNFSNOLAYOUT 10080 #define NFSERR_NOTONLYOP 10081 #define NFSERR_WRONGCRED 10082 #define NFSERR_WRONGTYPE 10083 #define NFSERR_DIRDELEGUNAVAIL 10084 #define NFSERR_REJECTDELEG 10085 #define NFSERR_RETURNCONFLICT 10086 #define NFSERR_DELEGREVOKED 10087 #define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ #define NFSERR_DONTREPLY 30003 /* Don't process request */ #define NFSERR_RETVOID 30004 /* Return void, not error */ #define NFSERR_REPLYFROMCACHE 30005 /* Reply from recent request cache */ #define NFSERR_STALEDONTRECOVER 30006 /* Don't initiate recovery */ #define NFSERR_RPCERR 0x40000000 /* Mark an RPC layer error */ #define NFSERR_AUTHERR 0x80000000 /* Mark an authentication error */ #define NFSERR_RPCMISMATCH (NFSERR_RPCERR | RPC_MISMATCH) #define NFSERR_PROGUNAVAIL (NFSERR_RPCERR | RPC_PROGUNAVAIL) #define NFSERR_PROGMISMATCH (NFSERR_RPCERR | RPC_PROGMISMATCH) #define NFSERR_PROGNOTV4 (NFSERR_RPCERR | 0xffff) #define NFSERR_PROCUNAVAIL (NFSERR_RPCERR | RPC_PROCUNAVAIL) #define NFSERR_GARBAGE (NFSERR_RPCERR | RPC_GARBAGE) /* Sizes in bytes of various nfs rpc components */ #define NFSX_UNSIGNED 4 #define NFSX_HYPER (2 * NFSX_UNSIGNED) /* specific to NFS Version 2 */ #define NFSX_V2FH 32 #define NFSX_V2FATTR 68 #define NFSX_V2SATTR 32 #define NFSX_V2COOKIE 4 #define NFSX_V2STATFS 20 /* specific to NFS Version 3 */ #define NFSX_V3FHMAX 64 /* max. allowed by protocol */ #define NFSX_V3FATTR 84 #define NFSX_V3SATTR 60 /* max. all fields filled in */ #define NFSX_V3SRVSATTR (sizeof (struct nfsv3_sattr)) #define NFSX_V3POSTOPATTR (NFSX_V3FATTR + NFSX_UNSIGNED) #define NFSX_V3WCCDATA (NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED) #define NFSX_V3STATFS 52 #define NFSX_V3FSINFO 48 #define NFSX_V3PATHCONF 24 /* specific to NFS Version 4 */ #define NFSX_V4FHMAX 128 #define NFSX_V4FSID (2 * NFSX_HYPER) #define NFSX_V4SPECDATA (2 * NFSX_UNSIGNED) #define NFSX_V4TIME (NFSX_HYPER + NFSX_UNSIGNED) #define NFSX_V4SETTIME (NFSX_UNSIGNED + NFSX_V4TIME) #define NFSX_V4SESSIONID 16 #define NFSX_V4DEVICEID 16 +#define NFSX_V4PNFSFH (sizeof(fhandle_t) + 1) +#define NFSX_V4FILELAYOUT (4 * NFSX_UNSIGNED + NFSX_V4DEVICEID + \ + NFSX_HYPER + NFSM_RNDUP(NFSX_V4PNFSFH)) +#define NFSX_V4FLEXLAYOUT(m) (NFSX_HYPER + 3 * NFSX_UNSIGNED + \ + ((m) * (NFSX_V4DEVICEID + NFSX_STATEID + NFSM_RNDUP(NFSX_V4PNFSFH) + \ + 8 * NFSX_UNSIGNED))) /* sizes common to multiple NFS versions */ #define NFSX_FHMAX (NFSX_V4FHMAX) #define NFSX_MYFH (sizeof (fhandle_t)) /* size this server uses */ #define NFSX_VERF 8 #define NFSX_STATEIDOTHER 12 #define NFSX_STATEID (NFSX_UNSIGNED + NFSX_STATEIDOTHER) #define NFSX_GSSH 12 /* variants for multiple versions */ #define NFSX_STATFS(v3) ((v3) ? NFSX_V3STATFS : NFSX_V2STATFS) +/* + * Beware. NFSPROC_NULL and friends are defined in + * as well and the numbers are different. + */ +#ifndef NFSPROC_NULL /* nfs rpc procedure numbers (before version mapping) */ #define NFSPROC_NULL 0 #define NFSPROC_GETATTR 1 #define NFSPROC_SETATTR 2 #define NFSPROC_LOOKUP 3 #define NFSPROC_ACCESS 4 #define NFSPROC_READLINK 5 #define NFSPROC_READ 6 #define NFSPROC_WRITE 7 #define NFSPROC_CREATE 8 #define NFSPROC_MKDIR 9 #define NFSPROC_SYMLINK 10 #define NFSPROC_MKNOD 11 #define NFSPROC_REMOVE 12 #define NFSPROC_RMDIR 13 #define NFSPROC_RENAME 14 #define NFSPROC_LINK 15 #define NFSPROC_READDIR 16 #define NFSPROC_READDIRPLUS 17 #define NFSPROC_FSSTAT 18 #define NFSPROC_FSINFO 19 #define NFSPROC_PATHCONF 20 #define NFSPROC_COMMIT 21 +#endif /* NFSPROC_NULL */ /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 /* Additional procedures for NFSv4.1. */ #define NFSPROC_EXCHANGEID 41 #define NFSPROC_CREATESESSION 42 #define NFSPROC_DESTROYSESSION 43 #define NFSPROC_DESTROYCLIENT 44 #define NFSPROC_FREESTATEID 45 #define NFSPROC_LAYOUTGET 46 #define NFSPROC_GETDEVICEINFO 47 #define NFSPROC_LAYOUTCOMMIT 48 #define NFSPROC_LAYOUTRETURN 49 #define NFSPROC_RECLAIMCOMPL 50 #define NFSPROC_WRITEDS 51 #define NFSPROC_READDS 52 #define NFSPROC_COMMITDS 53 #define NFSPROC_OPENLAYGET 54 #define NFSPROC_CREATELAYGET 55 /* * Must be defined as one higher than the last NFSv4.1 Proc# above. */ #define NFSV41_NPROCS 56 #endif /* NFS_V3NPROCS */ /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif /* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure * or Operation#. Since the NFS V4 Op #s go higher, use NFSV42_NOPS, which * is one greater than the highest Op#. */ #define NFSPROC_NOOP NFSV42_NOPS /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 #define NFSV2PROC_GETATTR 1 #define NFSV2PROC_SETATTR 2 #define NFSV2PROC_NOOP 3 #define NFSV2PROC_ROOT NFSV2PROC_NOOP /* Obsolete */ #define NFSV2PROC_LOOKUP 4 #define NFSV2PROC_READLINK 5 #define NFSV2PROC_READ 6 #define NFSV2PROC_WRITECACHE NFSV2PROC_NOOP /* Obsolete */ #define NFSV2PROC_WRITE 8 #define NFSV2PROC_CREATE 9 #define NFSV2PROC_REMOVE 10 #define NFSV2PROC_RENAME 11 #define NFSV2PROC_LINK 12 #define NFSV2PROC_SYMLINK 13 #define NFSV2PROC_MKDIR 14 #define NFSV2PROC_RMDIR 15 #define NFSV2PROC_READDIR 16 #define NFSV2PROC_STATFS 17 /* * V4 Procedure numbers */ #define NFSV4PROC_COMPOUND 1 #define NFSV4PROC_CBNULL 0 #define NFSV4PROC_CBCOMPOUND 1 /* * Constants used by the Version 3 and 4 protocols for various RPCs */ #define NFSV3SATTRTIME_DONTCHANGE 0 #define NFSV3SATTRTIME_TOSERVER 1 #define NFSV3SATTRTIME_TOCLIENT 2 #define NFSV4SATTRTIME_TOSERVER 0 #define NFSV4SATTRTIME_TOCLIENT 1 #define NFSV4LOCKT_READ 1 #define NFSV4LOCKT_WRITE 2 #define NFSV4LOCKT_READW 3 #define NFSV4LOCKT_WRITEW 4 #define NFSV4LOCKT_RELEASE 5 #define NFSV4OPEN_NOCREATE 0 #define NFSV4OPEN_CREATE 1 #define NFSV4OPEN_CLAIMNULL 0 #define NFSV4OPEN_CLAIMPREVIOUS 1 #define NFSV4OPEN_CLAIMDELEGATECUR 2 #define NFSV4OPEN_CLAIMDELEGATEPREV 3 #define NFSV4OPEN_CLAIMFH 4 #define NFSV4OPEN_CLAIMDELEGATECURFH 5 #define NFSV4OPEN_CLAIMDELEGATEPREVFH 6 #define NFSV4OPEN_DELEGATENONE 0 #define NFSV4OPEN_DELEGATEREAD 1 #define NFSV4OPEN_DELEGATEWRITE 2 #define NFSV4OPEN_DELEGATENONEEXT 3 #define NFSV4OPEN_LIMITSIZE 1 #define NFSV4OPEN_LIMITBLOCKS 2 /* * Nfs V4 ACE stuff */ #define NFSV4ACE_ALLOWEDTYPE 0x00000000 #define NFSV4ACE_DENIEDTYPE 0x00000001 #define NFSV4ACE_AUDITTYPE 0x00000002 #define NFSV4ACE_ALARMTYPE 0x00000003 #define NFSV4ACE_SUPALLOWED 0x00000001 #define NFSV4ACE_SUPDENIED 0x00000002 #define NFSV4ACE_SUPAUDIT 0x00000004 #define NFSV4ACE_SUPALARM 0x00000008 #define NFSV4ACE_SUPTYPES (NFSV4ACE_SUPALLOWED | NFSV4ACE_SUPDENIED) #define NFSV4ACE_FILEINHERIT 0x00000001 #define NFSV4ACE_DIRECTORYINHERIT 0x00000002 #define NFSV4ACE_NOPROPAGATEINHERIT 0x00000004 #define NFSV4ACE_INHERITONLY 0x00000008 #define NFSV4ACE_SUCCESSFULACCESS 0x00000010 #define NFSV4ACE_FAILEDACCESS 0x00000020 #define NFSV4ACE_IDENTIFIERGROUP 0x00000040 #define NFSV4ACE_READDATA 0x00000001 #define NFSV4ACE_LISTDIRECTORY 0x00000001 #define NFSV4ACE_WRITEDATA 0x00000002 #define NFSV4ACE_ADDFILE 0x00000002 #define NFSV4ACE_APPENDDATA 0x00000004 #define NFSV4ACE_ADDSUBDIRECTORY 0x00000004 #define NFSV4ACE_READNAMEDATTR 0x00000008 #define NFSV4ACE_WRITENAMEDATTR 0x00000010 #define NFSV4ACE_EXECUTE 0x00000020 #define NFSV4ACE_SEARCH 0x00000020 #define NFSV4ACE_DELETECHILD 0x00000040 #define NFSV4ACE_READATTRIBUTES 0x00000080 #define NFSV4ACE_WRITEATTRIBUTES 0x00000100 #define NFSV4ACE_DELETE 0x00010000 #define NFSV4ACE_READACL 0x00020000 #define NFSV4ACE_WRITEACL 0x00040000 #define NFSV4ACE_WRITEOWNER 0x00080000 #define NFSV4ACE_SYNCHRONIZE 0x00100000 /* * Here are the mappings between mode bits and acl mask bits for * directories and other files. * (Named attributes have not been included, since named attributes are * not yet supported.) * The mailing list seems to indicate that NFSV4ACE_EXECUTE refers to * searching a directory, although I can't find a statement of that in * the RFC. */ #define NFSV4ACE_ALLFILESMASK (NFSV4ACE_READATTRIBUTES | NFSV4ACE_READACL) #define NFSV4ACE_OWNERMASK (NFSV4ACE_WRITEATTRIBUTES | NFSV4ACE_WRITEACL) #define NFSV4ACE_DIRREADMASK NFSV4ACE_LISTDIRECTORY #define NFSV4ACE_DIREXECUTEMASK NFSV4ACE_EXECUTE #define NFSV4ACE_DIRWRITEMASK (NFSV4ACE_ADDFILE | \ NFSV4ACE_ADDSUBDIRECTORY | NFSV4ACE_DELETECHILD) #define NFSV4ACE_READMASK NFSV4ACE_READDATA #define NFSV4ACE_WRITEMASK (NFSV4ACE_WRITEDATA | NFSV4ACE_APPENDDATA) #define NFSV4ACE_EXECUTEMASK NFSV4ACE_EXECUTE #define NFSV4ACE_ALLFILEBITS (NFSV4ACE_READMASK | NFSV4ACE_WRITEMASK | \ NFSV4ACE_EXECUTEMASK | NFSV4ACE_SYNCHRONIZE) #define NFSV4ACE_ALLDIRBITS (NFSV4ACE_DIRREADMASK | \ NFSV4ACE_DIRWRITEMASK | NFSV4ACE_DIREXECUTEMASK) #define NFSV4ACE_AUDITMASK 0x0 /* * These GENERIC masks are not used and are no longer believed to be useful. */ #define NFSV4ACE_GENERICREAD 0x00120081 #define NFSV4ACE_GENERICWRITE 0x00160106 #define NFSV4ACE_GENERICEXECUTE 0x001200a0 #define NFSSTATEID_PUTALLZERO 0 #define NFSSTATEID_PUTALLONE 1 #define NFSSTATEID_PUTSTATEID 2 #define NFSSTATEID_PUTSEQIDZERO 3 /* * Bits for share access and deny. */ #define NFSV4OPEN_ACCESSREAD 0x00000001 #define NFSV4OPEN_ACCESSWRITE 0x00000002 #define NFSV4OPEN_ACCESSBOTH 0x00000003 #define NFSV4OPEN_WANTDELEGMASK 0x0000ff00 #define NFSV4OPEN_WANTREADDELEG 0x00000100 #define NFSV4OPEN_WANTWRITEDELEG 0x00000200 #define NFSV4OPEN_WANTANYDELEG 0x00000300 #define NFSV4OPEN_WANTNODELEG 0x00000400 #define NFSV4OPEN_WANTCANCEL 0x00000500 #define NFSV4OPEN_WANTSIGNALDELEG 0x00010000 #define NFSV4OPEN_WANTPUSHDELEG 0x00020000 #define NFSV4OPEN_DENYNONE 0x00000000 #define NFSV4OPEN_DENYREAD 0x00000001 #define NFSV4OPEN_DENYWRITE 0x00000002 #define NFSV4OPEN_DENYBOTH 0x00000003 /* * Delegate_none_ext reply values. */ #define NFSV4OPEN_NOTWANTED 0 #define NFSV4OPEN_CONTENTION 1 #define NFSV4OPEN_RESOURCE 2 #define NFSV4OPEN_NOTSUPPFTYPE 3 #define NFSV4OPEN_NOTSUPPWRITEFTYPE 4 #define NFSV4OPEN_NOTSUPPUPGRADE 5 #define NFSV4OPEN_NOTSUPPDOWNGRADE 6 #define NFSV4OPEN_CANCELLED 7 #define NFSV4OPEN_ISDIR 8 /* * Open result flags * (The first four are in the spec. The rest are used internally.) */ #define NFSV4OPEN_RESULTCONFIRM 0x00000002 #define NFSV4OPEN_LOCKTYPEPOSIX 0x00000004 #define NFSV4OPEN_PRESERVEUNLINKED 0x00000008 #define NFSV4OPEN_MAYNOTIFYLOCK 0x00000020 #define NFSV4OPEN_RFLAGS \ (NFSV4OPEN_RESULTCONFIRM | NFSV4OPEN_LOCKTYPEPOSIX | \ NFSV4OPEN_PRESERVEUNLINKED | NFSV4OPEN_MAYNOTIFYLOCK) #define NFSV4OPEN_RECALL 0x00010000 #define NFSV4OPEN_READDELEGATE 0x00020000 #define NFSV4OPEN_WRITEDELEGATE 0x00040000 #define NFSV4OPEN_WDRESOURCE 0x00080000 #define NFSV4OPEN_WDCONTENTION 0x00100000 #define NFSV4OPEN_WDNOTWANTED 0x00200000 /* * NFS V4 File Handle types */ #define NFSV4FHTYPE_PERSISTENT 0x0 #define NFSV4FHTYPE_NOEXPIREWITHOPEN 0x1 #define NFSV4FHTYPE_VOLATILEANY 0x2 #define NFSV4FHTYPE_VOLATILEMIGRATE 0x4 #define NFSV4FHTYPE_VOLATILERENAME 0x8 /* * Maximum size of V4 opaque strings. */ #define NFSV4_OPAQUELIMIT 1024 /* * These are the same for V3 and V4. */ #define NFSACCESS_READ 0x01 #define NFSACCESS_LOOKUP 0x02 #define NFSACCESS_MODIFY 0x04 #define NFSACCESS_EXTEND 0x08 #define NFSACCESS_DELETE 0x10 #define NFSACCESS_EXECUTE 0x20 #define NFSWRITE_UNSTABLE 0 #define NFSWRITE_DATASYNC 1 #define NFSWRITE_FILESYNC 2 #define NFSCREATE_UNCHECKED 0 #define NFSCREATE_GUARDED 1 #define NFSCREATE_EXCLUSIVE 2 #define NFSCREATE_EXCLUSIVE41 3 #define NFSV3FSINFO_LINK 0x01 #define NFSV3FSINFO_SYMLINK 0x02 #define NFSV3FSINFO_HOMOGENEOUS 0x08 #define NFSV3FSINFO_CANSETTIME 0x10 /* Flags for Exchange ID */ #define NFSV4EXCH_SUPPMOVEDREFER 0x00000001 #define NFSV4EXCH_SUPPMOVEDMIGR 0x00000002 #define NFSV4EXCH_BINDPRINCSTATEID 0x00000100 #define NFSV4EXCH_USENONPNFS 0x00010000 #define NFSV4EXCH_USEPNFSMDS 0x00020000 #define NFSV4EXCH_USEPNFSDS 0x00040000 #define NFSV4EXCH_MASKPNFS 0x00070000 #define NFSV4EXCH_UPDCONFIRMEDRECA 0x40000000 #define NFSV4EXCH_CONFIRMEDR 0x80000000 /* State Protects */ #define NFSV4EXCH_SP4NONE 0 #define NFSV4EXCH_SP4MACHCRED 1 #define NFSV4EXCH_SP4SSV 2 /* Flags for Create Session */ #define NFSV4CRSESS_PERSIST 0x00000001 #define NFSV4CRSESS_CONNBACKCHAN 0x00000002 #define NFSV4CRSESS_CONNRDMA 0x00000004 /* Flags for Sequence */ #define NFSV4SEQ_CBPATHDOWN 0x00000001 #define NFSV4SEQ_CBGSSCONTEXPIRING 0x00000002 #define NFSV4SEQ_CBGSSCONTEXPIRED 0x00000004 #define NFSV4SEQ_EXPIREDALLSTATEREVOKED 0x00000008 #define NFSV4SEQ_EXPIREDSOMESTATEREVOKED 0x00000010 #define NFSV4SEQ_ADMINSTATEREVOKED 0x00000020 #define NFSV4SEQ_RECALLABLESTATEREVOKED 0x00000040 #define NFSV4SEQ_LEASEMOVED 0x00000080 #define NFSV4SEQ_RESTARTRECLAIMNEEDED 0x00000100 #define NFSV4SEQ_CBPATHDOWNSESSION 0x00000200 #define NFSV4SEQ_BACKCHANNELFAULT 0x00000400 #define NFSV4SEQ_DEVIDCHANGED 0x00000800 #define NFSV4SEQ_DEVIDDELETED 0x00001000 /* Flags for Layout. */ #define NFSLAYOUTRETURN_FILE 1 #define NFSLAYOUTRETURN_FSID 2 #define NFSLAYOUTRETURN_ALL 3 #define NFSLAYOUT_NFSV4_1_FILES 0x1 #define NFSLAYOUT_OSD2_OBJECTS 0x2 #define NFSLAYOUT_BLOCK_VOLUME 0x3 #define NFSLAYOUT_FLEXFILE 0x4 #define NFSLAYOUTIOMODE_READ 1 #define NFSLAYOUTIOMODE_RW 2 #define NFSLAYOUTIOMODE_ANY 3 /* Flags for Get Device Info. */ #define NFSDEVICEIDNOTIFY_CHANGEBIT 0x1 #define NFSDEVICEIDNOTIFY_DELETEBIT 0x2 /* Flags for File Layout. */ #define NFSFLAYUTIL_DENSE 0x1 #define NFSFLAYUTIL_COMMIT_THRU_MDS 0x2 +#define NFSFLAYUTIL_STRIPE_MASK 0xffffffc0 /* Flags for Flex File Layout. */ #define NFSFLEXFLAG_NO_LAYOUTCOMMIT 0x00000001 #define NFSFLEXFLAG_NOIO_MDS 0x00000002 #define NFSFLEXFLAG_NO_READIO 0x00000004 #define NFSFLEXFLAG_WRITE_ONEMIRROR 0x00000008 /* Enum values for Bind Connection to Session. */ #define NFSCDFC4_FORE 0x1 #define NFSCDFC4_BACK 0x2 #define NFSCDFC4_FORE_OR_BOTH 0x3 #define NFSCDFC4_BACK_OR_BOTH 0x7 #define NFSCDFS4_FORE 0x1 #define NFSCDFS4_BACK 0x2 #define NFSCDFS4_BOTH 0x3 +#if defined(_KERNEL) || defined(KERNEL) /* Conversion macros */ #define vtonfsv2_mode(t,m) \ txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ MAKEIMODE((t), (m))) #define vtonfsv34_mode(m) txdr_unsigned((m) & 07777) #define nfstov_mode(a) (fxdr_unsigned(u_int16_t, (a))&07777) #define vtonfsv2_type(a) (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \ txdr_unsigned(newnfsv2_type[((u_int32_t)(a))])) #define vtonfsv34_type(a) (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \ txdr_unsigned(nfsv34_type[((u_int32_t)(a))])) #define nfsv2tov_type(a) newnv2tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] #define nfsv34tov_type(a) nv34tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] #define vtonfs_dtype(a) (((u_int32_t)(a)) >= 9 ? IFTODT(VTTOIF(VNON)) : \ IFTODT(VTTOIF(a))) /* File types */ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, NFSOCK=6, NFFIFO=7, NFATTRDIR=8, NFNAMEDATTR=9 } nfstype; /* Structs for common parts of the rpc's */ struct nfsv2_time { u_int32_t nfsv2_sec; u_int32_t nfsv2_usec; }; typedef struct nfsv2_time nfstime2; struct nfsv3_time { u_int32_t nfsv3_sec; u_int32_t nfsv3_nsec; }; typedef struct nfsv3_time nfstime3; struct nfsv4_time { u_int32_t nfsv4_highsec; u_int32_t nfsv4_sec; u_int32_t nfsv4_nsec; }; typedef struct nfsv4_time nfstime4; /* * Quads are defined as arrays of 2 longs to ensure dense packing for the * protocol and to facilitate xdr conversion. */ struct nfs_uquad { u_int32_t nfsuquad[2]; }; typedef struct nfs_uquad nfsuint64; /* * Used to convert between two u_longs and a u_quad_t. */ union nfs_quadconvert { u_int32_t lval[2]; u_quad_t qval; }; typedef union nfs_quadconvert nfsquad_t; /* * NFS Version 3 special file number. */ struct nfsv3_spec { u_int32_t specdata1; u_int32_t specdata2; }; typedef struct nfsv3_spec nfsv3spec; /* * File attributes and setable attributes. These structures cover both * NFS version 2 and the version 3 protocol. Note that the union is only * used so that one pointer can refer to both variants. These structures * go out on the wire and must be densely packed, so no quad data types * are used. (all fields are longs or u_longs or structures of same) * NB: You can't do sizeof(struct nfs_fattr), you must use the * NFSX_FATTR(v3) macro. */ struct nfs_fattr { u_int32_t fa_type; u_int32_t fa_mode; u_int32_t fa_nlink; u_int32_t fa_uid; u_int32_t fa_gid; union { struct { u_int32_t nfsv2fa_size; u_int32_t nfsv2fa_blocksize; u_int32_t nfsv2fa_rdev; u_int32_t nfsv2fa_blocks; u_int32_t nfsv2fa_fsid; u_int32_t nfsv2fa_fileid; nfstime2 nfsv2fa_atime; nfstime2 nfsv2fa_mtime; nfstime2 nfsv2fa_ctime; } fa_nfsv2; struct { nfsuint64 nfsv3fa_size; nfsuint64 nfsv3fa_used; nfsv3spec nfsv3fa_rdev; nfsuint64 nfsv3fa_fsid; nfsuint64 nfsv3fa_fileid; nfstime3 nfsv3fa_atime; nfstime3 nfsv3fa_mtime; nfstime3 nfsv3fa_ctime; } fa_nfsv3; } fa_un; }; /* and some ugly defines for accessing union components */ #define fa2_size fa_un.fa_nfsv2.nfsv2fa_size #define fa2_blocksize fa_un.fa_nfsv2.nfsv2fa_blocksize #define fa2_rdev fa_un.fa_nfsv2.nfsv2fa_rdev #define fa2_blocks fa_un.fa_nfsv2.nfsv2fa_blocks #define fa2_fsid fa_un.fa_nfsv2.nfsv2fa_fsid #define fa2_fileid fa_un.fa_nfsv2.nfsv2fa_fileid #define fa2_atime fa_un.fa_nfsv2.nfsv2fa_atime #define fa2_mtime fa_un.fa_nfsv2.nfsv2fa_mtime #define fa2_ctime fa_un.fa_nfsv2.nfsv2fa_ctime #define fa3_size fa_un.fa_nfsv3.nfsv3fa_size #define fa3_used fa_un.fa_nfsv3.nfsv3fa_used #define fa3_rdev fa_un.fa_nfsv3.nfsv3fa_rdev #define fa3_fsid fa_un.fa_nfsv3.nfsv3fa_fsid #define fa3_fileid fa_un.fa_nfsv3.nfsv3fa_fileid #define fa3_atime fa_un.fa_nfsv3.nfsv3fa_atime #define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime #define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime #define NFS_LINK_MAX UINT32_MAX struct nfsv2_sattr { u_int32_t sa_mode; u_int32_t sa_uid; u_int32_t sa_gid; u_int32_t sa_size; nfstime2 sa_atime; nfstime2 sa_mtime; }; /* * NFS Version 3 sattr structure for the new node creation case. */ struct nfsv3_sattr { u_int32_t sa_modetrue; u_int32_t sa_mode; u_int32_t sa_uidfalse; u_int32_t sa_gidfalse; u_int32_t sa_sizefalse; u_int32_t sa_atimetype; nfstime3 sa_atime; u_int32_t sa_mtimetype; nfstime3 sa_mtime; }; +#endif /* _KERNEL */ /* * The attribute bits used for V4. * NFSATTRBIT_xxx defines the attribute# (and its bit position) * NFSATTRBM_xxx is a 32bit mask with the correct bit set within the * appropriate 32bit word. * NFSATTRBIT_MAX is one greater than the largest NFSATTRBIT_xxx */ #define NFSATTRBIT_SUPPORTEDATTRS 0 #define NFSATTRBIT_TYPE 1 #define NFSATTRBIT_FHEXPIRETYPE 2 #define NFSATTRBIT_CHANGE 3 #define NFSATTRBIT_SIZE 4 #define NFSATTRBIT_LINKSUPPORT 5 #define NFSATTRBIT_SYMLINKSUPPORT 6 #define NFSATTRBIT_NAMEDATTR 7 #define NFSATTRBIT_FSID 8 #define NFSATTRBIT_UNIQUEHANDLES 9 #define NFSATTRBIT_LEASETIME 10 #define NFSATTRBIT_RDATTRERROR 11 #define NFSATTRBIT_ACL 12 #define NFSATTRBIT_ACLSUPPORT 13 #define NFSATTRBIT_ARCHIVE 14 #define NFSATTRBIT_CANSETTIME 15 #define NFSATTRBIT_CASEINSENSITIVE 16 #define NFSATTRBIT_CASEPRESERVING 17 #define NFSATTRBIT_CHOWNRESTRICTED 18 #define NFSATTRBIT_FILEHANDLE 19 #define NFSATTRBIT_FILEID 20 #define NFSATTRBIT_FILESAVAIL 21 #define NFSATTRBIT_FILESFREE 22 #define NFSATTRBIT_FILESTOTAL 23 #define NFSATTRBIT_FSLOCATIONS 24 #define NFSATTRBIT_HIDDEN 25 #define NFSATTRBIT_HOMOGENEOUS 26 #define NFSATTRBIT_MAXFILESIZE 27 #define NFSATTRBIT_MAXLINK 28 #define NFSATTRBIT_MAXNAME 29 #define NFSATTRBIT_MAXREAD 30 #define NFSATTRBIT_MAXWRITE 31 #define NFSATTRBIT_MIMETYPE 32 #define NFSATTRBIT_MODE 33 #define NFSATTRBIT_NOTRUNC 34 #define NFSATTRBIT_NUMLINKS 35 #define NFSATTRBIT_OWNER 36 #define NFSATTRBIT_OWNERGROUP 37 #define NFSATTRBIT_QUOTAHARD 38 #define NFSATTRBIT_QUOTASOFT 39 #define NFSATTRBIT_QUOTAUSED 40 #define NFSATTRBIT_RAWDEV 41 #define NFSATTRBIT_SPACEAVAIL 42 #define NFSATTRBIT_SPACEFREE 43 #define NFSATTRBIT_SPACETOTAL 44 #define NFSATTRBIT_SPACEUSED 45 #define NFSATTRBIT_SYSTEM 46 #define NFSATTRBIT_TIMEACCESS 47 #define NFSATTRBIT_TIMEACCESSSET 48 #define NFSATTRBIT_TIMEBACKUP 49 #define NFSATTRBIT_TIMECREATE 50 #define NFSATTRBIT_TIMEDELTA 51 #define NFSATTRBIT_TIMEMETADATA 52 #define NFSATTRBIT_TIMEMODIFY 53 #define NFSATTRBIT_TIMEMODIFYSET 54 #define NFSATTRBIT_MOUNTEDONFILEID 55 #define NFSATTRBIT_DIRNOTIFDELAY 56 #define NFSATTRBIT_DIRENTNOTIFDELAY 57 #define NFSATTRBIT_DACL 58 #define NFSATTRBIT_SACL 59 #define NFSATTRBIT_CHANGEPOLICY 60 #define NFSATTRBIT_FSSTATUS 61 #define NFSATTRBIT_FSLAYOUTTYPE 62 #define NFSATTRBIT_LAYOUTHINT 63 #define NFSATTRBIT_LAYOUTTYPE 64 #define NFSATTRBIT_LAYOUTBLKSIZE 65 #define NFSATTRBIT_LAYOUTALIGNMENT 66 #define NFSATTRBIT_FSLOCATIONSINFO 67 #define NFSATTRBIT_MDSTHRESHOLD 68 #define NFSATTRBIT_RETENTIONGET 69 #define NFSATTRBIT_RETENTIONSET 70 #define NFSATTRBIT_RETENTEVTGET 71 #define NFSATTRBIT_RETENTEVTSET 72 #define NFSATTRBIT_RETENTIONHOLD 73 #define NFSATTRBIT_MODESETMASKED 74 #define NFSATTRBIT_SUPPATTREXCLCREAT 75 #define NFSATTRBIT_FSCHARSETCAP 76 #define NFSATTRBM_SUPPORTEDATTRS 0x00000001 #define NFSATTRBM_TYPE 0x00000002 #define NFSATTRBM_FHEXPIRETYPE 0x00000004 #define NFSATTRBM_CHANGE 0x00000008 #define NFSATTRBM_SIZE 0x00000010 #define NFSATTRBM_LINKSUPPORT 0x00000020 #define NFSATTRBM_SYMLINKSUPPORT 0x00000040 #define NFSATTRBM_NAMEDATTR 0x00000080 #define NFSATTRBM_FSID 0x00000100 #define NFSATTRBM_UNIQUEHANDLES 0x00000200 #define NFSATTRBM_LEASETIME 0x00000400 #define NFSATTRBM_RDATTRERROR 0x00000800 #define NFSATTRBM_ACL 0x00001000 #define NFSATTRBM_ACLSUPPORT 0x00002000 #define NFSATTRBM_ARCHIVE 0x00004000 #define NFSATTRBM_CANSETTIME 0x00008000 #define NFSATTRBM_CASEINSENSITIVE 0x00010000 #define NFSATTRBM_CASEPRESERVING 0x00020000 #define NFSATTRBM_CHOWNRESTRICTED 0x00040000 #define NFSATTRBM_FILEHANDLE 0x00080000 #define NFSATTRBM_FILEID 0x00100000 #define NFSATTRBM_FILESAVAIL 0x00200000 #define NFSATTRBM_FILESFREE 0x00400000 #define NFSATTRBM_FILESTOTAL 0x00800000 #define NFSATTRBM_FSLOCATIONS 0x01000000 #define NFSATTRBM_HIDDEN 0x02000000 #define NFSATTRBM_HOMOGENEOUS 0x04000000 #define NFSATTRBM_MAXFILESIZE 0x08000000 #define NFSATTRBM_MAXLINK 0x10000000 #define NFSATTRBM_MAXNAME 0x20000000 #define NFSATTRBM_MAXREAD 0x40000000 #define NFSATTRBM_MAXWRITE 0x80000000 #define NFSATTRBM_MIMETYPE 0x00000001 #define NFSATTRBM_MODE 0x00000002 #define NFSATTRBM_NOTRUNC 0x00000004 #define NFSATTRBM_NUMLINKS 0x00000008 #define NFSATTRBM_OWNER 0x00000010 #define NFSATTRBM_OWNERGROUP 0x00000020 #define NFSATTRBM_QUOTAHARD 0x00000040 #define NFSATTRBM_QUOTASOFT 0x00000080 #define NFSATTRBM_QUOTAUSED 0x00000100 #define NFSATTRBM_RAWDEV 0x00000200 #define NFSATTRBM_SPACEAVAIL 0x00000400 #define NFSATTRBM_SPACEFREE 0x00000800 #define NFSATTRBM_SPACETOTAL 0x00001000 #define NFSATTRBM_SPACEUSED 0x00002000 #define NFSATTRBM_SYSTEM 0x00004000 #define NFSATTRBM_TIMEACCESS 0x00008000 #define NFSATTRBM_TIMEACCESSSET 0x00010000 #define NFSATTRBM_TIMEBACKUP 0x00020000 #define NFSATTRBM_TIMECREATE 0x00040000 #define NFSATTRBM_TIMEDELTA 0x00080000 #define NFSATTRBM_TIMEMETADATA 0x00100000 #define NFSATTRBM_TIMEMODIFY 0x00200000 #define NFSATTRBM_TIMEMODIFYSET 0x00400000 #define NFSATTRBM_MOUNTEDONFILEID 0x00800000 #define NFSATTRBM_DIRNOTIFDELAY 0x01000000 #define NFSATTRBM_DIRENTNOTIFDELAY 0x02000000 #define NFSATTRBM_DACL 0x04000000 #define NFSATTRBM_SACL 0x08000000 #define NFSATTRBM_CHANGEPOLICY 0x10000000 #define NFSATTRBM_FSSTATUS 0x20000000 #define NFSATTRBM_FSLAYOUTTYPE 0x40000000 #define NFSATTRBM_LAYOUTHINT 0x80000000 #define NFSATTRBM_LAYOUTTYPE 0x00000001 #define NFSATTRBM_LAYOUTBLKSIZE 0x00000002 #define NFSATTRBM_LAYOUTALIGNMENT 0x00000004 #define NFSATTRBM_FSLOCATIONSINFO 0x00000008 #define NFSATTRBM_MDSTHRESHOLD 0x00000010 #define NFSATTRBM_RETENTIONGET 0x00000020 #define NFSATTRBM_RETENTIONSET 0x00000040 #define NFSATTRBM_RETENTEVTGET 0x00000080 #define NFSATTRBM_RETENTEVTSET 0x00000100 #define NFSATTRBM_RETENTIONHOLD 0x00000200 #define NFSATTRBM_MODESETMASKED 0x00000400 #define NFSATTRBM_SUPPATTREXCLCREAT 0x00000800 #define NFSATTRBM_FSCHARSETCAP 0x00001000 #define NFSATTRBIT_MAX 77 /* * Sets of attributes that are supported, by words in the bitmap. */ /* * NFSATTRBIT_SUPPORTED - SUPP0 - bits 0<->31 * SUPP1 - bits 32<->63 * SUPP2 - bits 64<->95 */ #define NFSATTRBIT_SUPP0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_FHEXPIRETYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_NAMEDATTR | \ NFSATTRBM_FSID | \ NFSATTRBM_UNIQUEHANDLES | \ NFSATTRBM_LEASETIME | \ NFSATTRBM_RDATTRERROR | \ NFSATTRBM_ACL | \ NFSATTRBM_ACLSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_CASEINSENSITIVE | \ NFSATTRBM_CASEPRESERVING | \ NFSATTRBM_CHOWNRESTRICTED | \ NFSATTRBM_FILEHANDLE | \ NFSATTRBM_FILEID | \ NFSATTRBM_FILESAVAIL | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_FSLOCATIONS | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXLINK | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSATTRBIT_S1 - subset of SUPP1 - OR of the following bits: */ #define NFSATTRBIT_S1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NOTRUNC | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMEDELTA | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY | \ NFSATTRBM_MOUNTEDONFILEID | \ NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ - NFSATTRBM_QUOTAUSED) + NFSATTRBM_QUOTAUSED | \ + NFSATTRBM_FSLAYOUTTYPE) #ifdef QUOTA /* * If QUOTA OR in NFSATTRBIT_QUOTAHARD, NFSATTRBIT_QUOTASOFT and * NFSATTRBIT_QUOTAUSED. */ #define NFSATTRBIT_SUPP1 (NFSATTRBIT_S1 | \ NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED) #else #define NFSATTRBIT_SUPP1 NFSATTRBIT_S1 #endif -#define NFSATTRBIT_SUPP2 NFSATTRBM_SUPPATTREXCLCREAT +#define NFSATTRBIT_SUPP2 \ + (NFSATTRBM_LAYOUTTYPE | \ + NFSATTRBM_LAYOUTBLKSIZE | \ + NFSATTRBM_LAYOUTALIGNMENT | \ + NFSATTRBM_SUPPATTREXCLCREAT) /* * NFSATTRBIT_SUPPSETONLY is the OR of NFSATTRBIT_TIMEACCESSSET and * NFSATTRBIT_TIMEMODIFYSET. */ #define NFSATTRBIT_SUPPSETONLY (NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) /* * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31 * SETABLE1 - bits 32<->63 * SETABLE2 - bits 64<->95 */ #define NFSATTRBIT_SETABLE0 \ (NFSATTRBM_SIZE | \ NFSATTRBM_ACL) #define NFSATTRBIT_SETABLE1 \ (NFSATTRBM_MODE | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) #define NFSATTRBIT_SETABLE2 0 /* * Set of attributes that the getattr vnode op needs. * OR of the following bits. * NFSATTRBIT_GETATTR0 - bits 0<->31 */ #define NFSATTRBIT_GETATTR0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ NFSATTRBM_MAXREAD) /* * NFSATTRBIT_GETATTR1 - bits 32<->63 */ #define NFSATTRBIT_GETATTR1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_GETATTR2 - bits 64<->95 */ #define NFSATTRBIT_GETATTR2 0 /* * Subset of the above that the Write RPC gets. * OR of the following bits. * NFSATTRBIT_WRITEGETATTR0 - bits 0<->31 */ #define NFSATTRBIT_WRITEGETATTR0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ NFSATTRBM_MAXREAD) /* * NFSATTRBIT_WRITEGETATTR1 - bits 32<->63 */ #define NFSATTRBIT_WRITEGETATTR1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_WRITEGETATTR2 - bits 64<->95 */ #define NFSATTRBIT_WRITEGETATTR2 0 /* * Set of attributes that the wccattr operation op needs. * OR of the following bits. * NFSATTRBIT_WCCATTR0 - bits 0<->31 */ #define NFSATTRBIT_WCCATTR0 0 /* * NFSATTRBIT_WCCATTR1 - bits 32<->63 */ #define NFSATTRBIT_WCCATTR1 \ (NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_WCCATTR2 - bits 64<->95 */ #define NFSATTRBIT_WCCATTR2 0 /* * NFSATTRBIT_CBGETATTR0 - bits 0<->31 */ #define NFSATTRBIT_CBGETATTR0 (NFSATTRBM_CHANGE | NFSATTRBM_SIZE) /* * NFSATTRBIT_CBGETATTR1 - bits 32<->63 */ #define NFSATTRBIT_CBGETATTR1 0x0 /* * NFSATTRBIT_CBGETATTR2 - bits 64<->95 */ #define NFSATTRBIT_CBGETATTR2 0x0 /* * Sets of attributes that require a VFS_STATFS() call to get the * values of. * NFSATTRBIT_STATFS0 - bits 0<->31 */ #define NFSATTRBIT_STATFS0 \ (NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_FILESAVAIL | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSATTRBIT_STATFS1 - bits 32<->63 */ #define NFSATTRBIT_STATFS1 \ (NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEDELTA) /* * NFSATTRBIT_STATFS2 - bits 64<->95 */ #define NFSATTRBIT_STATFS2 0 /* * These are the bits that are needed by the nfs_statfs() call. * (The regular getattr bits are or'd in so the vnode gets the correct * type, etc.) * NFSGETATTRBIT_STATFS0 - bits 0<->31 */ #define NFSGETATTRBIT_STATFS0 (NFSATTRBIT_GETATTR0 | \ NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSGETATTRBIT_STATFS1 - bits 32<->63 */ #define NFSGETATTRBIT_STATFS1 (NFSATTRBIT_GETATTR1 | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_TIMEDELTA) /* * NFSGETATTRBIT_STATFS2 - bits 64<->95 */ #define NFSGETATTRBIT_STATFS2 0 /* * Set of attributes for the equivalent of an nfsv3 pathconf rpc. * NFSGETATTRBIT_PATHCONF0 - bits 0<->31 */ #define NFSGETATTRBIT_PATHCONF0 (NFSATTRBIT_GETATTR0 | \ NFSATTRBM_CASEINSENSITIVE | \ NFSATTRBM_CASEPRESERVING | \ NFSATTRBM_CHOWNRESTRICTED | \ NFSATTRBM_MAXLINK | \ NFSATTRBM_MAXNAME) /* * NFSGETATTRBIT_PATHCONF1 - bits 32<->63 */ #define NFSGETATTRBIT_PATHCONF1 (NFSATTRBIT_GETATTR1 | \ NFSATTRBM_NOTRUNC) /* * NFSGETATTRBIT_PATHCONF2 - bits 64<->95 */ #define NFSGETATTRBIT_PATHCONF2 0 /* * Sets of attributes required by readdir and readdirplus. * NFSATTRBIT_READDIRPLUS0 (NFSATTRBIT_GETATTR0 | NFSATTRBIT_FILEHANDLE | * NFSATTRBIT_RDATTRERROR) */ #define NFSATTRBIT_READDIRPLUS0 (NFSATTRBIT_GETATTR0 | NFSATTRBM_FILEHANDLE | \ NFSATTRBM_RDATTRERROR) #define NFSATTRBIT_READDIRPLUS1 NFSATTRBIT_GETATTR1 #define NFSATTRBIT_READDIRPLUS2 0 /* * Set of attributes supported by Referral vnodes. */ #define NFSATTRBIT_REFERRAL0 (NFSATTRBM_TYPE | NFSATTRBM_FSID | \ NFSATTRBM_RDATTRERROR | NFSATTRBM_FSLOCATIONS) #define NFSATTRBIT_REFERRAL1 NFSATTRBM_MOUNTEDONFILEID #define NFSATTRBIT_REFERRAL2 0 /* * Structure for data handled by the statfs rpc. Since some fields are * u_int64_t, this cannot be used for copying data on/off the wire, due * to alignment concerns. */ struct nfsstatfs { union { struct { u_int32_t nfsv2sf_tsize; u_int32_t nfsv2sf_bsize; u_int32_t nfsv2sf_blocks; u_int32_t nfsv2sf_bfree; u_int32_t nfsv2sf_bavail; } sf_nfsv2; struct { u_int64_t nfsv3sf_tbytes; u_int64_t nfsv3sf_fbytes; u_int64_t nfsv3sf_abytes; u_int64_t nfsv3sf_tfiles; u_int64_t nfsv3sf_ffiles; u_int64_t nfsv3sf_afiles; u_int32_t nfsv3sf_invarsec; } sf_nfsv3; } sf_un; }; #define sf_tsize sf_un.sf_nfsv2.nfsv2sf_tsize #define sf_bsize sf_un.sf_nfsv2.nfsv2sf_bsize #define sf_blocks sf_un.sf_nfsv2.nfsv2sf_blocks #define sf_bfree sf_un.sf_nfsv2.nfsv2sf_bfree #define sf_bavail sf_un.sf_nfsv2.nfsv2sf_bavail #define sf_tbytes sf_un.sf_nfsv3.nfsv3sf_tbytes #define sf_fbytes sf_un.sf_nfsv3.nfsv3sf_fbytes #define sf_abytes sf_un.sf_nfsv3.nfsv3sf_abytes #define sf_tfiles sf_un.sf_nfsv3.nfsv3sf_tfiles #define sf_ffiles sf_un.sf_nfsv3.nfsv3sf_ffiles #define sf_afiles sf_un.sf_nfsv3.nfsv3sf_afiles #define sf_invarsec sf_un.sf_nfsv3.nfsv3sf_invarsec /* * Now defined using u_int64_t for the 64 bit field(s). * (Cannot be used to move data on/off the wire, due to alignment concerns.) */ struct nfsfsinfo { u_int32_t fs_rtmax; u_int32_t fs_rtpref; u_int32_t fs_rtmult; u_int32_t fs_wtmax; u_int32_t fs_wtpref; u_int32_t fs_wtmult; u_int32_t fs_dtpref; u_int64_t fs_maxfilesize; struct timespec fs_timedelta; u_int32_t fs_properties; }; /* * Bits for fs_properties */ #define NFSV3_FSFLINK 0x1 #define NFSV3_FSFSYMLINK 0x2 #define NFSV3_FSFHOMOGENEOUS 0x4 #define NFSV3_FSFCANSETTIME 0x8 /* * Yikes, overload fs_rtmult as fs_maxname for V4. */ #define fs_maxname fs_rtmult struct nfsv3_pathconf { u_int32_t pc_linkmax; u_int32_t pc_namemax; u_int32_t pc_notrunc; u_int32_t pc_chownrestricted; u_int32_t pc_caseinsensitive; u_int32_t pc_casepreserving; }; /* * NFS V4 data structures. */ struct nfsv4stateid { u_int32_t seqid; u_int32_t other[NFSX_STATEIDOTHER / NFSX_UNSIGNED]; }; typedef struct nfsv4stateid nfsv4stateid_t; + +/* Notify bits and notify bitmap size. */ +#define NFSV4NOTIFY_CHANGE 1 +#define NFSV4NOTIFY_DELETE 2 +#define NFSV4_NOTIFYBITMAP 1 /* # of 32bit values needed for bits */ + +/* Layoutreturn kinds. */ +#define NFSV4LAYOUTRET_FILE 1 +#define NFSV4LAYOUTRET_FSID 2 +#define NFSV4LAYOUTRET_ALL 3 #endif /* _NFS_NFSPROTO_H_ */ Index: head/sys/fs/nfs/nfsrvstate.h =================================================================== --- head/sys/fs/nfs/nfsrvstate.h (revision 335011) +++ head/sys/fs/nfs/nfsrvstate.h (revision 335012) @@ -1,298 +1,398 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSRVSTATE_H_ #define _NFS_NFSRVSTATE_H_ +#if defined(_KERNEL) || defined(KERNEL) /* * Definitions for NFS V4 server state handling. */ /* * List heads for nfsclient, nfsstate and nfslockfile. * (Some systems seem to like to dynamically size these things, but I * don't see any point in doing so for these ones.) */ LIST_HEAD(nfsclienthashhead, nfsclient); LIST_HEAD(nfsstatehead, nfsstate); LIST_HEAD(nfslockhead, nfslock); LIST_HEAD(nfslockhashhead, nfslockfile); LIST_HEAD(nfssessionhead, nfsdsession); LIST_HEAD(nfssessionhashhead, nfsdsession); +TAILQ_HEAD(nfslayouthead, nfslayout); +SLIST_HEAD(nfsdsdirhead, nfsdsdir); +TAILQ_HEAD(nfsdevicehead, nfsdevice); +LIST_HEAD(nfsdontlisthead, nfsdontlist); /* * List head for nfsusrgrp. */ TAILQ_HEAD(nfsuserhashhead, nfsusrgrp); #define NFSCLIENTHASH(id) \ (&nfsclienthash[(id).lval[1] % nfsrv_clienthashsize]) #define NFSSTATEHASH(clp, id) \ (&((clp)->lc_stateid[(id).other[2] % nfsrv_statehashsize])) #define NFSUSERHASH(id) \ (&nfsuserhash[(id) % nfsrv_lughashsize]) #define NFSUSERNAMEHASH(p, l) \ (&nfsusernamehash[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ % nfsrv_lughashsize]) #define NFSGROUPHASH(id) \ (&nfsgrouphash[(id) % nfsrv_lughashsize]) #define NFSGROUPNAMEHASH(p, l) \ (&nfsgroupnamehash[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ % nfsrv_lughashsize]) struct nfssessionhash { struct mtx mtx; struct nfssessionhashhead list; }; #define NFSSESSIONHASH(f) \ (&nfssessionhash[nfsrv_hashsessionid(f) % nfsrv_sessionhashsize]) +struct nfslayouthash { + struct mtx mtx; + struct nfslayouthead list; +}; +#define NFSLAYOUTHASH(f) \ + (&nfslayouthash[nfsrv_hashfh(f) % nfsrv_layouthashsize]) + /* * Client server structure for V4. It is doubly linked into two lists. * The first is a hash table based on the clientid and the second is a * list of all clients maintained in LRU order. * The actual size malloc'd is large enough to accommodate the id string. */ struct nfsclient { LIST_ENTRY(nfsclient) lc_hash; /* Clientid hash list */ struct nfsstatehead *lc_stateid; /* Stateid hash */ struct nfsstatehead lc_open; /* Open owner list */ struct nfsstatehead lc_deleg; /* Delegations */ struct nfsstatehead lc_olddeleg; /* and old delegations */ struct nfssessionhead lc_session; /* List of NFSv4.1 sessions */ time_t lc_expiry; /* Expiry time (sec) */ time_t lc_delegtime; /* Old deleg expiry (sec) */ nfsquad_t lc_clientid; /* 64 bit clientid */ nfsquad_t lc_confirm; /* 64 bit confirm value */ u_int32_t lc_program; /* RPC Program # */ u_int32_t lc_callback; /* Callback id */ u_int32_t lc_stateindex; /* Current state index# */ u_int32_t lc_statemaxindex; /* Max state index# */ u_int32_t lc_cbref; /* Cnt of callbacks */ uid_t lc_uid; /* User credential */ gid_t lc_gid; u_int16_t lc_idlen; /* Client ID and len */ u_int16_t lc_namelen; /* plus GSS principal and len */ u_char *lc_name; struct nfssockreq lc_req; /* Callback info */ u_int32_t lc_flags; /* LCL_ flag bits */ u_char lc_verf[NFSX_VERF]; /* client verifier */ u_char lc_id[1]; /* Malloc'd correct size */ }; #define CLOPS_CONFIRM 0x0001 #define CLOPS_RENEW 0x0002 #define CLOPS_RENEWOP 0x0004 /* + * Structure for NFSv4.1 Layouts. + * Malloc'd to correct size for the lay_xdr. + */ +struct nfslayout { + TAILQ_ENTRY(nfslayout) lay_list; + nfsv4stateid_t lay_stateid; + nfsquad_t lay_clientid; + fhandle_t lay_fh; + fsid_t lay_fsid; + uint32_t lay_layoutlen; + uint16_t lay_mirrorcnt; + uint16_t lay_trycnt; + uint16_t lay_type; + uint16_t lay_flags; + uint32_t lay_xdr[0]; +}; + +/* Flags for lay_flags. */ +#define NFSLAY_READ 0x0001 +#define NFSLAY_RW 0x0002 +#define NFSLAY_RECALL 0x0004 +#define NFSLAY_RETURNED 0x0008 +#define NFSLAY_CALLB 0x0010 + +/* * Structure for an NFSv4.1 session. * Locking rules for this structure. * To add/delete one of these structures from the lists, you must lock * both: NFSLOCKSTATE() and NFSLOCKSESSION(session hashhead) in that order. * To traverse the lists looking for one of these, you must hold one * of these two locks. * The exception is if the thread holds the exclusive root sleep lock. * In this case, all other nfsd threads are blocked, so locking the * mutexes isn't required. * When manipulating sess_refcnt, NFSLOCKSTATE() must be locked. * When manipulating the fields withinsess_cbsess except nfsess_xprt, * sess_cbsess.nfsess_mtx must be locked. * When manipulating sess_slots and sess_cbsess.nfsess_xprt, * NFSLOCKSESSION(session hashhead) must be locked. */ struct nfsdsession { uint64_t sess_refcnt; /* Reference count. */ LIST_ENTRY(nfsdsession) sess_hash; /* Hash list of sessions. */ LIST_ENTRY(nfsdsession) sess_list; /* List of client sessions. */ struct nfsslot sess_slots[NFSV4_SLOTS]; struct nfsclient *sess_clp; /* Associated clientid. */ uint32_t sess_crflags; uint32_t sess_cbprogram; uint32_t sess_maxreq; uint32_t sess_maxresp; uint32_t sess_maxrespcached; uint32_t sess_maxops; uint32_t sess_maxslots; uint32_t sess_cbmaxreq; uint32_t sess_cbmaxresp; uint32_t sess_cbmaxrespcached; uint32_t sess_cbmaxops; uint8_t sess_sessionid[NFSX_V4SESSIONID]; struct nfsclsession sess_cbsess; /* Callback session. */ }; /* * Nfs state structure. I couldn't resist overloading this one, since * it makes cleanup, etc. simpler. These structures are used in four ways: * - open_owner structures chained off of nfsclient * - open file structures chained off an open_owner structure * - lock_owner structures chained off an open file structure * - delegated file structures chained off of nfsclient and nfslockfile * - the ls_list field is used for the chain it is in * - the ls_head structure is used to chain off the sibling structure * (it is a union between an nfsstate and nfslock structure head) * If it is a lockowner stateid, nfslock structures hang off it. * For the open file and lockowner cases, it is in the hash table in * nfsclient for stateid. */ struct nfsstate { LIST_ENTRY(nfsstate) ls_hash; /* Hash list entry */ LIST_ENTRY(nfsstate) ls_list; /* List of opens/delegs */ LIST_ENTRY(nfsstate) ls_file; /* Opens/Delegs for a file */ union { struct nfsstatehead open; /* Opens list */ struct nfslockhead lock; /* Locks list */ } ls_head; nfsv4stateid_t ls_stateid; /* The state id */ u_int32_t ls_seq; /* seq id */ uid_t ls_uid; /* uid of locker */ u_int32_t ls_flags; /* Type of lock, etc. */ union { struct nfsstate *openowner; /* Open only */ u_int32_t opentolockseq; /* Lock call only */ u_int32_t noopens; /* Openowner only */ struct { u_quad_t filerev; /* Delegations only */ time_t expiry; time_t limit; u_int64_t compref; } deleg; } ls_un; struct nfslockfile *ls_lfp; /* Back pointer */ struct nfsrvcache *ls_op; /* Op cache reference */ struct nfsclient *ls_clp; /* Back pointer */ u_short ls_ownerlen; /* Length of ls_owner */ u_char ls_owner[1]; /* malloc'd the correct size */ }; #define ls_lock ls_head.lock #define ls_open ls_head.open #define ls_opentolockseq ls_un.opentolockseq #define ls_openowner ls_un.openowner #define ls_openstp ls_un.openowner #define ls_noopens ls_un.noopens #define ls_filerev ls_un.deleg.filerev #define ls_delegtime ls_un.deleg.expiry #define ls_delegtimelimit ls_un.deleg.limit #define ls_compref ls_un.deleg.compref /* * Nfs lock structure. * This structure is chained off of the nfsstate (the lockowner) and * nfslockfile (the file) structures, for the file and owner it * refers to. It holds flags and a byte range. * It also has back pointers to the associated lock_owner and lockfile. */ struct nfslock { LIST_ENTRY(nfslock) lo_lckowner; LIST_ENTRY(nfslock) lo_lckfile; struct nfsstate *lo_stp; struct nfslockfile *lo_lfp; u_int64_t lo_first; u_int64_t lo_end; u_int32_t lo_flags; }; /* * Structure used to return a conflicting lock. (Must be large * enough for the largest lock owner we can have.) */ struct nfslockconflict { nfsquad_t cl_clientid; u_int64_t cl_first; u_int64_t cl_end; u_int32_t cl_flags; u_short cl_ownerlen; u_char cl_owner[NFSV4_OPAQUELIMIT]; }; /* * This structure is used to keep track of local locks that might need * to be rolled back. */ struct nfsrollback { LIST_ENTRY(nfsrollback) rlck_list; uint64_t rlck_first; uint64_t rlck_end; int rlck_type; }; /* * This structure refers to a file for which lock(s) and/or open(s) exist. * Searched via hash table on file handle or found via the back pointer from an * open or lock owner. */ struct nfslockfile { LIST_HEAD(, nfsstate) lf_open; /* Open list */ LIST_HEAD(, nfsstate) lf_deleg; /* Delegation list */ LIST_HEAD(, nfslock) lf_lock; /* Lock list */ LIST_HEAD(, nfslock) lf_locallock; /* Local lock list */ LIST_HEAD(, nfsrollback) lf_rollback; /* Local lock rollback list */ LIST_ENTRY(nfslockfile) lf_hash; /* Hash list entry */ fhandle_t lf_fh; /* The file handle */ struct nfsv4lock lf_locallock_lck; /* serialize local locking */ int lf_usecount; /* Ref count for locking */ }; /* * This structure is malloc'd an chained off hash lists for user/group * names. */ struct nfsusrgrp { TAILQ_ENTRY(nfsusrgrp) lug_numhash; /* Hash by id# */ TAILQ_ENTRY(nfsusrgrp) lug_namehash; /* and by name */ time_t lug_expiry; /* Expiry time in sec */ union { uid_t un_uid; /* id# */ gid_t un_gid; } lug_un; struct ucred *lug_cred; /* Cred. with groups list */ int lug_namelen; /* Name length */ u_char lug_name[1]; /* malloc'd correct length */ }; #define lug_uid lug_un.un_uid #define lug_gid lug_un.un_gid /* * These structures are used for the stable storage restart stuff. */ /* * Record at beginning of file. */ struct nfsf_rec { u_int32_t lease; /* Lease duration */ u_int32_t numboots; /* Number of boottimes */ }; -#if defined(_KERNEL) || defined(KERNEL) void nfsrv_cleanclient(struct nfsclient *, NFSPROC_T *); void nfsrv_freedeleglist(struct nfsstatehead *); -#endif + +/* + * This structure is used to create the list of device info entries for + * a GetDeviceInfo operation and stores the DS server info. + * The nfsdev_addrandhost field has the fully qualified host domain name + * followed by the network address in XDR. + * It is allocated with nfsrv_dsdirsize nfsdev_dsdir[] entries. + */ +struct nfsdevice { + TAILQ_ENTRY(nfsdevice) nfsdev_list; + vnode_t nfsdev_dvp; + struct nfsmount *nfsdev_nmp; + char nfsdev_deviceid[NFSX_V4DEVICEID]; + uint16_t nfsdev_hostnamelen; + uint16_t nfsdev_fileaddrlen; + uint16_t nfsdev_flexaddrlen; + char *nfsdev_fileaddr; + char *nfsdev_flexaddr; + char *nfsdev_host; + uint32_t nfsdev_nextdir; + vnode_t nfsdev_dsdir[0]; +}; + +/* + * This structure holds the va_size, va_filerev, va_atime and va_mtime for the + * DS file and is stored in the metadata file's extended attribute pnfsd.dsattr. + */ +struct pnfsdsattr { + uint64_t dsa_filerev; + uint64_t dsa_size; + struct timespec dsa_atime; + struct timespec dsa_mtime; +}; + +/* + * This structure is a list element for a list the pNFS server uses to + * mark that the recovery of a mirror file is in progress. + */ +struct nfsdontlist { + LIST_ENTRY(nfsdontlist) nfsmr_list; + uint32_t nfsmr_flags; + fhandle_t nfsmr_fh; +}; + +/* nfsmr_flags bits. */ +#define NFSMR_DONTLAYOUT 0x00000001 + +#endif /* defined(_KERNEL) || defined(KERNEL) */ + +/* + * This structure holds the information about the DS file and is stored + * in the metadata file's extended attribute called pnfsd.dsfile. + */ +#define PNFS_FILENAME_LEN (2 * sizeof(fhandle_t)) +struct pnfsdsfile { + fhandle_t dsf_fh; + uint32_t dsf_dir; + union { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } dsf_nam; + char dsf_filename[PNFS_FILENAME_LEN + 1]; +}; +#define dsf_sin dsf_nam.sin +#define dsf_sin6 dsf_nam.sin6 #endif /* _NFS_NFSRVSTATE_H_ */ Index: head/sys/fs/nfsclient/nfs_clport.c =================================================================== --- head/sys/fs/nfsclient/nfs_clport.c (revision 335011) +++ head/sys/fs/nfsclient/nfs_clport.c (revision 335012) @@ -1,1495 +1,1504 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include /* * generally, I don't like #includes inside .h files, but it seems to * be the easiest way to handle the port. */ #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfscl_attrcache_flush_done_probe; uint32_t nfscl_attrcache_flush_done_id; dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfscl_attrcache_get_hit_probe; uint32_t nfscl_attrcache_get_hit_id; dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfscl_attrcache_get_miss_probe; uint32_t nfscl_attrcache_get_miss_id; dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfscl_attrcache_load_done_probe; uint32_t nfscl_attrcache_load_done_id; #endif /* !KDTRACE_HOOKS */ extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern struct vop_vector newnfs_vnodeops; extern struct vop_vector newnfs_fifoops; extern uma_zone_t newnfsnode_zone; extern struct buf_ops buf_ops_newnfs; extern int ncl_pbuf_freecnt; extern short nfsv4_cbport; extern int nfscl_enablecallb; extern int nfs_numnfscbd; extern int nfscl_inited; struct mtx ncl_iod_mutex; NFSDLOCKMUTEX; +extern struct mtx nfsrv_dslock_mtx; extern void (*ncl_call_invalcaches)(struct vnode *); SYSCTL_DECL(_vfs_nfs); static int ncl_fileid_maxwarnings = 10; SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN, &ncl_fileid_maxwarnings, 0, "Limit fileid corruption warnings; 0 is off; -1 is unlimited"); static volatile int ncl_fileid_nwarnings; static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *, struct nfsvattr *); /* * Comparison function for vfs_hash functions. */ int newnfs_vncmpf(struct vnode *vp, void *arg) { struct nfsfh *nfhp = (struct nfsfh *)arg; struct nfsnode *np = VTONFS(vp); if (np->n_fhp->nfh_len != nfhp->nfh_len || NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len)) return (1); return (0); } /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. * This variant takes a "struct nfsfh *" as second argument and uses * that structure up, either by hanging off the nfsnode or FREEing it. */ int nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, struct componentname *cnp, struct thread *td, struct nfsnode **npp, void *stuff, int lkflags) { struct nfsnode *np, *dnp; struct vnode *vp, *nvp; struct nfsv4node *newd, *oldd; int error; u_int hash; struct nfsmount *nmp; nmp = VFSTONFS(mntp); dnp = VTONFS(dvp); *npp = NULL; hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); error = vfs_hash_get(mntp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { /* * I believe there is a slight chance that vgonel() could * get called on this vnode between when NFSVOPLOCK() drops * the VI_LOCK() and vget() acquires it again, so that it * hasn't yet had v_usecount incremented. If this were to * happen, the VI_DOOMED flag would be set, so check for * that here. Since we now have the v_usecount incremented, * we should be ok until we vrele() it, if the VI_DOOMED * flag isn't set now. */ VI_LOCK(nvp); if ((nvp->v_iflag & VI_DOOMED)) { VI_UNLOCK(nvp); vrele(nvp); error = ENOENT; } else { VI_UNLOCK(nvp); } } if (error) { free(nfhp, M_NFSFH); return (error); } if (nvp != NULL) { np = VTONFS(nvp); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ oldd = newd = NULL; if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { newd = malloc( sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); NFSLOCKNODE(np); if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { oldd = np->n_v4; np->n_v4 = newd; newd = NULL; np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } NFSUNLOCKNODE(np); } if (newd != NULL) free(newd, M_NFSV4NODE); if (oldd != NULL) free(oldd, M_NFSV4NODE); *npp = np; free(nfhp, M_NFSFH); return (0); } np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp); if (error) { uma_zfree(newnfsnode_zone, np); free(nfhp, M_NFSFH); return (error); } vp = nvp; KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0")); vp->v_bufobj.bo_ops = &buf_ops_newnfs; vp->v_data = np; np->n_vnode = vp; /* * Initialize the mutex even if the vnode is going to be a loser. * This simplifies the logic in reclaim, which can then unconditionally * destroy the mutex (in the case of the loser, or if hash_insert * happened to return an error no special casing is needed). */ mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE | LK_CANRECURSE); /* * Are we getting the root? If so, make sure the vnode flags * are correct */ if ((nfhp->nfh_len == nmp->nm_fhsize) && !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) { if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; } np->n_fhp = nfhp; /* * For NFSv4, we have to attach the directory file handle and * file name, so that Open Ops can be done later. */ if (nmp->nm_flag & NFSMNT_NFSV4) { np->n_v4 = malloc(sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } else { np->n_v4 = NULL; } /* * NFS supports recursive and shared locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); error = insmntque(vp, mntp); if (error != 0) { *npp = NULL; mtx_destroy(&np->n_mtx); lockdestroy(&np->n_excl); free(nfhp, M_NFSFH); if (np->n_v4 != NULL) free(np->n_v4, M_NFSV4NODE); uma_zfree(newnfsnode_zone, np); return (error); } error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); /* vfs_hash_insert() vput()'s the losing vnode */ return (0); } *npp = np; return (0); } /* * Another variant of nfs_nget(). This one is only used by reopen. It * takes almost the same args as nfs_nget(), but only succeeds if an entry * exists in the cache. (Since files should already be "open" with a * vnode ref cnt on the node when reopen calls this, it should always * succeed.) * Also, don't get a vnode lock, since it may already be locked by some * other process that is handling it. This is ok, since all other threads * on the client are blocked by the nfsc_lock being exclusively held by the * caller of this function. */ int nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize, struct thread *td, struct nfsnode **npp) { struct vnode *nvp; u_int hash; struct nfsfh *nfhp; int error; *npp = NULL; /* For forced dismounts, just return error. */ if (NFSCL_FORCEDISM(mntp)) return (EINTR); nfhp = malloc(sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, &nfhp->nfh_fh[0], fhsize); nfhp->nfh_len = fhsize; hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); /* * First, try to get the vnode locked, but don't block for the lock. */ error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { NFSVOPUNLOCK(nvp, 0); } else if (error == EBUSY) { /* * It is safe so long as a vflush() with * FORCECLOSE has not been done. Since the Renew thread is * stopped and the MNTK_UNMOUNTF flag is set before doing * a vflush() with FORCECLOSE, we should be ok here. */ if (NFSCL_FORCEDISM(mntp)) error = EINTR; else { vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp); if (nvp == NULL) { error = ENOENT; } else if ((nvp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; vrele(nvp); } else { error = 0; } } } free(nfhp, M_NFSFH); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); return (0); } return (EINVAL); } static void nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap, struct nfsvattr *newnap) { int off; if (ncl_fileid_maxwarnings >= 0 && ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) return; off = 0; if (ncl_fileid_maxwarnings >= 0) { if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) off = 1; } printf("newnfs: server '%s' error: fileid changed. " "fsid %jx:%jx: expected fileid %#jx, got %#jx. " "(BROKEN NFS SERVER OR MIDDLEWARE)\n", nmp->nm_com.nmcom_hostname, (uintmax_t)nmp->nm_fsid[0], (uintmax_t)nmp->nm_fsid[1], (uintmax_t)oldnap->na_fileid, (uintmax_t)newnap->na_fileid); if (off) printf("newnfs: Logged %d times about fileid corruption; " "going quiet to avoid spamming logs excessively. (Limit " "is: %d).\n", ncl_fileid_nwarnings, ncl_fileid_maxwarnings); } /* * Load the attribute cache (that lives in the nfsnode entry) with * the attributes of the second argument and * Iff vaper not NULL * copy the attributes to *vaper * Similar to nfs_loadattrcache(), except the attributes are passed in * instead of being parsed out of the mbuf list. */ int nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, void *stuff, int writeattr, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper; struct nfsnode *np; struct nfsmount *nmp; struct timespec mtime_save; u_quad_t nsize; int setnsize, error, force_fid_err; error = 0; setnsize = 0; nsize = 0; /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); NFSLOCKNODE(np); if (vp->v_type != nvap->va_type) { vp->v_type = nvap->va_type; if (vp->v_type == VFIFO) vp->v_op = &newnfs_fifoops; np->n_mtime = nvap->va_mtime; } nmp = VFSTONFS(vp->v_mount); vap = &np->n_vattr.na_vattr; mtime_save = vap->va_mtime; if (writeattr) { np->n_vattr.na_filerev = nap->na_filerev; np->n_vattr.na_size = nap->na_size; np->n_vattr.na_mtime = nap->na_mtime; np->n_vattr.na_ctime = nap->na_ctime; np->n_vattr.na_fsid = nap->na_fsid; np->n_vattr.na_mode = nap->na_mode; } else { force_fid_err = 0; KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning, force_fid_err); /* * BROKEN NFS SERVER OR MIDDLEWARE * * Certain NFS servers (certain old proprietary filers ca. * 2006) or broken middleboxes (e.g. WAN accelerator products) * will respond to GETATTR requests with results for a * different fileid. * * The WAN accelerator we've observed not only serves stale * cache results for a given file, it also occasionally serves * results for wholly different files. This causes surprising * problems; for example the cached size attribute of a file * may truncate down and then back up, resulting in zero * regions in file contents read by applications. We observed * this reliably with Clang and .c files during parallel build. * A pcap revealed packet fragmentation and GETATTR RPC * responses with wholly wrong fileids. */ if ((np->n_vattr.na_fileid != 0 && np->n_vattr.na_fileid != nap->na_fileid) || force_fid_err) { nfscl_warn_fileid(nmp, &np->n_vattr, nap); error = EIDRM; goto out; } NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr, sizeof (struct nfsvattr)); } /* * For NFSv4, if the node's fsid is not equal to the mount point's * fsid, return the low order 32bits of the node's fsid. This * allows getcwd(3) to work. There is a chance that the fsid might * be the same as a local fs, but since this is in an NFS mount * point, I don't think that will cause any problems? */ if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) && (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] || nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) { /* * va_fsid needs to be set to some value derived from * np->n_vattr.na_filesid that is not equal * vp->v_mount->mnt_stat.f_fsid[0], so that it changes * from the value used for the top level server volume * in the mounted subtree. */ vn_fsid(vp, vap); if ((uint32_t)vap->va_fsid == np->n_vattr.na_filesid[0]) vap->va_fsid = hash32_buf( np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0); } else vn_fsid(vp, vap); np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (dontshrink && vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. */ vap->va_size = np->n_size; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); vnode_pager_setsize(vp, np->n_size); } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger * of our size, and the server's size. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } vnode_pager_setsize(vp, np->n_size); } else if (vap->va_size < np->n_size) { /* * When shrinking the size, the call to * vnode_pager_setsize() cannot be done * with the mutex held, so delay it until * after the mtx_unlock call. */ nsize = np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; setnsize = 1; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; vnode_pager_setsize(vp, np->n_size); } } else { np->n_size = vap->va_size; } } /* * The following checks are added to prevent a race between (say) * a READDIR+ and a WRITE. * READDIR+, WRITE requests sent out. * READDIR+ resp, WRITE resp received on client. * However, the WRITE resp was handled before the READDIR+ resp * causing the post op attrs from the write to be loaded first * and the attrs from the READDIR+ to be loaded later. If this * happens, we have stale attrs loaded into the attrcache. * We detect this by for the mtime moving back. We invalidate the * attrcache when this happens. */ if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { /* Size changed or mtime went backwards */ np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (vaper != NULL) { NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } out: #ifdef KDTRACE_HOOKS if (np->n_attrstamp != 0) KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error); #endif NFSUNLOCKNODE(np); if (setnsize) vnode_pager_setsize(vp, nsize); return (error); } /* * Fill in the client id name. For these bytes: * 1 - they must be unique * 2 - they should be persistent across client reboots * 1 is more critical than 2 * Use the mount point's unique id plus either the uuid or, if that * isn't set, random junk. */ void nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) { int uuidlen; /* * First, put in the 64bit mount point identifier. */ if (idlen >= sizeof (u_int64_t)) { NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t)); cp += sizeof (u_int64_t); idlen -= sizeof (u_int64_t); } /* * If uuid is non-zero length, use it. */ uuidlen = strlen(uuid); if (uuidlen > 0 && idlen >= uuidlen) { NFSBCOPY(uuid, cp, uuidlen); cp += uuidlen; idlen -= uuidlen; } /* * This only normally happens if the uuid isn't set. */ while (idlen > 0) { *cp++ = (u_int8_t)(arc4random() % 256); idlen--; } } /* * Fill in a lock owner name. For now, pid + the process's creation time. */ void nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; if (id == NULL) { /* Return the single open_owner of all 0 bytes. */ bzero(cp, NFSV4CL_LOCKNAMELEN); return; } if ((flags & F_POSIX) != 0) { p = (struct proc *)id; tl.lval = p->p_pid; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_sec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_usec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp = tl.cval[3]; } else if ((flags & F_FLOCK) != 0) { bcopy(&id, cp, sizeof(id)); bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); } else { printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); } } /* * Find the parent process for the thread passed in as an argument. * If none exists, return NULL, otherwise return a thread for the parent. * (Can be any of the threads, since it is only used for td->td_proc.) */ NFSPROC_T * nfscl_getparent(struct thread *td) { struct proc *p; struct thread *ptd; if (td == NULL) return (NULL); p = td->td_proc; if (p->p_pid == 0) return (NULL); p = p->p_pptr; if (p == NULL) return (NULL); ptd = TAILQ_FIRST(&p->p_threads); return (ptd); } /* * Start up the renew kernel thread. */ static void start_nfscl(void *arg) { struct nfsclclient *clp; struct thread *td; clp = (struct nfsclclient *)arg; td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads); nfscl_renewthread(clp, td); kproc_exit(0); } void nfscl_start_renewthread(struct nfsclclient *clp) { kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0, "nfscl"); } /* * Handle wcc_data. * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr * as the first Op after PutFH. * (For NFSv4, the postop attributes are after the Op, so they can't be * parsed here. A separate call to nfscl_postop_attr() is required.) */ int nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff) { u_int32_t *tl; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; int error = 0; if (wccflagp != NULL) *wccflagp = 0; if (nd->nd_flag & ND_NFSV3) { *flagp = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); if (wccflagp != NULL) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && np->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); mtx_unlock(&np->n_mtx); } } error = nfscl_postop_attr(nd, nap, flagp, stuff); if (wccflagp != NULL && *flagp == 0) *wccflagp = 0; } else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); if (error) return (error); /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl) nd->nd_flag |= ND_NOMOREDATA; if (wccflagp != NULL && nfsva.na_vattr.va_mtime.tv_sec != 0) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == nfsva.na_vattr.va_mtime.tv_sec && np->n_mtime.tv_nsec == nfsva.na_vattr.va_mtime.tv_sec); mtx_unlock(&np->n_mtx); } } nfsmout: return (error); } /* * Get postop attributes. */ int nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp, void *stuff) { u_int32_t *tl; int error = 0; *retp = 0; if (nd->nd_flag & ND_NOMOREDATA) return (error); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); *retp = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { /* * For NFSv4, the postop attr are at the end, so no point * in looking if nd_repstat != 0. */ if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) /* should never happen since nd_repstat != 0 */ nd->nd_flag |= ND_NOMOREDATA; else *retp = 1; } } else if (!nd->nd_repstat) { /* For NFSv2, the attributes are here iff nd_repstat == 0 */ *retp = 1; } if (*retp) { error = nfsm_loadattr(nd, nap); if (error) *retp = 0; } nfsmout: return (error); } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, - &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); + &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL); break; } } /* * nfscl_request() - mostly a wrapper for newnfs_request(). */ int nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, struct ucred *cred, void *stuff) { int ret, vers; struct nfsmount *nmp; nmp = VFSTONFS(vp->v_mount); if (nd->nd_flag & ND_NFSV4) vers = NFS_VER4; else if (nd->nd_flag & ND_NFSV3) vers = NFS_VER3; else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } /* * fill in this bsden's variant of statfs using nfsstatfs. */ void nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) { struct statfs *sbp = (struct statfs *)statfs; if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE; sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE; /* * Although sf_abytes is uint64_t and f_bavail is int64_t, * the value after dividing by NFS_FABLKSIZE is small * enough that it will fit in 63bits, so it is ok to * assign it to f_bavail without fear that it will become * negative. */ sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE; sbp->f_files = sfp->sf_tfiles; /* Since f_ffree is int64_t, clip it to 63bits. */ if (sfp->sf_ffiles > INT64_MAX) sbp->f_ffree = INT64_MAX; else sbp->f_ffree = sfp->sf_ffiles; } else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) { /* * The type casts to (int32_t) ensure that this code is * compatible with the old NFS client, in that it will * propagate bit31 to the high order bits. This may or may * not be correct for NFSv2, but since it is a legacy * environment, I'd rather retain backwards compatibility. */ sbp->f_bsize = (int32_t)sfp->sf_bsize; sbp->f_blocks = (int32_t)sfp->sf_blocks; sbp->f_bfree = (int32_t)sfp->sf_bfree; sbp->f_bavail = (int32_t)sfp->sf_bavail; sbp->f_files = 0; sbp->f_ffree = 0; } } /* * Use the fsinfo stuff to update the mount point. */ void nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) { if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && fsp->fs_wtpref >= NFS_FABLKSIZE) nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) { nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = fsp->fs_wtmax; } if (nmp->nm_wsize < NFS_FABLKSIZE) nmp->nm_wsize = NFS_FABLKSIZE; if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) && fsp->fs_rtpref >= NFS_FABLKSIZE) nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) { nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = fsp->fs_rtmax; } if (nmp->nm_rsize < NFS_FABLKSIZE) nmp->nm_rsize = NFS_FABLKSIZE; if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize) && fsp->fs_dtpref >= NFS_DIRBLKSIZ) nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) { nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = fsp->fs_rtmax; } if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; if (fsp->fs_maxfilesize > 0 && fsp->fs_maxfilesize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = fsp->fs_maxfilesize; nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; } /* * Lookups source address which should be used to communicate with * @nmp and stores it inside @pdst. * * Returns 0 on success. */ u_int8_t * nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p) { #if defined(INET6) || defined(INET) int error, fibnum; fibnum = curthread->td_proc->p_fibnum; #endif #ifdef INET if (nmp->nm_nam->sa_family == AF_INET) { struct sockaddr_in *sin; struct nhop4_extended nh_ext; sin = (struct sockaddr_in *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = fib4_lookup_nh_ext(fibnum, sin->sin_addr, 0, 0, &nh_ext); CURVNET_RESTORE(); if (error != 0) return (NULL); if ((ntohl(nh_ext.nh_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { /* Ignore loopback addresses */ return (NULL); } *isinet6p = 0; *((struct in_addr *)paddr) = nh_ext.nh_src; return (u_int8_t *)paddr; } #endif #ifdef INET6 if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr, sin6->sin6_scope_id, NULL, paddr, NULL); CURVNET_RESTORE(); if (error != 0) return (NULL); if (IN6_IS_ADDR_LOOPBACK(paddr)) return (NULL); /* Scope is embedded in */ *isinet6p = 1; return (u_int8_t *)paddr; } #endif return (NULL); } /* * Copy NFS uid, gids from the cred structure. */ void newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) { int i; KASSERT(cr->cr_ngroups >= 0, ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); for (i = 0; i < nfscr->nfsc_ngroups; i++) nfscr->nfsc_groups[i] = cr->cr_groups[i]; } /* * Do any client specific initialization. */ void nfscl_init(void) { static int inited = 0; if (inited) return; inited = 1; nfscl_inited = 1; ncl_pbuf_freecnt = nswbuf / 2 + 1; } /* * Check each of the attributes to be set, to ensure they aren't already * the correct value. Disable setting ones already correct. */ int nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap) { if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode == nvap->na_mode) vap->va_mode = (mode_t)VNOVAL; } if (vap->va_uid != (uid_t)VNOVAL) { if (vap->va_uid == nvap->na_uid) vap->va_uid = (uid_t)VNOVAL; } if (vap->va_gid != (gid_t)VNOVAL) { if (vap->va_gid == nvap->na_gid) vap->va_gid = (gid_t)VNOVAL; } if (vap->va_size != VNOVAL) { if (vap->va_size == nvap->na_size) vap->va_size = VNOVAL; } /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. * Set the VA_UTIMES_NULL flag for this case, so that * the server's time will be used. This is needed to * work around a bug in some Solaris servers, where * setting the time TOCLIENT causes the Setattr RPC * to return NFS_OK, but not set va_mode. */ if (vap->va_mtime.tv_sec == VNOVAL) { vfs_timestamp(&vap->va_mtime); vap->va_vaflags |= VA_UTIMES_NULL; } if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; return (1); } /* * Map nfsv4 errors to errno.h errors. * The uid and gid arguments are only used for NFSERR_BADOWNER and that * error should only be returned for the Open, Create and Setattr Ops. * As such, most calls can just pass in 0 for those arguments. */ APPLESTATIC int nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) { struct proc *p; if (error < 10000 || error >= NFSERR_STALEWRITEVERF) return (error); if (td != NULL) p = td->td_proc; else p = NULL; switch (error) { case NFSERR_BADOWNER: tprintf(p, LOG_INFO, "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); case NFSERR_BADNAME: case NFSERR_BADCHAR: printf("nfsv4 char/name not handled by server\n"); return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: case NFSERR_SERVERFAULT: case NFSERR_BADTYPE: case NFSERR_FHEXPIRED: case NFSERR_RESOURCE: case NFSERR_MOVED: case NFSERR_NOFILEHANDLE: case NFSERR_MINORVERMISMATCH: case NFSERR_OLDSTATEID: case NFSERR_BADSEQID: case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); return (EIO); default: tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error); return (EIO); }; } /* * Check to see if the process for this owner exists. Return 1 if it doesn't * and 0 otherwise. */ int nfscl_procdoesntexist(u_int8_t *own) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; pid_t pid; int i, ret = 0; /* For the single open_owner of all 0 bytes, just return 0. */ for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++) if (own[i] != 0) break; if (i == NFSV4CL_LOCKNAMELEN) return (0); tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; pid = tl.lval; p = pfind_locked(pid); if (p == NULL) return (1); if (p->p_stats == NULL) { PROC_UNLOCK(p); return (0); } tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; if (tl.lval != p->p_stats->p_start.tv_sec) { ret = 1; } else { tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own; if (tl.lval != p->p_stats->p_start.tv_usec) ret = 1; } PROC_UNLOCK(p); return (ret); } /* * - nfs pseudo system call for the client */ /* * MPSAFE */ static int nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfscbd_args nfscbdarg; struct nfsd_nfscbd_args nfscbdarg2; struct nameidata nd; struct nfscl_dumpmntopts dumpmntopts; cap_rights_t rights; char *buf; int error; struct mount *mp; struct nfsmount *nmp; if (uap->flag & NFSSVC_CBADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); if (error) return (error); /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, nfscbdarg.sock, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); return (EPERM); } error = nfscbd_addsock(fp); fdrop(fp, td); if (!error && nfscl_enablecallb == 0) { nfsv4_cbport = nfscbdarg.port; nfscl_enablecallb = 1; } } else if (uap->flag & NFSSVC_NFSCBD) { if (uap->argp == NULL) return (EINVAL); error = copyin(uap->argp, (caddr_t)&nfscbdarg2, sizeof(nfscbdarg2)); if (error) return (error); error = nfscbd_nfsd(td, &nfscbdarg2); } else if (uap->flag & NFSSVC_DUMPMNTOPTS) { error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts)); if (error == 0 && (dumpmntopts.ndmnt_blen < 256 || dumpmntopts.ndmnt_blen > 1024)) error = EINVAL; if (error == 0) error = nfsrv_lookupfilename(&nd, dumpmntopts.ndmnt_fname, td); if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = EINVAL; } if (error == 0) { buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK); nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf, dumpmntopts.ndmnt_blen); vput(nd.ni_vp); error = copyout(buf, dumpmntopts.ndmnt_buf, dumpmntopts.ndmnt_blen); free(buf, M_TEMP); } } else if (uap->flag & NFSSVC_FORCEDISM) { buf = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); error = copyinstr(uap->argp, buf, MNAMELEN + 1, NULL); if (error == 0) { nmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, buf) == 0 && strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 && mp->mnt_data != NULL) { nmp = VFSTONFS(mp); + NFSDDSLOCK(); + if (nfsv4_findmirror(nmp) != NULL) { + NFSDDSUNLOCK(); + error = ENXIO; + nmp = NULL; + break; + } mtx_lock(&nmp->nm_mtx); if ((nmp->nm_privflag & NFSMNTP_FORCEDISM) == 0) { nmp->nm_privflag |= (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS); mtx_unlock(&nmp->nm_mtx); } else { mtx_unlock(&nmp->nm_mtx); nmp = NULL; } + NFSDDSUNLOCK(); break; } } mtx_unlock(&mountlist_mtx); if (nmp != NULL) { /* * Call newnfs_nmcancelreqs() to cause * any RPCs in progress on the mount point to * fail. * This will cause any process waiting for an * RPC to complete while holding a vnode lock * on the mounted-on vnode (such as "df" or * a non-forced "umount") to fail. * This will unlock the mounted-on vnode so * a forced dismount can succeed. * Then clear NFSMNTP_CANCELRPCS and wakeup(), * so that nfs_unmount() can complete. */ newnfs_nmcancelreqs(nmp); mtx_lock(&nmp->nm_mtx); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); mtx_unlock(&nmp->nm_mtx); - } else + } else if (error == 0) error = EINVAL; } free(buf, M_TEMP); } else { error = EINVAL; } return (error); } extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscl_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) return (0); newnfs_portinit(); mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF); nfscl_init(); NFSD_LOCK(); nfsrvd_cbinit(0); NFSD_UNLOCK(); ncl_call_invalcaches = ncl_invalcaches; nfsd_call_nfscl = nfssvc_nfscl; loaded = 1; break; case MOD_UNLOAD: if (nfs_numnfscbd != 0) { error = EBUSY; break; } /* * XXX: Unloading of nfscl module is unsupported. */ #if 0 ncl_call_invalcaches = NULL; nfsd_call_nfscl = NULL; /* and get rid of the mutexes */ mtx_destroy(&ncl_iod_mutex); loaded = 0; break; #else /* FALLTHROUGH */ #endif default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfscl_mod = { "nfscl", nfscl_modevent, NULL, }; DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscl, 1); MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1); MODULE_DEPEND(nfscl, krpc, 1, 1, 1); MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscl, nfslock, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clrpcops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clrpcops.c (revision 335011) +++ head/sys/fs/nfsclient/nfs_clrpcops.c (revision 335012) @@ -1,7594 +1,7594 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Rpc op calls, generally called from the vnode op calls or through the * buffer cache, for NFS v2, 3 and 4. * These do not normally make any changes to vnode arguments or use * structures that might change between the VFS variants. The returned * arguments are all at the end, after the NFSPROC_T *p one. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include #include #include SYSCTL_DECL(_vfs_nfs); static int nfsignore_eexist = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW, &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink"); /* * Global variables */ extern int nfs_numnfscbd; extern struct timeval nfsboottime; extern u_int32_t newnfs_false, newnfs_true; extern nfstype nfsv34_type[9]; extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; extern int nfs_pnfsiothreads; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; int nfscl_enablecallb = 0; short nfsv4_cbport = NFSV4_CBPORT; int nfstest_openallsetattr = 0; #endif /* !APPLEKEXT */ #define DIRHDSIZ offsetof(struct dirent, d_name) /* * nfscl_getsameserver() can return one of three values: * NFSDSP_USETHISSESSION - Use this session for the DS. * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new * session. * NFSDSP_NOTFOUND - No matching server was found. */ enum nfsclds_state { NFSDSP_USETHISSESSION = 0, NFSDSP_SEQTHISSESSION = 1, NFSDSP_NOTFOUND = 2, }; /* * Do a write RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsclwritedsdorpc { int done; int inprog; struct task tsk; struct vnode *vp; int iomode; int must_commit; nfsv4stateid_t *stateidp; struct nfsclds *dsp; uint64_t off; int len; struct nfsfh *fhp; struct mbuf *m; int vers; int minorvers; struct ucred *cred; NFSPROC_T *p; int err; }; static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, struct nfscllockowner *, u_int64_t, u_int64_t, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, struct ucred *, NFSPROC_T *); static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *, struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *); static void nfscl_initsessionslots(struct nfsclsession *); static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *, NFSPROC_T *); static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *, struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static struct mbuf *nfsm_copym(struct mbuf *, int, int); static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int, struct ucred *, NFSPROC_T *); static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *); static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int, struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int, struct ucred *, NFSPROC_T *); static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, struct nfsclds *, struct nfsclds **); static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, int); static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *, NFSPROC_T *); static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *, int *, struct nfsclflayouthead *); static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *, struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *); static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *, int, int, int, int *, struct nfsclflayouthead *, int *); static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *); static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *, int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **, struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *); int nfs_pnfsio(task_fn_t *, void *); /* * nfs null call from vfs. */ APPLESTATIC int nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p) { int error; struct nfsrv_descript nfsd, *nd = &nfsd; NFSCL_REQSTART(nd, NFSPROC_NULL, vp); error = nfscl_request(nd, vp, p, cred, NULL); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs access rpc op. * For nfs version 3 and 4, use the access rpc to check accessibility. If file * modes are changed on the server, accesses might still fail later. */ APPLESTATIC int nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp) { int error; u_int32_t mode, rmode; if (acmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vnode_vtype(vp) == VDIR) { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE); if (acmode & VEXEC) mode |= NFSACCESS_LOOKUP; } else { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (acmode & VEXEC) mode |= NFSACCESS_EXECUTE; } /* * Now, just call nfsrpc_accessrpc() to do the actual RPC. */ error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode, NULL); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if (!error && (rmode & mode) != mode) error = EACCES; return (error); } /* * The actual rpc, separated out for Darwin. */ APPLESTATIC int nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep, void *stuff) { u_int32_t *tl; u_int32_t supported, rmode; int error; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *attrflagp = 0; supported = mode; NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(mode); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); supported = fxdr_unsigned(u_int32_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } rmode = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); /* * It's not obvious what should be done about * unsupported access modes. For now, be paranoid * and clear the unsupported ones. */ rmode &= supported; *rmodep = rmode; } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs open rpc */ APPLESTATIC int nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int32_t mode, clidrev; int ret, newone, error, expireret = 0, retrycnt; /* * For NFSv4, Open Ops are only done on Regular Files. */ if (vnode_vtype(vp) != VREG) return (0); mode = 0; if (amode & FREAD) mode |= NFSV4OPEN_ACCESSREAD; if (amode & FWRITE) mode |= NFSV4OPEN_ACCESSWRITE; nfhp = np->n_fhp; retrycnt = 0; #ifdef notdef { char name[100]; int namel; namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99; bcopy(NFS4NODENAME(np->n_v4), name, namel); name[namel] = '\0'; printf("rpcopen p=0x%x name=%s",p->p_pid,name); if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]); else printf(" fhl=0\n"); } #endif do { dp = NULL; error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1, cred, p, NULL, &op, &newone, &ret, 1); if (error) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (ret == NFSCLOPEN_DOOPEN) { if (np->n_v4 != NULL) { /* * For the first attempt, try and get a layout, if * pNFS is enabled for the mount. */ if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0) error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, 0, 0x0, cred, p, 0, 0); else error = nfsrpc_getopenlayout(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, cred, p); if (dp != NULL) { #ifdef APPLE OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag); #else NFSLOCKNODE(np); np->n_flag &= ~NDELEGMOD; /* * Invalidate the attribute cache, so that * attributes that pre-date the issue of a * delegation are not cached, since the * cached attributes will remain valid while * the delegation is held. */ NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); #endif (void) nfscl_deleg(nmp->nm_mountp, op->nfso_own->nfsow_clp, nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp); } } else { error = EIO; } newnfs_copyincred(cred, &op->nfso_cred); } else if (ret == NFSCLOPEN_SETCRED) /* * This is a new local open on a delegation. It needs * to have credentials so that an open can be done * against the server during recovery. */ newnfs_copyincred(cred, &op->nfso_cred); /* * nfso_opencnt is the count of how many VOP_OPEN()s have * been done on this Open successfully and a VOP_CLOSE() * is expected for each of these. * If error is non-zero, don't increment it, since the Open * hasn't succeeded yet. */ if (!error) op->nfso_opencnt++; nfscl_openrelease(nmp, op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * the actual open rpc */ APPLESTATIC int nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **dpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p, int syscred, int recursed) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *dp, *ndp = NULL; struct nfsvattr nfsva; u_int32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby; struct nfsclsession *tsep; dp = *dpp; *dpp = NULL; nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); if (reclaim) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(delegtype); } else { if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; } else { *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); } (void) nfsm_strtom(nd, name, namelen); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); (void) nfsrv_putattrbit(nd, &attrbits); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); ndp = malloc( sizeof (struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { ndp->nfsdl_flags = NFSCLDL_READ; } if (ret) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; } if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open"); } while (ret == NFSERR_DELAY); error = ret; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) && (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL && ndp == NULL && !recursed) { do { ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, &ndp, 0, 0x0, cred, p, syscred, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open2"); } while (ret == NFSERR_DELAY); if (ret) { if (ndp != NULL) { free(ndp, M_NFSCLDELEG); ndp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) *dpp = ndp; else if (ndp != NULL) free(ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * open downgrade rpc */ APPLESTATIC int nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Close operation. */ APPLESTATIC int nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p) { struct nfsclclient *clp; int error; if (vnode_vtype(vp) != VREG) return (0); if (doclose) error = nfscl_doclose(vp, &clp, p); else error = nfscl_getclose(vp, &clp); if (error) return (error); nfscl_clientrelease(clp); return (0); } /* * Close the open. */ APPLESTATIC void nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct ucred *tcred; u_int64_t off = 0, len = 0; u_int32_t type = NFSV4LOCKT_READ; int error, do_unlock, trycnt; tcred = newnfs_getcred(); newnfs_copycred(&op->nfso_cred, tcred); /* * (Theoretically this could be done in the same * compound as the close, but having multiple * sequenced Ops in the same compound might be * too scary for some servers.) */ if (op->nfso_posixlock) { off = 0; len = NFS64BITSSET; type = NFSV4LOCKT_READ; } /* * Since this function is only called from VOP_INACTIVE(), no * other thread will be manipulating this Open. As such, the * lock lists are not being changed by other threads, so it should * be safe to do this without locking. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { do_unlock = 1; LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { if (op->nfso_posixlock == 0) { off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; if (lop->nfslo_type == F_WRLCK) type = NFSV4LOCKT_WRITE; else type = NFSV4LOCKT_READ; } if (do_unlock) { trycnt = 0; do { error = nfsrpc_locku(nd, nmp, lp, off, len, type, tcred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_close"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0 && trycnt++ < 5); if (op->nfso_posixlock) do_unlock = 0; } nfscl_freelock(lop, 0); } /* * Do a ReleaseLockOwner. * The lock owner name nfsl_owner may be used by other opens for * other files but the lock_owner4 name that nfsrpc_rellockown() * puts on the wire has the file handle for this file appended * to it, so it can be done now. */ (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, tcred, p); } /* * There could be other Opens for different files on the same * OpenOwner, so locking is required. */ NFSLOCKCLSTATE(); nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR); NFSUNLOCKCLSTATE(); do { error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfs_close"); } while (error == NFSERR_GRACE); NFSLOCKCLSTATE(); nfscl_lockunlock(&op->nfso_own->nfsow_rwlock); LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) nfscl_freelockowner(lp, 0); nfscl_freeopen(op, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * The actual Close RPC. */ APPLESTATIC int nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, op->nfso_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Open Confirm RPC. */ APPLESTATIC int nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int error; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4N(nmp)) return (0); /* No confirmation for NFSv4.1. */ nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl = txdr_unsigned(op->nfso_own->nfsow_seqid); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs() * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9]; u_short port; int error, isinet6 = 0, callblen; nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; struct nfsclds *dsp; struct in6_addr a6; struct nfsclsession *tsep; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); clp->nfsc_rev = rev++; if (NFSHASNFSV4N(nmp)) { /* * Either there was no previous session or the * previous session has failed, so... * do an ExchangeID followed by the CreateSession. */ error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); NFSCL_DEBUG(1, "aft exch=%d\n", error); if (error == 0) error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, &nmp->nm_sockreq, dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); if (error == 0) { NFSLOCKMNT(nmp); /* * The old sessions cannot be safely free'd * here, since they may still be used by * in-progress RPCs. */ tsep = NULL; if (TAILQ_FIRST(&nmp->nm_sess) != NULL) tsep = NFSMNT_MDSSESSION(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); /* * Wake up RPCs waiting for a slot on the * old session. These will then fail with * NFSERR_BADSESSION and be retried with the * new session by nfsv4_setsequence(). * Also wakeup() processes waiting for the * new session. */ if (tsep != NULL) wakeup(&tsep->nfsess_slots); wakeup(&nmp->nm_sess); NFSUNLOCKMNT(nmp); } else nfscl_freenfsclds(dsp); NFSCL_DEBUG(1, "aft createsess=%d\n", error); if (error == 0 && reclaim == 0) { error = nfsrpc_reclaimcomplete(nmp, cred, p); NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); if (error == NFSERR_COMPLETEALREADY || error == NFSERR_NOTSUPP) /* Ignore this error. */ error = 0; } return (error); } /* * Allocate a single session structure for NFSv4.0, because some of * the fields are used by NFSv4.0 although it doesn't do a session. */ dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); NFSLOCKMNT(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); tsep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* * set up the callback address */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_CALLBCKPROG); callblen = strlen(nfsv4_callbackaddr); if (callblen == 0) cp = nfscl_getmyip(nmp, &a6, &isinet6); if (nfscl_enablecallb && nfs_numnfscbd > 0 && (callblen > 0 || cp != NULL)) { port = htons(nfsv4_cbport); cp2 = (u_int8_t *)&port; #ifdef INET6 if ((callblen > 0 && strchr(nfsv4_callbackaddr, ':')) || isinet6) { char ip6buf[INET6_ADDRSTRLEN], *ip6add; (void) nfsm_strtom(nd, "tcp6", 4); if (callblen == 0) { ip6_sprintf(ip6buf, (struct in6_addr *)cp); ip6add = ip6buf; } else { ip6add = nfsv4_callbackaddr; } snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", ip6add, cp2[0], cp2[1]); } else #endif { (void) nfsm_strtom(nd, "tcp", 3); if (callblen == 0) snprintf(addr, INET6_ADDRSTRLEN + 9, "%d.%d.%d.%d.%d.%d", cp[0], cp[1], cp[2], cp[3], cp2[0], cp2[1]); else snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", nfsv4_callbackaddr, cp2[0], cp2[1]); } (void) nfsm_strtom(nd, addr, strlen(addr)); } else { (void) nfsm_strtom(nd, "tcp", 3); (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); tsep->nfsess_clientid.lval[0] = *tl++; tsep->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; /* * and confirm it. */ nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = tsep->nfsess_clientid.lval[0]; *tl++ = tsep->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, nmp->nm_fhsize, NULL, NULL, 0, 0); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred); if (error) goto nfsmout; clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; } } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call. */ APPLESTATIC int nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsm_loadattr(nd, nap); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call with non-vnode arguemnts. */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); if (nd->nd_repstat == 0) { if ((nd->nd_flag & ND_NFSV4) != 0) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, NULL, NULL); else error = nfsm_loadattr(nd, nap); } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs setattr operation. */ APPLESTATIC int nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { int error, expireret = 0, openerr, retrycnt; u_int32_t clidrev = 0, mode; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsfh *nfhp; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; retrycnt = 0; do { lckp = NULL; openerr = 1; if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { /* * No Open stateid, so try and open the file * now. */ if (mode == NFSV4OPEN_ACCESSWRITE) openerr = nfsrpc_open(vp, FWRITE, cred, p); else openerr = nfsrpc_open(vp, FREAD, cred, p); if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p, rnap, attrflagp, stuff); else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (!openerr) (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } static int nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); vap->va_type = vnode_vtype(vp); nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff); if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error) error = nfscl_postop_attr(nd, rnap, attrflagp, stuff); mbuf_freem(nd->nd_mrep); if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs lookup rpc */ APPLESTATIC int nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; int error = 0, lookupp = 0; *attrflagp = 0; *dattrflagp = 0; if (vnode_vtype(dvp) != VDIR) return (ENOTDIR); nmp = VFSTONFS(vnode_mount(dvp)); if (len > NFS_MAXNAMLEN) return (ENAMETOOLONG); if (NFSHASNFSV4(nmp) && len == 1 && name[0] == '.') { /* * Just return the current dir's fh. */ np = VTONFS(dvp); nfhp = malloc(sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; return (0); } if (NFSHASNFSV4(nmp) && len == 2 && name[0] == '.' && name[1] == '.') { lookupp = 1; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp); } else { NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp); (void) nfsm_strtom(nd, name, len); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * When an NFSv4 Lookupp returns ENOENT, it means that * the lookup is at the root of an fs, so return this dir. */ if (nd->nd_repstat == NFSERR_NOENT && lookupp) { np = VTONFS(dvp); nfhp = malloc(sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; mbuf_freem(nd->nd_mrep); return (0); } if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } goto nfsmout; } if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error != 0) goto nfsmout; *dattrflagp = 1; /* Skip over the Lookup and GetFH operation status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); } error = nfsm_getfh(nd, nfhpp); if (error) goto nfsmout; error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); nfsmout: mbuf_freem(nd->nd_mrep); if (!error && nd->nd_repstat) error = nd->nd_repstat; return (error); } /* * Do a readlink rpc. */ APPLESTATIC int nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np = VTONFS(vp); nfsattrbit_t attrbits; int error, len, cangetattr = 1; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READLINK, vp); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_STRSIZ(len, NFS_MAXPATHLEN); /* * This seems weird to me, but must have been added to * FreeBSD for some reason. The only thing I can think of * is that there was/is some server that replies with * more link data than it should? */ if (len == NFS_MAXPATHLEN) { NFSLOCKNODE(np); if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) { len = np->n_size; cangetattr = 0; } NFSUNLOCKNODE(np); } error = nfsm_mbufuio(nd, uiop, len); if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Read operation. */ APPLESTATIC int nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { int error, expireret = 0, retrycnt; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { nfhp = np->n_fhp; newcred = NFSNEWCRED(cred); } retrycnt = 0; do { lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_OPENMODE) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual read RPC. */ static int nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; int error = 0, len, retlen, tsiz, eof = 0; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; int rsize; off_t tmp_off; *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } rsize = nmp->nm_rsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; while (tsiz > 0) { *attrflagp = 0; len = (tsiz > rsize) ? rsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_READ, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3); if (nd->nd_flag & ND_NFSV2) { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } else { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } /* * Since I can't do a Getattr for NFSv4 for Write, there * doesn't seem any point in doing one here, either. * (See the comment in nfsrpc_writerpc() for more info.) */ error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); error = nfsm_mbufuio(nd, uiop, retlen); if (error) goto nfsmout; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= retlen; if (!(nd->nd_flag & ND_NFSV2)) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } return (0); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * nfs write operation * When called_from_strategy != 0, it should return EIO for an error that * indicates recovery is in progress, so that the buffer will be left * dirty and be written back to the server later. If it loops around, * the recovery thread could get stuck waiting for the buffer and recovery * will then deadlock. */ APPLESTATIC int nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff, int called_from_strategy) { int error, expireret = 0, retrycnt, nostateid; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; *must_commit = 0; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { newcred = NFSNEWCRED(cred); nfhp = np->n_fhp; } retrycnt = 0; do { lckp = NULL; nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; NFSCL_DEBUG(1, "stateid0 in write\n"); } } /* * If there is no stateid for NFSv4, it means this is an * extraneous write after close. Basically a poorly * implemented buffer cache. Just don't do the write. */ if (nostateid) error = 0; else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual write RPC. */ static int nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC; int wccflag = 0, wsize; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; off_t tmp_off; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } wsize = nmp->nm_wsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; /* NFSv2 sometimes does a write with */ nd->nd_repstat = 0; /* uio_resid == 0, so the while is not done */ while (tsiz > 0) { *attrflagp = 0; len = (tsiz > wsize) ? wsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_WRITE, vp); if (nd->nd_flag & ND_NFSV4) { nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* * Not sure why someone changed this, since the * RFC clearly states that "beginoffset" and * "totalcount" are ignored, but it wouldn't * surprise me if there's a busted server out there. */ /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiombuf(nd, uiop, len); /* * Although it is tempting to do a normal Getattr Op in the * NFSv4 compound, the result can be a nearly hung client * system if the Getattr asks for Owner and/or OwnerGroup. * It occurs when the client can't map either the Owner or * Owner_group name in the Getattr reply to a uid/gid. When * there is a cache miss, the kernel does an upcall to the * nfsuserd. Then, it can try and read the local /etc/passwd * or /etc/group file. It can then block in getnewbuf(), * waiting for dirty writes to be pushed to the NFS server. * The only reason this doesn't result in a complete * deadlock, is that the upcall times out and allows * the write to complete. However, progress is so slow * that it might just as well be deadlocked. * As such, we get the rest of the attributes, but not * Owner or Owner_group. * nb: nfscl_loadattrcache() needs to be told that these * partial attributes from a write rpc are being * passed in, via a argument flag. */ if (nd->nd_flag & ND_NFSV4) { NFSWRITEGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY((caddr_t)tl, (caddr_t)&nmp->nm_verf[0], NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } if (nd->nd_flag & ND_NFSV4) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = NFS_LATTR_NOSHRINK; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4)); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= len; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ APPLESTATIC int nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap, u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp); if (nd->nd_flag & ND_NFSV4) { if (vtyp == VBLK || vtyp == VCHR) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = vtonfsv34_type(vtyp); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if ((nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!error && nd->nd_repstat) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs file create call * Mostly just call the approriate routine. (I separated out v4, so that * error recovery wouldn't be as difficult.) */ APPLESTATIC int nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { int error = 0, newone, expireret = 0, retrycnt, unlocked; struct nfsclowner *owp; struct nfscldeleg *dp; struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp)); u_int32_t clidrev; if (NFSHASNFSV4(nmp)) { retrycnt = 0; do { dp = NULL; error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone, NULL, 1); if (error) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || retrycnt > 0) error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); else error = nfsrpc_getcreatelayout(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); /* * There is no need to invalidate cached attributes here, * since new post-delegation issue attributes are always * returned by nfsrpc_createv4() and these will update the * attribute cache. */ if (dp != NULL) (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(nmp, owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; } else { error = nfsrpc_createv23(dvp, name, namelen, vap, cverf, fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff); } return (error); } /* * The create rpc for v2 and 3. */ static int nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } static int nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { u_int32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { if (NFSHASNFSV4N(nmp)) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { /* NFSv4.0 */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); (void) nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); dp = malloc( sizeof (struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); if ((rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh, nfhp->nfh_len, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_create"); } while (ret == NFSERR_DELAY); error = ret; } /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if ((rflags & NFSV4OPEN_RESULTCONFIRM) && (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL) { do { ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op, name, namelen, &dp, 0, 0x0, cred, p, 0, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_crt2"); } while (ret == NFSERR_DELAY); if (ret) { if (dp != NULL) { free(dp, M_NFSCLDELEG); dp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) *dpp = dp; else if (dp != NULL) free(dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Nfs remove rpc */ APPLESTATIC int nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np; struct nfsmount *nmp; nfsv4stateid_t dstateid; int error, ret = 0, i; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); nmp = VFSTONFS(vnode_mount(dvp)); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_removedeleg(vp, p, &dstateid); if (ret == 1) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(dvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_REMOVE); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } } error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs rename rpc. */ APPLESTATIC int nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; nfsattrbit_t attrbits; nfsv4stateid_t fdstateid, tdstateid; int error = 0, ret = 0, gottd = 0, gotfd = 0, i; *fattrflagp = 0; *tattrflagp = 0; nmp = VFSTONFS(vnode_mount(fdvp)); if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp, &tdstateid, &gottd, p); if (gotfd && gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp); } else if (gotfd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp); } else if (gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp); } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(tvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_DELEGRETURN); } } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; } if (ret > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(fdvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SAVEFH); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_RENAME); } (void) nfsm_strtom(nd, fnameptr, fnamelen); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); (void) nfsm_strtom(nd, tnameptr, tnamelen); error = nfscl_request(nd, fdvp, p, cred, fstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) { if (i == 0 && ret > 1) { /* * If the Delegreturn failed, try again * without it. The server will Recall, as * required. * If ret > 1, the first iteration of this * loop is the second DelegReturn result. */ mbuf_freem(nd->nd_mrep); goto tryagain; } else { nd->nd_flag |= ND_NOMOREDATA; } } } } /* Now, the first wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, fstuff); /* and the second wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } if (!error) error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp, NULL, tstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs hard link create rpc */ APPLESTATIC int nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_LINK, vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); } (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh, VTONFS(dvp)->n_fhp->nfh_len, 0); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LINK); } (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, vp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, dstuff); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* * First, parse out the PutFH and Getattr result. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (!(*(tl + 1))) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; /* * Get the pre-op attributes. */ error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs symbolic link create rpc */ APPLESTATIC int nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int slen, error = 0; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); slen = strlen(target); if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFLNK); (void) nfsm_strtom(nd, target, slen); } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_strtom(nd, target, slen); if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if ((nd->nd_flag & ND_NFSV3) && !error) { if (!nd->nd_repstat) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs make dir rpc */ APPLESTATIC int nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; struct nfsfh *fhp; struct nfsmount *nmp; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); fhp = VTONFS(dvp)->n_fhp; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); } (void) nfsm_strtom(nd, name, namelen); nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat && !error) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (!error) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) { /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } } if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs remove directory call */ APPLESTATIC int nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * Readdir rpc. * Always returns with either uio_resid unchanged, if you are at the * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks * filled in. * I felt this would allow caching of directory blocks more easily * than returning a pertially filled block. * Directory offset cookies: * Oh my, what to do with them... * I can think of three ways to deal with them: * 1 - have the layer above these RPCs maintain a map between logical * directory byte offsets and the NFS directory offset cookies * 2 - pass the opaque directory offset cookies up into userland * and let the libc functions deal with them, via the system call * 3 - return them to userland in the "struct dirent", so future versions * of libc can use them and do whatever is necessary to make things work * above these rpc calls, in the meantime * For now, I do #3 by "hiding" the directory offset cookies after the * d_name field in struct dirent. This is space inside d_reclen that * will be ignored by anything that doesn't know about them. * The directory offset cookies are filled in as the last 8 bytes of * each directory entry, after d_name. Someday, the userland libc * functions may be able to use these. In the meantime, it satisfies * OpenBSD's requirements for cookies being returned. * If expects the directory offset cookie for the read to be in uio_offset * and returns the one for the next entry after this directory block in * there, as well. */ APPLESTATIC int nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; nfsquad_t cookie, ncookie; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp); struct nfsvattr nfsva; struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0; u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX; char *cp; nfsattrbit_t attrbits, dattrbits; u_int32_t rderr, *tl2 = NULL; size_t tresid; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); ncookie.lval[0] = ncookie.lval[1] = 0; /* * There is no point in reading a lot more than uio_resid, however * adding one additional DIRBLKSIZ makes sense. Since uio_resid * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this * will never make readsize > nm_readdirsize. */ readsize = nmp->nm_readdirsize; if (readsize > uio_uio_resid(uiop)) readsize = uio_uio_resid(uiop) + DIRBLKSIZ; *attrflagp = 0; if (eofp) *eofp = 0; tresid = uio_uio_resid(uiop); cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; nd->nd_mrep = NULL; /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { reqsize = 6 * NFSX_UNSIGNED; NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != UINT64_MAX) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; *((uint64_t *)dp->d_name) = 0; /* Zero pad it. */ dp->d_name[0] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; *((uint64_t *)dp->d_name) = 0; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR); } else { reqsize = 5 * NFSX_UNSIGNED; } /* * Loop around doing readdir rpc's of size readsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIR, vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.lval[1]; *tl = txdr_unsigned(readsize); } else { NFSM_BUILD(tl, u_int32_t *, reqsize); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } if (nd->nd_flag & ND_NFSV4) { *tl++ = txdr_unsigned(readsize); *tl = txdr_unsigned(readsize); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } else { *tl = txdr_unsigned(readsize); } } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!(nd->nd_flag & ND_NFSV2)) { if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; NFSUNLOCKNODE(dnp); } } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; len = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_unsigned(uint64_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = roundup2(len, 8); if (tlen == len) tlen += 8; /* To ensure null termination. */ left = DIRBLKSIZ - blksiz; if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if (_GENERIC_DIRLEN(len) + NFSX_HYPER > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_namlen = len; dp->d_reclen = _GENERIC_DIRLEN(len) + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; /* null terminate */ cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) { rderr = 0; nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); ncookie.lval[0] = 0; ncookie.lval[1] = *tl++; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else { if (gotmnton) { if (nfsva.na_mntonfileno != UINT64_MAX) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } dp->d_type = vtonfs_dtype(nfsva.na_type); } } else { dp->d_fileno = nfsva.na_fileid; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; } more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp) { if (tresid == ((size_t)(uio_uio_resid(uiop)))) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #ifndef APPLE /* * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir(). * (Also used for NFS V4 when mount flag set.) * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.) */ APPLESTATIC int nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; vnode_t newvp = NULLVP; struct nfsrv_descript nfsd, *nd = &nfsd; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp), *np; struct nfsvattr nfsva; struct nfsfh *nfhp; nfsquad_t cookie, ncookie; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0; int isdotdot = 0, unlocknewvp = 0; u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX; u_int64_t fileno = 0; char *cp; nfsattrbit_t attrbits, dattrbits; size_t tresid; u_int32_t *tl2 = NULL, rderr; struct timespec dctime; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); ncookie.lval[0] = ncookie.lval[1] = 0; timespecclear(&dctime); *attrflagp = 0; if (eofp != NULL) *eofp = 0; ndp->ni_dvp = vp; nd->nd_mrep = NULL; cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; tresid = uio_uio_resid(uiop); /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dctime = nfsva.na_ctime; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != UINT64_MAX) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; *((uint64_t *)dp->d_name) = 0; /* Zero pad it. */ dp->d_name[0] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; *((uint64_t *)dp->d_name) = 0; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSREADDIRPLUS_ATTRBIT(&attrbits); if (gotmnton) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); } /* * Loop around doing readdir rpc's of size nm_readdirsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp); NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_readdirsize); if (nd->nd_flag & ND_NFSV4) { (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0) dctime = nap->na_ctime; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; NFSUNLOCKNODE(dnp); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NFSV4) { ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { fileno = fxdr_hyper(tl); tl += 2; } len = fxdr_unsigned(int, *tl); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = roundup2(len, 8); if (tlen == len) tlen += 8; /* To ensure null termination. */ left = DIRBLKSIZ - blksiz; if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if (_GENERIC_DIRLEN(len) + NFSX_HYPER > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_namlen = len; dp->d_reclen = _GENERIC_DIRLEN(len) + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); cnp->cn_nameptr = uio_iov_base(uiop); cnp->cn_namelen = len; NFSCNHASHZERO(cnp); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; if (len == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') isdotdot = 1; else isdotdot = 0; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } nfhp = NULL; if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; attrflag = fxdr_unsigned(int, *tl); if (attrflag) { error = nfsm_loadattr(nd, &nfsva); if (error) goto nfsmout; } NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED); if (*tl) { error = nfsm_getfh(nd, &nfhp); if (error) goto nfsmout; } if (!attrflag && nfhp != NULL) { free(nfhp, M_NFSFH); nfhp = NULL; } } else { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } } else { dp->d_fileno = fileno; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; if (nfhp != NULL) { if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len, dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) { VREF(vp); newvp = vp; unlocknewvp = 0; free(nfhp, M_NFSFH); np = dnp; } else if (isdotdot != 0) { /* * Skip doing a nfscl_nget() call for "..". * There's a race between acquiring the nfs * node here and lookups that look for the * directory being read (in the parent). * It would try to get a lock on ".." here, * owning the lock on the directory being * read. Lookup will hold the lock on ".." * and try to acquire the lock on the * directory being read. * If the directory is unlocked/relocked, * then there is a LOR with the buflock * vp is relocked. */ free(nfhp, M_NFSFH); } else { error = nfscl_nget(vnode_mount(vp), vp, nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE); if (!error) { newvp = NFSTOV(np); unlocknewvp = 1; } } nfhp = NULL; if (newvp != NULLVP) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 0); if (error) { if (unlocknewvp) vput(newvp); else vrele(newvp); goto nfsmout; } dp->d_type = vtonfs_dtype(np->n_vattr.na_type); ndp->ni_vp = newvp; NFSCNHASH(cnp, HASHINIT); if (cnp->cn_namelen <= NCHNAMLEN && (newvp->v_type != VDIR || dctime.tv_sec != 0)) { cache_enter_time(ndp->ni_dvp, ndp->ni_vp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dctime); } if (unlocknewvp) vput(newvp); else vrele(newvp); newvp = NULLVP; } } } else if (nfhp != NULL) { free(nfhp, M_NFSFH); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp != NULL) { if (tresid == uio_uio_resid(uiop)) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #endif /* !APPLE */ /* * Nfs commit rpc */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKMNT(nmp); if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); nd->nd_repstat = NFSERR_STALEWRITEVERF; } NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } nfsmout: if (!error && nd->nd_repstat) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * NFS byte range lock rpc. * (Mostly just calls one of the three lower level RPC routines.) */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; struct nfsfh *nfhp; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int64_t off, len; off_t start, end; u_int32_t clidrev = 0; int error = 0, newone = 0, expireret = 0, retrycnt, donelocally; int callcnt, dorpc; /* * Convert the flock structure into a start and end and do POSIX * bounds checking. */ switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ start = fl->l_start; off = fl->l_start; break; case SEEK_END: start = size + fl->l_start; off = size + fl->l_start; break; default: return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len != 0) { end = start + fl->l_len - 1; if (end < start) return (EINVAL); } len = fl->l_len; if (len == 0) len = NFS64BITSSET; retrycnt = 0; do { nd->nd_repstat = 0; if (op == F_GETLK) { error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, p, id, flags); } else if (error == -1) { error = 0; } nfscl_clientrelease(clp); } else if (op == F_UNLCK && fl->l_type == F_UNLCK) { /* * We must loop around for all lockowner cases. */ callcnt = 0; error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ if (lp == NULL) { if (callcnt == 0) nfscl_clientrelease(clp); else nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; /* * If the server doesn't support Posix lock semantics, * only allow locks on the entire file, since it won't * handle overlapping byte ranges. * There might still be a problem when a lock * upgrade/downgrade (read<->write) occurs, since the * server "might" expect an unlock first? */ if (dorpc && (lp->nfsl_open->nfso_posixlock || (off == 0 && len == NFS64BITSSET))) { /* * Since the lock records will go away, we must * wait for grace and delay here. */ do { error = nfsrpc_locku(nd, nmp, lp, off, len, NFSV4LOCKT_READ, cred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_advlock"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0); } callcnt++; } while (error == 0 && nd->nd_repstat == 0); nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; if (!lp->nfsl_open->nfso_posixlock && (off != 0 || len != NFS64BITSSET)) { error = EINVAL; } else { error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, lp, newone, reclaim, off, len, fl->l_type, cred, p, 0); } if (!error) error = nd->nd_repstat; nfscl_lockrelease(lp, error, newone); } else { error = EINVAL; } if (!error) error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * The lower level routine for the LockT case. */ APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], np->n_fhp->nfh_len); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); if (nd->nd_repstat == 0) { fl->l_type = F_UNLCK; } else if (nd->nd_repstat == NFSERR_DENIED) { nd->nd_repstat = 0; fl->l_whence = SEEK_SET; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); fl->l_start = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (len == NFS64BITSSET) fl->l_len = 0; else fl->l_len = len; type = fxdr_unsigned(int, *tl++); if (type == NFSV4LOCKT_WRITE) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; /* * XXX For now, I have no idea what to do with the * conflicting lock_owner, so I'll just set the pid == 0 * and skip over the lock_owner. */ fl->l_pid = (pid_t)0; tl += 2; size = fxdr_unsigned(int, *tl); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALECLIENTID) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Lower level function that performs the LockU RPC. */ static int nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfscllockowner *lp, u_int64_t off, u_int64_t len, u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * The actual Lock RPC. */ APPLESTATIC int nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); *tl++ = txdr_unsigned(reclaim); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (newone) { *tl = newnfs_true; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); size = fxdr_unsigned(int, *(tl + 7)); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs statfs rpc * (always called with the vp for the mount point) */ APPLESTATIC int nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl = NULL; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSSTATFS_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) { nmp->nm_fsid[0] = nap->na_filesid[0]; nmp->nm_fsid[1] = nap->na_filesid[1]; NFSSETHASSETFSID(nmp); *attrflagp = 1; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; } else { NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (nd->nd_repstat) { error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATFS(nd->nd_flag & ND_NFSV3)); } if (NFSHASNFSV3(nmp)) { sbp->sf_tbytes = fxdr_hyper(tl); tl += 2; sbp->sf_fbytes = fxdr_hyper(tl); tl += 2; sbp->sf_abytes = fxdr_hyper(tl); tl += 2; sbp->sf_tfiles = fxdr_hyper(tl); tl += 2; sbp->sf_ffiles = fxdr_hyper(tl); tl += 2; sbp->sf_afiles = fxdr_hyper(tl); tl += 2; sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl); } else if (NFSHASNFSV4(nmp) == 0) { sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs pathconf rpc */ APPLESTATIC int nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; u_int32_t *tl; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSPATHCONF_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) *attrflagp = 1; } else { error = nd->nd_repstat; } } else { NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF); pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++); pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl++); pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl++); pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs version 3 fsinfo rpc call */ APPLESTATIC int nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO); fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_maxfilesize = fxdr_hyper(tl); tl += 2; fxdr_nfsv3time(tl, &fsp->fs_timedelta); tl += 2; fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Renew RPC. */ APPLESTATIC int nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; struct nfssockreq *nrp; struct nfsclsession *tsep; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); if (dsp == NULL) nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0, 0); else nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, &dsp->nfsclds_sess, 0, 0); if (!NFSHASNFSV4N(nmp)) { /* NFSv4.1 just uses a Sequence Op and not a Renew. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; } nrp = NULL; if (dsp != NULL) nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; if (dsp == NULL) error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); else error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Releaselockowner RPC. */ APPLESTATIC int nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; u_int32_t *tl; int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; if (NFSHASNFSV4N(nmp)) { /* For NFSv4.1, do a FreeStateID. */ nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, NULL, 0, 0); nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); } else { nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Compound to get the mount pt FH. */ APPLESTATIC int nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2; int error, cnt, len, setnil; u_int32_t *opcntp; nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0, 0); cp = dirpath; cnt = 0; do { setnil = 0; while (*cp == '/') cp++; cp2 = cp; while (*cp2 != '\0' && *cp2 != '/') cp2++; if (*cp2 == '/') { setnil = 1; *cp2 = '\0'; } if (cp2 != cp) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LOOKUP); nfsm_strtom(nd, cp, strlen(cp)); cnt++; } if (setnil) *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); if (NFSHASNFSV4N(nmp)) /* Has a Sequence Op done by nfscl_reqstart(). */ *opcntp = txdr_unsigned(3 + cnt); else *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); tl += (2 + 2 * cnt); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len); if (nd->nd_repstat == 0) nmp->nm_fhsize = len; } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Delegreturn RPC. */ APPLESTATIC int nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p, int syscred) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getacl call. */ APPLESTATIC int nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_GETACL, vp); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs setacl call. */ APPLESTATIC int nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff); return (error); } /* * nfs setacl call. */ static int nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_SETACL, vp); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0, - &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); + &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); /* Don't care about the pre/postop attributes */ mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } /* * Do the NFSv4.1 Exchange ID. */ int nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl, v41flags; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsclds *dsp; struct timespec verstime; int error, len; *dspp = NULL; nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(exchflags); *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* Set the implementation id4 */ *tl = txdr_unsigned(1); (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void) nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, (int)nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); len = fxdr_unsigned(int, *(tl + 7)); if (len < 0 || len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; dsp->nfsclds_servownlen = len; dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; dsp->nfsclds_sess.nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); v41flags = fxdr_unsigned(uint32_t, *tl); if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && NFSHASPNFSOPT(nmp)) { NFSCL_DEBUG(1, "set PNFS\n"); NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_PNFS; NFSUNLOCKMNT(nmp); dsp->nfsclds_flags |= NFSCLDS_MDS; } if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) dsp->nfsclds_flags |= NFSCLDS_DS; if (len > 0) nd->nd_repstat = nfsrv_mtostr(nd, dsp->nfsclds_serverown, len); if (nd->nd_repstat == 0) { mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); nfscl_initsessionslots(&dsp->nfsclds_sess); *dspp = dsp; } else free(dsp, M_NFSCLDS); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Create Session. */ int nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; /* Make sure nm_rsize, nm_wsize is set. */ if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0) nmp->nm_wsize = NFS_MAXBSIZE; nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); *tl++ = sep->nfsess_clientid.lval[0]; *tl++ = sep->nfsess_clientid.lval[1]; *tl++ = txdr_unsigned(sequenceid); crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0) crflags |= NFSV4CRSESS_CONNBACKCHAN; *tl = txdr_unsigned(crflags); /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ *tl = 0; /* No rdma ird */ /* Fill in back channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(10000); /* Max request size */ *tl++ = txdr_unsigned(10000); /* Max response size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(4); /* Max operations */ *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ *tl = 0; /* No rdma ird */ NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ /* Allow AUTH_SYS callbacks as uid, gid == 0. */ *tl++ = txdr_unsigned(1); /* Auth_sys only */ *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ *tl++ = 0; /* Null machine name */ *tl++ = 0; /* Uid == 0 */ *tl++ = 0; /* Gid == 0 */ *tl = 0; /* No additional gids */ nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); crflags = fxdr_unsigned(uint32_t, *tl); if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_SESSPERSIST; NFSUNLOCKMNT(nmp); } /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Skip the header pad size. */ /* Make sure nm_wsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); while (maxval < nmp->nm_wsize + NFS_MAXXDR) { if (nmp->nm_wsize > 8096) nmp->nm_wsize /= 2; else break; } /* Make sure nm_rsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); while (maxval < nmp->nm_rsize + NFS_MAXXDR) { if (nmp->nm_rsize > 8096) nmp->nm_rsize /= 2; else break; } sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); irdcnt = fxdr_unsigned(int, *tl); if (irdcnt > 0) NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); /* and the back channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 5; sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Session. */ int nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); tsep = nfsmnt_mdssession(nmp); bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Client. */ int nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutGet. */ static int nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, int layouttype, int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0, 0); nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp, layouttype, layoutlen, 0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Get Device Info. */ int nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, NFSPROC_T *p) { uint32_t cnt, *tl, vers, minorvers; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct sockaddr_in sin, ssin; struct sockaddr_in6 sin6, ssin6; struct nfsclds *dsp = NULL, **dspp, **gotdspp; struct nfscldevinfo *ndi; int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt; uint8_t stripeindex; sa_family_t af, safilled; *ndip = NULL; ndi = NULL; gotdspp = NULL; nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(100000); if (notifybitsp != NULL && *notifybitsp != 0) { *tl = txdr_unsigned(1); /* One word of bits. */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*notifybitsp); } else *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (layouttype != fxdr_unsigned(int, *tl)) printf("EEK! devinfo layout type not same!\n"); if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); stripecnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); if (stripecnt < 1 || stripecnt > 4096) { printf("pNFS File layout devinfo stripecnt %d:" " out of range\n", stripecnt); error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); if (addrcnt < 1 || addrcnt > 128) { printf("NFS devinfo addrcnt %d: out of range\n", addrcnt); error = NFSERR_BADXDR; goto nfsmout; } /* * Now we know how many stripe indices and addresses, so * we can allocate the structure the correct size. */ i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + 1; NFSCL_DEBUG(4, "stripeindices=%d\n", i); ndi = malloc(sizeof(*ndi) + (addrcnt + i) * sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_flags = NFSDI_FILELAYOUT; ndi->nfsdi_stripecnt = stripecnt; ndi->nfsdi_addrcnt = addrcnt; /* Fill in the stripe indices. */ for (i = 0; i < stripecnt; i++) { stripeindex = fxdr_unsigned(uint8_t, *tl++); NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); if (stripeindex >= addrcnt) { printf("pNFS File Layout devinfo" " stripeindex %d: too big\n", (int)stripeindex); error = NFSERR_BADXDR; goto nfsmout; } nfsfldi_setstripeindex(ndi, i, stripeindex); } } else if (layouttype == NFSLAYOUT_FLEXFILE) { /* For Flex File, we only get one address list. */ ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_flags = NFSDI_FLEXFILE; addrcnt = ndi->nfsdi_addrcnt = 1; } /* Now, dissect the server address(es). */ safilled = AF_UNSPEC; for (i = 0; i < addrcnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(uint32_t, *tl); if (cnt == 0) { printf("NFS devinfo 0 len addrlist\n"); error = NFSERR_BADXDR; goto nfsmout; } dspp = nfsfldi_addr(ndi, i); safilled = AF_UNSPEC; for (j = 0; j < cnt; j++) { error = nfsv4_getipaddr(nd, &sin, &sin6, &af, &isudp); if (error != 0 && error != EPERM) { error = NFSERR_BADXDR; goto nfsmout; } if (error == 0 && isudp == 0) { /* * The priority is: * - Same address family. * Save the address and dspp, so that * the connection can be done after * parsing is complete. */ if (safilled == AF_UNSPEC || (af == nmp->nm_nam->sa_family && safilled != nmp->nm_nam->sa_family) ) { if (af == AF_INET) ssin = sin; else ssin6 = sin6; safilled = af; gotdspp = dspp; } } } } gotvers = NFS_VER4; /* Always NFSv4 for File Layout. */ /* For Flex File, we will take one of the versions to use. */ if (layouttype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 1 || j > NFSDEV_MAXVERS) { printf("pNFS: too many versions\n"); error = NFSERR_BADXDR; goto nfsmout; } gotvers = 0; for (i = 0; i < j; i++) { NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED); vers = fxdr_unsigned(uint32_t, *tl++); minorvers = fxdr_unsigned(uint32_t, *tl++); if ((vers == NFS_VER4 && minorvers == NFSV41_MINORVERSION) || (vers == NFS_VER3 && gotvers == 0)) { gotvers = vers; /* We'll take this one. */ ndi->nfsdi_versindex = i; ndi->nfsdi_vers = vers; ndi->nfsdi_minorvers = minorvers; ndi->nfsdi_rsize = fxdr_unsigned( uint32_t, *tl++); ndi->nfsdi_wsize = fxdr_unsigned( uint32_t, *tl++); if (*tl == newnfs_true) ndi->nfsdi_flags |= NFSDI_TIGHTCOUPLED; else ndi->nfsdi_flags &= ~NFSDI_TIGHTCOUPLED; } } if (gotvers == 0) { printf("pNFS: no NFSv3 or NFSv4.1\n"); error = NFSERR_BADXDR; goto nfsmout; } } /* And the notify bits. */ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); bitcnt = fxdr_unsigned(int, *tl); if (bitcnt > 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (notifybitsp != NULL) *notifybitsp = fxdr_unsigned(uint32_t, *tl); } if (safilled != AF_UNSPEC) { KASSERT(ndi != NULL, ("ndi is NULL")); *ndip = ndi; } else error = EPERM; if (error == 0) { /* * Now we can do a TCP connection for the correct * NFS version and IP address. */ error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled, gotvers, &dsp, p); } if (error == 0) { KASSERT(gotdspp != NULL, ("gotdspp is NULL")); *gotdspp = dsp; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: if (error != 0 && ndi != NULL) nfscl_freedevinfo(ndi); mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutCommit. */ int nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; *tl++ = newnfs_true; if (lastbyte < off) lastbyte = off; else if (lastbyte >= (off + len)) lastbyte = off + len - 1; txdr_hyper(lastbyte, tl); tl += 2; *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); /* All supported layouts are 0 length. */ *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutReturn. */ int nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layoutreturn); if (layoutreturn == NFSLAYOUTRETURN_FILE) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; if (layouttype == NFSLAYOUT_NFSV4_1_FILES) *tl = txdr_unsigned(0); else if (layouttype == NFSLAYOUT_FLEXFILE) { *tl = txdr_unsigned(2 * NFSX_UNSIGNED); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); /* No ioerrs or stats yet. */ *tl++ = 0; *tl = 0; } } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl; } } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Acquire a layout and devinfo, if possible. The caller must have acquired * a reference count on the nfsclclient structure before calling this. * Return the layout in lypp with a reference count on it, if successful. */ static int nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp; struct nfsclflayouthead flh; int error = 0, islocked, layoutlen, layouttype, recalled, retonclose; nfsv4stateid_t stateid; struct nfsclsession *tsep; *lypp = NULL; if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, off, &flp, &recalled); islocked = 0; if (lyp == NULL || flp == NULL) { if (recalled != 0) return (EIO); LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) { stateid.seqid = 0; stateid.other[0] = stateidp->other[0]; stateid.other[1] = stateidp->other[1]; stateid.other[2] = stateidp->other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX, (uint64_t)0, layouttype, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } else { islocked = 1; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, off, UINT64_MAX, (uint64_t)0, layouttype, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp, &flh, layouttype, error, NULL, cred, p); if (error == 0) *lypp = lyp; else if (islocked != 0) nfscl_rellayout(lyp, 1); } else *lypp = lyp; return (error); } /* * Do a TCP connection plus exchange id and create session. * If successful, a "struct nfsclds" is linked into the list for the * mount point and a pointer to it is returned. */ static int nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp, NFSPROC_T *p) { struct sockaddr_in *msad, *sad; struct sockaddr_in6 *msad6, *sad6; struct nfsclclient *clp; struct nfssockreq *nrp; struct nfsclds *dsp, *tdsp; int error; enum nfsclds_state retv; uint32_t sequenceid; KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("nfsrpc_fillsa: NULL nr_cred")); NFSLOCKCLSTATE(); clp = nmp->nm_clp; NFSUNLOCKCLSTATE(); if (clp == NULL) return (EPERM); if (af == AF_INET) { NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad != NULL && msad->sin_family == AF_INET && sin->sin_addr.s_addr == msad->sin_addr.s_addr && sin->sin_port == msad->sin_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); NFSCL_DEBUG(4, "fnd same addr\n"); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad = (struct sockaddr_in *) tdsp->nfsclds_sockp->nr_nam; else msad = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); sad->sin_len = sizeof(*sad); sad->sin_family = AF_INET; sad->sin_port = sin->sin_port; sad->sin_addr.s_addr = sin->sin_addr.s_addr; nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad; } else if (af == AF_INET6) { NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad6 != NULL && msad6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &msad6->sin6_addr) && sin6->sin6_port == msad6->sin6_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad6 = (struct sockaddr_in6 *) tdsp->nfsclds_sockp->nr_nam; else msad6 = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); sad6->sin6_len = sizeof(*sad6); sad6->sin6_family = AF_INET6; sad6->sin6_port = sin6->sin6_port; NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr, sizeof(struct in6_addr)); nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad6; } else return (EPERM); nrp->nr_sotype = SOCK_STREAM; mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); nrp->nr_prog = NFS_PROG; nrp->nr_vers = vers; /* * Use the credentials that were used for the mount, which are * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. * Ref. counting the credentials with crhold() is probably not * necessary, since nm_sockreq.nr_cred won't be crfree()'d until * unmount, but I did it anyhow. */ nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); error = newnfs_connect(nmp, nrp, NULL, p, 0); NFSCL_DEBUG(3, "DS connect=%d\n", error); dsp = NULL; /* Now, do the exchangeid and create session. */ if (error == 0) { if (vers == NFS_VER4) { error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); if (error != 0) newnfs_disconnect(nrp); } else { dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_flags |= NFSCLDS_DS; dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */ mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); } } if (error == 0) { dsp->nfsclds_sockp = nrp; if (vers == NFS_VER4) { NFSLOCKMNT(nmp); retv = nfscl_getsameserver(nmp, dsp, &tdsp); NFSCL_DEBUG(3, "getsame ret=%d\n", retv); if (retv == NFSDSP_USETHISSESSION) { NFSUNLOCKMNT(nmp); /* * If there is already a session for this * server, use it. */ (void)newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); *dspp = tdsp; return (0); } if (retv == NFSDSP_SEQTHISSESSION) sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; else sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; NFSUNLOCKMNT(nmp); error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, nrp, sequenceid, 0, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS createsess=%d\n", error); } } else { NFSFREECRED(nrp->nr_cred); NFSFREEMUTEX(&nrp->nr_mtx); free(nrp->nr_nam, M_SONAME); free(nrp, M_NFSSOCKREQ); } if (error == 0) { NFSCL_DEBUG(3, "add DS session\n"); /* * Put it at the end of the list. That way the list * is ordered by when the entry was added. This matters * since the one done first is the one that should be * used for sequencid'ing any subsequent create sessions. */ NFSLOCKMNT(nmp); TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); NFSUNLOCKMNT(nmp); *dspp = dsp; } else if (dsp != NULL) { newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); } return (error); } /* * Do the NFSv4.1 Reclaim Complete. */ int nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Initialize the slot tables for a session. */ static void nfscl_initsessionslots(struct nfsclsession *sep) { int i; for (i = 0; i < NFSV4_CBSLOTS; i++) { if (sep->nfsess_cbslots[i].nfssl_reply != NULL) m_freem(sep->nfsess_cbslots[i].nfssl_reply); NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); } for (i = 0; i < 64; i++) sep->nfsess_slotseq[i] = 0; sep->nfsess_slots = 0; } /* * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). */ int nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfscllayout *layp; struct nfscldevinfo *dip; struct nfsclflayout *rflp; struct mbuf *m; struct nfsclwritedsdorpc *drpc, *tdrpc; nfsv4stateid_t stateid; struct ucred *newcred; uint64_t lastbyte, len, off, oresid, xfer; int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo; void *lckp; uint8_t *dev; void *iovbase; size_t iovlen; off_t offs; ssize_t resid; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0) return (EIO); /* Now, get a reference cnt on the clientid for this mount. */ if (nfscl_getref(nmp) == 0) return (EIO); /* Find an appropriate stateid. */ newcred = NFSNEWCRED(cred); error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, rwaccess, 1, newcred, p, &stateid, &lckp); if (error != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (error); } /* Search for a layout for this file. */ off = uiop->uio_offset; layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, off, &rflp, &recalled); if (layp == NULL || rflp == NULL) { if (recalled != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (EIO); } if (layp != NULL) { nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); layp = NULL; } /* Try and get a Layout, if it is supported. */ if (rwaccess == NFSV4OPEN_ACCESSWRITE || (np->n_flag & NWRITEOPENED) != 0) iolaymode = NFSLAYOUTIOMODE_RW; else iolaymode = NFSLAYOUTIOMODE_READ; error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, NULL, &stateid, off, &layp, newcred, p); if (error != 0) { NFSLOCKNODE(np); np->n_flag |= NNOLAYOUT; NFSUNLOCKNODE(np); if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); if (layp != NULL) nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } } /* * Loop around finding a layout that works for the first part of * this I/O operation, and then call the function that actually * does the RPC. */ eof = 0; len = (uint64_t)uiop->uio_resid; while (len > 0 && error == 0 && eof == 0) { off = uiop->uio_offset; error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); if (error == 0) { oresid = xfer = (uint64_t)uiop->uio_resid; if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) xfer = rflp->nfsfl_end - rflp->nfsfl_off; /* * For Flex File layout with mirrored DSs, select one * of them at random for reads. For writes and commits, * do all mirrors. */ m = NULL; drpc = NULL; firstmirror = 0; mirrorcnt = 1; if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 && (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) { if (rwaccess == NFSV4OPEN_ACCESSREAD) { firstmirror = arc4random() % mirrorcnt; mirrorcnt = firstmirror + 1; } else { if (docommit == 0) { /* * Save values, so uiop can be * rolled back upon a write * error. */ offs = uiop->uio_offset; resid = uiop->uio_resid; iovbase = uiop->uio_iov->iov_base; iovlen = uiop->uio_iov->iov_len; m = nfsm_uiombuflist(uiop, len, NULL, NULL); } tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK | M_ZERO); } } for (i = firstmirror; i < mirrorcnt && error == 0; i++){ if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) dev = rflp->nfsfl_ffm[i].dev; else dev = rflp->nfsfl_dev; dip = nfscl_getdevinfo(nmp->nm_clp, dev, rflp->nfsfl_devp); if (dip != NULL) { if ((rflp->nfsfl_flags & NFSFL_FLEXFILE) != 0) error = nfscl_dofflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, i, docommit, m, tdrpc, newcred, p); else error = nfscl_doflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, docommit, newcred, p); nfscl_reldevinfo(dip); } else error = EIO; tdrpc++; } if (m != NULL) m_freem(m); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = firstmirror; i < mirrorcnt - 1 && tdrpc != NULL; i++, tdrpc++) { /* * For the unused drpc entries, both inprog and * err == 0, so this loop won't break. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "clrpcio", timo); if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); if (error == 0) { if (mirrorcnt > 1 && rwaccess == NFSV4OPEN_ACCESSWRITE && docommit == 0) { NFSLOCKCLSTATE(); layp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } lastbyte = off + xfer - 1; NFSLOCKCLSTATE(); if (lastbyte > layp->nfsly_lastbyte) layp->nfsly_lastbyte = lastbyte; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_OPENMODE && rwaccess == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } else error = EIO; if (error == 0) len -= (oresid - (uint64_t)uiop->uio_resid); else if (mirrorcnt > 1 && rwaccess == NFSV4OPEN_ACCESSWRITE && docommit == 0) { /* * In case the rpc gets retried, roll the * uio fields changed by nfsm_uiombuflist() * back. */ uiop->uio_offset = offs; uiop->uio_resid = resid; uiop->uio_iov->iov_base = iovbase; uiop->uio_iov->iov_len = iovlen; } } } if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } /* * Make a copy of the mbuf chain and add an mbuf for null padding, as required. */ static struct mbuf * nfsm_copym(struct mbuf *m, int off, int xfer) { struct mbuf *m2, *m3, *m4; uint32_t *tl; int rem; m2 = m_copym(m, off, xfer, M_WAITOK); rem = NFSM_RNDUP(xfer) - xfer; if (rem > 0) { /* * The zero padding to a multiple of 4 bytes is required by * the XDR. So that the mbufs copied by reference aren't * modified, add an mbuf with the zero'd bytes to the list. * rem will be a maximum of 3, so one zero'd uint32_t is * sufficient. */ m3 = m2; while (m3->m_next != NULL) m3 = m3->m_next; NFSMGET(m4); tl = NFSMTOD(m4, uint32_t *); *tl = 0; mbuf_setlen(m4, rem); mbuf_setnext(m3, m4); } return (m2); } /* * Find a file layout that will handle the first bytes of the requested * range and return the information from it needed to the I/O operation. */ int nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, struct nfsclflayout **retflpp) { struct nfsclflayout *flp, *nflp, *rflp; uint32_t rw; rflp = NULL; rw = rwaccess; /* For reading, do the Read list first and then the Write list. */ do { if (rw == NFSV4OPEN_ACCESSREAD) flp = LIST_FIRST(&lyp->nfsly_flayread); else flp = LIST_FIRST(&lyp->nfsly_flayrw); while (flp != NULL) { nflp = LIST_NEXT(flp, nfsfl_list); if (flp->nfsfl_off > off) break; if (flp->nfsfl_end > off && (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) rflp = flp; flp = nflp; } if (rw == NFSV4OPEN_ACCESSREAD) rw = NFSV4OPEN_ACCESSWRITE; else rw = 0; } while (rw != 0); if (rflp != NULL) { /* This one covers the most bytes starting at off. */ *retflpp = rflp; return (0); } return (EIO); } /* * Do I/O using an NFSv4.1 file layout. */ static int nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p) { uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; int commit_thru_mds, error, stripe_index, stripe_pos; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; np = VTONFS(vp); rel_off = off - flp->nfsfl_patoff; stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % dp->nfsdi_stripecnt; transfer = stripe_unit_size - (rel_off % stripe_unit_size); error = 0; /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { stripe_index = nfsfldi_stripeindex(dp, stripe_pos); dspp = nfsfldi_addr(dp, stripe_index); if (len > transfer && docommit == 0) xfer = transfer; else xfer = len; if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { /* Dense layout. */ if (stripe_pos >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_pos]; io_off = (rel_off / (stripe_unit_size * dp->nfsdi_stripecnt)) * stripe_unit_size + rel_off % stripe_unit_size; } else { /* Sparse layout. */ if (flp->nfsfl_fhcnt > 1) { if (stripe_index >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_index]; } else if (flp->nfsfl_fhcnt == 1) fhp = flp->nfsfl_fh[0]; else fhp = np->n_fhp; io_off = off; } if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) { commit_thru_mds = 1; if (docommit != 0) error = EIO; } else { commit_thru_mds = 0; mtx_lock(&np->n_mtx); np->n_flag |= NDSCOMMIT; mtx_unlock(&np->n_mtx); } if (docommit != 0) { if (error == 0) error = nfsrpc_commitds(vp, io_off, xfer, *dspp, fhp, 0, 0, cred, p); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any * loops. */ *eofp = 1; uiop->uio_resid = 0; } else { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, io_off, xfer, fhp, 0, 0, 0, cred, p); else { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, 0, 0, 0, cred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } if (error == 0) { transfer = stripe_unit_size; stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; len -= xfer; off += xfer; } } return (error); } /* * Do I/O using an NFSv4.1 flex file layout. */ static int nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, uint64_t len, int mirror, int docommit, struct mbuf *mp, struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { uint64_t transfer, xfer; int error, rel_off; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; struct ucred *tcred; struct mbuf *m; np = VTONFS(vp); error = 0; rel_off = 0; NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off, (uintmax_t)len); /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { dspp = nfsfldi_addr(dp, 0); fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex]; stateidp = &flp->nfsfl_ffm[mirror].st; NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n", mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid); if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) { tcred = NFSNEWCRED(cred); tcred->cr_uid = flp->nfsfl_ffm[mirror].user; tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group; tcred->cr_ngroups = 1; } else tcred = cred; if (rwflag == NFSV4OPEN_ACCESSREAD) transfer = dp->nfsdi_rsize; else transfer = dp->nfsdi_wsize; mtx_lock(&np->n_mtx); np->n_flag |= NDSCOMMIT; mtx_unlock(&np->n_mtx); if (len > transfer && docommit == 0) xfer = transfer; else xfer = len; if (docommit != 0) { if (error == 0) { /* * Do last mirrored DS commit with this thread. */ if (mirror < flp->nfsfl_mirrorcnt - 1) error = nfsio_commitds(vp, off, xfer, *dspp, fhp, dp->nfsdi_vers, dp->nfsdi_minorvers, drpc, tcred, p); else error = nfsrpc_commitds(vp, off, xfer, *dspp, fhp, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); } NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any * loops. */ *eofp = 1; uiop->uio_resid = 0; } else { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, off, xfer, fhp, 1, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); else { if (flp->nfsfl_mirrorcnt == 1) { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, 0, 1, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } else { m = nfsm_copym(mp, rel_off, xfer); NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n", rel_off, (uintmax_t)xfer); /* * Do last write to a mirrored DS with this * thread. */ if (mirror < flp->nfsfl_mirrorcnt - 1) error = nfsio_writedsmir(vp, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers, drpc, tcred, p); else error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error); } } NFSCL_DEBUG(4, "aft read/writeds=%d\n", error); if (error == 0) { len -= xfer; off += xfer; rel_off += xfer; } if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) NFSFREECRED(tcred); } NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error); return (error); } /* * The actual read RPC done to a DS. */ static int nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; int attrflag, error, retlen; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers); if (flex != 0) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); else nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); } else { nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n"); } NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(io_off, tl); *(tl + 2) = txdr_unsigned(len); nrp = dsp->nfsclds_sockp; NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp); if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat, error); if (error != 0) return (error); if (vers == NFS_VER3) { error = nfscl_postop_attr(nd, &na, &attrflag, NULL); NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error); if (error != 0) goto nfsmout; } if (nd->nd_repstat != 0) { error = nd->nd_repstat; goto nfsmout; } if (vers == NFS_VER3) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *(tl + 1)); } else { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp); error = nfsm_mbufuio(nd, uiop, retlen); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * The actual write RPC done to a DS. */ static int nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers); vers = NFS_VER4; if (flex != 0) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); else nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); } else { nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n"); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED); } txdr_hyper(io_off, tl); tl += 2; if (vers == NFS_VER3) *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); nfsm_uiombuf(nd, uiop, len); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat != 0) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); error = nd->nd_repstat; } else { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; if (commit_thru_mds != 0) { NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } else { NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* * The actual write RPC done to a DS. * This variant is called from a separate kernel process for mirrors. * Any short write is considered an IO error. */ static int nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n", minorvers); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); } else { nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n"); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED); } txdr_hyper(io_off, tl); tl += 2; if (vers == NFS_VER3) *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); if (len > 0) { /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nd->nd_bpos = mtod(m, char *) + m->m_len; NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len); } nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat != 0) error = nd->nd_repstat; else { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len, rlen); if (rlen != len) { error = NFSERR_IO; NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len, rlen); goto nfsmout; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* * Start up the thread that will execute nfsrpc_writedsmir(). */ static void start_writedsmir(void *arg, int pending) { struct nfsclwritedsdorpc *drpc; drpc = (struct nfsclwritedsdorpc *)arg; drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode, &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len, drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred, drpc->p); drpc->done = 1; NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err); } /* * Set up the write DS mirror call for the pNFS I/O thread. */ static int nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len, struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers, struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { int error, ret; error = 0; drpc->done = 0; drpc->vp = vp; drpc->iomode = *iomode; drpc->must_commit = *must_commit; drpc->stateidp = stateidp; drpc->dsp = dsp; drpc->off = off; drpc->len = len; drpc->fhp = fhp; drpc->m = m; drpc->vers = vers; drpc->minorvers = minorvers; drpc->cred = cred; drpc->p = p; drpc->inprog = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_writedsmir, drpc); NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret); } if (ret != 0) error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p); NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error); return (error); } /* * Free up the nfsclds structure. */ void nfscl_freenfsclds(struct nfsclds *dsp) { int i; if (dsp == NULL) return; if (dsp->nfsclds_sockp != NULL) { NFSFREECRED(dsp->nfsclds_sockp->nr_cred); NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); free(dsp->nfsclds_sockp->nr_nam, M_SONAME); free(dsp->nfsclds_sockp, M_NFSSOCKREQ); } NFSFREEMUTEX(&dsp->nfsclds_mtx); NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); for (i = 0; i < NFSV4_CBSLOTS; i++) { if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) m_freem( dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); } free(dsp, M_NFSCLDS); } static enum nfsclds_state nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, struct nfsclds **retdspp) { struct nfsclds *dsp, *cur_dsp; /* * Search the list of nfsclds structures for one with the same * server. */ cur_dsp = NULL; TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && dsp->nfsclds_servownlen != 0 && !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, dsp->nfsclds_servownlen) && dsp->nfsclds_sess.nfsess_defunct == 0) { NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", TAILQ_FIRST(&nmp->nm_sess), dsp, dsp->nfsclds_flags); /* Server major id matches. */ if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { *retdspp = dsp; return (NFSDSP_USETHISSESSION); } /* * Note the first match, so it can be used for * sequence'ing new sessions. */ if (cur_dsp == NULL) cur_dsp = dsp; } } if (cur_dsp != NULL) { *retdspp = cur_dsp; return (NFSDSP_SEQTHISSESSION); } return (NFSDSP_NOTFOUND); } /* * NFS commit rpc to a NFSv4.1 DS. */ static int nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfssockreq *nrp; struct nfsvattr na; int attrflag, error; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; } else nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers, minorvers); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKDS(dsp); if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); error = NFSERR_STALEWRITEVERF; } NFSUNLOCKDS(dsp); } nfsmout: if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Start up the thread that will execute nfsrpc_commitds(). */ static void start_commitds(void *arg, int pending) { struct nfsclwritedsdorpc *drpc; drpc = (struct nfsclwritedsdorpc *)arg; drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred, drpc->p); drpc->done = 1; NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err); } /* * Set up the commit DS mirror call for the pNFS I/O thread. */ static int nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { int error, ret; error = 0; drpc->done = 0; drpc->vp = vp; drpc->off = offset; drpc->len = cnt; drpc->dsp = dsp; drpc->fhp = fhp; drpc->vers = vers; drpc->minorvers = minorvers; drpc->cred = cred; drpc->p = p; drpc->inprog = 0; ret = EIO; if (nfs_pnfsiothreads != 0) { ret = nfs_pnfsio(start_commitds, drpc); NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret); } if (ret != 0) error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers, minorvers, cred, p); NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error); return (error); } /* * Set up the XDR arguments for the LayoutGet operation. */ static void nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype, int layoutlen, int usecurstateid) { uint32_t *tl; NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); *tl++ = newnfs_false; /* Don't signal availability. */ *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; txdr_hyper(minlen, tl); tl += 2; if (usecurstateid != 0) { /* Special stateid for Current stateid. */ *tl++ = txdr_unsigned(1); *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = txdr_unsigned(stateidp->seqid); NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; } *tl = txdr_unsigned(layoutlen); } /* * Parse the reply for a successful LayoutGet operation. */ static int nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp) { uint32_t *tl; struct nfsclflayout *flp, *prevflp, *tflp; int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen; int m, mirrorcnt; uint64_t retlen, off; struct nfsfh *nfhp; uint8_t *cp; uid_t user; gid_t grp; NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n"); error = 0; flp = NULL; gotiomode = -1; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); if (*tl++ != 0) *retonclosep = 1; else *retonclosep = 0; stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, (int)stateidp->seqid); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl++; cnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); if (cnt <= 0 || cnt > 10000) { /* Don't accept more than 10000 layouts in reply. */ error = NFSERR_BADXDR; goto nfsmout; } for (i = 0; i < cnt; i++) { /* Dissect to the layout type. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; retlen = fxdr_hyper(tl); tl += 2; iomode = fxdr_unsigned(int, *tl++); laytype = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype, (uintmax_t)off, (uintmax_t)retlen, iomode); /* Ignore length of layout body for now. */ if (laytype == NFSLAYOUT_NFSV4_1_FILES) { /* Parse the File layout up to fhcnt. */ NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER + NFSX_V4DEVICEID); fhcnt = fxdr_unsigned(int, *(tl + 4 + NFSX_V4DEVICEID / NFSX_UNSIGNED)); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 0 || fhcnt > 100) { /* Don't accept more than 100 file handles. */ error = NFSERR_BADXDR; goto nfsmout; } if (fhcnt > 0) flp = malloc(sizeof(*flp) + fhcnt * sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK); else flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = NFSFL_FILE; flp->nfsfl_fhcnt = 0; flp->nfsfl_devp = NULL; flp->nfsfl_off = off; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = iomode; if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; /* Ignore layout body length for now. */ NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n", flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff); for (j = 0; j < fhcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_fh[j] = nfhp; flp->nfsfl_fhcnt++; nfhp->nfh_len = nfhlen; NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); } } else if (laytype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); mirrorcnt = fxdr_unsigned(int, *(tl + 2)); NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt); if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) { error = NFSERR_BADXDR; goto nfsmout; } flp = malloc(sizeof(*flp) + mirrorcnt * sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = NFSFL_FLEXFILE; flp->nfsfl_mirrorcnt = mirrorcnt; flp->nfsfl_devp = NULL; flp->nfsfl_off = off; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = iomode; if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; flp->nfsfl_stripeunit = fxdr_hyper(tl); NFSCL_DEBUG(4, "stripeunit=%ju\n", (uintmax_t)flp->nfsfl_stripeunit); for (j = 0; j < mirrorcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); k = fxdr_unsigned(int, *tl); if (k < 1 || k > 128) { error = NFSERR_BADXDR; goto nfsmout; } NFSCL_DEBUG(4, "servercnt=%d\n", k); for (l = 0; l < k; l++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + NFSX_STATEID + 2 * NFSX_UNSIGNED); if (l == 0) { /* Just use the first server. */ NFSBCOPY(tl, flp->nfsfl_ffm[j].dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); tl++; flp->nfsfl_ffm[j].st.seqid = *tl++; flp->nfsfl_ffm[j].st.other[0] = *tl++; flp->nfsfl_ffm[j].st.other[1] = *tl++; flp->nfsfl_ffm[j].st.other[2] = *tl++; NFSCL_DEBUG(4, "st.seqid=%u " "st.o0=0x%x st.o1=0x%x " "st.o2=0x%x\n", flp->nfsfl_ffm[j].st.seqid, flp->nfsfl_ffm[j].st.other[0], flp->nfsfl_ffm[j].st.other[1], flp->nfsfl_ffm[j].st.other[2]); } else tl += ((NFSX_V4DEVICEID + NFSX_STATEID + NFSX_UNSIGNED) / NFSX_UNSIGNED); fhcnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 1 || fhcnt > NFSDEV_MAXVERS) { error = NFSERR_BADXDR; goto nfsmout; } for (m = 0; m < fhcnt; m++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "nfhlen=%d\n", nfhlen); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); if (l == 0) { flp->nfsfl_ffm[j].fhcnt = fhcnt; nfhp = malloc( sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_ffm[j].fh[m] = nfhp; nfhp->nfh_len = nfhlen; NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); NFSCL_DEBUG(4, "got fh\n"); } } /* Now, get the ffsd_user/ffds_group. */ error = nfsrv_parseug(nd, 0, &user, &grp, curthread); NFSCL_DEBUG(4, "after parseu=%d\n", error); if (error == 0) error = nfsrv_parseug(nd, 1, &user, &grp, curthread); NFSCL_DEBUG(4, "aft parseg=%d\n", grp); if (error != 0) goto nfsmout; NFSCL_DEBUG(4, "user=%d group=%d\n", user, grp); if (l == 0) { flp->nfsfl_ffm[j].user = user; flp->nfsfl_ffm[j].group = grp; NFSCL_DEBUG(4, "usr=%d grp=%d\n", user, grp); } } } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl); NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n", flp->nfsfl_fflags, flp->nfsfl_statshint); } else { error = NFSERR_BADXDR; goto nfsmout; } if (flp->nfsfl_iomode == gotiomode) { /* Keep the list in increasing offset order. */ tflp = LIST_FIRST(flhp); prevflp = NULL; while (tflp != NULL && tflp->nfsfl_off < flp->nfsfl_off) { prevflp = tflp; tflp = LIST_NEXT(tflp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(flhp, flp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, flp, nfsfl_list); NFSCL_DEBUG(4, "flp inserted\n"); } else { printf("nfscl_layoutget(): got wrong iomode\n"); nfscl_freeflayout(flp); } flp = NULL; } nfsmout: NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error); if (error != 0 && flp != NULL) nfscl_freeflayout(flp); return (error); } /* * Parse a user/group digit string. */ static int nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, NFSPROC_T *p) { uint32_t *tl; char *cp, *str, str0[NFSV4_SMALLSTR + 1]; uint32_t len = 0; int error = 0; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(uint32_t, *tl); str = NULL; if (len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len); if (len == 0) { if (dogrp != 0) *gidp = GID_NOGROUP; else *uidp = UID_NOBODY; return (0); } if (len > NFSV4_SMALLSTR) str = malloc(len + 1, M_TEMP, M_WAITOK); else str = str0; NFSM_DISSECT(cp, char *, NFSM_RNDUP(len)); NFSBCOPY(cp, str, len); str[len] = '\0'; NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str); if (dogrp != 0) error = nfsv4_strtogid(nd, str, len, gidp, p); else error = nfsv4_strtouid(nd, str, len, uidp, p); nfsmout: if (len > NFSV4_SMALLSTR) free(str, M_TEMP); NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error); return (error); } /* * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(), * so that it does both an Open and a Layoutget. */ static int nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode, struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp; struct nfsclflayouthead flh; int error, islocked, layoutlen, recalled, retonclose, usecurstateid; int layouttype, laystat; nfsv4stateid_t stateid; struct nfsclsession *tsep; error = 0; if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp, &recalled); NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp); if (lyp == NULL) islocked = 0; else if (flp != NULL) islocked = 1; else islocked = 2; if ((lyp == NULL || flp == NULL) && recalled == 0) { LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) usecurstateid = 1; else { usecurstateid = 0; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; } error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, dpp, &stateid, usecurstateid, layouttype, layoutlen, &retonclose, &flh, &laystat, cred, p); NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n", laystat, error); laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, &islocked, cred, p); } else error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0); if (islocked == 2) nfscl_rellayout(lyp, 1); else if (islocked == 1) nfscl_rellayout(lyp, 0); return (error); } /* * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS * enabled, only for the CLAIM_NULL case. All other NFSv4 Opens are * handled by nfsrpc_openrpc(). * For the case where op == NULL, dvp is the directory. When op != NULL, it * can be NULL. */ static int nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode, struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp, nfsv4stateid_t *stateidp, int usecurstateid, int layouttype, int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp, int *laystatp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *ndp = NULL; struct nfsvattr nfsva; struct nfsclsession *tsep; uint32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby, iomode; *dpp = NULL; *laystatp = ENXIO; nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); nfsm_strtom(nd, name, namelen); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LAYOUTGET); if ((mode & NFSV4OPEN_ACCESSWRITE) != 0) iomode = NFSLAYOUTIOMODE_RW; else iomode = NFSLAYOUTIOMODE_READ; nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp, layouttype, layoutlen, usecurstateid); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat != 0) *laystatp = nd->nd_repstat; if ((nd->nd_flag & ND_NOMOREDATA) == 0) { /* ND_NOMOREDATA will be set if the Open operation failed. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); ndp = malloc(sizeof(struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else ndp->nfsdl_flags = NFSCLDL_READ; if (ret != 0) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error != 0) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); /* If the 2nd element == NFS_OK, the Getattr succeeded. */ if (*++tl == 0) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error != 0) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; *dpp = ndp; ndp = NULL; } /* * At this point, the Open has succeeded, so set * nd_repstat = NFS_OK. If the Layoutget failed, * this function just won't return a layout. */ if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *++tl); if (*laystatp == 0) { error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; } } else nd->nd_repstat = 0; /* Return 0 for Open. */ } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: free(ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Similar nfsrpc_createv4(), but also does the LayoutGet operation. * Used only for mounts with pNFS enabled. */ static int nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp, int usecurstateid, int layouttype, int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp, int *laystatp) { uint32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; struct nfsclsession *tsep; nfsattrbit_t attrbits; nfsv4stateid_t stateid; struct nfsmount *nmp; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *laystatp = ENXIO; *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if ((fmode & O_EXCL) != 0) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes, plus save the FH. */ NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_SAVEFH); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH); *tl = txdr_unsigned(NFSV4OP_LAYOUTGET); nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp, layouttype, layoutlen, usecurstateid); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error != 0) return (error); NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat, error); if (nd->nd_repstat != 0) *laystatp = nd->nd_repstat; NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if ((nd->nd_flag & ND_NOMOREDATA) == 0) { NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret != 0) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error != 0) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } /* Now, we should have the status for the SaveFH. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl == 0) { NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n"); /* * Now, process the GetFH and Getattr for the newly * created file. nfscl_mtofh() will set * ND_NOMOREDATA if these weren't successful. */ error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error); if (error != 0) goto nfsmout; } else nd->nd_flag |= ND_NOMOREDATA; /* Now we have the PutFH and Getattr for the directory. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; else { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } } if ((nd->nd_flag & ND_NOMOREDATA) == 0) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error); if (error != 0) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp != 0) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error != 0) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; /* Now, handle the RestoreFH and LayoutGet. */ if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *(tl + 3)); if (*laystatp == 0) { error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; } NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n", error); } else nd->nd_repstat = 0; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error); if (error == 0) *dpp = dp; else free(dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Similar to nfsrpc_getopenlayout(), except that it used for the Create case. */ static int nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { struct nfscllayout *lyp; struct nfsclflayouthead flh; struct nfsfh *nfhp; struct nfsclsession *tsep; struct nfsmount *nmp; nfsv4stateid_t stateid; int error, layoutlen, layouttype, retonclose, laystat; error = 0; nmp = VFSTONFS(dvp->v_mount); if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode, owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose, &flh, &laystat); NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n", laystat, error); lyp = NULL; if (laystat == 0) { nfhp = *nfhpp; laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL, cred, p); } else laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL, cred, p); if (laystat == 0) nfscl_rellayout(lyp, 0); return (error); } /* * Process the results of a layoutget() operation. */ static int nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp, int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit, struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype, int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p) { struct nfsclflayout *tflp; struct nfscldevinfo *dip; uint8_t *dev; if (laystat == NFSERR_UNKNLAYOUTTYPE) { NFSLOCKMNT(nmp); if (!NFSHASFLEXFILE(nmp)) { /* Switch to using Flex File Layout. */ nmp->nm_state |= NFSSTA_FLEXFILE; } else if (layouttype == NFSLAYOUT_FLEXFILE) { /* Disable pNFS. */ NFSCL_DEBUG(1, "disable PNFS\n"); nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE); } NFSUNLOCKMNT(nmp); } if (laystat == 0) { NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n"); LIST_FOREACH(tflp, flhp, nfsfl_list) { laystat = nfscl_adddevinfo(nmp, NULL, tflp); NFSCL_DEBUG(4, "aft adddev=%d\n", laystat); if (laystat != 0) { if (layouttype == NFSLAYOUT_FLEXFILE) dev = tflp->nfsfl_ffm[0].dev; else dev = tflp->nfsfl_dev; laystat = nfsrpc_getdeviceinfo(nmp, dev, layouttype, notifybit, &dip, cred, p); NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n", laystat); if (laystat != 0) break; laystat = nfscl_adddevinfo(nmp, dip, tflp); if (laystat != 0) printf("getlayout: cannot add\n"); } } } if (laystat == 0) { /* * nfscl_layout() always returns with the nfsly_lock * set to a refcnt (shared lock). * Passing in dvp is sufficient, since it is only used to * get the fsid for the file system. */ laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp, layouttype, retonclose, flhp, lypp, cred, p); NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n", laystat); if (laystat == 0 && islockedp != NULL) *islockedp = 1; } return (laystat); } Index: head/sys/fs/nfsclient/nfs_clstate.c =================================================================== --- head/sys/fs/nfsclient/nfs_clstate.c (revision 335011) +++ head/sys/fs/nfsclient/nfs_clstate.c (revision 335012) @@ -1,5360 +1,5360 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions implement the client side state handling for NFSv4. * NFSv4 state handling: * - A lockowner is used to determine lock contention, so it * corresponds directly to a Posix pid. (1 to 1 mapping) * - The correct granularity of an OpenOwner is not nearly so * obvious. An OpenOwner does the following: * - provides a serial sequencing of Open/Close/Lock-with-new-lockowner * - is used to check for Open/Share contention (not applicable to * this client, since all Opens are Deny_None) * As such, I considered both extreme. * 1 OpenOwner per ClientID - Simple to manage, but fully serializes * all Open, Close and Lock (with a new lockowner) Ops. * 1 OpenOwner for each Open - This one results in an OpenConfirm for * every Open, for most servers. * So, I chose to use the same mapping as I did for LockOwnwers. * The main concern here is that you can end up with multiple Opens * for the same File Handle, but on different OpenOwners (opens * inherited from parents, grandparents...) and you do not know * which of these the vnodeop close applies to. This is handled by * delaying the Close Op(s) until all of the Opens have been closed. * (It is not yet obvious if this is the correct granularity.) * - How the code handles serialization: * - For the ClientId, it uses an exclusive lock while getting its * SetClientId and during recovery. Otherwise, it uses a shared * lock via a reference count. * - For the rest of the data structures, it uses an SMP mutex * (once the nfs client is SMP safe) and doesn't sleep while * manipulating the linked lists. * - The serialization of Open/Close/Lock/LockU falls out in the * "wash", since OpenOwners and LockOwners are both mapped from * Posix pid. In other words, there is only one Posix pid using * any given owner, so that owner is serialized. (If you change * the granularity of the OpenOwner, then code must be added to * serialize Ops on the OpenOwner.) * - When to get rid of OpenOwners and LockOwners. * - The function nfscl_cleanup_common() is executed after a process exits. * It goes through the client list looking for all Open and Lock Owners. * When one is found, it is marked "defunct" or in the case of * an OpenOwner without any Opens, freed. * The renew thread scans for defunct Owners and gets rid of them, * if it can. The LockOwners will also be deleted when the * associated Open is closed. * - If the LockU or Close Op(s) fail during close in a way * that could be recovered upon retry, they are relinked to the * ClientId's defunct open list and retried by the renew thread * until they succeed or an unmount/recovery occurs. * (Since we are done with them, they do not need to be recovered.) */ #ifndef APPLEKEXT #include /* * Global variables */ extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern u_int32_t newnfs_false, newnfs_true; extern int nfscl_debuglevel; extern int nfscl_enablecallb; extern int nfs_numnfscbd; NFSREQSPINLOCK; NFSCLSTATEMUTEX; int nfscl_inited = 0; struct nfsclhead nfsclhead; /* Head of clientid list */ int nfscl_deleghighwater = NFSCLDELEGHIGHWATER; int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER; #endif /* !APPLEKEXT */ static int nfscl_delegcnt = 0; static int nfscl_layoutcnt = 0; static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **); static void nfscl_clrelease(struct nfsclclient *); static void nfscl_cleanclient(struct nfsclclient *); static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *, struct ucred *, NFSPROC_T *); static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *, struct nfsmount *, struct ucred *, NFSPROC_T *); static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *); static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *, struct nfscllock *, int); static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **, struct nfscllock **, int); static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *); static u_int32_t nfscl_nextcbident(void); static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **); static struct nfsclclient *nfscl_getclnt(u_int32_t); static struct nfsclclient *nfscl_getclntsess(uint8_t *); static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *, int); static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *, int, struct nfsclrecalllayout **); static void nfscl_reldevinfo_locked(struct nfscldevinfo *); static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *, int); static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *); static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *, u_int8_t *, struct nfscllock **); static void nfscl_freealllocks(struct nfscllockownerhead *, int); static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int, struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **); static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *, struct nfsclowner **, struct nfsclowner **, struct nfsclopen **, struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *); static int nfscl_moveopen(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfsclopen *, struct nfsclowner *, struct nfscldeleg *, struct ucred *, NFSPROC_T *); static void nfscl_totalrecall(struct nfsclclient *); static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *); static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int, u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *); static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *); static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t, struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *); static int nfscl_errmap(struct nfsrv_descript *, u_int32_t); static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *); static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *, struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int); static void nfscl_freeopenowner(struct nfsclowner *, int); static void nfscl_cleandeleg(struct nfscldeleg *); static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *); static void nfscl_emptylockowner(struct nfscllockowner *, struct nfscllockownerfhhead *); static void nfscl_mergeflayouts(struct nfsclflayouthead *, struct nfsclflayouthead *); static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t, uint64_t, uint32_t, struct nfsclrecalllayout *); static int nfscl_seq(uint32_t, uint32_t); static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *, struct ucred *, NFSPROC_T *); static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *, struct ucred *, NFSPROC_T *); static short nfscberr_null[] = { 0, 0, }; static short nfscberr_getattr[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short nfscberr_recall[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short *nfscl_cberrmap[] = { nfscberr_null, nfscberr_null, nfscberr_null, nfscberr_getattr, nfscberr_recall }; #define NETFAMILY(clp) \ (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET) /* * Called for an open operation. * If the nfhp argument is NULL, just get an openowner. */ APPLESTATIC int nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp, struct nfsclopen **opp, int *newonep, int *retp, int lockit) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op = NULL, *nop = NULL; struct nfscldeleg *dp; struct nfsclownerhead *ohp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret; if (newonep != NULL) *newonep = 0; if (opp != NULL) *opp = NULL; if (owpp != NULL) *owpp = NULL; /* * Might need one or both of these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ nowp = malloc(sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); if (nfhp != NULL) nop = malloc(sizeof (struct nfsclopen) + fhlen - 1, M_NFSCLOPEN, M_WAITOK); ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (ret != 0) { free(nowp, M_NFSCLOWNER); if (nop != NULL) free(nop, M_NFSCLOPEN); return (ret); } /* * Get the Open iff it already exists. * If none found, add the new one or return error, depending upon * "create". */ NFSLOCKCLSTATE(); dp = NULL; /* First check the delegation list */ if (nfhp != NULL && usedeleg) { LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(amode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) break; dp = NULL; break; } } } if (dp != NULL) { nfscl_filllockowner(p->td_proc, own, F_POSIX); ohp = &dp->nfsdl_owner; } else { /* For NFSv4.1 and this option, use a single open_owner. */ if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, own, F_POSIX); else nfscl_filllockowner(p->td_proc, own, F_POSIX); ohp = &clp->nfsc_owner; } /* Now, search for an openowner */ LIST_FOREACH(owp, ohp, nfsow_list) { if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) break; } /* * Create a new open, as required. */ nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen, cred, newonep); /* * Now, check the mode on the open and return the appropriate * value. */ if (retp != NULL) { if (nfhp != NULL && dp != NULL && nop == NULL) /* new local open on delegation */ *retp = NFSCLOPEN_SETCRED; else *retp = NFSCLOPEN_OK; } if (op != NULL && (amode & ~(op->nfso_mode))) { op->nfso_mode |= amode; if (retp != NULL && dp == NULL) *retp = NFSCLOPEN_DOOPEN; } /* * Serialize modifications to the open owner for multiple threads * within the same process using a read/write sleep lock. * For NFSv4.1 and a single OpenOwner, allow concurrent open operations * by acquiring a shared lock. The close operations still use an * exclusive lock for this case. */ if (lockit != 0) { if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) { /* * Get a shared lock on the OpenOwner, but first * wait for any pending exclusive lock, so that the * exclusive locker gets priority. */ nfsv4_lock(&owp->nfsow_rwlock, 0, NULL, NFSCLSTATEMUTEXPTR, NULL); nfsv4_getref(&owp->nfsow_rwlock, NULL, NFSCLSTATEMUTEXPTR, NULL); } else nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR); } NFSUNLOCKCLSTATE(); if (nowp != NULL) free(nowp, M_NFSCLOWNER); if (nop != NULL) free(nop, M_NFSCLOPEN); if (owpp != NULL) *owpp = owp; if (opp != NULL) *opp = op; return (0); } /* * Create a new open, as required. */ static void nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp, struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen, struct ucred *cred, int *newonep) { struct nfsclowner *owp = *owpp, *nowp; struct nfsclopen *op, *nop; if (nowpp != NULL) nowp = *nowpp; else nowp = NULL; if (nopp != NULL) nop = *nopp; else nop = NULL; if (owp == NULL && nowp != NULL) { NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); if (dp != NULL) { nfsstatsv1.cllocalopenowners++; LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list); } else { nfsstatsv1.clopenowners++; LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list); } owp = *owpp = nowp; *nowpp = NULL; if (newonep != NULL) *newonep = 1; } /* If an fhp has been specified, create an Open as well. */ if (fhp != NULL) { /* and look for the correct open, based upon FH */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) break; } if (op == NULL && nop != NULL) { nop->nfso_own = owp; nop->nfso_mode = 0; nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = fhlen; NFSBCOPY(fhp, nop->nfso_fh, fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; KASSERT(cred != NULL, ("%s: cred NULL\n", __func__)); newnfs_copyincred(cred, &nop->nfso_cred); if (dp != NULL) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; nfsstatsv1.cllocalopens++; } else { nfsstatsv1.clopens++; } LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list); *opp = nop; *nopp = NULL; if (newonep != NULL) *newonep = 1; } else { *opp = op; } } } /* * Called to find/add a delegation to a client. */ APPLESTATIC int nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp, int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp) { struct nfscldeleg *dp = *dpp, *tdp; /* * First, if we have received a Read delegation for a file on a * read/write file system, just return it, because they aren't * useful, imho. */ if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) && (dp->nfsdl_flags & NFSCLDL_READ)) { (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p); free(dp, M_NFSCLDELEG); *dpp = NULL; return (0); } /* Look for the correct deleg, based upon FH */ NFSLOCKCLSTATE(); tdp = nfscl_finddeleg(clp, nfhp, fhlen); if (tdp == NULL) { if (dp == NULL) { NFSUNLOCKCLSTATE(); return (NFSERR_BADSTATEID); } *dpp = NULL; TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp, nfsdl_hash); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; nfsstatsv1.cldelegates++; nfscl_delegcnt++; } else { /* * Delegation already exists, what do we do if a new one?? */ if (dp != NULL) { printf("Deleg already exists!\n"); free(dp, M_NFSCLDELEG); *dpp = NULL; } else { *dpp = tdp; } } NFSUNLOCKCLSTATE(); return (0); } /* * Find a delegation for this file handle. Return NULL upon failure. */ static struct nfscldeleg * nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscldeleg *dp; LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(dp->nfsdl_fh, fhp, fhlen)) break; } return (dp); } /* * Get a stateid for an I/O operation. First, look for an open and iff * found, return either a lockowner stateid or the open stateid. * If no Open is found, just return error and the special stateid of all zeros. */ APPLESTATIC int nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, void **lckpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op = NULL, *top; struct nfscllockowner *lp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error, done; *lckpp = NULL; /* * Initially, just set the special stateid of all zeros. * (Don't do this for a DS, since the special stateid can't be used.) */ if (fords == 0) { stateidp->seqid = 0; stateidp->other[0] = 0; stateidp->other[1] = 0; stateidp->other[2] = 0; } if (vnode_vtype(vp) != VREG) return (EISDIR); np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } /* * Wait for recovery to complete. */ while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG)) (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR, PZERO, "nfsrecvr", NULL); /* * First, look for a delegation. */ LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(mode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) { stateidp->seqid = dp->nfsdl_stateid.seqid; stateidp->other[0] = dp->nfsdl_stateid.other[0]; stateidp->other[1] = dp->nfsdl_stateid.other[1]; stateidp->other[2] = dp->nfsdl_stateid.other[2]; if (!(np->n_flag & NDELEGRECALL)) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; dp->nfsdl_rwlock.nfslock_usecnt++; *lckpp = (void *)&dp->nfsdl_rwlock; } NFSUNLOCKCLSTATE(); return (0); } break; } } if (p != NULL) { /* * If p != NULL, we want to search the parentage tree * for a matching OpenOwner and use that. */ if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, own, F_POSIX); else nfscl_filllockowner(p->td_proc, own, F_POSIX); lp = NULL; error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own, mode, &lp, &op); if (error == 0 && lp != NULL && fords == 0) { /* Don't return a lock stateid for a DS. */ stateidp->seqid = lp->nfsl_stateid.seqid; stateidp->other[0] = lp->nfsl_stateid.other[0]; stateidp->other[1] = lp->nfsl_stateid.other[1]; stateidp->other[2] = lp->nfsl_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } } if (op == NULL) { /* If not found, just look for any OpenOwner that will work. */ top = NULL; done = 0; owp = LIST_FIRST(&clp->nfsc_owner); while (!done && owp != NULL) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen)) { if (top == NULL && (op->nfso_mode & NFSV4OPEN_ACCESSWRITE) != 0 && (mode & NFSV4OPEN_ACCESSREAD) != 0) top = op; if ((mode & op->nfso_mode) == mode) { done = 1; break; } } } if (!done) owp = LIST_NEXT(owp, nfsow_list); } if (!done) { NFSCL_DEBUG(2, "openmode top=%p\n", top); if (top == NULL || NFSHASOPENMODE(nmp)) { NFSUNLOCKCLSTATE(); return (ENOENT); } else op = top; } /* * For read aheads or write behinds, use the open cred. * A read ahead or write behind is indicated by p == NULL. */ if (p == NULL) newnfs_copycred(&op->nfso_cred, cred); } /* * No lock stateid, so return the open stateid. */ stateidp->seqid = op->nfso_stateid.seqid; stateidp->other[0] = op->nfso_stateid.other[0]; stateidp->other[1] = op->nfso_stateid.other[1]; stateidp->other[2] = op->nfso_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } /* * Search for a matching file, mode and, optionally, lockowner. */ static int nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown, u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp) { struct nfsclowner *owp; struct nfsclopen *op, *rop, *rop2; struct nfscllockowner *lp; int keep_looping; if (lpp != NULL) *lpp = NULL; /* * rop will be set to the open to be returned. There are three * variants of this, all for an open of the correct file: * 1 - A match of lockown. * 2 - A match of the openown, when no lockown match exists. * 3 - A match for any open, if no openown or lockown match exists. * Looking for #2 over #3 probably isn't necessary, but since * RFC3530 is vague w.r.t. the relationship between openowners and * lockowners, I think this is the safer way to go. */ rop = NULL; rop2 = NULL; keep_looping = 1; /* Search the client list */ owp = LIST_FIRST(ohp); while (owp != NULL && keep_looping != 0) { /* and look for the correct open */ op = LIST_FIRST(&owp->nfsow_open); while (op != NULL && keep_looping != 0) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen) && (op->nfso_mode & mode) == mode) { if (lpp != NULL) { /* Now look for a matching lockowner. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, lockown, NFSV4CL_LOCKNAMELEN)) { *lpp = lp; rop = op; keep_looping = 0; break; } } } if (rop == NULL && !NFSBCMP(owp->nfsow_owner, openown, NFSV4CL_LOCKNAMELEN)) { rop = op; if (lpp == NULL) keep_looping = 0; } if (rop2 == NULL) rop2 = op; } op = LIST_NEXT(op, nfso_list); } owp = LIST_NEXT(owp, nfsow_list); } if (rop == NULL) rop = rop2; if (rop == NULL) return (EBADF); *opp = rop; return (0); } /* * Release use of an open owner. Called when open operations are done * with the open owner. */ APPLESTATIC void nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp, __unused int error, __unused int candelete, int unlocked) { if (owp == NULL) return; NFSLOCKCLSTATE(); if (unlocked == 0) { if (NFSHASONEOPENOWN(nmp)) nfsv4_relref(&owp->nfsow_rwlock); else nfscl_lockunlock(&owp->nfsow_rwlock); } nfscl_clrelease(owp->nfsow_clp); NFSUNLOCKCLSTATE(); } /* * Release use of an open structure under an open owner. */ APPLESTATIC void nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error, int candelete) { struct nfsclclient *clp; struct nfsclowner *owp; if (op == NULL) return; NFSLOCKCLSTATE(); owp = op->nfso_own; if (NFSHASONEOPENOWN(nmp)) nfsv4_relref(&owp->nfsow_rwlock); else nfscl_lockunlock(&owp->nfsow_rwlock); clp = owp->nfsow_clp; if (error && candelete && op->nfso_opencnt == 0) nfscl_freeopen(op, 0); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to get a clientid structure. It will optionally lock the * client data structures to do the SetClientId/SetClientId_confirm, * but will release that lock and return the clientid with a reference * count on it. * If the "cred" argument is NULL, a new clientid should not be created. * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot * be done. * The start_renewthread argument tells nfscl_getcl() to start a renew * thread if this creates a new clp. * It always clpp with a reference count on it, unless returning an error. */ APPLESTATIC int nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p, int start_renewthread, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclclient *newclp = NULL; struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); idlen = strlen(uuid); if (idlen > 0) idlen += sizeof (u_int64_t); else idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */ newclp = malloc( sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT, M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); /* * If a forced dismount is already in progress, don't * allocate a new clientid and get out now. For the case where * clp != NULL, this is a harmless optimization. */ if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); return (EBADF); } clp = nmp->nm_clp; if (clp == NULL) { if (newclp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } clp = newclp; clp->nfsc_idlen = idlen; LIST_INIT(&clp->nfsc_owner); TAILQ_INIT(&clp->nfsc_deleg); TAILQ_INIT(&clp->nfsc_layout); LIST_INIT(&clp->nfsc_devinfo); for (i = 0; i < NFSCLDELEGHASHSIZE; i++) LIST_INIT(&clp->nfsc_deleghash[i]); for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) LIST_INIT(&clp->nfsc_layouthash[i]); clp->nfsc_flags = NFSCLFLAGS_INITED; clp->nfsc_clientidrev = 1; clp->nfsc_cbident = nfscl_nextcbident(); nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id, clp->nfsc_idlen); LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list); nmp->nm_clp = clp; clp->nfsc_nmp = nmp; NFSUNLOCKCLSTATE(); if (start_renewthread != 0) nfscl_start_renewthread(clp); } else { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); } NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock && !NFSCL_FORCEDISM(mp)) igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); if (igotlock == 0) { /* * Call nfsv4_lock() with "iwantlock == 0" so that it will * wait for a pending exclusive lock request. This gives the * exclusive lock request priority over this shared lock * request. * An exclusive lock on nfsc_lock is used mainly for server * crash recoveries. */ nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp); nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp); } if (igotlock == 0 && NFSCL_FORCEDISM(mp)) { /* * Both nfsv4_lock() and nfsv4_getref() know to check * for NFSCL_FORCEDISM() and return without sleeping to * wait for the exclusive lock to be released, since it * might be held by nfscl_umount() and we need to get out * now for that case and not wait until nfscl_umount() * releases it. */ NFSUNLOCKCLSTATE(); return (EBADF); } NFSUNLOCKCLSTATE(); /* * If it needs a clientid, do the setclientid now. */ if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) { if (!igotlock) panic("nfscl_clget"); if (p == NULL || cred == NULL) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (EACCES); } /* * If RFC3530 Sec. 14.2.33 is taken literally, * NFSERR_CLIDINUSE will be returned persistently for the * case where a new mount of the same file system is using * a different principal. In practice, NFSERR_CLIDINUSE is * only returned when there is outstanding unexpired state * on the clientid. As such, try for twice the lease * interval, if we know what that is. Otherwise, make a * wild ass guess. * The case of returning NFSERR_STALECLIENTID is far less * likely, but might occur if there is a significant delay * between doing the SetClientID and SetClientIDConfirm Ops, * such that the server throws away the clientid before * receiving the SetClientIDConfirm. */ if (clp->nfsc_renew > 0) clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2; else clidinusedelay = 120; trystalecnt = 3; do { error = nfsrpc_setclient(nmp, clp, 0, cred, p); if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_BADSESSION || error == NFSERR_CLIDINUSE) { (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || (error == NFSERR_CLIDINUSE && --clidinusedelay > 0)); if (error) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (error); } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; } if (igotlock) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 1); NFSUNLOCKCLSTATE(); } *clpp = clp; return (0); } /* * Get a reference to a clientid and return it, if valid. */ APPLESTATIC struct nfsclclient * nfscl_findcl(struct nfsmount *nmp) { struct nfsclclient *clp; clp = nmp->nm_clp; if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) return (NULL); return (clp); } /* * Release the clientid structure. It may be locked or reference counted. */ static void nfscl_clrelease(struct nfsclclient *clp) { if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); } /* * External call for nfscl_clrelease. */ APPLESTATIC void nfscl_clientrelease(struct nfsclclient *clp) { NFSLOCKCLSTATE(); if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Called when wanting to lock a byte region. */ APPLESTATIC int nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp, int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp, struct nfscllockowner **lpp, int *newonep, int *donelocallyp) { struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllockowner *nlp; struct nfscllock *nlop, *otherlop; struct nfscldeleg *dp = NULL, *ldp = NULL; struct nfscllockownerhead *lhp = NULL; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN]; u_int8_t *openownp; int error = 0, ret, donelocally = 0; u_int32_t mode; /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */ mode = 0; np = VTONFS(vp); *lpp = NULL; lp = NULL; *newonep = 0; *donelocallyp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ nlp = malloc( sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK); otherlop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = type; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) error = NFSERR_INVAL; } if (!error) { if (recovery) clp = rclp; else error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); } if (error) { free(nlp, M_NFSCLLOCKOWNER); free(otherlop, M_NFSCLLOCK); free(nlop, M_NFSCLLOCK); return (error); } op = NULL; if (recovery) { ownp = rownp; openownp = ropenownp; } else { nfscl_filllockowner(id, own, flags); ownp = own; if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, openown, F_POSIX); else nfscl_filllockowner(p->td_proc, openown, F_POSIX); openownp = openown; } if (!recovery) { NFSLOCKCLSTATE(); /* * First, search for a delegation. If one exists for this file, * the lock can be done locally against it, so long as there * isn't a local lock conflict. */ ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* Just sanity check for correct type of delegation */ if (dp != NULL && ((dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 || (type == F_WRLCK && (dp->nfsdl_flags & NFSCLDL_WRITE) == 0))) dp = NULL; } if (dp != NULL) { /* Now, find an open and maybe a lockowner. */ ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (ret) ret = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (!ret) { lhp = &dp->nfsdl_lock; TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; donelocally = 1; } else { dp = NULL; } } if (!donelocally) { /* * Get the related Open and maybe lockowner. */ error = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, &lp, &op); if (!error) lhp = &op->nfso_lock; } if (!error && !recovery) error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nlop, ownp, ldp, NULL); if (error) { if (!recovery) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } free(nlp, M_NFSCLLOCKOWNER); free(otherlop, M_NFSCLLOCK); free(nlop, M_NFSCLLOCK); return (error); } /* * Ok, see if a lockowner exists and create one, as required. */ if (lp == NULL) LIST_FOREACH(lp, lhp, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN)) break; } if (lp == NULL) { NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN); if (recovery) NFSBCOPY(ropenownp, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); else NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); nlp->nfsl_seqid = 0; nlp->nfsl_lockflags = flags; nlp->nfsl_inprog = NULL; nfscl_lockinit(&nlp->nfsl_rwlock); LIST_INIT(&nlp->nfsl_lock); if (donelocally) { nlp->nfsl_open = NULL; nfsstatsv1.cllocallockowners++; } else { nlp->nfsl_open = op; nfsstatsv1.cllockowners++; } LIST_INSERT_HEAD(lhp, nlp, nfsl_list); lp = nlp; nlp = NULL; *newonep = 1; } /* * Now, update the byte ranges for locks. */ ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally); if (!ret) donelocally = 1; if (donelocally) { *donelocallyp = 1; if (!recovery) nfscl_clrelease(clp); } else { /* * Serial modifications on the lock owner for multiple threads * for the same process using a read/write lock. */ if (!recovery) nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); } if (!recovery) NFSUNLOCKCLSTATE(); if (nlp) free(nlp, M_NFSCLLOCKOWNER); if (nlop) free(nlop, M_NFSCLLOCK); if (otherlop) free(otherlop, M_NFSCLLOCK); *lpp = lp; return (0); } /* * Called to unlock a byte range, for LockU. */ APPLESTATIC int nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, __unused struct ucred *cred, NFSPROC_T *p, int callcnt, struct nfsclclient *clp, void *id, int flags, struct nfscllockowner **lpp, int *dorpcp) { struct nfscllockowner *lp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllock *nlop, *other_lop = NULL; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret = 0, fnd; np = VTONFS(vp); *lpp = NULL; *dorpcp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ nlop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = F_UNLCK; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) { free(nlop, M_NFSCLLOCK); return (NFSERR_INVAL); } } if (callcnt == 0) { other_lop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); *other_lop = *nlop; } nfscl_filllockowner(id, own, flags); dp = NULL; NFSLOCKCLSTATE(); if (callcnt == 0) dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* * First, unlock any local regions on a delegation. */ if (dp != NULL) { /* Look for this lockowner. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) /* Use other_lop, so nlop is still available */ (void)nfscl_updatelock(lp, &other_lop, NULL, 1); } /* * Now, find a matching open/lockowner that hasn't already been done, * as marked by nfsl_inprog. */ lp = NULL; fnd = 0; LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == NULL && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { fnd = 1; break; } } if (fnd) break; } } if (fnd) break; } if (lp != NULL) { ret = nfscl_updatelock(lp, &nlop, NULL, 0); if (ret) *dorpcp = 1; /* * Serial modifications on the lock owner for multiple * threads for the same process using a read/write lock. */ lp->nfsl_inprog = p; nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); *lpp = lp; } NFSUNLOCKCLSTATE(); if (nlop) free(nlop, M_NFSCLLOCK); if (other_lop) free(other_lop, M_NFSCLLOCK); return (0); } /* * Release all lockowners marked in progess for this process and file. */ APPLESTATIC void nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == p && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { lp->nfsl_inprog = NULL; nfscl_lockunlock(&lp->nfsl_rwlock); } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to find out if any bytes within the byte range specified are * write locked by the calling process. Used to determine if flushing * is required before a LockU. * If in doubt, return 1, so the flush will occur. */ APPLESTATIC int nfscl_checkwritelocked(vnode_t vp, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllock *lop; struct nfscldeleg *dp; struct nfsnode *np; u_int64_t off, end; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error = 0; np = VTONFS(vp); switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ off = fl->l_start; break; case SEEK_END: off = np->n_size + fl->l_start; break; default: return (1); } if (fl->l_len != 0) { end = off + fl->l_len; if (end < off) return (1); } else { end = NFS64BITSSET; } error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (1); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); /* * First check the delegation locks. */ dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } /* * Now, check state against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (0); } /* * Release a byte range lock owner structure. */ APPLESTATIC void nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete) { struct nfsclclient *clp; if (lp == NULL) return; NFSLOCKCLSTATE(); clp = lp->nfsl_open->nfso_own->nfsow_clp; if (error != 0 && candelete && (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0) nfscl_freelockowner(lp, 0); else nfscl_lockunlock(&lp->nfsl_rwlock); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Free up an open structure and any associated byte range lock structures. */ APPLESTATIC void nfscl_freeopen(struct nfsclopen *op, int local) { LIST_REMOVE(op, nfso_list); nfscl_freealllocks(&op->nfso_lock, local); free(op, M_NFSCLOPEN); if (local) nfsstatsv1.cllocalopens--; else nfsstatsv1.clopens--; } /* * Free up all lock owners and associated locks. */ static void nfscl_freealllocks(struct nfscllockownerhead *lhp, int local) { struct nfscllockowner *lp, *nlp; LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, local); } } /* * Called for an Open when NFSERR_EXPIRED is received from the server. * If there are no byte range locks nor a Share Deny lost, try to do a * fresh Open. Otherwise, free the open. */ static int nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *lp; struct nfscldeleg *dp; int mustdelete = 0, error; /* * Look for any byte range lock(s). */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { mustdelete = 1; break; } } /* * If no byte range lock(s) nor a Share deny, try to re-open. */ if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) { newnfs_copycred(&op->nfso_cred, cred); dp = NULL; error = nfsrpc_reopen(nmp, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p); if (error) { mustdelete = 1; if (dp != NULL) { free(dp, M_NFSCLDELEG); dp = NULL; } } if (dp != NULL) nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh, op->nfso_fhlen, cred, p, &dp); } /* * If a byte range lock or Share deny or couldn't re-open, free it. */ if (mustdelete) nfscl_freeopen(op, 0); return (mustdelete); } /* * Free up an open owner structure. */ static void nfscl_freeopenowner(struct nfsclowner *owp, int local) { LIST_REMOVE(owp, nfsow_list); free(owp, M_NFSCLOWNER); if (local) nfsstatsv1.cllocalopenowners--; else nfsstatsv1.clopenowners--; } /* * Free up a byte range lock owner structure. */ APPLESTATIC void nfscl_freelockowner(struct nfscllockowner *lp, int local) { struct nfscllock *lop, *nlop; LIST_REMOVE(lp, nfsl_list); LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { nfscl_freelock(lop, local); } free(lp, M_NFSCLLOCKOWNER); if (local) nfsstatsv1.cllocallockowners--; else nfsstatsv1.cllockowners--; } /* * Free up a byte range lock structure. */ APPLESTATIC void nfscl_freelock(struct nfscllock *lop, int local) { LIST_REMOVE(lop, nfslo_list); free(lop, M_NFSCLLOCK); if (local) nfsstatsv1.cllocallocks--; else nfsstatsv1.cllocks--; } /* * Clean out the state related to a delegation. */ static void nfscl_cleandeleg(struct nfscldeleg *dp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscleandel"); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } nfscl_freealllocks(&dp->nfsdl_lock, 1); } /* * Free a delegation. */ static void nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp) { TAILQ_REMOVE(hdp, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); free(dp, M_NFSCLDELEG); nfsstatsv1.cldelegates--; nfscl_delegcnt--; } /* * Free up all state related to this client structure. */ static void nfscl_cleanclient(struct nfsclclient *clp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllayout *lyp, *nlyp; struct nfscldevinfo *dip, *ndip; TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) nfscl_freelayout(lyp); LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) nfscl_freedevinfo(dip); /* Now, all the OpenOwners, etc. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { nfscl_freeopen(op, 0); } nfscl_freeopenowner(owp, 0); } } /* * Called when an NFSERR_EXPIRED is received from the server. */ static void nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp, *towp; struct nfsclopen *op, *nop, *top; struct nfscldeleg *dp, *ndp; int ret, printed = 0; /* * First, merge locally issued Opens into the list for the server. */ dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); owp = LIST_FIRST(&dp->nfsdl_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfsclexp"); LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) break; } if (towp != NULL) { /* Merge opens in */ LIST_FOREACH(top, &towp->nfsow_open, nfso_list) { if (top->nfso_fhlen == op->nfso_fhlen && !NFSBCMP(top->nfso_fh, op->nfso_fh, op->nfso_fhlen)) { top->nfso_mode |= op->nfso_mode; top->nfso_opencnt += op->nfso_opencnt; break; } } if (top == NULL) { /* Just add the open to the owner list */ LIST_REMOVE(op, nfso_list); op->nfso_own = towp; LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list); nfsstatsv1.cllocalopens--; nfsstatsv1.clopens++; } } else { /* Just add the openowner to the client list */ LIST_REMOVE(owp, nfsow_list); owp->nfsow_clp = clp; LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list); nfsstatsv1.cllocalopenowners--; nfsstatsv1.clopenowners++; nfsstatsv1.cllocalopens--; nfsstatsv1.clopens++; } } owp = nowp; } if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) { printed = 1; printf("nfsv4 expired locks lost\n"); } nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); dp = ndp; } if (!TAILQ_EMPTY(&clp->nfsc_deleg)) panic("nfsclexp"); /* * Now, try and reopen against the server. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { owp->nfsow_seqid = 0; LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { ret = nfscl_expireopen(clp, op, nmp, cred, p); if (ret && !printed) { printed = 1; printf("nfsv4 expired locks lost\n"); } } if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); } } /* * This function must be called after the process represented by "own" has * exited. Must be called with CLSTATE lock held. */ static void nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own) { struct nfsclowner *owp, *nowp; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; /* First, get rid of local locks on delegations. */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, 1); } } } owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) { /* * If there are children that haven't closed the * file descriptors yet, the opens will still be * here. For that case, let the renew thread clear * out the OpenOwner later. */ if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); else owp->nfsow_defunct = 1; } owp = nowp; } } /* * Find open/lock owners for processes that have exited. */ static void nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; NFSPROCLISTLOCK(); NFSLOCKCLSTATE(); LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) { if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_emptylockowner(lp, lhp); } } if (nfscl_procdoesntexist(owp->nfsow_owner)) nfscl_cleanup_common(clp, owp->nfsow_owner); } /* * For the single open_owner case, these lock owners need to be * checked to see if they still exist separately. * This is because nfscl_procdoesntexist() never returns true for * the single open_owner so that the above doesn't ever call * nfscl_cleanup_common(). */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (nfscl_procdoesntexist(lp->nfsl_owner)) nfscl_cleanup_common(clp, lp->nfsl_owner); } } NFSUNLOCKCLSTATE(); NFSPROCLISTUNLOCK(); } /* * Take the empty lock owner and move it to the local lhp list if the * associated process no longer exists. */ static void nfscl_emptylockowner(struct nfscllockowner *lp, struct nfscllockownerfhhead *lhp) { struct nfscllockownerfh *lfhp, *mylfhp; struct nfscllockowner *nlp; int fnd_it; /* If not a Posix lock owner, just return. */ if ((lp->nfsl_lockflags & F_POSIX) == 0) return; fnd_it = 0; mylfhp = NULL; /* * First, search to see if this lock owner is already in the list. * If it is, then the associated process no longer exists. */ SLIST_FOREACH(lfhp, lhp, nfslfh_list) { if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen && !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh, lfhp->nfslfh_len)) mylfhp = lfhp; LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list) if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner, NFSV4CL_LOCKNAMELEN)) fnd_it = 1; } /* If not found, check if process still exists. */ if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0) return; /* Move the lock owner over to the local list. */ if (mylfhp == NULL) { mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP, M_NOWAIT); if (mylfhp == NULL) return; mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen; NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh, mylfhp->nfslfh_len); LIST_INIT(&mylfhp->nfslfh_lock); SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list); } LIST_REMOVE(lp, nfsl_list); LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list); } static int fake_global; /* Used to force visibility of MNTK_UNMOUNTF */ /* * Called from nfs umount to free up the clientid. */ APPLESTATIC void nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) { struct nfsclclient *clp; struct ucred *cred; int igotlock; /* * For the case that matters, this is the thread that set * MNTK_UNMOUNTF, so it will see it set. The code that follows is * done to ensure that any thread executing nfscl_getcl() after * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the * mutex for NFSLOCKCLSTATE(), so it is "m" for the following * explanation, courtesy of Alan Cox. * What follows is a snippet from Alan Cox's email at: * http://docs.FreeBSD.org/cgi/ * mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw * * 1. Set MNTK_UNMOUNTF * 2. Acquire a standard FreeBSD mutex "m". * 3. Update some data structures. * 4. Release mutex "m". * * Then, other threads that acquire "m" after step 4 has occurred will * see MNTK_UNMOUNTF as set. But, other threads that beat thread X to * step 2 may or may not see MNTK_UNMOUNTF as set. */ NFSLOCKCLSTATE(); if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { fake_global++; NFSUNLOCKCLSTATE(); NFSLOCKCLSTATE(); } clp = nmp->nm_clp; if (clp != NULL) { if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0) panic("nfscl umount"); /* * First, handshake with the nfscl renew thread, to terminate * it. */ clp->nfsc_flags |= NFSCLFLAGS_UMOUNT; while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfsclumnt", hz); /* * Now, get the exclusive lock on the client state, so * that no uses of the state are still in progress. */ do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); /* * Free up all the state. It will expire on the server, but * maybe we should do a SetClientId/SetClientIdConfirm so * the server throws it away? */ LIST_REMOVE(clp, nfsc_list); nfscl_delegreturnall(clp, p); cred = newnfs_getcred(); if (NFSHASNFSV4N(nmp)) { (void)nfsrpc_destroysession(nmp, clp, cred, p); (void)nfsrpc_destroyclient(nmp, clp, cred, p); } else (void)nfsrpc_setclient(nmp, clp, 0, cred, p); nfscl_cleanclient(clp); nmp->nm_clp = NULL; NFSFREECRED(cred); free(clp, M_NFSCLCLIENT); } else NFSUNLOCKCLSTATE(); } /* * This function is called when a server replies with NFSERR_STALECLIENTID * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists, * doing Opens and Locks with reclaim. If these fail, it deletes the * corresponding state. */ static void nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct nfscldeleg *dp, *ndp, *tdp; struct nfsmount *nmp; struct ucred *tcred; struct nfsclopenhead extra_open; struct nfscldeleghead extra_deleg; struct nfsreq *rep; u_int64_t len; u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode; int i, igotlock = 0, error, trycnt, firstlock; struct nfscllayout *lyp, *nlyp; /* * First, lock the client structure, so everyone else will * block when trying to use state. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl recover"); /* * For now, just get rid of all layouts. There may be a need * to do LayoutCommit Ops with reclaim == true later. */ TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) nfscl_freelayout(lyp); TAILQ_INIT(&clp->nfsc_layout); for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) LIST_INIT(&clp->nfsc_layouthash[i]); trycnt = 5; do { error = nfsrpc_setclient(nmp, clp, 1, cred, p); } while ((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return; } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; /* * Mark requests already queued on the server, so that they don't * initiate another recovery cycle. Any requests already in the * queue that handle state information will have the old stale * clientid/stateid and will get a NFSERR_STALESTATEID, * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server. * This will be translated to NFSERR_STALEDONTRECOVER when * R_DONTRECOVER is set. */ NFSLOCKREQ(); TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) { if (rep->r_nmp == nmp) rep->r_flags |= R_DONTRECOVER; } NFSUNLOCKREQ(); /* * Now, mark all delegations "need reclaim". */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM; TAILQ_INIT(&extra_deleg); LIST_INIT(&extra_open); /* * Now traverse the state lists, doing Open and Lock Reclaims. */ tcred = newnfs_getcred(); owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); owp->nfsow_seqid = 0; op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { nop = LIST_NEXT(op, nfso_list); if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) { /* Search for a delegation to reclaim with the open */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) break; } ndp = dp; if (dp == NULL) delegtype = NFSV4OPEN_DELEGATENONE; newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryopen(nmp, NULL, op->nfso_fh, op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype, tcred, p); if (!error) { /* Handle any replied delegation */ if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE) || NFSMNT_RDONLY(nmp->nm_mountp))) { if ((ndp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) { dp->nfsdl_stateid = ndp->nfsdl_stateid; dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit; dp->nfsdl_ace = ndp->nfsdl_ace; dp->nfsdl_change = ndp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((ndp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; free(ndp, M_NFSCLDELEG); ndp = NULL; break; } } } if (ndp != NULL) TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list); /* and reclaim all byte range locks */ lp = LIST_FIRST(&op->nfso_lock); while (lp != NULL) { nlp = LIST_NEXT(lp, nfsl_list); lp->nfsl_seqid = 0; firstlock = 1; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { nlop = LIST_NEXT(lop, nfslo_list); if (lop->nfslo_end == NFS64BITSSET) len = NFS64BITSSET; else len = lop->nfslo_end - lop->nfslo_first; error = nfscl_trylock(nmp, NULL, op->nfso_fh, op->nfso_fhlen, lp, firstlock, 1, lop->nfslo_first, len, lop->nfslo_type, tcred, p); if (error != 0) nfscl_freelock(lop, 0); else firstlock = 0; lop = nlop; } /* If no locks, but a lockowner, just delete it. */ if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_freelockowner(lp, 0); lp = nlp; } } } if (error != 0 && error != NFSERR_BADSESSION) nfscl_freeopen(op, 0); op = nop; } owp = nowp; } /* * Now, try and get any delegations not yet reclaimed by cobbling * to-gether an appropriate open. */ nowp = NULL; dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) { if (nowp == NULL) { nowp = malloc( sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); /* * Name must be as long an largest possible * NFSV4CL_LOCKNAMELEN. 12 for now. */ NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); } nop = NULL; if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) { nop = malloc(sizeof (struct nfsclopen) + dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK); nop->nfso_own = nowp; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { nop->nfso_mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { nop->nfso_mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = dp->nfsdl_fhlen; NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; newnfs_copycred(&dp->nfsdl_cred, tcred); newnfs_copyincred(tcred, &nop->nfso_cred); tdp = NULL; error = nfscl_tryopen(nmp, NULL, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_mode, nop, NULL, 0, &tdp, 1, delegtype, tcred, p); if (tdp != NULL) { if ((tdp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; if ((nop->nfso_mode & mode) == mode && nop->nfso_fhlen == tdp->nfsdl_fhlen && !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh, nop->nfso_fhlen)) { dp->nfsdl_stateid = tdp->nfsdl_stateid; dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit; dp->nfsdl_ace = tdp->nfsdl_ace; dp->nfsdl_change = tdp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((tdp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; free(tdp, M_NFSCLDELEG); } else { TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list); } } } if (error) { if (nop != NULL) free(nop, M_NFSCLOPEN); /* * Couldn't reclaim it, so throw the state * away. Ouch!! */ nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } else { LIST_INSERT_HEAD(&extra_open, nop, nfso_list); } } dp = ndp; } /* * Now, get rid of extra Opens and Delegations. */ LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) { do { newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexcls"); } while (error == NFSERR_GRACE); LIST_REMOVE(op, nfso_list); free(op, M_NFSCLOPEN); } if (nowp != NULL) free(nowp, M_NFSCLOWNER); TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) { do { newnfs_copycred(&dp->nfsdl_cred, tcred); error = nfscl_trydelegreturn(dp, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexdlg"); } while (error == NFSERR_GRACE); TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list); free(dp, M_NFSCLDELEG); } /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */ if (NFSHASNFSV4N(nmp)) (void)nfsrpc_reclaimcomplete(nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * This function is called when a server replies with NFSERR_EXPIRED. * It deletes all state for the client and does a fresh SetClientId/confirm. * XXX Someday it should post a signal to the process(es) that hold the * state, so they know that lock state has been lost. */ APPLESTATIC int nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) { struct nfsmount *nmp; struct ucred *cred; int igotlock = 0, error, trycnt; /* * If the clientid has gone away or a new SetClientid has already * been done, just return ok. */ if (clp == NULL || clidrev != clp->nfsc_clientidrev) return (0); /* * First, lock the client structure, so everyone else will * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so * that only one thread does the work. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT)); if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) { if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (0); } clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl expired"); cred = newnfs_getcred(); trycnt = 5; do { error = nfsrpc_setclient(nmp, clp, 0, cred, p); } while ((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } else { /* * Expire the state for the client. */ nfscl_expireclient(clp, nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(cred); return (error); } /* * This function inserts a lock in the list after insert_lop. */ static void nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop, struct nfscllock *insert_lop, int local) { if ((struct nfscllockowner *)insert_lop == lp) LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list); else LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list); if (local) nfsstatsv1.cllocallocks++; else nfsstatsv1.cllocks++; } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. * Returns 1 if the locks were modified, 0 otherwise. */ static int nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp, struct nfscllock **other_lopp, int local) { struct nfscllock *new_lop = *new_lopp; struct nfscllock *lop, *tlop, *ilop; struct nfscllock *other_lop; int unlock = 0, modified = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->nfslo_type == F_UNLCK) unlock = 1; ilop = (struct nfscllock *)lp; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->nfslo_end >= new_lop->nfslo_first) { if (new_lop->nfslo_end < lop->nfslo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->nfslo_type == lop->nfslo_type || (new_lop->nfslo_first <= lop->nfslo_first && new_lop->nfslo_end >= lop->nfslo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (new_lop->nfslo_type != lop->nfslo_type || new_lop->nfslo_first != lop->nfslo_first || new_lop->nfslo_end != lop->nfslo_end) modified = 1; if (lop->nfslo_first < new_lop->nfslo_first) new_lop->nfslo_first = lop->nfslo_first; if (lop->nfslo_end > new_lop->nfslo_end) new_lop->nfslo_end = lop->nfslo_end; tlop = lop; lop = LIST_NEXT(lop, nfslo_list); nfscl_freelock(tlop, local); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->nfslo_first <= lop->nfslo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ if (lop->nfslo_first != new_lop->nfslo_end) { lop->nfslo_first = new_lop->nfslo_end; modified = 1; } break; } if (new_lop->nfslo_end >= lop->nfslo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ if (lop->nfslo_end != new_lop->nfslo_first) { lop->nfslo_end = new_lop->nfslo_first; modified = 1; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->nfslo_type any longer. */ tmp = new_lop->nfslo_first; if (unlock) { other_lop = new_lop; *new_lopp = NULL; } else { other_lop = *other_lopp; *other_lopp = NULL; } other_lop->nfslo_first = new_lop->nfslo_end; other_lop->nfslo_end = lop->nfslo_end; other_lop->nfslo_type = lop->nfslo_type; lop->nfslo_end = tmp; nfscl_insertlock(lp, other_lop, lop, local); ilop = lop; modified = 1; break; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); if (lop == NULL) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfscl_insertlock(lp, new_lop, ilop, local); *new_lopp = NULL; modified = 1; } return (modified); } /* * This function must be run as a kernel thread. * It does Renew Ops and recovery, when required. */ APPLESTATIC void nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleghead dh; struct nfscldeleg *dp, *ndp; struct ucred *cred; u_int32_t clidrev; int error, cbpathdown, islept, igotlock, ret, clearok; uint32_t recover_done_time = 0; time_t mytime; static time_t prevsec = 0; struct nfscllockownerfh *lfhp, *nlfhp; struct nfscllockownerfhhead lfh; struct nfscllayout *lyp, *nlyp; struct nfscldevinfo *dip, *ndip; struct nfscllayouthead rlh; struct nfsclrecalllayout *recallp; struct nfsclds *dsp; cred = newnfs_getcred(); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); for(;;) { newnfs_setroot(cred); cbpathdown = 0; if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) { /* * Only allow one recover within 1/2 of the lease * duration (nfsc_renew). */ if (recover_done_time < NFSD_MONOSEC) { recover_done_time = NFSD_MONOSEC + clp->nfsc_renew; NFSCL_DEBUG(1, "Doing recovery..\n"); nfscl_recover(clp, cred, p); } else { NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n", recover_done_time, (intmax_t)NFSD_MONOSEC); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } } if (clp->nfsc_expire <= NFSD_MONOSEC && (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clidrev = clp->nfsc_clientidrev; error = nfsrpc_renew(clp, NULL, cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; else if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) { NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_EXPIRED) (void) nfscl_hasexpired(clp, clidrev, p); } checkdsrenew: if (NFSHASNFSV4N(clp->nfsc_nmp)) { /* Do renews for any DS sessions. */ NFSLOCKMNT(clp->nfsc_nmp); /* Skip first entry, since the MDS is handled above. */ dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess); if (dsp != NULL) dsp = TAILQ_NEXT(dsp, nfsclds_list); while (dsp != NULL) { if (dsp->nfsclds_expire <= NFSD_MONOSEC && dsp->nfsclds_sess.nfsess_defunct == 0) { dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; NFSUNLOCKMNT(clp->nfsc_nmp); (void)nfsrpc_renew(clp, dsp, cred, p); goto checkdsrenew; } dsp = TAILQ_NEXT(dsp, nfsclds_list); } NFSUNLOCKMNT(clp->nfsc_nmp); } TAILQ_INIT(&dh); NFSLOCKCLSTATE(); if (cbpathdown) /* It's a Total Recall! */ nfscl_totalrecall(clp); /* * Now, handle defunct owners. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { if (LIST_EMPTY(&owp->nfsow_open)) { if (owp->nfsow_defunct != 0) nfscl_freeopenowner(owp, 0); } } /* * Do the recall on any delegations. To avoid trouble, always * come back up here after having slept. */ igotlock = 0; tryagain: dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_RECALL)) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); goto tryagain; } while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) goto tryagain; } NFSUNLOCKCLSTATE(); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp, NULL, cred, p, 1); if (!ret) { nfscl_cleandeleg(dp); TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; nfsstatsv1.cldelegates--; } NFSLOCKCLSTATE(); } dp = ndp; } /* * Clear out old delegations, if we are above the high water * mark. Only clear out ones with no state related to them. * The tailq list is in LRU order. */ dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead); while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) { ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list); if (dp->nfsdl_rwlock.nfslock_usecnt == 0 && dp->nfsdl_rwlock.nfslock_lock == 0 && dp->nfsdl_timestamp < NFSD_MONOSEC && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED | NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) { clearok = 1; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { clearok = 0; break; } } if (clearok) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { clearok = 0; break; } } } if (clearok) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; nfsstatsv1.cldelegates--; } } dp = ndp; } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); /* * Do the recall on any layouts. To avoid trouble, always * come back up here after having slept. */ TAILQ_INIT(&rlh); tryagain2: TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) { if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) { /* * Wait for outstanding I/O ops to be done. */ if (lyp->nfsly_lock.nfslock_usecnt > 0 || (lyp->nfsly_lock.nfslock_lock & NFSV4LOCK_LOCK) != 0) { lyp->nfsly_lock.nfslock_lock |= NFSV4LOCK_WANTED; nfsmsleep(&lyp->nfsly_lock.nfslock_lock, NFSCLSTATEMUTEXPTR, PZERO, "nfslyp", NULL); goto tryagain2; } /* Move the layout to the recall list. */ TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); LIST_REMOVE(lyp, nfsly_hash); TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list); /* Handle any layout commits. */ if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) && (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { lyp->nfsly_flags &= ~NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); NFSCL_DEBUG(3, "do layoutcommit\n"); nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, cred, p); NFSLOCKCLSTATE(); goto tryagain2; } } } /* Now, look for stale layouts. */ lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead); while (lyp != NULL) { nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list); if (lyp->nfsly_timestamp < NFSD_MONOSEC && (lyp->nfsly_flags & NFSLY_RECALL) == 0 && lyp->nfsly_lock.nfslock_usecnt == 0 && lyp->nfsly_lock.nfslock_lock == 0) { NFSCL_DEBUG(4, "ret stale lay=%d\n", nfscl_layoutcnt); recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_NOWAIT); if (recallp == NULL) break; (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); } lyp = nlyp; } /* * Free up any unreferenced device info structures. */ LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) { if (dip->nfsdi_layoutrefs == 0 && dip->nfsdi_refcnt == 0) { NFSCL_DEBUG(4, "freeing devinfo\n"); LIST_REMOVE(dip, nfsdi_list); nfscl_freedevinfo(dip); } } NFSUNLOCKCLSTATE(); /* Do layout return(s), as required. */ TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) { TAILQ_REMOVE(&rlh, lyp, nfsly_list); NFSCL_DEBUG(4, "ret layout\n"); nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p); nfscl_freelayout(lyp); } /* * Delegreturn any delegations cleaned out or recalled. */ TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) { newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); TAILQ_REMOVE(&dh, dp, nfsdl_list); free(dp, M_NFSCLDELEG); } SLIST_INIT(&lfh); /* * Call nfscl_cleanupkext() once per second to check for * open/lock owners where the process has exited. */ mytime = NFSD_MONOSEC; if (prevsec != mytime) { prevsec = mytime; nfscl_cleanupkext(clp, &lfh); } /* * Do a ReleaseLockOwner for all lock owners where the * associated process no longer exists, as found by * nfscl_cleanupkext(). */ newnfs_setroot(cred); SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) { LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list, nlp) { (void)nfsrpc_rellockown(clp->nfsc_nmp, lp, lfhp->nfslfh_fh, lfhp->nfslfh_len, cred, p); nfscl_freelockowner(lp, 0); } free(lfhp, M_TEMP); } SLIST_INIT(&lfh); NFSLOCKCLSTATE(); if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl", hz); if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) { clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); NFSFREECRED(cred); wakeup((caddr_t)clp); return; } NFSUNLOCKCLSTATE(); } } /* * Initiate state recovery. Called when NFSERR_STALECLIENTID, * NFSERR_STALESTATEID or NFSERR_BADSESSION is received. */ APPLESTATIC void nfscl_initiate_recovery(struct nfsclclient *clp) { if (clp == NULL) return; NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); wakeup((caddr_t)clp); } /* * Dump out the state stuff for debugging. */ APPLESTATIC void nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens, int lockowner, int locks) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfscllock *lop; struct nfscldeleg *dp; clp = nmp->nm_clp; if (clp == NULL) { printf("nfscl dumpstate NULL clp\n"); return; } NFSLOCKCLSTATE(); TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } NFSUNLOCKCLSTATE(); } /* * Check for duplicate open owners and opens. * (Only used as a diagnostic aid.) */ APPLESTATIC void nfscl_dupopen(vnode_t vp, int dupopens) { struct nfsclclient *clp; struct nfsclowner *owp, *owp2; struct nfsclopen *op, *op2; struct nfsfh *nfhp; clp = VFSTONFS(vnode_mount(vp))->nm_clp; if (clp == NULL) { printf("nfscl dupopen NULL clp\n"); return; } nfhp = VTONFS(vp)->n_fhp; NFSLOCKCLSTATE(); /* * First, search for duplicate owners. * These should never happen! */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (owp != owp2 && !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("DUP OWNER\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } /* * Now, search for duplicate stateids. * These shouldn't happen, either. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && (op->nfso_stateid.other[0] != 0 || op->nfso_stateid.other[1] != 0 || op->nfso_stateid.other[2] != 0) && op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] && op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] && op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) { NFSUNLOCKCLSTATE(); printf("DUP STATEID\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } /* * Now search for duplicate opens. * Duplicate opens for the same owner * should never occur. Other duplicates are * possible and are checked for if "dupopens" * is true. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { if (nfhp->nfh_len == op2->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && nfhp->nfh_len == op->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) && (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) || dupopens)) { if (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("BADDUP OPEN\n"); } else { NFSUNLOCKCLSTATE(); printf("DUP OPEN\n"); } nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } } NFSUNLOCKCLSTATE(); } /* * During close, find an open that needs to be dereferenced and * dereference it. If there are no more opens for this file, * log a message to that effect. * Opens aren't actually Close'd until VOP_INACTIVE() is performed * on the file's vnode. * This is the safe way, since it is difficult to identify * which open the close is for and I/O can be performed after the * close(2) system call when a file is mmap'd. * If it returns 0 for success, there will be a referenced * clp returned via clpp. */ APPLESTATIC int nfscl_getclose(vnode_t vp, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; int error, notdecr; error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; notdecr = 1; NFSLOCKCLSTATE(); /* * First, look for one under a delegation that was locally issued * and just decrement the opencnt for it. Since all my Opens against * the server are DENY_NONE, I don't see a problem with hanging * onto them. (It is much easier to use one of the extant Opens * that I already have on the server when a Delegation is recalled * than to do fresh Opens.) Someday, I might need to rethink this, but. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { /* * Since a delegation is for a file, there * should never be more than one open for * each openowner. */ if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscdeleg opens"); if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; break; } } } } /* Now process the opens against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, decrement cnt if possible */ if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; } /* * There are more opens, so just return. */ if (op->nfso_opencnt > 0) { NFSUNLOCKCLSTATE(); return (0); } } } } NFSUNLOCKCLSTATE(); if (notdecr) printf("nfscl: never fnd open\n"); return (0); } APPLESTATIC int nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsclrecalllayout *recallp; int error; error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK); NFSLOCKCLSTATE(); /* * First get rid of the local Open structures, which should be no * longer in use. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on deleg")); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } } /* Return any layouts marked return on close. */ nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp); /* Now process the opens against the server. */ lookformore: LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, close it. */ KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on server")); NFSUNLOCKCLSTATE(); nfsrpc_doclose(VFSTONFS(vnode_mount(vp)), op, p); NFSLOCKCLSTATE(); goto lookformore; } op = LIST_NEXT(op, nfso_list); } } NFSUNLOCKCLSTATE(); /* * recallp has been set NULL by nfscl_retoncloselayout() if it was * used by the function, but calling free() with a NULL pointer is ok. */ free(recallp, M_NFSLAYRECALL); return (0); } /* * Return all delegations on this client. * (Must be called with client sleep lock.) */ static void nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p) { struct nfscldeleg *dp, *ndp; struct ucred *cred; cred = newnfs_getcred(); TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) { nfscl_cleandeleg(dp); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); nfscl_freedeleg(&clp->nfsc_deleg, dp); } NFSFREECRED(cred); } /* * Do a callback RPC. */ APPLESTATIC void nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) { int clist, gotseq_ok, i, j, k, op, rcalls; u_int32_t *tl; struct nfsclclient *clp; struct nfscldeleg *dp = NULL; int numops, taglen = -1, error = 0, trunc; u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident; u_char tag[NFSV4_SMALLSTR + 1], *tagstr; vnode_t vp = NULL; struct nfsnode *np; struct vattr va; struct nfsfh *nfhp; mount_t mp; nfsattrbit_t attrbits, rattrbits; nfsv4stateid_t stateid; uint32_t seqid, slotid = 0, highslot, cachethis; uint8_t sessionid[NFSX_V4SESSIONID]; struct mbuf *rep; struct nfscllayout *lyp; uint64_t filesid[2], len, off; int changed, gotone, laytype, recalltype; uint32_t iomode; struct nfsclrecalllayout *recallp = NULL; struct nfsclsession *tsep; gotseq_ok = 0; nfsrvd_rephead(nd); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); if (taglen < 0) { error = EBADRPC; goto nfsmout; } if (taglen <= NFSV4_SMALLSTR) tagstr = tag; else tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, tagstr, taglen); if (error) { if (taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); taglen = -1; goto nfsmout; } (void) nfsm_strtom(nd, tag, taglen); if (taglen > NFSV4_SMALLSTR) { free(tagstr, M_TEMP); } NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) numops = 0; else numops = fxdr_unsigned(int, *tl); /* * Loop around doing the sub ops. */ for (i = 0; i < numops; i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp++ = *tl; op = fxdr_unsigned(int, *tl); if (op < NFSV4OP_CBGETATTR || (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || (op > NFSV4OP_CBNOTIFYDEVID && minorvers == NFSV41_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd, minorvers); retops++; break; } nd->nd_procnum = op; if (op < NFSV41_CBNOPS) nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: NFSCL_DEBUG(4, "cbgetattr\n"); mp = NULL; vp = NULL; error = nfsm_getfh(nd, &nfhp); if (!error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error == 0 && i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; if (!error) { mp = nfscl_getmnt(minorvers, sessionid, cbident, &clp); if (mp == NULL) error = NFSERR_SERVERFAULT; } if (!error) { error = nfscl_ngetreopen(mp, nfhp->nfh_fh, nfhp->nfh_len, p, &np); if (!error) vp = NFSTOV(np); } if (!error) { NFSZERO_ATTRBIT(&rattrbits); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { if (vp != NULL) va.va_size = np->n_size; else va.va_size = dp->nfsdl_size; NFSSETBIT_ATTRBIT(&rattrbits, NFSATTRBIT_SIZE); } if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE)) { va.va_filerev = dp->nfsdl_change; if (vp == NULL || (np->n_flag & NDELEGMOD)) va.va_filerev++; NFSSETBIT_ATTRBIT(&rattrbits, NFSATTRBIT_CHANGE); } } else error = NFSERR_SERVERFAULT; NFSUNLOCKCLSTATE(); } if (vp != NULL) vrele(vp); if (mp != NULL) vfs_unbusy(mp); if (nfhp != NULL) free(nfhp, M_NFSFH); if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0, - (uint64_t)0); + (uint64_t)0, NULL); break; case NFSV4OP_CBRECALL: NFSCL_DEBUG(4, "cbrecall\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stateid.seqid = *tl++; NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); trunc = fxdr_unsigned(int, *tl); error = nfsm_getfh(nd, &nfhp); if (error == 0 && i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; if (!error) { NFSLOCKCLSTATE(); if (minorvers == NFSV4_MINORVERSION) clp = nfscl_getclnt(cbident); else clp = nfscl_getclntsess(sessionid); if (clp != NULL) { dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) { dp->nfsdl_flags |= NFSCLDL_RECALL; wakeup((caddr_t)clp); } } else { error = NFSERR_SERVERFAULT; } NFSUNLOCKCLSTATE(); } if (nfhp != NULL) free(nfhp, M_NFSFH); break; case NFSV4OP_CBLAYOUTRECALL: NFSCL_DEBUG(4, "cblayrec\n"); nfhp = NULL; NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); laytype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(uint32_t, *tl++); if (newnfs_true == *tl++) changed = 1; else changed = 0; recalltype = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n", laytype, iomode, changed, recalltype); recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK); if (laytype != NFSLAYOUT_NFSV4_1_FILES && laytype != NFSLAYOUT_FLEXFILE) error = NFSERR_NOMATCHLAYOUT; else if (recalltype == NFSLAYOUTRETURN_FILE) { error = nfsm_getfh(nd, &nfhp); NFSCL_DEBUG(4, "retfile getfh=%d\n", error); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID); off = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); if (minorvers == NFSV4_MINORVERSION) error = NFSERR_NOTSUPP; else if (i == 0) error = NFSERR_OPNOTINSESS; NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n", (uintmax_t)off, (uintmax_t)len, stateid.seqid, error); if (error == 0) { NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); NFSCL_DEBUG(4, "cbly clp=%p\n", clp); if (clp != NULL) { lyp = nfscl_findlayout(clp, nfhp->nfh_fh, nfhp->nfh_len); NFSCL_DEBUG(4, "cblyp=%p\n", lyp); if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_FILES | NFSLY_FLEXFILE)) != 0 && !NFSBCMP(stateid.other, lyp->nfsly_stateid.other, NFSX_STATEIDOTHER)) { error = nfscl_layoutrecall( recalltype, lyp, iomode, off, len, stateid.seqid, recallp); recallp = NULL; wakeup(clp); NFSCL_DEBUG(4, "aft layrcal=%d\n", error); } else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } free(nfhp, M_NFSFH); } else if (recalltype == NFSLAYOUTRETURN_FSID) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER); filesid[0] = fxdr_hyper(tl); tl += 2; filesid[1] = fxdr_hyper(tl); tl += 2; gotone = 0; NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { TAILQ_FOREACH(lyp, &clp->nfsc_layout, nfsly_list) { if (lyp->nfsly_filesid[0] == filesid[0] && lyp->nfsly_filesid[1] == filesid[1]) { error = nfscl_layoutrecall( recalltype, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); recallp = NULL; gotone = 1; } } if (gotone != 0) wakeup(clp); else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } else if (recalltype == NFSLAYOUTRETURN_ALL) { gotone = 0; NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { TAILQ_FOREACH(lyp, &clp->nfsc_layout, nfsly_list) { error = nfscl_layoutrecall( recalltype, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); recallp = NULL; gotone = 1; } if (gotone != 0) wakeup(clp); else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } else error = NFSERR_NOMATCHLAYOUT; if (recallp != NULL) { free(recallp, M_NFSLAYRECALL); recallp = NULL; } break; case NFSV4OP_CBSEQUENCE: NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(tl, sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; seqid = fxdr_unsigned(uint32_t, *tl++); slotid = fxdr_unsigned(uint32_t, *tl++); highslot = fxdr_unsigned(uint32_t, *tl++); cachethis = *tl++; /* Throw away the referring call stuff. */ clist = fxdr_unsigned(int, *tl); for (j = 0; j < clist; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + NFSX_UNSIGNED); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; rcalls = fxdr_unsigned(int, *tl); for (k = 0; k < rcalls; k++) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); } } NFSLOCKCLSTATE(); if (i == 0) { clp = nfscl_getclntsess(sessionid); if (clp == NULL) error = NFSERR_SERVERFAULT; } else error = NFSERR_SEQUENCEPOS; if (error == 0) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); error = nfsv4_seqsession(seqid, slotid, highslot, tsep->nfsess_cbslots, &rep, tsep->nfsess_backslots); } NFSUNLOCKCLSTATE(); if (error == 0 || error == NFSERR_REPLYFROMCACHE) { gotseq_ok = 1; if (rep != NULL) { /* * Handle a reply for a retried * callback. The reply will be * re-inserted in the session cache * by the nfsv4_seqsess_cacherep() call * after out: */ KASSERT(error == NFSERR_REPLYFROMCACHE, ("cbsequence: non-NULL rep")); NFSCL_DEBUG(4, "Got cbretry\n"); m_freem(nd->nd_mreq); nd->nd_mreq = rep; rep = NULL; goto out; } NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(seqid); *tl++ = txdr_unsigned(slotid); *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1); *tl = txdr_unsigned(NFSV4_CBSLOTS - 1); } break; default: if (i == 0 && minorvers == NFSV41_MINORVERSION) error = NFSERR_OPNOTINSESS; else { NFSCL_DEBUG(1, "unsupp callback %d\n", op); error = NFSERR_NOTSUPP; } break; } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = error; } error = 0; } retops++; if (nd->nd_repstat) { *repp = nfscl_errmap(nd, minorvers); break; } else *repp = 0; /* NFS4_OK */ } nfsmout: if (recallp != NULL) free(recallp, M_NFSLAYRECALL); if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; else printf("nfsv4 comperr1=%d\n", error); } if (taglen == -1) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; } else { *retopsp = txdr_unsigned(retops); } *nd->nd_errp = nfscl_errmap(nd, minorvers); out: if (gotseq_ok != 0) { rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots, NFSERR_OK, &rep); NFSUNLOCKCLSTATE(); } else { NFSUNLOCKCLSTATE(); m_freem(rep); } } } /* * Generate the next cbident value. Basically just increment a static value * and then check that it isn't already in the list, if it has wrapped around. */ static u_int32_t nfscl_nextcbident(void) { struct nfsclclient *clp; int matched; static u_int32_t nextcbident = 0; static int haswrapped = 0; nextcbident++; if (nextcbident == 0) haswrapped = 1; if (haswrapped) { /* * Search the clientid list for one already using this cbident. */ do { matched = 0; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { if (clp->nfsc_cbident == nextcbident) { matched = 1; break; } } NFSUNLOCKCLSTATE(); if (matched == 1) nextcbident++; } while (matched); } return (nextcbident); } /* * Get the mount point related to a given cbident or session and busy it. */ static mount_t nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident, struct nfsclclient **clpp) { struct nfsclclient *clp; mount_t mp; int error; struct nfsclsession *tsep; *clpp = NULL; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); if (minorvers == NFSV4_MINORVERSION) { if (clp->nfsc_cbident == cbident) break; } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } if (clp == NULL) { NFSUNLOCKCLSTATE(); return (NULL); } mp = clp->nfsc_nmp->nm_mountp; vfs_ref(mp); NFSUNLOCKCLSTATE(); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (NULL); *clpp = clp; return (mp); } /* * Get the clientid pointer related to a given cbident. */ static struct nfsclclient * nfscl_getclnt(u_int32_t cbident) { struct nfsclclient *clp; LIST_FOREACH(clp, &nfsclhead, nfsc_list) if (clp->nfsc_cbident == cbident) break; return (clp); } /* * Get the clientid pointer related to a given sessionid. */ static struct nfsclclient * nfscl_getclntsess(uint8_t *sessionid) { struct nfsclclient *clp; struct nfsclsession *tsep; LIST_FOREACH(clp, &nfsclhead, nfsc_list) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); if (!NFSBCMP(tsep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } return (clp); } /* * Search for a lock conflict locally on the client. A conflict occurs if * - not same owner and overlapping byte range and at least one of them is * a write lock or this is an unlock. */ static int nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen, struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp, struct nfscllock **lopp) { struct nfsclowner *owp; struct nfsclopen *op; int ret; if (dp != NULL) { ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp); if (ret) return (ret); } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) { ret = nfscl_checkconflict(&op->nfso_lock, nlop, own, lopp); if (ret) return (ret); } } } return (0); } static int nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop, u_int8_t *own, struct nfscllock **lopp) { struct nfscllockowner *lp; struct nfscllock *lop; LIST_FOREACH(lp, lhp, nfsl_list) { if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= nlop->nfslo_end) break; if (lop->nfslo_end <= nlop->nfslo_first) continue; if (lop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_UNLCK) { if (lopp != NULL) *lopp = lop; return (NFSERR_DENIED); } } } } return (0); } /* * Check for a local conflicting lock. */ APPLESTATIC int nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags) { struct nfscllock *lop, nlck; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error; nlck.nfslo_type = fl->l_type; nlck.nfslo_first = off; if (len == NFS64BITSSET) { nlck.nfslo_end = NFS64BITSSET; } else { nlck.nfslo_end = off + len; if (nlck.nfslo_end <= nlck.nfslo_first) return (NFSERR_INVAL); } np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, &nlck, own, dp, &lop); if (error != 0) { fl->l_whence = SEEK_SET; fl->l_start = lop->nfslo_first; if (lop->nfslo_end == NFS64BITSSET) fl->l_len = 0; else fl->l_len = lop->nfslo_end - lop->nfslo_first; fl->l_pid = (pid_t)0; fl->l_type = lop->nfslo_type; error = -1; /* no RPC required */ } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) || fl->l_type == F_RDLCK)) { /* * The delegation ensures that there isn't a conflicting * lock on the server, so return -1 to indicate an RPC * isn't required. */ fl->l_type = F_UNLCK; error = -1; } NFSUNLOCKCLSTATE(); return (error); } /* * Handle Recall of a delegation. * The clp must be exclusive locked when this is called. */ static int nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p, int called_from_renewthread) { struct nfsclowner *owp, *lowp, *nowp; struct nfsclopen *op, *lop; struct nfscllockowner *lp; struct nfscllock *lckp; struct nfsnode *np; int error = 0, ret, gotvp = 0; if (vp == NULL) { /* * First, get a vnode for the file. This is needed to do RPCs. */ ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh, dp->nfsdl_fhlen, p, &np); if (ret) { /* * File isn't open, so nothing to move over to the * server. */ return (0); } vp = NFSTOV(np); gotvp = 1; } else { np = VTONFS(vp); } dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET; /* * Ok, if it's a write delegation, flush data to the server, so * that close/open consistency is retained. */ ret = 0; NFSLOCKNODE(np); if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) { np->n_flag |= NDELEGRECALL; NFSUNLOCKNODE(np); ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread); NFSLOCKNODE(np); np->n_flag &= ~NDELEGRECALL; } NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); if (ret == EIO && called_from_renewthread != 0) { /* * If the flush failed with EIO for the renew thread, * return now, so that the dirty buffer will be flushed * later. */ if (gotvp != 0) vrele(vp); return (ret); } /* * Now, for each openowner with opens issued locally, move them * over to state against the server. */ LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) { lop = LIST_FIRST(&lowp->nfsow_open); if (lop != NULL) { if (LIST_NEXT(lop, nfso_list) != NULL) panic("nfsdlg mult opens"); /* * Look for the same openowner against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(lowp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } break; } } /* * If no openowner found, create one and get an open * for it. */ if (owp == NULL) { nowp = malloc( sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); nfscl_newopen(clp, NULL, &owp, &nowp, &op, NULL, lowp->nfsow_owner, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret) { nfscl_freeopenowner(owp, 0); if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } } } } } /* * Now, get byte range locks for any locks done locally. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p); if (ret == NFSERR_STALESTATEID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_STALECLIENTID || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret && !error) error = ret; } } if (gotvp) vrele(vp); return (error); } /* * Move a locally issued open over to an owner on the state list. * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and * returns with it unlocked. */ static int nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op, *nop; struct nfscldeleg *ndp; struct nfsnode *np; int error = 0, newone; /* * First, look for an appropriate open, If found, just increment the * opencnt in it. */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode && op->nfso_fhlen == lop->nfso_fhlen && !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) { op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); return (0); } } /* No appropriate open, so we have to do one against the server. */ np = VTONFS(vp); nop = malloc(sizeof (struct nfsclopen) + lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK); newone = 0; nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner, lop->nfso_fh, lop->nfso_fhlen, cred, &newone); ndp = dp; error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p); if (error) { if (newone) nfscl_freeopen(op, 0); } else { op->nfso_mode |= lop->nfso_mode; op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); } if (nop != NULL) free(nop, M_NFSCLOPEN); if (ndp != NULL) { /* * What should I do with the returned delegation, since the * delegation is being recalled? For now, just printf and * through it away. */ printf("Moveopen returned deleg\n"); free(ndp, M_NFSCLDELEG); } return (error); } /* * Recall all delegations on this client. */ static void nfscl_totalrecall(struct nfsclclient *clp) { struct nfscldeleg *dp; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) dp->nfsdl_flags |= NFSCLDL_RECALL; } } /* * Relock byte ranges. Called for delegation recall and state expiry. */ static int nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *nlp; struct nfsfh *nfhp; u_int64_t off, len; int error, newone, donelocally; off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p, clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, &donelocally); if (error || donelocally) return (error); nfhp = VTONFS(vp)->n_fhp; error = nfscl_trylock(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, nlp, newone, 0, off, len, lop->nfslo_type, cred, p); if (error) nfscl_freelockowner(nlp, 0); return (error); } /* * Called to re-open a file. Basically get a vnode for the file handle * and then call nfsrpc_openrpc() to do the rest. */ static int nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np; vnode_t vp; int error; error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np); if (error) return (error); vp = NFSTOV(np); if (np->n_v4 != NULL) { error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, fhp, fhlen, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0, cred, p); } else { error = EINVAL; } vrele(vp); return (error); } /* * Try an open against the server. Just call nfsrpc_openrpc(), retrying while * NFSERR_DELAY. Also, try system credentials, if the passed in credentials * fail. */ static int nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **ndpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p) { int error; do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 0, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 1, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a byte range lock. Just loop on nfsrpc_lock() while it returns * NFSERR_DELAY. Also, retry with system credentials, if the provided * cred don't work. */ static int nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, struct nfscllockowner *nlp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 0); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; if (error == EAUTH || error == EACCES) { /* Try again using root credentials */ newnfs_setroot(cred); do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 1); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; } return (error); } /* * Try a delegreturn against the server. Just call nfsrpc_delegreturn(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ static int nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { int error; do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a close against the server. Just call nfsrpc_closerpc(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ APPLESTATIC int nfscl_tryclose(struct nfsclopen *op, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); } return (error); } /* * Decide if a delegation on a file permits close without flushing writes * to the server. This might be a big performance win in some environments. * (Not useful until the client does caching on local stable storage.) */ APPLESTATIC int nfscl_mustflush(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == NFSCLDL_WRITE && (dp->nfsdl_sizelimit >= np->n_size || !NFSHASSTRICT3530(nmp))) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * See if a (write) delegation exists for this file. */ APPLESTATIC int nfscl_nodeleg(vnode_t vp, int writedeleg) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 && (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) == NFSCLDL_WRITE)) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * Look for an associated delegation that should be DelegReturned. */ APPLESTATIC int nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(vp)); np = VTONFS(vp); NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on vp that has state, lock the client and * do a recall * - return delegation with no state */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *stp = dp->nfsdl_stateid; retcnt = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Look for associated delegation(s) that should be DelegReturned. */ APPLESTATIC int nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp, nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(fvp)); *gotfdp = 0; *gottdp = 0; NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on fvp that has state, lock the client and * do a recall * - return delegation(s) with no state. */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } np = VTONFS(fvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gotfdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *fstp = dp->nfsdl_stateid; retcnt++; *gotfdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } if (tvp != NULL) { np = VTONFS(tvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gottdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { NFSUNLOCKCLSTATE(); return (retcnt); } } LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { NFSUNLOCKCLSTATE(); return (retcnt); } } *tstp = dp->nfsdl_stateid; retcnt++; *gottdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } } NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Get a reference on the clientid associated with the mount point. * Return 1 if success, 0 otherwise. */ APPLESTATIC int nfscl_getref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (0); } nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL); NFSUNLOCKCLSTATE(); return (1); } /* * Release a reference on a clientid acquired with the above call. */ APPLESTATIC void nfscl_relref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Save the size attribute in the delegation, since the nfsnode * is going away. */ APPLESTATIC void nfscl_reclaimnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) dp->nfsdl_size = np->n_size; NFSUNLOCKCLSTATE(); } /* * Get the saved size attribute in the delegation, since it is a * newly allocated nfsnode. */ APPLESTATIC void nfscl_newnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) np->n_size = dp->nfsdl_size; NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file, set the modtime * to the local clock time. */ APPLESTATIC void nfscl_delegmodtime(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) { nanotime(&dp->nfsdl_modtime); dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file with a modtime set, * put that modtime in mtime. */ APPLESTATIC void nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) == (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) *mtime = dp->nfsdl_modtime; NFSUNLOCKCLSTATE(); } static int nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers) { short *defaulterrp, *errp; if (!nd->nd_repstat) return (0); if (nd->nd_procnum == NFSPROC_NOOP) return (txdr_unsigned(nd->nd_repstat & 0xffff)); if (nd->nd_repstat == EBADRPC) return (txdr_unsigned(NFSERR_BADXDR)); if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 && minorvers > NFSV4_MINORVERSION) { /* NFSv4.n error. */ return (txdr_unsigned(nd->nd_repstat)); } if (nd->nd_procnum < NFSV4OP_CBNOPS) errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; else return (txdr_unsigned(nd->nd_repstat)); while (*++errp) if (*errp == (short)nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } /* * Called to find/add a layout to a client. * This function returns the layout with a refcnt (shared lock) upon * success (returns 0) or with no lock/refcnt on the layout when an * error is returned. * If a layout is passed in via lypp, it is locked (exclusively locked). */ APPLESTATIC int nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, nfsv4stateid_t *stateidp, int layouttype, int retonclose, struct nfsclflayouthead *fhlp, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscllayout *lyp, *tlyp; struct nfsclflayout *flp; struct nfsnode *np = VTONFS(vp); mount_t mp; int layout_passed_in; mp = nmp->nm_mountp; layout_passed_in = 1; tlyp = NULL; lyp = *lypp; if (lyp == NULL) { layout_passed_in = 0; tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT, M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { if (layout_passed_in != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (EPERM); } if (lyp == NULL) { /* * Although no lyp was passed in, another thread might have * allocated one. If one is found, just increment it's ref * count and return it. */ lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp == NULL) { lyp = tlyp; tlyp = NULL; lyp->nfsly_stateid.seqid = stateidp->seqid; lyp->nfsly_stateid.other[0] = stateidp->other[0]; lyp->nfsly_stateid.other[1] = stateidp->other[1]; lyp->nfsly_stateid.other[2] = stateidp->other[2]; lyp->nfsly_lastbyte = 0; LIST_INIT(&lyp->nfsly_flayread); LIST_INIT(&lyp->nfsly_flayrw); LIST_INIT(&lyp->nfsly_recall); lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0]; lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1]; lyp->nfsly_clp = clp; if (layouttype == NFSLAYOUT_FLEXFILE) lyp->nfsly_flags = NFSLY_FLEXFILE; else lyp->nfsly_flags = NFSLY_FILES; if (retonclose != 0) lyp->nfsly_flags |= NFSLY_RETONCLOSE; lyp->nfsly_fhlen = fhlen; NFSBCOPY(fhp, lyp->nfsly_fh, fhlen); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp, nfsly_hash); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; nfscl_layoutcnt++; } else { if (retonclose != 0) lyp->nfsly_flags |= NFSLY_RETONCLOSE; TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; } nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (EPERM); } *lypp = lyp; } else lyp->nfsly_stateid.seqid = stateidp->seqid; /* Merge the new list of File Layouts into the list. */ flp = LIST_FIRST(fhlp); if (flp != NULL) { if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ) nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp); else nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp); } if (layout_passed_in != 0) nfsv4_unlock(&lyp->nfsly_lock, 1); NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (0); } /* * Search for a layout by MDS file handle. * If one is found, it is returned with a refcnt (shared lock) iff * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is * returned NULL. */ struct nfscllayout * nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen, uint64_t off, struct nfsclflayout **retflpp, int *recalledp) { struct nfscllayout *lyp; mount_t mp; int error, igotlock; mp = clp->nfsc_nmp->nm_mountp; *recalledp = 0; *retflpp = NULL; NFSLOCKCLSTATE(); lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp != NULL) { if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) { TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; error = nfscl_findlayoutforio(lyp, off, NFSV4OPEN_ACCESSREAD, retflpp); if (error == 0) nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); else { do { igotlock = nfsv4_lock(&lyp->nfsly_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); } while (igotlock == 0 && !NFSCL_FORCEDISM(mp)); *retflpp = NULL; } if (NFSCL_FORCEDISM(mp)) { lyp = NULL; *recalledp = 1; } } else { lyp = NULL; *recalledp = 1; } } NFSUNLOCKCLSTATE(); return (lyp); } /* * Search for a layout by MDS file handle. If one is found, mark in to be * recalled, if it already marked "return on close". */ static void nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp, int fhlen, struct nfsclrecalllayout **recallpp) { struct nfscllayout *lyp; uint32_t iomode; if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (VTONFS(vp)->n_flag & NNOLAYOUT) != 0) return; lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE | NFSLY_RECALL)) == NFSLY_RETONCLOSE) { iomode = 0; if (!LIST_EMPTY(&lyp->nfsly_flayread)) iomode |= NFSLAYOUTIOMODE_READ; if (!LIST_EMPTY(&lyp->nfsly_flayrw)) iomode |= NFSLAYOUTIOMODE_RW; (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, *recallpp); NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode); *recallpp = NULL; } } /* * Dereference a layout. */ void nfscl_rellayout(struct nfscllayout *lyp, int exclocked) { NFSLOCKCLSTATE(); if (exclocked != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); else nfsv4_relref(&lyp->nfsly_lock); NFSUNLOCKCLSTATE(); } /* * Search for a devinfo by deviceid. If one is found, return it after * acquiring a reference count on it. */ struct nfscldevinfo * nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid, struct nfscldevinfo *dip) { NFSLOCKCLSTATE(); if (dip == NULL) dip = nfscl_finddevinfo(clp, deviceid); if (dip != NULL) dip->nfsdi_refcnt++; NFSUNLOCKCLSTATE(); return (dip); } /* * Dereference a devinfo structure. */ static void nfscl_reldevinfo_locked(struct nfscldevinfo *dip) { dip->nfsdi_refcnt--; if (dip->nfsdi_refcnt == 0) wakeup(&dip->nfsdi_refcnt); } /* * Dereference a devinfo structure. */ void nfscl_reldevinfo(struct nfscldevinfo *dip) { NFSLOCKCLSTATE(); nfscl_reldevinfo_locked(dip); NFSUNLOCKCLSTATE(); } /* * Find a layout for this file handle. Return NULL upon failure. */ static struct nfscllayout * nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscllayout *lyp; LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash) if (lyp->nfsly_fhlen == fhlen && !NFSBCMP(lyp->nfsly_fh, fhp, fhlen)) break; return (lyp); } /* * Find a devinfo for this deviceid. Return NULL upon failure. */ static struct nfscldevinfo * nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid) { struct nfscldevinfo *dip; LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list) if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID) == 0) break; return (dip); } /* * Merge the new file layout list into the main one, maintaining it in * increasing offset order. */ static void nfscl_mergeflayouts(struct nfsclflayouthead *fhlp, struct nfsclflayouthead *newfhlp) { struct nfsclflayout *flp, *nflp, *prevflp, *tflp; flp = LIST_FIRST(fhlp); prevflp = NULL; LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) { while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) { prevflp = flp; flp = LIST_NEXT(flp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list); prevflp = nflp; } } /* * Add this nfscldevinfo to the client, if it doesn't already exist. * This function consumes the structure pointed at by dip, if not NULL. */ APPLESTATIC int nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, struct nfsclflayout *flp) { struct nfsclclient *clp; struct nfscldevinfo *tdip; uint8_t *dev; NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { NFSUNLOCKCLSTATE(); if (dip != NULL) free(dip, M_NFSDEVINFO); return (ENODEV); } if ((flp->nfsfl_flags & NFSFL_FILE) != 0) dev = flp->nfsfl_dev; else dev = flp->nfsfl_ffm[0].dev; tdip = nfscl_finddevinfo(clp, dev); if (tdip != NULL) { tdip->nfsdi_layoutrefs++; flp->nfsfl_devp = tdip; nfscl_reldevinfo_locked(tdip); NFSUNLOCKCLSTATE(); if (dip != NULL) free(dip, M_NFSDEVINFO); return (0); } if (dip != NULL) { LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list); dip->nfsdi_layoutrefs = 1; flp->nfsfl_devp = dip; } NFSUNLOCKCLSTATE(); if (dip == NULL) return (ENODEV); return (0); } /* * Free up a layout structure and associated file layout structure(s). */ APPLESTATIC void nfscl_freelayout(struct nfscllayout *layp) { struct nfsclflayout *flp, *nflp; struct nfsclrecalllayout *rp, *nrp; LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) { LIST_REMOVE(flp, nfsfl_list); nfscl_freeflayout(flp); } LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) { LIST_REMOVE(flp, nfsfl_list); nfscl_freeflayout(flp); } LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) { LIST_REMOVE(rp, nfsrecly_list); free(rp, M_NFSLAYRECALL); } nfscl_layoutcnt--; free(layp, M_NFSLAYOUT); } /* * Free up a file layout structure. */ APPLESTATIC void nfscl_freeflayout(struct nfsclflayout *flp) { int i, j; if ((flp->nfsfl_flags & NFSFL_FILE) != 0) for (i = 0; i < flp->nfsfl_fhcnt; i++) free(flp->nfsfl_fh[i], M_NFSFH); if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0) for (i = 0; i < flp->nfsfl_mirrorcnt; i++) for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++) free(flp->nfsfl_ffm[i].fh[j], M_NFSFH); if (flp->nfsfl_devp != NULL) flp->nfsfl_devp->nfsdi_layoutrefs--; free(flp, M_NFSFLAYOUT); } /* * Free up a file layout devinfo structure. */ APPLESTATIC void nfscl_freedevinfo(struct nfscldevinfo *dip) { free(dip, M_NFSDEVINFO); } /* * Mark any layouts that match as recalled. */ static int nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode, uint64_t off, uint64_t len, uint32_t stateseqid, struct nfsclrecalllayout *recallp) { struct nfsclrecalllayout *rp, *orp; recallp->nfsrecly_recalltype = recalltype; recallp->nfsrecly_iomode = iomode; recallp->nfsrecly_stateseqid = stateseqid; recallp->nfsrecly_off = off; recallp->nfsrecly_len = len; /* * Order the list as file returns first, followed by fsid and any * returns, both in increasing stateseqid order. * Note that the seqids wrap around, so 1 is after 0xffffffff. * (I'm not sure this is correct because I find RFC5661 confusing * on this, but hopefully it will work ok.) */ orp = NULL; LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { orp = rp; if ((recalltype == NFSLAYOUTRETURN_FILE && (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE || nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) || (recalltype != NFSLAYOUTRETURN_FILE && rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE && nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) { LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list); break; } } if (rp == NULL) { if (orp == NULL) LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp, nfsrecly_list); else LIST_INSERT_AFTER(orp, recallp, nfsrecly_list); } lyp->nfsly_flags |= NFSLY_RECALL; return (0); } /* * Compare the two seqids for ordering. The trick is that the seqids can * wrap around from 0xffffffff->0, so check for the cases where one * has wrapped around. * Return 1 if seqid1 comes before seqid2, 0 otherwise. */ static int nfscl_seq(uint32_t seqid1, uint32_t seqid2) { if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff) /* seqid2 has wrapped around. */ return (0); if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff) /* seqid1 has wrapped around. */ return (1); if (seqid1 <= seqid2) return (1); return (0); } /* * Do a layout return for each of the recalls. */ static void nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp, struct ucred *cred, NFSPROC_T *p) { struct nfsclrecalllayout *rp; nfsv4stateid_t stateid; int layouttype; NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER); stateid.seqid = lyp->nfsly_stateid.seqid; if ((lyp->nfsly_flags & NFSLY_FILES) != 0) layouttype = NFSLAYOUT_NFSV4_1_FILES; else layouttype = NFSLAYOUT_FLEXFILE; LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, 0, layouttype, rp->nfsrecly_iomode, rp->nfsrecly_recalltype, rp->nfsrecly_off, rp->nfsrecly_len, &stateid, cred, p, NULL); } } /* * Do the layout commit for a file layout. */ static void nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp, struct ucred *cred, NFSPROC_T *p) { struct nfsclflayout *flp; uint64_t len; int error, layouttype; if ((lyp->nfsly_flags & NFSLY_FILES) != 0) layouttype = NFSLAYOUT_NFSV4_1_FILES; else layouttype = NFSLAYOUT_FLEXFILE; LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) { if (layouttype == NFSLAYOUT_FLEXFILE && (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) { NFSCL_DEBUG(4, "Flex file: no layoutcommit\n"); /* If not supported, don't bother doing it. */ NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; NFSUNLOCKMNT(nmp); break; } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) { len = flp->nfsfl_end - flp->nfsfl_off; error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, 0, flp->nfsfl_off, len, lyp->nfsly_lastbyte, &lyp->nfsly_stateid, layouttype, cred, p, NULL); NFSCL_DEBUG(4, "layoutcommit err=%d\n", error); if (error == NFSERR_NOTSUPP) { /* If not supported, don't bother doing it. */ NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; NFSUNLOCKMNT(nmp); break; } } } } /* * Commit all layouts for a file (vnode). */ int nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscllayout *lyp; struct nfsnode *np = VTONFS(vp); mount_t mp; struct nfsmount *nmp; mp = vnode_mount(vp); nmp = VFSTONFS(mp); if (NFSHASNOLAYOUTCOMMIT(nmp)) return (0); NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EPERM); } lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (lyp == NULL) { NFSUNLOCKCLSTATE(); return (EPERM); } nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); return (EPERM); } tryagain: if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { lyp->nfsly_flags &= ~NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); NFSCL_DEBUG(4, "do layoutcommit2\n"); nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p); NFSLOCKCLSTATE(); goto tryagain; } nfsv4_relref(&lyp->nfsly_lock); NFSUNLOCKCLSTATE(); return (0); } Index: head/sys/fs/nfsclient/nfs_clvfsops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvfsops.c (revision 335011) +++ head/sys/fs/nfsclient/nfs_clvfsops.c (revision 335012) @@ -1,2045 +1,2053 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern struct mtx ncl_iod_mutex; NFSCLSTATEMUTEX; +extern struct mtx nfsrv_dslock_mtx; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct"); SYSCTL_DECL(_vfs_nfs); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); #ifdef NFS_DEBUG int nfs_debug; SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "Toggle debug flag"); #endif static int nfs_mountroot(struct mount *); static void nfs_sec_name(char *, int *); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; static vfs_purge_t nfs_purge; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = ncl_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = ncl_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, .vfs_purge = nfs_purge, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); MODULE_DEPEND(nfs, nfscommon, 1, 1, 1); MODULE_DEPEND(nfs, krpc, 1, 1, 1); MODULE_DEPEND(nfs, nfssvc, 1, 1, 1); MODULE_DEPEND(nfs, nfslock, 1, 1, 1); /* * This structure is now defined in sys/nfs/nfs_diskless.c so that it * can be shared by both NFS clients. It is declared here so that it * will be defined for kernels built without NFS_ROOT, although it * isn't used in that case. */ #if !defined(NFS_ROOT) struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; #endif SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, "Has the diskless struct been filled correctly"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr), "%Ssockaddr_in", "Diskless root nfs address"); void newnfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int newnfs_iosize(struct nfsmount *nmp) { int iosize, maxio; /* First, set the upper limit for iosize */ if (nmp->nm_flag & NFSMNT_NFSV4) { maxio = NFS_MAXBSIZE; } else if (nmp->nm_flag & NFSMNT_NFSV3) { if (nmp->nm_sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXBSIZE; } else { maxio = NFS_V2MAXDATA; } if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) nmp->nm_rsize = maxio; if (nmp->nm_rsize > NFS_MAXBSIZE) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) nmp->nm_wsize = maxio; if (nmp->nm_wsize > NFS_MAXBSIZE) nmp->nm_wsize = NFS_MAXBSIZE; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. It must also be at least NFS_DIRBLKSIZ, since * that is the buffer size used for directories. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); iosize = imax(iosize, NFS_DIRBLKSIZ); nmp->nm_mountp->mnt_stat.f_iosize = iosize; return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->readahead = oargs->readahead; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_MYFH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp) { struct vnode *vp; struct thread *td; struct nfsmount *nmp = VFSTONFS(mp); struct nfsvattr nfsva; struct nfsfsinfo fs; struct nfsstatfs sb; int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; td = curthread; error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (!error) gotfsinfo = 1; } else mtx_unlock(&nmp->nm_mtx); if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (error != 0) NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; } } (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) nfscl_loadfsinfo(nmp, &fs); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } vput(vp); vfs_unbusy(mp); return (error); } /* * nfs version 3 fsinfo rpc call */ int ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsfsinfo fs; struct nfsvattr nfsva; int error, attrflag; error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); if (!error) { if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); nfscl_loadfsinfo(nmp, &fs); mtx_unlock(&nmp->nm_mtx); } return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ static int nfs_mountroot(struct mount *mp) { struct thread *td = curthread; struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = kern_getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ mtx_lock(&prison0.pr_mtx); strlcpy(prison0.pr_hostname, nd->my_hostnam, sizeof(prison0.pr_hostname)); mtx_unlock(&prison0.pr_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int dirlen, error; char *dirpath; /* * Find the directory path in "path", which also has the server's * name/ip address in it. */ dirpath = strchr(path, ':'); if (dirpath != NULL) dirlen = strlen(++dirpath); else dirlen = 0; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_sec_name(char *sec, int *flagsp) { if (!strcmp(sec, "krb5")) *flagsp |= NFSMNT_KERB; else if (!strcmp(sec, "krb5i")) *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY); else if (!strcmp(sec, "krb5p")) *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *hostname, struct ucred *cred, struct thread *td) { int adjsock; char *p; /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_timeo = NFS_MAXTIMEO; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_retry = INT_MAX; else nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if NFSv2, it crashes some servers */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { argp->flags &= ~NFSMNT_RDIRPLUS; nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } /* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */ if (nmp->nm_minorvers == 0) { argp->flags &= ~NFSMNT_ONEOPENOWN; nmp->nm_flag &= ~NFSMNT_ONEOPENOWN; } /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_wsize > NFS_FABLKSIZE) nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1); else nmp->nm_wsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_rsize > NFS_FABLKSIZE) nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1); else nmp->nm_rsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); if (nmp->nm_client != NULL && adjsock) { int haslock = 0, error = 0; if (nmp->nm_sotype == SOCK_STREAM) { error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); if (!error) haslock = 1; } if (!error) { newnfs_disconnect(&nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "nfscon"); } } } else { nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; } if (hostname != NULL) { strlcpy(nmp->nm_hostname, hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p != NULL) *p = '\0'; } } static const char *nfs_opts[] = { "from", "nfs_args", "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs", "wcommitsize", "oneopenown", NULL }; /* * Parse the "from" mountarg, passed by the generic mount(8) program * or the mountroot code. This is used when rerooting into NFS. * * Note that the "hostname" is actually a "hostname:/share/path" string. */ static int nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep, struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp) { char *nam, *delimp, *hostp, *spec; int error, have_bracket = 0, offset, rv, speclen; struct sockaddr_in *sin; size_t len; error = vfs_getopt(opts, "from", (void **)&spec, &speclen); if (error != 0) return (error); nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); /* * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs(). */ if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && *(delimp + 1) == ':') { hostp = spec + 1; spec = delimp + 2; have_bracket = 1; } else if ((delimp = strrchr(spec, ':')) != NULL) { hostp = spec; spec = delimp + 1; } else if ((delimp = strrchr(spec, '@')) != NULL) { printf("%s: path@server syntax is deprecated, " "use server:path\n", __func__); hostp = delimp + 1; } else { printf("%s: no : nfs-name\n", __func__); free(nam, M_TEMP); return (EINVAL); } *delimp = '\0'; /* * If there has been a trailing slash at mounttime it seems * that some mountd implementations fail to remove the mount * entries from their mountlist while unmounting. */ for (speclen = strlen(spec); speclen > 1 && spec[speclen - 1] == '/'; speclen--) spec[speclen - 1] = '\0'; if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { printf("%s: %s:%s: name too long", __func__, hostp, spec); free(nam, M_TEMP); return (EINVAL); } /* Make both '@' and ':' notations equal */ if (*hostp != '\0') { len = strlen(hostp); offset = 0; if (have_bracket) nam[offset++] = '['; memmove(nam + offset, hostp, len); if (have_bracket) nam[len + offset++] = ']'; nam[len + offset++] = ':'; memmove(nam + len + offset, spec, speclen); nam[len + speclen + offset] = '\0'; } else nam[0] = '\0'; /* * XXX: IPv6 */ sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK); rv = inet_pton(AF_INET, hostp, &sin->sin_addr); if (rv != 1) { printf("%s: cannot parse '%s', inet_pton() returned %d\n", __func__, hostp, rv); free(nam, M_TEMP); free(sin, M_SONAME); return (EINVAL); } sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; /* * XXX: hardcoded port number. */ sin->sin_port = htons(2049); *hostnamep = strdup(nam, M_NEWNFSMNT); *sinp = sin; strlcpy(dirpath, spec, dirpathsize); *dirlenp = strlen(dirpath); free(nam, M_TEMP); return (0); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .hostname = NULL, .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error = 0, ret, len; struct sockaddr *nam = NULL; struct vnode *vp; struct thread *td; char *hst; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; char *cp, *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; int minvers = 0; int dirlen, has_nfs_args_opt, has_nfs_from_opt, krbnamelen, srvkrbnamelen; size_t hstlen; has_nfs_args_opt = 0; has_nfs_from_opt = 0; hst = malloc(MNAMELEN, M_TEMP, M_WAITOK); if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } td = curthread; if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS && nfs_diskless_valid != 0) { error = nfs_mountroot(mp); goto out; } nfscl_init(); /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof(args)); if (error != 0) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } /* Handle the new style options. */ if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) { args.acdirmin = args.acdirmax = args.acregmin = args.acregmax = 0; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) { args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0) args.flags |= NFSMNT_NONCONTIGWR; if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0) args.flags |= NFSMNT_ONEOPENOWN; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal actimeo: %s", opt); error = EINVAL; goto out; } args.acdirmin = args.acdirmax = args.acregmax = args.acregmin; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeo: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); if (ret != 1 || minvers < 0 || minvers > 1 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a transfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "nfsreq". */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(td->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change version, * security, switch lockd strategies, change cookie * translation or switch oneopenown. */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt != 0) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error != 0) goto out; error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen); if (error != 0) goto out; bzero(&hst[hstlen], MNAMELEN - hstlen); args.hostname = hst; /* getsockaddr() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error != 0) goto out; } else if (nfs_mount_parse_from(mp->mnt_optnew, &args.hostname, (struct sockaddr_in **)&nam, dirpath, sizeof(dirpath), &dirlen) == 0) { has_nfs_from_opt = 1; bcopy(args.hostname, hst, MNAMELEN); hst[MNAMELEN - 1] = '\0'; /* * This only works with NFSv4 for now. */ args.fhsize = 0; args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } else { if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } bcopy(args.fh, nfh, args.fhsize); } else { args.fhsize = 0; } (void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, &len); if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } if (len >= MNAMELEN) { vfs_mount_error(mp, "Hostname too long"); error = EINVAL; goto out; } bcopy(args.hostname, hst, len); hst[len] = '\0'; } if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0) strlcpy(srvkrbname, name, sizeof (srvkrbname)); else { snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst); cp = strchr(srvkrbname, ':'); if (cp != NULL) *cp = '\0'; } srvkrbnamelen = strlen(srvkrbname); if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0) strlcpy(krbname, name, sizeof (krbname)); else krbname[0] = '\0'; krbnamelen = strlen(krbname); if (has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0) strlcpy(dirpath, name, sizeof (dirpath)); else dirpath[0] = '\0'; dirlen = strlen(dirpath); } if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } else { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } } args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF | MNTK_USES_BCACHE; if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0) mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; MNT_IUNLOCK(mp); } free(hst, M_TEMP); return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; struct nfsclclient *clp; struct nfsclds *dsp, *tdsp; uint32_t lease; static u_int64_t clval = 0; NFSCL_DEBUG(3, "in mnt\n"); clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); free(nam, M_SONAME); return (0); } else { nmp = malloc(sizeof (struct nfsmount) + krbnamelen + dirlen + srvkrbnamelen + 2, M_NEWNFSMNT, M_WAITOK | M_ZERO); TAILQ_INIT(&nmp->nm_bufq); TAILQ_INIT(&nmp->nm_sess); if (clval == 0) clval = (u_int64_t)nfsboottime.tv_sec; nmp->nm_clval = clval++; nmp->nm_krbnamelen = krbnamelen; nmp->nm_dirpathlen = dirlen; nmp->nm_srvkrbnamelen = srvkrbnamelen; if (td->td_ucred->cr_uid != (uid_t)0) { /* * nm_uid is used to get KerberosV credentials for * the nfsv4 state handling operations if there is * no host based principal set. Use the uid of * this user if not root, since they are doing the * mount. I don't think setting this for root will * work, since root normally does not have user * credentials in a credentials cache. */ nmp->nm_uid = td->td_ucred->cr_uid; } else { /* * Just set to -1, so it won't be used. */ nmp->nm_uid = (uid_t)-1; } /* Copy and null terminate all the names */ if (nmp->nm_krbnamelen > 0) { bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); nmp->nm_name[nmp->nm_krbnamelen] = '\0'; } if (nmp->nm_dirpathlen > 0) { bcopy(dirpath, NFSMNT_DIRPATH(nmp), nmp->nm_dirpathlen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + 1] = '\0'; } if (nmp->nm_srvkrbnamelen > 0) { bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), nmp->nm_srvkrbnamelen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + nmp->nm_srvkrbnamelen + 2] = '\0'; } nmp->nm_sockreq.nr_cred = crhold(cred); mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); mp->mnt_data = nmp; nmp->nm_getinfo = nfs_getnlminfo; nmp->nm_vinvalbuf = ncl_vinvalbuf; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); /* * Since nfs_decode_args() might optionally set them, these * need to be set to defaults before the call, so that the * optional settings aren't overwritten. */ nmp->nm_nametimeo = nametimeo; nmp->nm_negnametimeo = negnametimeo; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_readahead = NFS_DEFRAHEAD; /* This is empirical approximation of sqrt(hibufspace) * 256. */ nmp->nm_wcommitsize = NFS_MAXBSIZE / 256; while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace) nmp->nm_wcommitsize *= 2; nmp->nm_wcommitsize *= 256; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_minorvers = minvers; else nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = OFF_MAX; if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; } nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; if (nmp->nm_fhsize > 0) bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_sockreq.nr_prog = NFS_PROG; if ((argp->flags & NFSMNT_NFSV4)) nmp->nm_sockreq.nr_vers = NFS_VER4; else if ((argp->flags & NFSMNT_NFSV3)) nmp->nm_sockreq.nr_vers = NFS_VER3; else nmp->nm_sockreq.nr_vers = NFS_VER2; if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; /* For NFSv4.1, get the clientid now. */ if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "at getcl\n"); error = nfscl_getcl(mp, cred, td, 0, &clp); NFSCL_DEBUG(3, "aft getcl=%d\n", error); if (error != 0) goto bad; } if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. */ trycnt = 3; do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); goto bad; } } /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == UFS_ROOTINO (2). */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set * non-zero for the root vnode. f_iosize will be set correctly * by nfs_statfs() before any I/O occurs. */ mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ; error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "lease=%d\n", (int)lease); NFSLOCKCLSTATE(); clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; NFSUNLOCKCLSTATE(); /* * Mount will succeed, so the renew thread can be * started now. */ nfscl_start_renewthread(clp); nfscl_clientrelease(clp); } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); /* Mark if the mount point supports NFSv4 ACLs. */ if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 && ret == 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * Lose the lock but keep the ref. */ NFSVOPUNLOCK(*vpp, 0); return (0); } error = EIO; bad: if (clp != NULL) nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); if (nmp->nm_clp != NULL) { NFSLOCKCLSTATE(); LIST_REMOVE(nmp->nm_clp, nfsc_list); NFSUNLOCKCLSTATE(); free(nmp->nm_clp, M_NFSCLCLIENT); } TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } free(nmp, M_NEWNFSMNT); free(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct nfsmount *nmp; int error, flags = 0, i, trycnt = 0; struct nfsclds *dsp, *tdsp; td = curthread; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); + error = 0; /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (mntflags & MNT_FORCE) { + NFSDDSLOCK(); + if (nfsv4_findmirror(nmp) != NULL) + error = ENXIO; + NFSDDSUNLOCK(); + if (error) + goto out; error = newnfs_nmcancelreqs(nmp); if (error) goto out; /* For a forced close, get rid of the renew thread now */ nfscl_umount(nmp, td); } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; /* * We are now committed to the unmount. */ if ((mntflags & MNT_FORCE) == 0) nfscl_umount(nmp, td); else { mtx_lock(&nmp->nm_mtx); nmp->nm_privflag |= NFSMNTP_FORCEDISM; mtx_unlock(&nmp->nm_mtx); } /* Make sure no nfsiods are assigned to this mount. */ mtx_lock(&ncl_iod_mutex); for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (ncl_iodmount[i] == nmp) { ncl_iodwant[i] = NFSIOD_AVAILABLE; ncl_iodmount[i] = NULL; } mtx_unlock(&ncl_iod_mutex); /* * We can now set mnt_data to NULL and wait for * nfssvc(NFSSVC_FORCEDISM) to complete. */ mtx_lock(&mountlist_mtx); mtx_lock(&nmp->nm_mtx); mp->mnt_data = NULL; mtx_unlock(&mountlist_mtx); while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0) msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0); mtx_unlock(&nmp->nm_mtx); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); free(nmp->nm_nam, M_SONAME); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } free(nmp, M_NEWNFSMNT); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct thread *td; int error, allerror = 0; td = curthread; MNT_ILOCK(mp); /* * If a forced dismount is in progress, return from here so that * the umount(2) syscall doesn't get stuck in VFS_SYNC() before * calling VFS_UNMOUNT(). */ if (NFSCL_FORCEDISM(mp)) { MNT_IUNLOCK(mp); return (EBADF); } MNT_IUNLOCK(mp); /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* XXX Racy bv_cnt check. */ if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; NFSVOPUNLOCK(vp, 0); vrele(vp); } return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } /* * Purge any RPCs in progress, so that they will all return errors. * This allows dounmount() to continue as far as VFS_UNMOUNT() for a * forced dismount. */ static void nfs_purge(struct mount *mp) { struct nfsmount *nmp = VFSTONFS(mp); newnfs_nmcancelreqs(nmp); } /* * Extract the information needed by the nlm from the nfs vnode. */ static void nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, struct timeval *timeop) { struct nfsmount *nmp; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); if (fhlenp != NULL) *fhlenp = (size_t)np->n_fhp->nfh_len; if (fhp != NULL) bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len); if (sp != NULL) bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); if (is_v3p != NULL) *is_v3p = NFS_ISV3(vp); if (sizep != NULL) *sizep = np->n_size; if (timeop != NULL) { timeop->tv_sec = nmp->nm_timeo / NFS_HZ; timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); } } /* * This function prints out an option name, based on the conditional * argument. */ static __inline void nfscl_printopt(struct nfsmount *nmp, int testval, char *opt, char **buf, size_t *blen) { int len; if (testval != 0 && *blen > strlen(opt)) { len = snprintf(*buf, *blen, "%s", opt); if (len != strlen(opt)) printf("EEK!!\n"); *buf += len; *blen -= len; } } /* * This function printf out an options integer value. */ static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval, char *opt, char **buf, size_t *blen) { int len; if (*blen > strlen(opt) + 1) { /* Could result in truncated output string. */ len = snprintf(*buf, *blen, "%s=%d", opt, optval); if (len < *blen) { *buf += len; *blen -= len; } } } /* * Load the option flags and values into the buffer. */ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen) { char *buf; size_t blen; buf = buffer; blen = buflen; nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf, &blen); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) { nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 && nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen); } nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0, "nfsv2", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0, ",noncontigwr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == 0, ",lockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == NFSMNT_NOLOCKD, ",nolockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen); } Index: head/sys/fs/nfsserver/nfs_nfsdkrpc.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdkrpc.c (revision 335011) +++ head/sys/fs/nfsserver/nfs_nfsdkrpc.c (revision 335012) @@ -1,571 +1,579 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_kgssapi.h" #include #include #include #include #include #include NFSDLOCKMUTEX; NFSV4ROOTLOCKMUTEX; struct nfsv4lock nfsd_suspend_lock; /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ int newnfs_nfsv3_procid[NFS_V3NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, }; SYSCTL_DECL(_vfs_nfsd); SVCPOOL *nfsrvd_pool; static int nfs_privport = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_RWTUN, &nfs_privport, 0, "Only allow clients using a privileged port for NFSv2 and 3"); static int nfs_minvers = NFS_VER2; SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, CTLFLAG_RWTUN, &nfs_minvers, 0, "The lowest version of NFS handled by the server"); static int nfs_maxvers = NFS_VER4; SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, CTLFLAG_RWTUN, &nfs_maxvers, 0, "The highest version of NFS handled by the server"); static int nfs_proc(struct nfsrv_descript *, u_int32_t, SVCXPRT *xprt, struct nfsrvcache **); extern u_long sb_max_adj; extern int newnfs_numnfsd; extern struct proc *nfsd_master_proc; +extern time_t nfsdev_time; /* * NFS server system calls */ static void nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) { struct nfsrv_descript nd; struct nfsrvcache *rp = NULL; int cacherep, credflavor; memset(&nd, 0, sizeof(nd)); if (rqst->rq_vers == NFS_VER2) { if (rqst->rq_proc > NFSV2PROC_STATFS || newnfs_nfsv3_procid[rqst->rq_proc] == NFSPROC_NOOP) { svcerr_noproc(rqst); svc_freereq(rqst); goto out; } nd.nd_procnum = newnfs_nfsv3_procid[rqst->rq_proc]; nd.nd_flag = ND_NFSV2; } else if (rqst->rq_vers == NFS_VER3) { if (rqst->rq_proc >= NFS_V3NPROCS) { svcerr_noproc(rqst); svc_freereq(rqst); goto out; } nd.nd_procnum = rqst->rq_proc; nd.nd_flag = ND_NFSV3; } else { if (rqst->rq_proc != NFSPROC_NULL && rqst->rq_proc != NFSV4PROC_COMPOUND) { svcerr_noproc(rqst); svc_freereq(rqst); goto out; } nd.nd_procnum = rqst->rq_proc; nd.nd_flag = ND_NFSV4; } /* * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 - * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP * mounts. */ nd.nd_mrep = rqst->rq_args; rqst->rq_args = NULL; newnfs_realign(&nd.nd_mrep, M_WAITOK); nd.nd_md = nd.nd_mrep; nd.nd_dpos = mtod(nd.nd_md, caddr_t); nd.nd_nam = svc_getrpccaller(rqst); nd.nd_nam2 = rqst->rq_addr; nd.nd_mreq = NULL; nd.nd_cred = NULL; if (nfs_privport && (nd.nd_flag & ND_NFSV4) == 0) { /* Check if source port is privileged */ u_short port; struct sockaddr *nam = nd.nd_nam; struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; /* * INET/INET6 - same code: * sin_port and sin6_port are at same offset */ port = ntohs(sin->sin_port); if (port >= IPPORT_RESERVED && nd.nd_procnum != NFSPROC_NULL) { #ifdef INET6 char buf[INET6_ADDRSTRLEN]; #else char buf[INET_ADDRSTRLEN]; #endif #ifdef INET6 #if defined(KLD_MODULE) /* Do not use ip6_sprintf: the nfs module should work without INET6. */ #define ip6_sprintf(buf, a) \ (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \ (a)->s6_addr16[0], (a)->s6_addr16[1], \ (a)->s6_addr16[2], (a)->s6_addr16[3], \ (a)->s6_addr16[4], (a)->s6_addr16[5], \ (a)->s6_addr16[6], (a)->s6_addr16[7]), \ (buf)) #endif #endif printf("NFS request from unprivileged port (%s:%d)\n", #ifdef INET6 sin->sin_family == AF_INET6 ? ip6_sprintf(buf, &satosin6(sin)->sin6_addr) : #if defined(KLD_MODULE) #undef ip6_sprintf #endif #endif inet_ntoa_r(sin->sin_addr, buf), port); svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } } if (nd.nd_procnum != NFSPROC_NULL) { if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) { svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } /* Set the flag based on credflavor */ if (credflavor == RPCSEC_GSS_KRB5) { nd.nd_flag |= ND_GSS; } else if (credflavor == RPCSEC_GSS_KRB5I) { nd.nd_flag |= (ND_GSS | ND_GSSINTEGRITY); } else if (credflavor == RPCSEC_GSS_KRB5P) { nd.nd_flag |= (ND_GSS | ND_GSSPRIVACY); } else if (credflavor != AUTH_SYS) { svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } #ifdef MAC mac_cred_associate_nfsd(nd.nd_cred); #endif /* * Get a refcnt (shared lock) on nfsd_suspend_lock. * NFSSVC_SUSPENDNFSD will take an exclusive lock on * nfsd_suspend_lock to suspend these threads. * The call to nfsv4_lock() that precedes nfsv4_getref() * ensures that the acquisition of the exclusive lock * takes priority over acquisition of the shared lock by * waiting for any exclusive lock request to complete. * This must be done here, before the check of * nfsv4root exports by nfsvno_v4rootexport(). */ NFSLOCKV4ROOTMUTEX(); nfsv4_lock(&nfsd_suspend_lock, 0, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); nfsv4_getref(&nfsd_suspend_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); if ((nd.nd_flag & ND_NFSV4) != 0) { nd.nd_repstat = nfsvno_v4rootexport(&nd); if (nd.nd_repstat != 0) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsd_suspend_lock); NFSUNLOCKV4ROOTMUTEX(); svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } } cacherep = nfs_proc(&nd, rqst->rq_xid, xprt, &rp); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsd_suspend_lock); NFSUNLOCKV4ROOTMUTEX(); } else { NFSMGET(nd.nd_mreq); nd.nd_mreq->m_len = 0; cacherep = RC_REPLY; } if (nd.nd_mrep != NULL) m_freem(nd.nd_mrep); if (nd.nd_cred != NULL) crfree(nd.nd_cred); if (cacherep == RC_DROPIT) { if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); svc_freereq(rqst); goto out; } if (nd.nd_mreq == NULL) { svcerr_decode(rqst); svc_freereq(rqst); goto out; } if (nd.nd_repstat & NFSERR_AUTHERR) { svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) { svcerr_systemerr(rqst); } if (rp != NULL) { nfsrvd_sentcache(rp, (rqst->rq_reply_seq != 0 || SVC_ACK(xprt, NULL)), rqst->rq_reply_seq); } svc_freereq(rqst); out: td_softdep_cleanup(curthread); NFSEXITCODE(0); } /* * Check the cache and, optionally, do the RPC. * Return the appropriate cache response. */ static int nfs_proc(struct nfsrv_descript *nd, u_int32_t xid, SVCXPRT *xprt, struct nfsrvcache **rpp) { struct thread *td = curthread; int cacherep = RC_DOIT, isdgram, taglen = -1; struct mbuf *m; u_char tag[NFSV4_SMALLSTR + 1], *tagstr = NULL; u_int32_t minorvers = 0; uint32_t ack; *rpp = NULL; if (nd->nd_nam2 == NULL) { nd->nd_flag |= ND_STREAMSOCK; isdgram = 0; } else { isdgram = 1; } /* * Two cases: * 1 - For NFSv2 over UDP, if we are near our malloc/mget * limit, just drop the request. There is no * NFSERR_RESOURCE or NFSERR_DELAY for NFSv2 and the * client will timeout/retry over UDP in a little while. * 2 - nd_repstat == 0 && nd_mreq == NULL, which * means a normal nfs rpc, so check the cache */ if ((nd->nd_flag & ND_NFSV2) && nd->nd_nam2 != NULL && nfsrv_mallocmget_limit()) { cacherep = RC_DROPIT; } else { /* * For NFSv3, play it safe and assume that the client is * doing retries on the same TCP connection. */ if ((nd->nd_flag & (ND_NFSV4 | ND_STREAMSOCK)) == ND_STREAMSOCK) nd->nd_flag |= ND_SAMETCPCONN; nd->nd_retxid = xid; nd->nd_tcpconntime = NFSD_MONOSEC; nd->nd_sockref = xprt->xp_sockref; if ((nd->nd_flag & ND_NFSV4) != 0) nfsd_getminorvers(nd, tag, &tagstr, &taglen, &minorvers); if ((nd->nd_flag & ND_NFSV41) != 0) /* NFSv4.1 caches replies in the session slots. */ cacherep = RC_DOIT; else { cacherep = nfsrvd_getcache(nd); ack = 0; SVC_ACK(xprt, &ack); nfsrc_trimcache(xprt->xp_sockref, ack, 0); } } /* * Handle the request. There are three cases. * RC_DOIT - do the RPC * RC_REPLY - return the reply already created * RC_DROPIT - just throw the request away */ if (cacherep == RC_DOIT) { if ((nd->nd_flag & ND_NFSV41) != 0) nd->nd_xprt = xprt; nfsrvd_dorpc(nd, isdgram, tagstr, taglen, minorvers, td); if ((nd->nd_flag & ND_NFSV41) != 0) { if (nd->nd_repstat != NFSERR_REPLYFROMCACHE && (nd->nd_flag & ND_SAVEREPLY) != 0) { /* Cache a copy of the reply. */ m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); } else m = NULL; if ((nd->nd_flag & ND_HASSEQUENCE) != 0) nfsrv_cache_session(nd->nd_sessionid, nd->nd_slotid, nd->nd_repstat, &m); if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) nd->nd_repstat = 0; cacherep = RC_REPLY; } else { if (nd->nd_repstat == NFSERR_DONTREPLY) cacherep = RC_DROPIT; else cacherep = RC_REPLY; *rpp = nfsrvd_updatecache(nd); } } if (tagstr != NULL && taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); NFSEXITCODE2(0, nd); return (cacherep); } static void nfssvc_loss(SVCXPRT *xprt) { uint32_t ack; ack = 0; SVC_ACK(xprt, &ack); nfsrc_trimcache(xprt->xp_sockref, ack, 1); } /* * Adds a socket to the list for servicing by nfsds. */ int nfsrvd_addsock(struct file *fp) { int siz; struct socket *so; int error = 0; SVCXPRT *xprt; static u_int64_t sockref = 0; so = fp->f_data; siz = sb_max_adj; error = soreserve(so, siz, siz); if (error) goto out; /* * Steal the socket from userland so that it doesn't close * unexpectedly. */ if (so->so_type == SOCK_DGRAM) xprt = svc_dg_create(nfsrvd_pool, so, 0, 0); else xprt = svc_vc_create(nfsrvd_pool, so, 0, 0); if (xprt) { fp->f_ops = &badfileops; fp->f_data = NULL; xprt->xp_sockref = ++sockref; if (nfs_minvers == NFS_VER2) svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); if (nfs_minvers <= NFS_VER3 && nfs_maxvers >= NFS_VER3) svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); if (nfs_maxvers >= NFS_VER4) svc_reg(xprt, NFS_PROG, NFS_VER4, nfssvc_program, NULL); if (so->so_type == SOCK_STREAM) svc_loss_reg(xprt, nfssvc_loss); SVC_RELEASE(xprt); } out: NFSEXITCODE(error); return (error); } /* * Called by nfssvc() for nfsds. Just loops around servicing rpc requests * until it is killed by a signal. */ int nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) { char principal[MAXHOSTNAMELEN + 5]; struct proc *p; int error = 0; bool_t ret2, ret3, ret4; error = copyinstr(args->principal, principal, sizeof (principal), NULL); if (error) goto out; /* * Only the first nfsd actually does any work. The RPC code * adds threads to it as needed. Any extra processes offered * by nfsd just exit. If nfsd is new enough, it will call us * once with a structure that specifies how many threads to * use. */ NFSD_LOCK(); if (newnfs_numnfsd == 0) { + nfsdev_time = time_second; p = td->td_proc; PROC_LOCK(p); p->p_flag2 |= P2_AST_SU; PROC_UNLOCK(p); newnfs_numnfsd++; NFSD_UNLOCK(); - - /* An empty string implies AUTH_SYS only. */ - if (principal[0] != '\0') { - ret2 = rpc_gss_set_svc_name_call(principal, - "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); - ret3 = rpc_gss_set_svc_name_call(principal, - "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); - ret4 = rpc_gss_set_svc_name_call(principal, - "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER4); - - if (!ret2 || !ret3 || !ret4) - printf("nfsd: can't register svc name\n"); + error = nfsrv_createdevids(args, td); + if (error == 0) { + /* An empty string implies AUTH_SYS only. */ + if (principal[0] != '\0') { + ret2 = rpc_gss_set_svc_name_call(principal, + "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, + NFS_VER2); + ret3 = rpc_gss_set_svc_name_call(principal, + "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, + NFS_VER3); + ret4 = rpc_gss_set_svc_name_call(principal, + "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, + NFS_VER4); + + if (!ret2 || !ret3 || !ret4) + printf( + "nfsd: can't register svc name\n"); + } + + nfsrvd_pool->sp_minthreads = args->minthreads; + nfsrvd_pool->sp_maxthreads = args->maxthreads; + + svc_run(nfsrvd_pool); + + if (principal[0] != '\0') { + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER4); + } } - - nfsrvd_pool->sp_minthreads = args->minthreads; - nfsrvd_pool->sp_maxthreads = args->maxthreads; - - svc_run(nfsrvd_pool); - - if (principal[0] != '\0') { - rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); - rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); - rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER4); - } - NFSD_LOCK(); newnfs_numnfsd--; nfsrvd_init(1); PROC_LOCK(p); p->p_flag2 &= ~P2_AST_SU; PROC_UNLOCK(p); } NFSD_UNLOCK(); out: NFSEXITCODE(error); return (error); } /* * Initialize the data structures for the server. * Handshake with any new nfsds starting up to avoid any chance of * corruption. */ void nfsrvd_init(int terminating) { NFSD_LOCK_ASSERT(); if (terminating) { nfsd_master_proc = NULL; NFSD_UNLOCK(); + nfsrv_freealllayoutsanddevids(); nfsrv_freeallbackchannel_xprts(); svcpool_close(nfsrvd_pool); NFSD_LOCK(); } else { NFSD_UNLOCK(); nfsrvd_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsd)); nfsrvd_pool->sp_rcache = NULL; nfsrvd_pool->sp_assign = fhanew_assign; nfsrvd_pool->sp_done = fha_nd_complete; NFSD_LOCK(); } } Index: head/sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdport.c (revision 335011) +++ head/sys/fs/nfsserver/nfs_nfsdport.c (revision 335012) @@ -1,3441 +1,5702 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include +#include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; +extern struct nfslayouthash *nfslayouthash; +extern int nfsrv_layouthashsize; +extern struct mtx nfsrv_dslock_mtx; +extern int nfs_pnfsiothreads; +extern struct nfsdontlisthead nfsrv_dontlisthead; +extern volatile int nfsrv_dontlistlen; +extern volatile int nfsrv_devidcnt; +extern int nfsrv_maxpnfsmirror; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; +NFSSTATESPINLOCK; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; +struct mtx nfsrv_dontlistlock_mtx; +struct mtx nfsrv_recalllock_mtx; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; +static fhandle_t zerofh; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; +extern struct nfsdevicehead nfsrv_devidhead; +static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *, + NFSPROC_T *); +static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **, + int *, char *, fhandle_t *); +static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *, + NFSPROC_T *); +static int nfsrv_proxyds(struct nfsrv_descript *, struct vnode *, off_t, int, + struct ucred *, struct thread *, int, struct mbuf **, char *, + struct mbuf **, struct nfsvattr *, struct acl *); +static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *); +static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *, + NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **); +static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *, + NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **, + char *, int *); +static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, + struct vnode *, struct nfsmount **, int, struct acl *, int *); +static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, + struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *); +static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, + struct vnode *, struct nfsmount *, struct nfsvattr *); +static int nfsrv_putfhname(fhandle_t *, char *); +static int nfsrv_pnfslookupds(struct vnode *, struct vnode *, + struct pnfsdsfile *, struct vnode **, NFSPROC_T *); +static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, + struct vnode *, NFSPROC_T *); +static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *); +static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *, + NFSPROC_T *); +static int nfsrv_pnfsstatfs(struct statfs *); + +int nfs_pnfsio(task_fn_t *, void *); + SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); +static int nfsrv_pnfsgetdsattr = 1; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, + &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC"); +/* + * nfsrv_dsdirsize can only be increased and only when the nfsd threads are + * not running. + * The dsN subdirectories for the increased values must have been created + * on all DS servers before this increase is done. + */ +u_int nfsrv_dsdirsize = 20; +static int +sysctl_dsdirsize(SYSCTL_HANDLER_ARGS) +{ + int error, newdsdirsize; + + newdsdirsize = nfsrv_dsdirsize; + error = sysctl_handle_int(oidp, &newdsdirsize, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 || + newnfs_numnfsd != 0) + return (EINVAL); + nfsrv_dsdirsize = newdsdirsize; + return (0); +} +SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_RW, 0, + sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU", + "Number of dsN subdirs on the DS servers"); + #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int -nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, - struct thread *p, int vpislocked) +nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, + struct nfsrv_descript *nd, struct thread *p, int vpislocked, + nfsattrbit_t *attrbitp) { - int error, lockedit = 0; + int error, gotattr, lockedit = 0; + struct nfsvattr na; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } - error = VOP_GETATTR(vp, &nvap->na_vattr, cred); + + /* + * Acquire the Change, Size and TimeModify attributes, as required. + * This needs to be done for regular files if: + * - non-NFSv4 RPCs or + * - when attrbitp == NULL or + * - an NFSv4 RPC with any of the above attributes in attrbitp. + * A return of 0 for nfsrv_proxyds() indicates that it has acquired + * these attributes. nfsrv_proxyds() will return an error if the + * server is not a pNFS one. + */ + gotattr = 0; + if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL || + (nd->nd_flag & ND_NFSV4) == 0 || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY))) { + error = nfsrv_proxyds(nd, vp, 0, 0, nd->nd_cred, p, + NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL); + if (error == 0) + gotattr = 1; + } + + error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); if (lockedit != 0) NFSVOPUNLOCK(vp, 0); + /* + * If we got the Change, Size and Modify Time from the DS, + * replace them. + */ + if (gotattr != 0) { + nvap->na_atime = na.na_atime; + nvap->na_mtime = na.na_mtime; + nvap->na_filerev = na.na_filerev; + nvap->na_size = na.na_size; + } + NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr, + error, (uintmax_t)na.na_filerev); + NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error; error = VOP_SETATTR(vp, &nvap->na_vattr, cred); + if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL || + nvap->na_vattr.va_gid != (gid_t)VNOVAL || + nvap->na_vattr.va_size != VNOVAL || + nvap->na_vattr.va_mode != (mode_t)VNOVAL || + nvap->na_vattr.va_atime.tv_sec != VNOVAL || + nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) { + /* For a pNFS server, set the attributes on the DS file. */ + error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETATTR, + NULL, NULL, NULL, nvap, NULL); + if (error == ENOENT) + error = 0; + } NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp, 0); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp, 0); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp, 0); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; struct iovec *ivp = iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; int i, len, tlen, error = 0; len = 0; i = 0; while (len < NFS_MAXPATHLEN) { NFSMGET(mp); MCLGET(mp, M_WAITOK); mp->m_len = M_SIZE(mp); if (len == 0) { mp3 = mp2 = mp; } else { mp2->m_next = mp; mp2 = mp; } if ((len + mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN - len; len = NFS_MAXPATHLEN; } else { len += mp->m_len; } ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; int i; struct iovec *iv; struct iovec *iv2; int error = 0, len, left, siz, tlen, ioflag = 0; struct mbuf *m2 = NULL, *m3; struct uio io, *uiop = &io; struct nfsheur *nh; + /* + * Attempt to read from a DS file. A return of ENOENT implies + * there is no DS file to read. + */ + error = nfsrv_proxyds(NULL, vp, off, cnt, cred, p, NFSPROC_READDS, mpp, + NULL, mpendp, NULL, NULL); + if (error != ENOENT) + return (error); + len = left = NFSM_RNDUP(cnt); m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsvno_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); free(iv2, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = NULL; } else if (len != tlen || tlen != cnt) nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m2; out: NFSEXITCODE(error); return (error); } /* * Write vnode op from an mbuf list. */ int -nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, +nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable, struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) { struct iovec *ivp; int i, len; struct iovec *iv; int ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; + /* + * Attempt to write to a DS file. A return of ENOENT implies + * there is no DS file to write. + */ + error = nfsrv_proxyds(NULL, vp, off, retlen, cred, p, NFSPROC_WRITEDS, + &mp, cp, NULL, NULL, NULL); + if (error != ENOENT) { + *stable = NFSWRITE_FILESYNC; + return (error); + } + ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsvno_write"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } - if (stable == NFSWRITE_UNSTABLE) + if (*stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) { u_quad_t tempsize; int error; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); + /* For a pNFS server, create the data file on a DS. */ + if (error == 0 && nvap->na_type == VREG) { + /* + * Create a data file on a DS for a pNFS server. + * This function just returns if not + * running a pNFS DS or the creation fails. + */ + nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, + nd->nd_cred, p); + } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); vput(ndp->ni_dvp); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ if (!not_v2 && !error) vput(ndp->ni_vp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. * This function has an ugly side effect. It will malloc() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } pathcp = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { - struct vnode *vp; - int error = 0; + struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS]; + int error = 0, mirrorcnt; + char fname[PNFS_FILENAME_LEN + 1]; + fhandle_t fh; vp = ndp->ni_vp; + dsdvp[0] = NULL; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, p); + if (error == 0) + nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); + if (error == 0 && dsdvp[0] != NULL) + nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { - struct vnode *fvp, *tvp, *tdvp; - int error = 0; + struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS]; + int error = 0, mirrorcnt; + char fname[PNFS_FILENAME_LEN + 1]; + fhandle_t fh; + dsdvp[0] = NULL; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, p); NFSVOPUNLOCK(fvp, 0); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } + if (error == 0 && tvp != NULL) { + nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh); + NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup" + " dsdvp=%p\n", dsdvp[0]); + } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } + + /* + * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and + * if the rename succeeded, the DS file for the tvp needs to be + * removed. + */ + if (error == 0 && dsdvp[0] != NULL) { + nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p); + NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n"); + } + vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); NFSVOPUNLOCK(vp, 0); } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { + struct statfs *tsf; int error; + tsf = NULL; + if (nfsrv_devidcnt > 0) { + /* For a pNFS service, get the DS numbers. */ + tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO); + error = nfsrv_pnfsstatfs(tsf); + if (error != 0) { + free(tsf, M_TEMP); + tsf = NULL; + } + } error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { + if (tsf != NULL) { + sf->f_blocks = tsf->f_blocks; + sf->f_bavail = tsf->f_bavail; + sf->f_bfree = tsf->f_bfree; + sf->f_bsize = tsf->f_bsize; + } /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } + free(tsf, M_TEMP); NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); + /* For a pNFS server, create the data file on a DS. */ + if (nd->nd_repstat == 0) { + /* + * Create a data file on a DS for a pNFS server. + * This function just returns if not + * running a pNFS DS or the creation fails. + */ + nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr, + cred, p); + } vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VI_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, - struct ucred *cred, struct thread *p) + struct nfsrv_descript *nd, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (ESTALE); } - (void) VOP_SETATTR(vp, &va, cred); - (void) nfsvno_getattr(vp, nvap, cred, p, 1); + (void) VOP_SETATTR(vp, &va, nd->nd_cred); + (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { + struct statfs *sf; int error; + sf = NULL; + if (nfsrv_devidcnt > 0 && + (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) { + sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO); + error = nfsrv_pnfsstatfs(sf); + if (error != 0) { + free(sf, M_TEMP); + sf = NULL; + } + } error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, - mounted_on_fileno); + mounted_on_fileno, sf); + free(sf, M_TEMP); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { - nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, - p, 1); + nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, + NULL); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { - getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); free(rbuf, M_TEMP); if (cookies) free(cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; free(rbuf, M_TEMP); free(cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; free(rbuf, M_TEMP); free(cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. */ if (siz <= 0) siz = cnt; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; - nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); + nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ #if 0 if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } #endif } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { nd->nd_repstat = NFSERR_BAD_COOKIE; } } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; - getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; free(cookies, M_TEMP); free(rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * Check to see if entries in this directory can be safely acquired * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. * ZFS snapshot directories need VOP_LOOKUP(), so that any * automount of the snapshot directory that is required will * be done. * This needs to be done here for NFSv4, since NFSv4 never does * a VFS_VGET() for "." or "..". */ if (is_zfs == 1) { r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); if (r == EOPNOTSUPP) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } else if (r == 0) vput(nvp); } /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { if (usevget) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp, 0); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } if (!r) { if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) - r = nfsvno_getattr(nvp, nvap, - nd->nd_cred, p, 1); + r = nfsvno_getattr(nvp, nvap, nd, p, + 1, &attrbits); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } } else { nvp = NULL; } if (r) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { if (nvp != NULL) vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp, 0); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { newnfs_trimtrailing(nd, mb0, bpos0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else newnfs_trimtrailing(nd, mb1, bpos1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } free(cookies, M_TEMP); free(rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, p); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, p); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon) { int error = 0; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. * Also, allow Secinfo, so that it can acquire the correct flavor(s). */ if (nfsvno_testexp(nd, exp) && nd->nd_procnum != NFSV4OP_SECINFO && nd->nd_procnum != NFSPROC_FSINFO) { if (nd->nd_flag & ND_NFSV4) error = NFSERR_WRONGSEC; else error = (NFSERR_AUTHERR | AUTH_TOOWEAK); goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite, struct thread *p) { struct mount *mp; struct ucred *credanon; fhandle_t *fhp; fhp = (fhandle_t *)nfp->nfsrvfh_data; /* * Check for the special case of the nfsv4root_fh. */ mp = vfs_busyfs(&fhp->fh_fsid); if (mpp != NULL) *mpp = mp; if (mp == NULL) { *vpp = NULL; nd->nd_repstat = ESTALE; goto out; } if (startwrite) { vn_start_write(NULL, mpp, V_WAIT); if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); nd->nd_repstat = EACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { if (startwrite) vn_finished_write(mp); *vpp = NULL; if (mpp != NULL) *mpp = NULL; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_activevnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int exflags, error = 0, numsecflavor, *secflavors, i; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, &secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; + struct nfsd_nfsd_oargs onfsdarg; + struct nfsd_pnfsd_args pnfsdarg; + struct vnode *vp, *nvp, *curdvp; + struct pnfsdsfile *pf; + struct nfsdevice *ds, *fds; cap_rights_t rights; - int error; + int buflen, error, ret; + char *buf, *cp, *cp2, *cp3; + char fname[PNFS_FILENAME_LEN + 1]; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } - error = copyin(uap->argp, (caddr_t)&nfsdarg, - sizeof (nfsdarg)); + if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { + error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg)); + if (error == 0) { + nfsdarg.principal = onfsdarg.principal; + nfsdarg.minthreads = onfsdarg.minthreads; + nfsdarg.maxthreads = onfsdarg.maxthreads; + nfsdarg.version = 1; + nfsdarg.addr = NULL; + nfsdarg.addrlen = 0; + nfsdarg.dnshost = NULL; + nfsdarg.dnshostlen = 0; + nfsdarg.mirrorcnt = 1; + } + } else + error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg)); if (error) goto out; + if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 && + nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 && + nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 && + nfsdarg.mirrorcnt >= 1 && + nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS && + nfsdarg.addr != NULL && nfsdarg.dnshost != NULL && + nfsdarg.dspath != NULL) { + NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d" + " mirrorcnt=%d\n", nfsdarg.addrlen, + nfsdarg.dspathlen, nfsdarg.dnshostlen, + nfsdarg.mirrorcnt); + cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK); + error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen); + if (error != 0) { + free(cp, M_TEMP); + goto out; + } + cp[nfsdarg.addrlen] = '\0'; /* Ensure nul term. */ + nfsdarg.addr = cp; + cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK); + error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen); + if (error != 0) { + free(nfsdarg.addr, M_TEMP); + free(cp, M_TEMP); + goto out; + } + cp[nfsdarg.dnshostlen] = '\0'; /* Ensure nul term. */ + nfsdarg.dnshost = cp; + cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK); + error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen); + if (error != 0) { + free(nfsdarg.addr, M_TEMP); + free(nfsdarg.dnshost, M_TEMP); + free(cp, M_TEMP); + goto out; + } + cp[nfsdarg.dspathlen] = '\0'; /* Ensure nul term. */ + nfsdarg.dspath = cp; + } else { + nfsdarg.addr = NULL; + nfsdarg.addrlen = 0; + nfsdarg.dnshost = NULL; + nfsdarg.dnshostlen = 0; + nfsdarg.dspath = NULL; + nfsdarg.dspathlen = 0; + nfsdarg.mirrorcnt = 1; + } error = nfsrvd_nfsd(td, &nfsdarg); + free(nfsdarg.addr, M_TEMP); + free(nfsdarg.dnshost, M_TEMP); + free(nfsdarg.dspath, M_TEMP); + } else if (uap->flag & NFSSVC_PNFSDS) { + error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg)); + if (error == 0 && pnfsdarg.op == PNFSDOP_DELDSSERVER) { + cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); + error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1, + NULL); + if (error == 0) + error = nfsrv_deldsserver(cp, td); + free(cp, M_TEMP); + } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) { + cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); + buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS; + buf = malloc(buflen, M_TEMP, M_WAITOK); + error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1, + NULL); + NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error); + if (error == 0 && pnfsdarg.dspath != NULL) { + cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); + error = copyinstr(pnfsdarg.dspath, cp2, + PATH_MAX + 1, NULL); + NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n", + error); + } else + cp2 = NULL; + if (error == 0 && pnfsdarg.curdspath != NULL) { + cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); + error = copyinstr(pnfsdarg.curdspath, cp3, + PATH_MAX + 1, NULL); + NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n", + error); + } else + cp3 = NULL; + curdvp = NULL; + fds = NULL; + if (error == 0) + error = nfsrv_mdscopymr(cp, cp2, cp3, buf, + &buflen, fname, td, &vp, &nvp, &pf, &ds, + &fds); + NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error); + if (error == 0) { + if (pf->dsf_dir >= nfsrv_dsdirsize) { + printf("copymr: dsdir out of range\n"); + pf->dsf_dir = 0; + } + NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen); + error = nfsrv_copymr(vp, nvp, + ds->nfsdev_dsdir[pf->dsf_dir], ds, pf, + (struct pnfsdsfile *)buf, + buflen / sizeof(*pf), td->td_ucred, td); + vput(vp); + vput(nvp); + if (fds != NULL && error == 0) { + curdvp = fds->nfsdev_dsdir[pf->dsf_dir]; + ret = vn_lock(curdvp, LK_EXCLUSIVE); + if (ret == 0) { + nfsrv_dsremove(curdvp, fname, + td->td_ucred, td); + NFSVOPUNLOCK(curdvp, 0); + } + } + NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error); + } + free(cp, M_TEMP); + free(cp2, M_TEMP); + free(cp3, M_TEMP); + free(buf, M_TEMP); + } } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct file *fp = NULL; int stablefd, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) error = nfsrv_v4rootexport(&export, cred, p); } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = (struct nfsd_dumpclients *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, CAST_USER_ADDR_T(dumplist.ndl_list), len); free(dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = (struct nfsd_dumplocks *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); free(dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } return (1); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } +/* + * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror. + * The arguments are in a structure, so that they can be passed through + * taskqueue for a kernel process to execute this function. + */ +struct nfsrvdscreate { + int done; + int inprog; + struct task tsk; + struct ucred *tcred; + struct vnode *dvp; + NFSPROC_T *p; + struct pnfsdsfile *pf; + int err; + fhandle_t fh; + struct vattr va; + struct vattr createva; +}; + +int +nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap, + fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa, + char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp) +{ + struct vnode *nvp; + struct nameidata named; + struct vattr va; + char *bufp; + u_long *hashp; + struct nfsnode *np; + struct nfsmount *nmp; + int error; + + NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE, + LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); + nfsvno_setpathbuf(&named, &bufp, &hashp); + named.ni_cnd.cn_lkflags = LK_EXCLUSIVE; + named.ni_cnd.cn_thread = p; + named.ni_cnd.cn_nameptr = bufp; + if (fnamep != NULL) { + strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1); + named.ni_cnd.cn_namelen = strlen(bufp); + } else + named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp); + NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp); + + /* Create the date file in the DS mount. */ + error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); + if (error == 0) { + error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap); + NFSVOPUNLOCK(dvp, 0); + if (error == 0) { + /* Set the ownership of the file. */ + error = VOP_SETATTR(nvp, nvap, tcred); + NFSD_DEBUG(4, "nfsrv_dscreate:" + " setattr-uid=%d\n", error); + if (error != 0) + vput(nvp); + } + if (error != 0) + printf("pNFS: pnfscreate failed=%d\n", error); + } else + printf("pNFS: pnfscreate vnlock=%d\n", error); + if (error == 0) { + np = VTONFS(nvp); + nmp = VFSTONFS(nvp->v_mount); + if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs") + != 0 || nmp->nm_nam->sa_len > sizeof( + struct sockaddr_in6) || + np->n_fhp->nfh_len != NFSX_MYFH) { + printf("Bad DS file: fstype=%s salen=%d" + " fhlen=%d\n", + nvp->v_mount->mnt_vfc->vfc_name, + nmp->nm_nam->sa_len, np->n_fhp->nfh_len); + error = ENOENT; + } + + /* Set extattrs for the DS on the MDS file. */ + if (error == 0) { + if (dsa != NULL) { + error = VOP_GETATTR(nvp, &va, tcred); + if (error == 0) { + dsa->dsa_filerev = va.va_filerev; + dsa->dsa_size = va.va_size; + dsa->dsa_atime = va.va_atime; + dsa->dsa_mtime = va.va_mtime; + } + } + if (error == 0) { + NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, + NFSX_MYFH); + NFSBCOPY(nmp->nm_nam, &pf->dsf_sin, + nmp->nm_nam->sa_len); + NFSBCOPY(named.ni_cnd.cn_nameptr, + pf->dsf_filename, + sizeof(pf->dsf_filename)); + } + } else + printf("pNFS: pnfscreate can't get DS" + " attr=%d\n", error); + if (nvpp != NULL && error == 0) + *nvpp = nvp; + else + vput(nvp); + } + nfsvno_relpathbuf(&named); + return (error); +} + +/* + * Start up the thread that will execute nfsrv_dscreate(). + */ +static void +start_dscreate(void *arg, int pending) +{ + struct nfsrvdscreate *dsc; + + dsc = (struct nfsrvdscreate *)arg; + dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh, + dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL); + dsc->done = 1; + NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err); +} + +/* + * Create a pNFS data file on the Data Server(s). + */ +static void +nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred, + NFSPROC_T *p) +{ + struct nfsrvdscreate *dsc, *tdsc; + struct nfsdevice *ds, *mds; + struct mount *mp; + struct pnfsdsfile *pf, *tpf; + struct pnfsdsattr dsattr; + struct vattr va; + struct vnode *dvp[NFSDEV_MAXMIRRORS]; + struct nfsmount *nmp; + fhandle_t fh; + uid_t vauid; + gid_t vagid; + u_short vamode; + struct ucred *tcred; + int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret; + int failpos, timo; + + /* Get a DS server directory in a round-robin order. */ + mirrorcnt = 1; + NFSDDSLOCK(); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds->nfsdev_nmp != NULL) + break; + } + if (ds == NULL) { + NFSDDSUNLOCK(); + NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n"); + return; + } + i = dsdir[0] = ds->nfsdev_nextdir; + ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize; + dvp[0] = ds->nfsdev_dsdir[i]; + if (nfsrv_maxpnfsmirror > 1) { + mds = TAILQ_NEXT(ds, nfsdev_list); + TAILQ_FOREACH_FROM(mds, &nfsrv_devidhead, nfsdev_list) { + if (mds->nfsdev_nmp != NULL) { + dsdir[mirrorcnt] = i; + dvp[mirrorcnt] = mds->nfsdev_dsdir[i]; + mirrorcnt++; + if (mirrorcnt >= nfsrv_maxpnfsmirror) + break; + } + } + } + /* Put at end of list to implement round-robin usage. */ + TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); + TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); + NFSDDSUNLOCK(); + dsc = NULL; + if (mirrorcnt > 1) + tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK | M_ZERO); + tpf = pf = malloc(sizeof(*pf) * mirrorcnt, M_TEMP, M_WAITOK | M_ZERO); + + error = nfsvno_getfh(vp, &fh, p); + if (error == 0) + error = VOP_GETATTR(vp, &va, cred); + if (error == 0) { + /* Set the attributes for "vp" to Setattr the DS vp. */ + vauid = va.va_uid; + vagid = va.va_gid; + vamode = va.va_mode; + VATTR_NULL(&va); + va.va_uid = vauid; + va.va_gid = vagid; + va.va_mode = vamode; + va.va_size = 0; + } else + printf("pNFS: pnfscreate getfh+attr=%d\n", error); + + NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid, + cred->cr_gid); + /* Make data file name based on FH. */ + tcred = newnfs_getcred(); + + /* + * Create the file on each DS mirror, using kernel process(es) for the + * additional mirrors. + */ + failpos = -1; + for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) { + tpf->dsf_dir = dsdir[i]; + tdsc->tcred = tcred; + tdsc->p = p; + tdsc->pf = tpf; + tdsc->createva = *vap; + tdsc->fh = fh; + tdsc->va = va; + tdsc->dvp = dvp[i]; + tdsc->done = 0; + tdsc->inprog = 0; + tdsc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_dscreate, tdsc); + NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret); + } + if (ret != 0) { + ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL, + NULL, tcred, p, NULL); + if (ret != 0) { + KASSERT(error == 0, ("nfsrv_dscreate err=%d", + error)); + if (failpos == -1 && nfsds_failerr(ret)) + failpos = i; + else + error = ret; + } + } + } + if (error == 0) { + tpf->dsf_dir = dsdir[mirrorcnt - 1]; + error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf, + &dsattr, NULL, tcred, p, NULL); + if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) { + failpos = mirrorcnt - 1; + error = 0; + } + } + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + /* Wait for kernel task(s) to complete. */ + for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) { + while (tdsc->inprog != 0 && tdsc->done == 0) + tsleep(&tdsc->tsk, PVFS, "srvdcr", timo); + if (tdsc->err != 0) { + if (failpos == -1 && nfsds_failerr(tdsc->err)) + failpos = i; + else if (error == 0) + error = tdsc->err; + } + } + + /* + * If failpos has been set, that mirror has failed, so it needs + * to be disabled. + */ + if (failpos >= 0) { + nmp = VFSTONFS(dvp[failpos]->v_mount); + NFSLOCKMNT(nmp); + if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | + NFSMNTP_CANCELRPCS)) == 0) { + nmp->nm_privflag |= NFSMNTP_CANCELRPCS; + NFSUNLOCKMNT(nmp); + ds = nfsrv_deldsnmp(nmp, p); + NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos, + ds); + if (ds != NULL) + nfsrv_killrpcs(nmp); + NFSLOCKMNT(nmp); + nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; + wakeup(nmp); + } + NFSUNLOCKMNT(nmp); + } + + NFSFREECRED(tcred); + if (error == 0) { + ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp"); + error = vn_start_write(vp, &mp, V_WAIT); + if (error == 0) { + error = vn_extattr_set(vp, IO_NODELOCKED, + EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", + sizeof(*pf) * mirrorcnt, (char *)pf, p); + if (error == 0) + error = vn_extattr_set(vp, IO_NODELOCKED, + EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", + sizeof(dsattr), (char *)&dsattr, p); + vn_finished_write(mp); + if (error != 0) + printf("pNFS: pnfscreate setextattr=%d\n", + error); + } else + printf("pNFS: pnfscreate startwrite=%d\n", error); + } else + printf("pNFS: pnfscreate=%d\n", error); + free(pf, M_TEMP); + free(dsc, M_TEMP); +} + +/* + * Get the information needed to remove the pNFS Data Server file from the + * Metadata file. Upon success, ddvp is set non-NULL to the locked + * DS directory vnode. The caller must unlock *ddvp when done with it. + */ +static void +nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp, + int *mirrorcntp, char *fname, fhandle_t *fhp) +{ + struct vattr va; + struct ucred *tcred; + char *buf; + int buflen, error; + + dvpp[0] = NULL; + /* If not an exported regular file or not a pNFS server, just return. */ + if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || + nfsrv_devidcnt == 0) + return; + + /* Check to see if this is the last hard link. */ + tcred = newnfs_getcred(); + error = VOP_GETATTR(vp, &va, tcred); + NFSFREECRED(tcred); + if (error != 0) { + printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error); + return; + } + if (va.va_nlink > 1) + return; + + error = nfsvno_getfh(vp, fhp, p); + if (error != 0) { + printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error); + return; + } + + buflen = 1024; + buf = malloc(buflen, M_TEMP, M_WAITOK); + /* Get the directory vnode for the DS mount and the file handle. */ + error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp, + NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL); + free(buf, M_TEMP); + if (error != 0) + printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error); +} + +/* + * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror. + * The arguments are in a structure, so that they can be passed through + * taskqueue for a kernel process to execute this function. + */ +struct nfsrvdsremove { + int done; + int inprog; + struct task tsk; + struct ucred *tcred; + struct vnode *dvp; + NFSPROC_T *p; + int err; + char fname[PNFS_FILENAME_LEN + 1]; +}; + +static int +nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred, + NFSPROC_T *p) +{ + struct nameidata named; + struct vnode *nvp; + char *bufp; + u_long *hashp; + int error; + + error = NFSVOPLOCK(dvp, LK_EXCLUSIVE); + if (error != 0) + return (error); + named.ni_cnd.cn_nameiop = DELETE; + named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; + named.ni_cnd.cn_cred = tcred; + named.ni_cnd.cn_thread = p; + named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; + nfsvno_setpathbuf(&named, &bufp, &hashp); + named.ni_cnd.cn_nameptr = bufp; + named.ni_cnd.cn_namelen = strlen(fname); + strlcpy(bufp, fname, NAME_MAX); + NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp); + error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); + NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error); + if (error == 0) { + error = VOP_REMOVE(dvp, nvp, &named.ni_cnd); + vput(nvp); + } + NFSVOPUNLOCK(dvp, 0); + nfsvno_relpathbuf(&named); + if (error != 0) + printf("pNFS: nfsrv_pnfsremove failed=%d\n", error); + return (error); +} + +/* + * Start up the thread that will execute nfsrv_dsremove(). + */ +static void +start_dsremove(void *arg, int pending) +{ + struct nfsrvdsremove *dsrm; + + dsrm = (struct nfsrvdsremove *)arg; + dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred, + dsrm->p); + dsrm->done = 1; + NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err); +} + +/* + * Remove a pNFS data file from a Data Server. + * nfsrv_pnfsremovesetup() must have been called before the MDS file was + * removed to set up the dvp and fill in the FH. + */ +static void +nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp, + NFSPROC_T *p) +{ + struct ucred *tcred; + struct nfsrvdsremove *dsrm, *tdsrm; + struct nfsdevice *ds; + struct nfsmount *nmp; + int failpos, i, ret, timo; + + tcred = newnfs_getcred(); + dsrm = NULL; + if (mirrorcnt > 1) + dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK); + /* + * Remove the file on each DS mirror, using kernel process(es) for the + * additional mirrors. + */ + failpos = -1; + for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { + tdsrm->tcred = tcred; + tdsrm->p = p; + tdsrm->dvp = dvp[i]; + strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1); + tdsrm->inprog = 0; + tdsrm->done = 0; + tdsrm->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_dsremove, tdsrm); + NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret); + } + if (ret != 0) { + ret = nfsrv_dsremove(dvp[i], fname, tcred, p); + if (failpos == -1 && nfsds_failerr(ret)) + failpos = i; + } + } + ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p); + if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret)) + failpos = mirrorcnt - 1; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + /* Wait for kernel task(s) to complete. */ + for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) { + while (tdsrm->inprog != 0 && tdsrm->done == 0) + tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo); + if (failpos == -1 && nfsds_failerr(tdsrm->err)) + failpos = i; + } + + /* + * If failpos has been set, that mirror has failed, so it needs + * to be disabled. + */ + if (failpos >= 0) { + nmp = VFSTONFS(dvp[failpos]->v_mount); + NFSLOCKMNT(nmp); + if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | + NFSMNTP_CANCELRPCS)) == 0) { + nmp->nm_privflag |= NFSMNTP_CANCELRPCS; + NFSUNLOCKMNT(nmp); + ds = nfsrv_deldsnmp(nmp, p); + NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos, + ds); + if (ds != NULL) + nfsrv_killrpcs(nmp); + NFSLOCKMNT(nmp); + nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; + wakeup(nmp); + } + NFSUNLOCKMNT(nmp); + } + + /* Get rid all layouts for the file. */ + nfsrv_freefilelayouts(fhp); + + NFSFREECRED(tcred); + free(dsrm, M_TEMP); +} + +/* + * Generate a file name based on the file handle and put it in *bufp. + * Return the number of bytes generated. + */ +static int +nfsrv_putfhname(fhandle_t *fhp, char *bufp) +{ + int i; + uint8_t *cp; + const uint8_t *hexdigits = "0123456789abcdef"; + + cp = (uint8_t *)fhp; + for (i = 0; i < sizeof(*fhp); i++) { + bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf]; + bufp[2 * i + 1] = hexdigits[*cp++ & 0xf]; + } + bufp[2 * i] = '\0'; + return (2 * i); +} + +/* + * Update the Metadata file's attributes from the DS file when a Read/Write + * layout is returned. + * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN + * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file. + */ +int +nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) +{ + struct ucred *tcred; + int error; + + /* Do this as root so that it won't fail with EACCES. */ + tcred = newnfs_getcred(); + error = nfsrv_proxyds(NULL, vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN, + NULL, NULL, NULL, nap, NULL); + NFSFREECRED(tcred); + return (error); +} + +/* + * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file. + */ +static int +nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred, + NFSPROC_T *p) +{ + int error; + + error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETACL, + NULL, NULL, NULL, NULL, aclp); + return (error); +} + +static int +nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt, + struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp, + char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp) +{ + struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp; + fhandle_t fh[NFSDEV_MAXMIRRORS]; + struct vnode *dvp[NFSDEV_MAXMIRRORS]; + struct nfsdevice *ds; + struct pnfsdsattr dsattr; + char *buf; + int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt; + + NFSD_DEBUG(4, "in nfsrv_proxyds\n"); + /* + * If not a regular file, not exported or not a pNFS server, + * just return ENOENT. + */ + if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 || + nfsrv_devidcnt == 0) + return (ENOENT); + + buflen = 1024; + buf = malloc(buflen, M_TEMP, M_WAITOK); + error = 0; + + /* + * For Getattr, get the Change attribute (va_filerev) and size (va_size) + * from the MetaData file's extended attribute. + */ + if (ioproc == NFSPROC_GETATTR) { + error = vn_extattr_get(vp, IO_NODELOCKED, + EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf, + p); + if (error == 0 && buflen != sizeof(dsattr)) + error = ENXIO; + if (error == 0) { + NFSBCOPY(buf, &dsattr, buflen); + nap->na_filerev = dsattr.dsa_filerev; + nap->na_size = dsattr.dsa_size; + nap->na_atime = dsattr.dsa_atime; + nap->na_mtime = dsattr.dsa_mtime; + + /* + * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr() + * returns 0, just return now. nfsrv_checkdsattr() + * returns 0 if there is no Read/Write layout + * plus either an Open/Write_access or Write + * delegation issued to a client for the file. + */ + if (nfsrv_pnfsgetdsattr == 0 || + nfsrv_checkdsattr(nd, vp, p) == 0) { + free(buf, M_TEMP); + return (error); + } + } + + /* + * Clear ENOATTR so the code below will attempt to do a + * nfsrv_getattrdsrpc() to get the attributes and (re)create + * the extended attribute. + */ + if (error == ENOATTR) + error = 0; + } + + origmircnt = -1; + trycnt = 0; +tryagain: + if (error == 0) { + buflen = 1024; + error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen, + &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL, + NULL, NULL); + if (error == 0) { + for (i = 0; i < mirrorcnt; i++) + nmp[i] = VFSTONFS(dvp[i]->v_mount); + } else + printf("pNFS: proxy getextattr sockaddr=%d\n", error); + } else + printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error); + if (error == 0) { + failpos = -1; + if (origmircnt == -1) + origmircnt = mirrorcnt; + /* + * If failpos is set to a mirror#, then that mirror has + * failed and will be disabled. For Read and Getattr, the + * function only tries one mirror, so if that mirror has + * failed, it will need to be retried. As such, increment + * tryitagain for these cases. + * For Write, Setattr and Setacl, the function tries all + * mirrors and will not return an error for the case where + * one mirror has failed. For these cases, the functioning + * mirror(s) will have been modified, so a retry isn't + * necessary. These functions will set failpos for the + * failed mirror#. + */ + if (ioproc == NFSPROC_READDS) { + error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0], + mpp, mpp2); + if (nfsds_failerr(error) && mirrorcnt > 1) { + /* + * Setting failpos will cause the mirror + * to be disabled and then a retry of this + * read is required. + */ + failpos = 0; + error = 0; + trycnt++; + } + } else if (ioproc == NFSPROC_WRITEDS) + error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp, + &nmp[0], mirrorcnt, mpp, cp, &failpos); + else if (ioproc == NFSPROC_SETATTR) + error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0], + mirrorcnt, nap, &failpos); + else if (ioproc == NFSPROC_SETACL) + error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0], + mirrorcnt, aclp, &failpos); + else { + error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, + vp, nmp[mirrorcnt - 1], nap); + if (nfsds_failerr(error) && mirrorcnt > 1) { + /* + * Setting failpos will cause the mirror + * to be disabled and then a retry of this + * getattr is required. + */ + failpos = mirrorcnt - 1; + error = 0; + trycnt++; + } + } + ds = NULL; + if (failpos >= 0) { + failnmp = nmp[failpos]; + NFSLOCKMNT(failnmp); + if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM | + NFSMNTP_CANCELRPCS)) == 0) { + failnmp->nm_privflag |= NFSMNTP_CANCELRPCS; + NFSUNLOCKMNT(failnmp); + ds = nfsrv_deldsnmp(failnmp, p); + NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n", + failpos, ds); + if (ds != NULL) + nfsrv_killrpcs(failnmp); + NFSLOCKMNT(failnmp); + failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; + wakeup(failnmp); + } + NFSUNLOCKMNT(failnmp); + } + for (i = 0; i < mirrorcnt; i++) + NFSVOPUNLOCK(dvp[i], 0); + NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error, + trycnt); + /* Try the Read/Getattr again if a mirror was deleted. */ + if (ds != NULL && trycnt > 0 && trycnt < origmircnt) + goto tryagain; + } else { + /* Return ENOENT for any Extended Attribute error. */ + error = ENOENT; + } + free(buf, M_TEMP); + NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error); + return (error); +} + +/* + * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended + * attribute. + * newnmpp - If it points to a non-NULL nmp, that is the destination and needs + * to be checked. If it points to a NULL nmp, then it returns + * a suitable destination. + * curnmp - If non-NULL, it is the source mount for the copy. + */ +int +nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp, + int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp, + char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp, + struct nfsmount *curnmp, int *ippos, int *dsdirp) +{ + struct vnode *dvp, *nvp, **tdvpp; + struct nfsmount *nmp, *newnmp; + struct sockaddr *sad; + struct sockaddr_in *sin; + struct nfsdevice *ds, *fndds; + struct pnfsdsfile *pf; + uint32_t dsdir; + int error, fhiszero, fnd, gotone, i, mirrorcnt; + + ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp"); + *mirrorcntp = 1; + tdvpp = dvpp; + if (nvpp != NULL) + *nvpp = NULL; + if (dvpp != NULL) + *dvpp = NULL; + if (ippos != NULL) + *ippos = -1; + if (newnmpp != NULL) + newnmp = *newnmpp; + else + newnmp = NULL; + error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, + "pnfsd.dsfile", buflenp, buf, p); + mirrorcnt = *buflenp / sizeof(*pf); + if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS || + *buflenp != sizeof(*pf) * mirrorcnt)) + error = ENOATTR; + + pf = (struct pnfsdsfile *)buf; + /* If curnmp != NULL, check for a match in the mirror list. */ + if (curnmp != NULL && error == 0) { + fnd = 0; + for (i = 0; i < mirrorcnt; i++, pf++) { + sad = (struct sockaddr *)&pf->dsf_sin; + if (nfsaddr2_match(sad, curnmp->nm_nam)) { + if (ippos != NULL) + *ippos = i; + fnd = 1; + break; + } + } + if (fnd == 0) + error = ENXIO; + } + + gotone = 0; + pf = (struct pnfsdsfile *)buf; + NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt, + error); + for (i = 0; i < mirrorcnt && error == 0; i++, pf++) { + fhiszero = 0; + sad = (struct sockaddr *)&pf->dsf_sin; + sin = &pf->dsf_sin; + dsdir = pf->dsf_dir; + if (dsdir >= nfsrv_dsdirsize) { + printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir); + error = ENOATTR; + } else if (nvpp != NULL && newnmp != NULL && + nfsaddr2_match(sad, newnmp->nm_nam)) + error = EEXIST; + if (error == 0) { + if (ippos != NULL && curnmp == NULL && + sad->sa_family == AF_INET && + sin->sin_addr.s_addr == 0) + *ippos = i; + if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0) + fhiszero = 1; + /* Use the socket address to find the mount point. */ + fndds = NULL; + NFSDDSLOCK(); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds->nfsdev_nmp != NULL) { + dvp = ds->nfsdev_dvp; + nmp = VFSTONFS(dvp->v_mount); + if (nmp != ds->nfsdev_nmp) + printf("different2 nmp %p %p\n", + nmp, ds->nfsdev_nmp); + if (nfsaddr2_match(sad, nmp->nm_nam)) + fndds = ds; + else if (newnmpp != NULL && + newnmp == NULL && + (*newnmpp == NULL || fndds == NULL)) + /* + * Return a destination for the + * copy in newnmpp. Choose the + * last valid one before the + * source mirror, so it isn't + * always the first one. + */ + *newnmpp = nmp; + } + } + NFSDDSUNLOCK(); + if (fndds != NULL) { + dvp = fndds->nfsdev_dsdir[dsdir]; + if (lktype != 0 || fhiszero != 0 || + (nvpp != NULL && *nvpp == NULL)) { + if (fhiszero != 0) + error = vn_lock(dvp, + LK_EXCLUSIVE); + else if (lktype != 0) + error = vn_lock(dvp, lktype); + else + error = vn_lock(dvp, LK_SHARED); + /* + * If the file handle is all 0's, try to + * do a Lookup against the DS to acquire + * it. + * If dvpp == NULL or the Lookup fails, + * unlock dvp after the call. + */ + if (error == 0 && (fhiszero != 0 || + (nvpp != NULL && *nvpp == NULL))) { + error = nfsrv_pnfslookupds(vp, + dvp, pf, &nvp, p); + if (error == 0) { + if (fhiszero != 0) + nfsrv_pnfssetfh( + vp, pf, + nvp, p); + if (nvpp != NULL && + *nvpp == NULL) { + *nvpp = nvp; + *dsdirp = dsdir; + } else + vput(nvp); + } + if (error != 0 || lktype == 0) + NFSVOPUNLOCK(dvp, 0); + } + } + if (error == 0) { + gotone++; + NFSD_DEBUG(4, "gotone=%d\n", gotone); + if (devid != NULL) { + NFSBCOPY(fndds->nfsdev_deviceid, + devid, NFSX_V4DEVICEID); + devid += NFSX_V4DEVICEID; + } + if (dvpp != NULL) + *tdvpp++ = dvp; + if (fhp != NULL) + NFSBCOPY(&pf->dsf_fh, fhp++, + NFSX_MYFH); + if (fnamep != NULL && gotone == 1) + strlcpy(fnamep, + pf->dsf_filename, + sizeof(pf->dsf_filename)); + } else + NFSD_DEBUG(4, "nfsrv_dsgetsockmnt " + "err=%d\n", error); + } + } + } + if (error == 0 && gotone == 0) + error = ENOENT; + + NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone, + error); + if (error == 0) + *mirrorcntp = gotone; + else { + if (gotone > 0 && dvpp != NULL) { + /* + * If the error didn't occur on the first one and + * dvpp != NULL, the one(s) prior to the failure will + * have locked dvp's that need to be unlocked. + */ + for (i = 0; i < gotone; i++) { + NFSVOPUNLOCK(*dvpp, 0); + *dvpp++ = NULL; + } + } + /* + * If it found the vnode to be copied from before a failure, + * it needs to be vput()'d. + */ + if (nvpp != NULL && *nvpp != NULL) { + vput(*nvpp); + *nvpp = NULL; + } + } + return (error); +} + +/* + * Set the extended attribute for the Change attribute. + */ +static int +nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p) +{ + struct pnfsdsattr dsattr; + struct mount *mp; + int error; + + ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp"); + error = vn_start_write(vp, &mp, V_WAIT); + if (error == 0) { + dsattr.dsa_filerev = nap->na_filerev; + dsattr.dsa_size = nap->na_size; + dsattr.dsa_atime = nap->na_atime; + dsattr.dsa_mtime = nap->na_mtime; + error = vn_extattr_set(vp, IO_NODELOCKED, + EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", + sizeof(dsattr), (char *)&dsattr, p); + vn_finished_write(mp); + } + if (error != 0) + printf("pNFS: setextattr=%d\n", error); + return (error); +} + +static int +nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, + NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsv4stateid_t st; + struct mbuf *m, *m2; + int error = 0, retlen, tlen, trimlen; + + NFSD_DEBUG(4, "in nfsrv_readdsrpc\n"); + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + *mpp = NULL; + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp), + NULL, NULL, 0, 0); + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); + txdr_hyper(off, tl); + *(tl + 2) = txdr_unsigned(len); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + NFSM_STRSIZ(retlen, len); + if (retlen > 0) { + /* Trim off the pre-data XDR from the mbuf chain. */ + m = nd->nd_mrep; + while (m != NULL && m != nd->nd_md) { + if (m->m_next == nd->nd_md) { + m->m_next = NULL; + m_freem(nd->nd_mrep); + nd->nd_mrep = m = nd->nd_md; + } else + m = m->m_next; + } + if (m == NULL) { + printf("nfsrv_readdsrpc: busted mbuf list\n"); + error = ENOENT; + goto nfsmout; + } + + /* + * Now, adjust first mbuf so that any XDR before the + * read data is skipped over. + */ + trimlen = nd->nd_dpos - mtod(m, char *); + if (trimlen > 0) { + m->m_len -= trimlen; + NFSM_DATAP(m, trimlen); + } + + /* + * Truncate the mbuf chain at retlen bytes of data, + * plus XDR padding that brings the length up to a + * multiple of 4. + */ + tlen = NFSM_RNDUP(retlen); + do { + if (m->m_len >= tlen) { + m->m_len = tlen; + tlen = 0; + m2 = m->m_next; + m->m_next = NULL; + m_freem(m2); + break; + } + tlen -= m->m_len; + m = m->m_next; + } while (m != NULL); + if (tlen > 0) { + printf("nfsrv_readdsrpc: busted mbuf list\n"); + error = ENOENT; + goto nfsmout; + } + *mpp = nd->nd_mrep; + *mpendp = m; + nd->nd_mrep = NULL; + } + } else + error = nd->nd_repstat; +nfsmout: + /* If nd->nd_mrep is already NULL, this is a no-op. */ + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error); + return (error); +} + +/* + * Do a write RPC on a DS data file, using this structure for the arguments, + * so that this function can be executed by a separate kernel process. + */ +struct nfsrvwritedsdorpc { + int done; + int inprog; + struct task tsk; + fhandle_t fh; + off_t off; + int len; + struct nfsmount *nmp; + struct ucred *cred; + NFSPROC_T *p; + struct mbuf *m; + int err; +}; + +static int +nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len, + struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsattrbit_t attrbits; + nfsv4stateid_t st; + int commit, error, retlen; + + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(off, tl); + tl += 2; + /* + * Do all writes FileSync, since the server doesn't hold onto dirty + * buffers. Since clients should be accessing the DS servers directly + * using the pNFS layouts, this just needs to work correctly as a + * fallback. + */ + *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); + *tl = txdr_unsigned(len); + NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len); + + /* Put data in mbuf chain. */ + nd->nd_mb->m_next = m; + + /* Set nd_mb and nd_bpos to end of data. */ + while (m->m_next != NULL) + m = m->m_next; + nd->nd_mb = m; + nd->nd_bpos = mtod(m, char *) + m->m_len; + NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len); + + /* Do a Getattr for Size, Change and Modify Time. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + (void) nfsrv_putattrbit(nd, &attrbits); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat); + /* Get rid of weak cache consistency data for now. */ + if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == + (ND_NFSV4 | ND_V4WCCATTR)) { + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error); + if (error != 0) + goto nfsmout; + /* + * Get rid of Op# and status for next op. + */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*++tl != 0) + nd->nd_flag |= ND_NOMOREDATA; + } + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); + retlen = fxdr_unsigned(int, *tl++); + commit = fxdr_unsigned(int, *tl); + if (commit != NFSWRITE_FILESYNC) + error = NFSERR_IO; + NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n", + retlen, commit, error); + } else + error = nd->nd_repstat; + /* We have no use for the Write Verifier since we use FileSync. */ + + /* + * Get the Change, Size, Access Time and Modify Time attributes and set + * on the Metadata file, so its attributes will be what the file's + * would be if it had been written. + */ + if (error == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + } + NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error); +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error); + return (error); +} + +/* + * Start up the thread that will execute nfsrv_writedsdorpc(). + */ +static void +start_writedsdorpc(void *arg, int pending) +{ + struct nfsrvwritedsdorpc *drpc; + + drpc = (struct nfsrvwritedsdorpc *)arg; + drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off, + drpc->len, NULL, drpc->m, drpc->cred, drpc->p); + drpc->done = 1; + NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err); +} + +static int +nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred, + NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, + struct mbuf **mpp, char *cp, int *failposp) +{ + struct nfsrvwritedsdorpc *drpc, *tdrpc; + struct nfsvattr na; + struct mbuf *m; + int error, i, offs, ret, timo; + + NFSD_DEBUG(4, "in nfsrv_writedsrpc\n"); + KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain")); + drpc = NULL; + if (mirrorcnt > 1) + tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK); + + /* Calculate offset in mbuf chain that data starts. */ + offs = cp - mtod(*mpp, char *); + NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len); + + /* + * Do the write RPC for every DS, using a separate kernel process + * for every DS except the last one. + */ + error = 0; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + tdrpc->fh = *fhp; + tdrpc->off = off; + tdrpc->len = len; + tdrpc->nmp = *nmpp; + tdrpc->cred = cred; + tdrpc->p = p; + tdrpc->inprog = 0; + tdrpc->err = 0; + tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_writedsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { + ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL, + tdrpc->m, cred, p); + if (nfsds_failerr(ret) && *failposp == -1) + *failposp = i; + else if (error == 0 && ret != 0) + error = ret; + } + nmpp++; + fhp++; + } + m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK); + ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p); + if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) + *failposp = mirrorcnt - 1; + else if (error == 0 && ret != 0) + error = ret; + if (error == 0) + error = nfsrv_setextattr(vp, &na, p); + NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo); + if (nfsds_failerr(tdrpc->err) && *failposp == -1) + *failposp = i; + else if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); + return (error); +} + +static int +nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, + struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, + struct nfsvattr *dsnap) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsv4stateid_t st; + nfsattrbit_t attrbits; + int error; + + NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n"); + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp), + NULL, NULL, 0, 0); + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0); + + /* Do a Getattr for Size, Change, Access Time and Modify Time. */ + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_GETATTR); + (void) nfsrv_putattrbit(nd, &attrbits); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n", + nd->nd_repstat); + /* Get rid of weak cache consistency data for now. */ + if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == + (ND_NFSV4 | ND_V4WCCATTR)) { + error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, + NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error); + if (error != 0) + goto nfsmout; + /* + * Get rid of Op# and status for next op. + */ + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + if (*++tl != 0) + nd->nd_flag |= ND_NOMOREDATA; + } + error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); + if (error != 0) + goto nfsmout; + if (nd->nd_repstat != 0) + error = nd->nd_repstat; + /* + * Get the Change, Size, Access Time and Modify Time attributes and set + * on the Metadata file, so its attributes will be what the file's + * would be if it had been written. + */ + if (error == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL, + NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + } + NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error); +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error); + return (error); +} + +struct nfsrvsetattrdsdorpc { + int done; + int inprog; + struct task tsk; + fhandle_t fh; + struct nfsmount *nmp; + struct vnode *vp; + struct ucred *cred; + NFSPROC_T *p; + struct nfsvattr na; + struct nfsvattr dsna; + int err; +}; + +/* + * Start up the thread that will execute nfsrv_setattrdsdorpc(). + */ +static void +start_setattrdsdorpc(void *arg, int pending) +{ + struct nfsrvsetattrdsdorpc *drpc; + + drpc = (struct nfsrvsetattrdsdorpc *)arg; + drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p, + drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna); + drpc->done = 1; +} + +static int +nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, + struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, + struct nfsvattr *nap, int *failposp) +{ + struct nfsrvsetattrdsdorpc *drpc, *tdrpc; + struct nfsvattr na; + int error, i, ret, timo; + + NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n"); + drpc = NULL; + if (mirrorcnt > 1) + tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK); + + /* + * Do the setattr RPC for every DS, using a separate kernel process + * for every DS except the last one. + */ + error = 0; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + tdrpc->inprog = 0; + tdrpc->fh = *fhp; + tdrpc->nmp = *nmpp; + tdrpc->vp = vp; + tdrpc->cred = cred; + tdrpc->p = p; + tdrpc->na = *nap; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { + ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, + &na); + if (nfsds_failerr(ret) && *failposp == -1) + *failposp = i; + else if (error == 0 && ret != 0) + error = ret; + } + nmpp++; + fhp++; + } + ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na); + if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) + *failposp = mirrorcnt - 1; + else if (error == 0 && ret != 0) + error = ret; + if (error == 0) + error = nfsrv_setextattr(vp, &na, p); + NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvsads", timo); + if (nfsds_failerr(tdrpc->err) && *failposp == -1) + *failposp = i; + else if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); + return (error); +} + +/* + * Do a Setattr of an NFSv4 ACL on the DS file. + */ +static int +nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, + struct vnode *vp, struct nfsmount *nmp, struct acl *aclp) +{ + struct nfsrv_descript *nd; + nfsv4stateid_t st; + nfsattrbit_t attrbits; + int error; + + NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n"); + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp), + NULL, NULL, 0, 0); + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); + /* + * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(), + * so passing in the metadata "vp" will be ok, since it is of + * the same type (VREG). + */ + nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL, + NULL, 0, 0, 0, 0, 0, NULL); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n", + nd->nd_repstat); + error = nd->nd_repstat; + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + return (error); +} + +struct nfsrvsetacldsdorpc { + int done; + int inprog; + struct task tsk; + fhandle_t fh; + struct nfsmount *nmp; + struct vnode *vp; + struct ucred *cred; + NFSPROC_T *p; + struct acl *aclp; + int err; +}; + +/* + * Start up the thread that will execute nfsrv_setacldsdorpc(). + */ +static void +start_setacldsdorpc(void *arg, int pending) +{ + struct nfsrvsetacldsdorpc *drpc; + + drpc = (struct nfsrvsetacldsdorpc *)arg; + drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p, + drpc->vp, drpc->nmp, drpc->aclp); + drpc->done = 1; +} + +static int +nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, + struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp, + int *failposp) +{ + struct nfsrvsetacldsdorpc *drpc, *tdrpc; + int error, i, ret, timo; + + NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n"); + drpc = NULL; + if (mirrorcnt > 1) + tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK); + + /* + * Do the setattr RPC for every DS, using a separate kernel process + * for every DS except the last one. + */ + error = 0; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + tdrpc->inprog = 0; + tdrpc->fh = *fhp; + tdrpc->nmp = *nmpp; + tdrpc->vp = vp; + tdrpc->cred = cred; + tdrpc->p = p; + tdrpc->aclp = aclp; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_setacldsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { + ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, + aclp); + if (nfsds_failerr(ret) && *failposp == -1) + *failposp = i; + else if (error == 0 && ret != 0) + error = ret; + } + nmpp++; + fhp++; + } + ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp); + if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) + *failposp = mirrorcnt - 1; + else if (error == 0 && ret != 0) + error = ret; + NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvacds", timo); + if (nfsds_failerr(tdrpc->err) && *failposp == -1) + *failposp = i; + else if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); + return (error); +} + +/* + * Getattr call to the DS for the Modify, Size and Change attributes. + */ +static int +nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, + struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap) +{ + struct nfsrv_descript *nd; + int error; + nfsattrbit_t attrbits; + + NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n"); + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); + (void) nfsrv_putattrbit(nd, &attrbits); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n", + nd->nd_repstat); + if (nd->nd_repstat == 0) { + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, + NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, + NULL, NULL); + /* + * We can only save the updated values in the extended + * attribute if the vp is exclusively locked. + * This should happen when any of the following operations + * occur on the vnode: + * Close, Delegreturn, LayoutCommit, LayoutReturn + * As such, the updated extended attribute should get saved + * before nfsrv_checkdsattr() returns 0 and allows the cached + * attributes to be returned without calling this function. + */ + if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { + error = nfsrv_setextattr(vp, nap, p); + NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n", + error); + } + } else + error = nd->nd_repstat; + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error); + return (error); +} + +/* + * Get the device id and file handle for a DS file. + */ +int +nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp, + fhandle_t *fhp, char *devid) +{ + int buflen, error; + char *buf; + + buflen = 1024; + buf = malloc(buflen, M_TEMP, M_WAITOK); + error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL, + fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL); + free(buf, M_TEMP); + return (error); +} + +/* + * Do a Lookup against the DS for the filename. + */ +static int +nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf, + struct vnode **nvpp, NFSPROC_T *p) +{ + struct nameidata named; + struct ucred *tcred; + char *bufp; + u_long *hashp; + struct vnode *nvp; + int error; + + tcred = newnfs_getcred(); + named.ni_cnd.cn_nameiop = LOOKUP; + named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY; + named.ni_cnd.cn_cred = tcred; + named.ni_cnd.cn_thread = p; + named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME; + nfsvno_setpathbuf(&named, &bufp, &hashp); + named.ni_cnd.cn_nameptr = bufp; + named.ni_cnd.cn_namelen = strlen(pf->dsf_filename); + strlcpy(bufp, pf->dsf_filename, NAME_MAX); + NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp); + error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd); + NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error); + NFSFREECRED(tcred); + nfsvno_relpathbuf(&named); + if (error == 0) + *nvpp = nvp; + NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error); + return (error); +} + +/* + * Set the file handle to the correct one. + */ +static void +nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *nvp, + NFSPROC_T *p) +{ + struct mount *mp; + struct nfsnode *np; + int ret; + + np = VTONFS(nvp); + NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH); + /* + * We can only do a setextattr for an exclusively + * locked vp. Instead of trying to upgrade a shared + * lock, just leave dsf_fh zeroed out and it will + * keep doing this lookup until it is done with an + * exclusively locked vp. + */ + if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) { + ret = vn_start_write(vp, &mp, V_WAIT); + NFSD_DEBUG(4, "nfsrv_pnfssetfh: vn_start_write=%d\n", + ret); + if (ret == 0) { + ret = vn_extattr_set(vp, IO_NODELOCKED, + EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", + sizeof(*pf), (char *)pf, p); + vn_finished_write(mp); + NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft " + "vn_extattr_set=%d\n", ret); + } + } + NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret); +} + +/* + * Cause RPCs waiting on "nmp" to fail. This is called for a DS mount point + * when the DS has failed. + */ +void +nfsrv_killrpcs(struct nfsmount *nmp) +{ + + /* + * Call newnfs_nmcancelreqs() to cause + * any RPCs in progress on the mount point to + * fail. + * This will cause any process waiting for an + * RPC to complete while holding a vnode lock + * on the mounted-on vnode (such as "df" or + * a non-forced "umount") to fail. + * This will unlock the mounted-on vnode so + * a forced dismount can succeed. + * The NFSMNTP_CANCELRPCS flag should be set when this function is + * called. + */ + newnfs_nmcancelreqs(nmp); +} + +/* + * Sum up the statfs info for each of the DSs, so that the client will + * receive the total for all DSs. + */ +static int +nfsrv_pnfsstatfs(struct statfs *sf) +{ + struct statfs *tsf; + struct nfsdevice *ds; + struct vnode **dvpp, **tdvpp, *dvp; + uint64_t tot; + int cnt, error = 0, i; + + if (nfsrv_devidcnt <= 0) + return (ENXIO); + dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK); + tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK); + + /* Get an array of the dvps for the DSs. */ + tdvpp = dvpp; + i = 0; + NFSDDSLOCK(); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (ds->nfsdev_nmp != NULL) { + if (++i > nfsrv_devidcnt) + break; + *tdvpp++ = ds->nfsdev_dvp; + } + } + NFSDDSUNLOCK(); + cnt = i; + + /* Do a VFS_STATFS() for each of the DSs and sum them up. */ + tdvpp = dvpp; + for (i = 0; i < cnt && error == 0; i++) { + dvp = *tdvpp++; + error = VFS_STATFS(dvp->v_mount, tsf); + if (error == 0) { + if (sf->f_bsize == 0) { + if (tsf->f_bsize > 0) + sf->f_bsize = tsf->f_bsize; + else + sf->f_bsize = 8192; + } + if (tsf->f_blocks > 0) { + if (sf->f_bsize != tsf->f_bsize) { + tot = tsf->f_blocks * tsf->f_bsize; + sf->f_blocks += (tot / sf->f_bsize); + } else + sf->f_blocks += tsf->f_blocks; + } + if (tsf->f_bfree > 0) { + if (sf->f_bsize != tsf->f_bsize) { + tot = tsf->f_bfree * tsf->f_bsize; + sf->f_bfree += (tot / sf->f_bsize); + } else + sf->f_bfree += tsf->f_bfree; + } + if (tsf->f_bavail > 0) { + if (sf->f_bsize != tsf->f_bsize) { + tot = tsf->f_bavail * tsf->f_bsize; + sf->f_bavail += (tot / sf->f_bsize); + } else + sf->f_bavail += tsf->f_bavail; + } + } + } + free(tsf, M_TEMP); + free(dvpp, M_TEMP); + return (error); +} + +/* + * Set an NFSv4 acl. + */ +int +nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) +{ + int error; + + if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { + error = NFSERR_ATTRNOTSUPP; + goto out; + } + /* + * With NFSv4 ACLs, chmod(2) may need to add additional entries. + * Make sure it has enough room for that - splitting every entry + * into two and appending "canonical six" entries at the end. + * Cribbed out of kern/vfs_acl.c - Rick M. + */ + if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { + error = NFSERR_ATTRNOTSUPP; + goto out; + } + error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); + if (error == 0) { + error = nfsrv_dssetacl(vp, aclp, cred, p); + if (error == ENOENT) + error = 0; + } + +out: + NFSEXITCODE(error); + return (error); +} + extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); + mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF); + mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); + mtx_destroy(&nfsrv_dontlistlock_mtx); + mtx_destroy(&nfsrv_recalllock_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); + if (nfslayouthash != NULL) { + for (i = 0; i < nfsrv_layouthashsize; i++) + mtx_destroy(&nfslayouthash[i].mtx); + free(nfslayouthash, M_NFSDSESSION); + } lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); Index: head/sys/fs/nfsserver/nfs_nfsdserv.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdserv.c (revision 335011) +++ head/sys/fs/nfsserver/nfs_nfsdserv.c (revision 335012) @@ -1,4198 +1,4769 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * nfs version 2, 3 and 4 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request, usually by calling a nfsvno_XXX() * function in nfsd_port.c * 3 - build the rpc reply in an mbuf list * For nfsv4, these functions are called for each Op within the Compound RPC. */ #ifndef APPLEKEXT #include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; extern enum vtype nv34tov_type[8]; extern struct timeval nfsboottime; extern int nfs_rootfhset; extern int nfsrv_enable_crossmntpt; extern int nfsrv_statehashsize; +extern int nfsrv_layouthashsize; +extern time_t nfsdev_time; +extern volatile int nfsrv_devidcnt; +extern int nfsd_debuglevel; +extern u_long sb_max_adj; +extern int nfsrv_pnfsatime; +extern int nfsrv_maxpnfsmirror; #endif /* !APPLEKEXT */ static int nfs_async = 0; SYSCTL_DECL(_vfs_nfsd); SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "Tell client that writes were synced even though they were not"); +extern int nfsrv_doflexfile; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW, + &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS"); /* * This list defines the GSS mechanisms supported. * (Don't ask me how you get these strings from the RFC stuff like * iso(1), org(3)... but someone did it, so I don't need to know.) */ static struct nfsgss_mechlist nfsgss_mechlist[] = { { 9, "\052\206\110\206\367\022\001\002\002", 11 }, { 0, "", 0 }, }; /* local functions */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen); static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp); /* * nfs access service (not a part of NFS V2) */ APPLESTATIC int nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int getret, error = 0; struct nfsvattr nva; u_int32_t testmode, nfsmode, supported = 0; accmode_t deletebit; if (nd->nd_repstat) { nfsrv_postopattr(nd, 1, &nva); goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nfsmode = fxdr_unsigned(u_int32_t, *tl); if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | NFSACCESS_EXECUTE))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; } if (nfsmode & NFSACCESS_READ) { supported |= NFSACCESS_READ; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_READ; } if (nfsmode & NFSACCESS_MODIFY) { supported |= NFSACCESS_MODIFY; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_MODIFY; } if (nfsmode & NFSACCESS_EXTEND) { supported |= NFSACCESS_EXTEND; if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) deletebit = VDELETE_CHILD; else deletebit = VDELETE; if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_DELETE; } if (vnode_vtype(vp) == VDIR) testmode = NFSACCESS_LOOKUP; else testmode = NFSACCESS_EXECUTE; if (nfsmode & testmode) { supported |= (nfsmode & testmode); if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~testmode; } nfsmode &= supported; if (nd->nd_flag & ND_NFSV3) { - getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); nfsrv_postopattr(nd, getret, &nva); } vput(vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(supported); } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs getattr service */ APPLESTATIC int nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsvattr nva; fhandle_t fh; int at_root = 0, error = 0, supports_nfsv4acls; struct nfsreferral *refp; nfsattrbit_t attrbits, tmpbits; struct mount *mp; struct vnode *tvp = NULL; struct vattr va; uint64_t mounted_on_fileno = 0; accmode_t accmode; if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) { vput(vp); goto out; } /* * Check for a referral. */ refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { (void) nfsrv_putreferralattr(nd, &attrbits, refp, 1, &nd->nd_repstat); vput(vp); goto out; } if (nd->nd_repstat == 0) { accmode = 0; NFSSET_ATTRBIT(&tmpbits, &attrbits); /* * GETATTR with write-only attr time_access_set and time_modify_set * should return NFS4ERR_INVAL. */ if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){ error = NFSERR_INVAL; vput(vp); goto out; } if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) { NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL); accmode |= VREAD_ACL; } if (NFSNONZERO_ATTRBIT(&tmpbits)) accmode |= VREAD_ATTRIBUTES; if (accmode != 0) nd->nd_repstat = nfsvno_accchk(vp, accmode, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } if (!nd->nd_repstat) - nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE)) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkgetattr(nd, vp, - &nva, &attrbits, nd->nd_cred, p); + &nva, &attrbits, p); if (nd->nd_repstat == 0) { supports_nfsv4acls = nfs_supportsnfsv4acls(vp); mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && (vp->v_vflag & VV_ROOT) != 0 && vp != rootvnode) { tvp = mp->mnt_vnodecovered; VREF(tvp); at_root = 1; } else at_root = 0; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); if (at_root != 0) { if ((nd->nd_repstat = NFSVOPLOCK(tvp, LK_SHARED)) == 0) { nd->nd_repstat = VOP_GETATTR( tvp, &va, nd->nd_cred); vput(tvp); } else vrele(tvp); if (nd->nd_repstat == 0) mounted_on_fileno = (uint64_t) va.va_fileid; else at_root = 0; } if (nd->nd_repstat == 0) nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat == 0) { (void)nfsvno_fillattr(nd, mp, vp, &nva, &fh, 0, &attrbits, nd->nd_cred, p, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno); vfs_unbusy(mp); } vrele(vp); } else vput(vp); } else { nfsrv_fillattr(nd, &nva); vput(vp); } } else { vput(vp); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs setattr service */ APPLESTATIC int nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, nva2; u_int32_t *tl; int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0; + int gotproxystateid; struct timespec guard = { 0, 0 }; nfsattrbit_t attrbits, retbits; nfsv4stateid_t stateid; NFSACL_T *aclp = NULL; if (nd->nd_repstat) { nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif + gotproxystateid = 0; NFSVNO_ATTRINIT(&nva); - NFSZERO_ATTRBIT(&retbits); if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); - NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); + stateid.other[0] = *tl++; + stateid.other[1] = *tl++; + stateid.other[2] = *tl; + if (stateid.other[0] == 0x55555555 && + stateid.other[1] == 0x55555555 && + stateid.other[2] == 0x55555555 && + stateid.seqid == 0xffffffff) + gotproxystateid = 1; } error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error) goto nfsmout; - preat_ret = nfsvno_getattr(vp, &nva2, nd->nd_cred, p, 1); + + /* For NFSv4, only va_uid is used from nva2. */ + NFSZERO_ATTRBIT(&retbits); + NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); + preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits); if (!nd->nd_repstat) nd->nd_repstat = preat_ret; + + NFSZERO_ATTRBIT(&retbits); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } if (!nd->nd_repstat && gcheck && (nva2.na_ctime.tv_sec != guard.tv_sec || nva2.na_ctime.tv_nsec != guard.tv_nsec)) nd->nd_repstat = NFSERR_NOT_SYNC; if (nd->nd_repstat) { vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); /* * Now that we have all the fields, lets do it. * If the size is being changed write access is required, otherwise * just check for a read only file system. */ if (!nd->nd_repstat) { if (NFSVNO_NOTSETSIZE(&nva)) { if (NFSVNO_EXRDONLY(exp) || (vfs_flags(vnode_mount(vp)) & MNT_RDONLY)) nd->nd_repstat = EROFS; } else { if (vnode_vtype(vp) != VREG) nd->nd_repstat = EINVAL; else if (nva2.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } - if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) + /* + * Proxy operations from the MDS are allowed via the all 0s special + * stateid. + */ + if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0 && + gotproxystateid == 0) nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid, &nva, &attrbits, exp, p); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { /* * For V4, try setting the attrbutes in sets, so that the * reply bitmap will be correct for an error case. */ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid); NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, size, nva.na_size); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE); } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime); NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime); if (nva.na_vaflags & VA_UTIMES_NULL) { nva2.na_vaflags |= VA_UTIMES_NULL; NFSVNO_SETACTIVE(&nva2, vaflags); } nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE); } #ifdef NFS4_ACL_EXTATTR_NAME if (!nd->nd_repstat && aclp->acl_cnt > 0 && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) { nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL); } #endif } else if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); } if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) { - postat_ret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + postat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = postat_ret; } vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); else if (nd->nd_flag & ND_NFSV4) (void) nfsrv_putattrbit(nd, &retbits); else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV4) { /* * For all nd_repstat, the V4 reply includes a bitmap, * even NFSERR_BADXDR, which is what this will end up * returning. */ (void) nfsrv_putattrbit(nd, &retbits); } NFSEXITCODE2(error, nd); return (error); } /* * nfs lookup rpc * (Also performs lookup parent for v4) */ APPLESTATIC int nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nameidata named; vnode_t vp, dirp = NULL; int error = 0, dattr_ret = 1; struct nfsvattr nva, dattr; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } /* * For some reason, if dp is a symlink, the error * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR. */ if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) { nd->nd_repstat = NFSERR_SYMLINK; vrele(dp); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) - dattr_ret = nfsvno_getattr(dirp, &dattr, - nd->nd_cred, p, 0); + dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, + 0, NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (named.ni_startdir) vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) && vp->v_type != VDIR && vp->v_type != VLNK) /* * Only allow lookup of VDIR and VLNK for traversal of * non-exported volumes during NFSv4 mounting. */ nd->nd_repstat = ENOENT; if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) - nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (vpp != NULL && nd->nd_repstat == 0) *vpp = vp; else vput(vp); if (dirp) { if (nd->nd_flag & ND_NFSV3) - dattr_ret = nfsvno_getattr(dirp, &dattr, nd->nd_cred, - p, 0); + dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0, + NULL); vrele(dirp); } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (nd->nd_flag & ND_NFSV2) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } else if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_postopattr(nd, 0, &nva); nfsrv_postopattr(nd, dattr_ret, &dattr); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs readlink service */ APPLESTATIC int nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; mbuf_t mp = NULL, mpend = NULL; int getret = 1, len; struct nfsvattr nva; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (vnode_vtype(vp) != VLNK) { if (nd->nd_flag & ND_NFSV2) nd->nd_repstat = ENXIO; else nd->nd_repstat = EINVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, p, &mp, &mpend, &len); if (nd->nd_flag & ND_NFSV3) - getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mbuf_setnext(nd->nd_mb, mp); nd->nd_mb = mpend; nd->nd_bpos = NFSMTOD(mpend, caddr_t) + mbuf_len(mpend); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs read service */ APPLESTATIC int nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; - int error = 0, cnt, getret = 1, reqlen, eof = 0; + int error = 0, cnt, getret = 1, gotproxystateid, reqlen, eof = 0; mbuf_t m2, m3; struct nfsvattr nva; off_t off = 0x0; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *tl++); reqlen = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; reqlen = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED); reqlen = fxdr_unsigned(int, *(tl + 6)); } if (reqlen > NFS_SRVMAXDATA(nd)) { reqlen = NFS_SRVMAXDATA(nd); } else if (reqlen < 0) { error = EBADRPC; goto nfsmout; } + gotproxystateid = 0; if (nd->nd_flag & ND_NFSV4) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); lop->lo_flags = NFSLCK_READ; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK1 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; + /* + * Don't allow the client to use a special stateid for a DS op. + */ + if ((nd->nd_flag & ND_DSSERVER) != 0 && + ((stp->ls_stateid.other[0] == 0x0 && + stp->ls_stateid.other[1] == 0x0 && + stp->ls_stateid.other[2] == 0x0) || + (stp->ls_stateid.other[0] == 0xffffffff && + stp->ls_stateid.other[1] == 0xffffffff && + stp->ls_stateid.other[2] == 0xffffffff) || + stp->ls_stateid.seqid != 0)) + nd->nd_repstat = NFSERR_BADSTATEID; + /* However, allow the proxy stateid. */ + if (stp->ls_stateid.seqid == 0xffffffff && + stp->ls_stateid.other[0] == 0x55555555 && + stp->ls_stateid.other[1] == 0x55555555 && + stp->ls_stateid.other[2] == 0x55555555) + gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; lop->lo_end = off + reqlen; /* * Paranoia, just in case it wraps around. */ if (lop->lo_end < off) lop->lo_end = NFS64BITSSET; } if (vnode_vtype(vp) != VREG) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } - getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; if (!nd->nd_repstat && (nva.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } - if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) + /* + * DS reads are marked by ND_DSSERVER or use the proxy special + * stateid. + */ + if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == + ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } if (off >= nva.na_size) { cnt = 0; eof = 1; } else if (reqlen == 0) cnt = 0; else if ((off + reqlen) >= nva.na_size) { cnt = nva.na_size - off; eof = 1; } else cnt = reqlen; m3 = NULL; if (cnt > 0) { nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, p, &m3, &m2); if (!(nd->nd_flag & ND_NFSV4)) { - getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = getret; } if (nd->nd_repstat) { vput(vp); if (m3) mbuf_freem(m3); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } } vput(vp); if (nd->nd_flag & ND_NFSV2) { nfsrv_fillattr(nd, &nva); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); } else { if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &nva); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); } else NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; } *tl = txdr_unsigned(cnt); if (m3) { mbuf_setnext(nd->nd_mb, m3); nd->nd_mb = m2; nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs write service */ APPLESTATIC int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { int i, cnt; u_int32_t *tl; mbuf_t mp; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; - int stable = NFSWRITE_FILESYNC; + int gotproxystateid, stable = NFSWRITE_FILESYNC; off_t off; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; + nfsattrbit_t attrbits; if (nd->nd_repstat) { nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } + gotproxystateid = 0; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *++tl); tl += 2; retlen = len = fxdr_unsigned(int32_t, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 3; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; + /* + * Don't allow the client to use a special stateid for a DS op. + */ + if ((nd->nd_flag & ND_DSSERVER) != 0 && + ((stp->ls_stateid.other[0] == 0x0 && + stp->ls_stateid.other[1] == 0x0 && + stp->ls_stateid.other[2] == 0x0) || + (stp->ls_stateid.other[0] == 0xffffffff && + stp->ls_stateid.other[1] == 0xffffffff && + stp->ls_stateid.other[2] == 0xffffffff) || + stp->ls_stateid.seqid != 0)) + nd->nd_repstat = NFSERR_BADSTATEID; + /* However, allow the proxy stateid. */ + if (stp->ls_stateid.seqid == 0xffffffff && + stp->ls_stateid.other[0] == 0x55555555 && + stp->ls_stateid.other[1] == 0x55555555 && + stp->ls_stateid.other[2] == 0x55555555) + gotproxystateid = 1; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (lop->lo_end < lop->lo_first) lop->lo_end = NFS64BITSSET; } /* * Loop through the mbuf chain, counting how many mbufs are a * part of this write operation, so the iovec size is known. */ cnt = 0; mp = nd->nd_md; i = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - nd->nd_dpos; while (len > 0) { if (i > 0) { len -= i; cnt++; } mp = mbuf_next(mp); if (!mp) { if (len > 0) { error = EBADRPC; goto nfsmout; } } else i = mbuf_len(mp); } if (retlen > NFS_SRVMAXIO || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } - forat_ret = nfsvno_getattr(vp, &forat, nd->nd_cred, p, 1); + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + forat_ret = nfsvno_getattr(vp, &forat, nd, p, 1, &attrbits); if (!nd->nd_repstat) nd->nd_repstat = forat_ret; if (!nd->nd_repstat && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); - if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { + /* + * DS reads are marked by ND_DSSERVER or use the proxy special + * stateid. + */ + if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) == + ND_NFSV4 && gotproxystateid == 0) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); - } if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { - nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, stable, + nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, &stable, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) aftat_ret = 0; else - aftat_ret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + aftat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); vput(vp); if (!nd->nd_repstat) nd->nd_repstat = aftat_ret; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. * Warning: Doing this violates RFC1813 and runs a risk * of data written by a client being lost when the server * crashes/reboots. */ if (stable == NFSWRITE_UNSTABLE && nfs_async == 0) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs create service (creates regular files for V2 and V3. Spec. files for V2.) * now does a truncate to 0 length via. setattr if it already exists * The core creation routine has been extracted out into nfsrv_creatsub(), * so it can also be used by nfsrv_open() for V4. */ APPLESTATIC int nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nfsv2_sattr *sp; struct nameidata named; u_int32_t *tl; int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1; int how = NFSCREATE_UNCHECKED, exclusive_flag = 0; NFSDEV_T rdev = 0; vnode_t vp = NULL, dirp = NULL; fhandle_t fh; char *bufp; u_long *hashp; enum vtype vtyp; int32_t cverf[2], tverf[2] = { 0, 0 }; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode)); if (vtyp == VNON) vtyp = VREG; NFSVNO_SETATTRVAL(&nva, type, vtyp); NFSVNO_SETATTRVAL(&nva, mode, nfstov_mode(sp->sa_mode)); switch (nva.na_type) { case VREG: tsize = fxdr_unsigned(int32_t, sp->sa_size); if (tsize != -1) NFSVNO_SETATTRVAL(&nva, size, (u_quad_t)tsize); break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size); break; default: break; } } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_GUARDED: case NFSCREATE_UNCHECKED: error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; exclusive_flag = 1; break; } NFSVNO_SETATTRVAL(&nva, type, VREG); } } if (nd->nd_repstat) { nfsvno_relpathbuf(&named); if (nd->nd_flag & ND_NFSV3) { - dirfor_ret = nfsvno_getattr(dp, &dirfor, nd->nd_cred, - p, 1); + dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 1, + NULL); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } vput(dp); goto out; } nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { - dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, - p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, + NULL); } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); goto out; } if (!(nd->nd_flag & ND_NFSV2)) { switch (how) { case NFSCREATE_GUARDED: if (named.ni_vp) nd->nd_repstat = EEXIST; break; case NFSCREATE_UNCHECKED: break; case NFSCREATE_EXCLUSIVE: if (named.ni_vp == NULL) NFSVNO_SETATTRVAL(&nva, mode, 0); break; } } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ? */ nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva, &exclusive_flag, cverf, rdev, p, exp); if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) - nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, - p, 1); + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, + NULL); vput(vp); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (nd->nd_flag & ND_NFSV2) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); nfsrv_fillattr(nd, &nva); } } else { if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; - diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); + diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * nfs v3 mknod service (and v4 create) */ APPLESTATIC int nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; u_int32_t *tl; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; u_int32_t major, minor; enum vtype vtyp = VNON; nfstype nfs4type = NFNON; vnode_t vp, dirp = NULL; nfsattrbit_t attrbits; char *bufp = NULL, *pathcp = NULL; u_long *hashp, cnflags; NFSACL_T *aclp = NULL; NFSVNO_ATTRINIT(&nva); cnflags = (LOCKPARENT | SAVESTART); if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif /* * For V4, the creation stuff is here, Yuck! */ if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); nfs4type = fxdr_unsigned(nfstype, *tl); switch (nfs4type) { case NFLNK: error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) goto nfsmout; break; case NFCHR: case NFBLK: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); break; case NFSOCK: case NFFIFO: break; case NFDIR: cnflags = (LOCKPARENT | SAVENAME); break; default: nd->nd_repstat = NFSERR_BADTYPE; vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); } error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; nva.na_type = vtyp; if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); } } - dirfor_ret = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { if (!dirfor_ret && NFSVNO_ISSETGID(&nva) && dirfor.na_gid == nva.na_gid) NFSVNO_UNSET(&nva, gid); nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); } if (nd->nd_repstat) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsvno_relpathbuf(&named); if (pathcp) free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } /* * Yuck! For V4, mkdir and link are here and some V4 clients don't fill * in va_mode, so we'll have to set a default here. */ if (NFSVNO_NOTSETMODE(&nva)) { if (vtyp == VLNK) nva.na_mode = 0755; else nva.na_mode = 0400; } if (vtyp == VDIR) named.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) - dirfor_ret = nfsvno_getattr(dirp, &dirfor, - nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, + p, 0, NULL); vrele(dirp); } #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp) - dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) { if (vtyp == VDIR) { nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } else if (vtyp == VLNK) { nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp, pathcp, pathlen); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif free(pathcp, M_TEMP); goto out; } } nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p); if (!nd->nd_repstat) { vp = named.ni_vp; nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat) - nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, - p, 1); + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, + NULL); if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(vp, 0); *vpp = vp; } else vput(vp); } - diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); + diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } else { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); (void) nfsrv_putattrbit(nd, &attrbits); } } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (bufp) nfsvno_relpathbuf(&named); if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs remove service */ APPLESTATIC int nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; struct nfsvattr dirfor, diraft; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE, LOCKPARENT | LOCKLEAF); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) { if (!(nd->nd_flag & ND_NFSV2)) { - dirfor_ret = nfsvno_getattr(dirp, &dirfor, - nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, + NULL); } else { vrele(dirp); dirp = NULL; } } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (vnode_vtype(named.ni_vp) == VDIR) nd->nd_repstat = nfsvno_rmdirsub(&named, 1, nd->nd_cred, p, exp); else nd->nd_repstat = nfsvno_removesub(&named, 1, nd->nd_cred, p, exp); } else if (nd->nd_procnum == NFSPROC_RMDIR) { nd->nd_repstat = nfsvno_rmdirsub(&named, 0, nd->nd_cred, p, exp); } else { nd->nd_repstat = nfsvno_removesub(&named, 0, nd->nd_cred, p, exp); } } if (!(nd->nd_flag & ND_NFSV2)) { if (dirp) { - diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, - p, 0); + diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, + NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs rename service */ APPLESTATIC int nfsrvd_rename(struct nfsrv_descript *nd, int isdgram, vnode_t dp, vnode_t todp, NFSPROC_T *p, struct nfsexstuff *exp, struct nfsexstuff *toexp) { u_int32_t *tl; int error = 0, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; struct nameidata fromnd, tond; vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL; struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft; struct nfsexstuff tnes; struct nfsrvfh tfh; char *bufp, *tbufp = NULL; u_long *hashp; fhandle_t fh; if (nd->nd_repstat) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); goto out; } if (!(nd->nd_flag & ND_NFSV2)) - fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd->nd_cred, p, 1); + fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd, p, 1, NULL); tond.ni_cnd.cn_nameiop = 0; tond.ni_startdir = NULL; NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART); nfsvno_setpathbuf(&fromnd, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen); if (error) { vput(dp); if (todp) vrele(todp); nfsvno_relpathbuf(&fromnd); goto out; } /* * Unlock dp in this code section, so it is unlocked before * tdp gets locked. This avoids a potential LOR if tdp is the * parent directory of dp. */ if (nd->nd_flag & ND_NFSV4) { tdp = todp; tnes = *toexp; if (dp != tdp) { NFSVOPUNLOCK(dp, 0); - tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, - p, 0); /* Might lock tdp. */ + /* Might lock tdp. */ + tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 0, + NULL); } else { - tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, - p, 1); + tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, + NULL); NFSVOPUNLOCK(dp, 0); } } else { tfh.nfsrvfh_len = 0; error = nfsrv_mtofh(nd, &tfh); if (error == 0) error = nfsvno_getfh(dp, &fh, p); if (error) { vput(dp); /* todp is always NULL except NFSv4 */ nfsvno_relpathbuf(&fromnd); goto out; } /* If this is the same file handle, just VREF() the vnode. */ if (tfh.nfsrvfh_len == NFSX_MYFH && !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) { VREF(dp); tdp = dp; tnes = *exp; - tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, - p, 1); + tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1, + NULL); NFSVOPUNLOCK(dp, 0); } else { NFSVOPUNLOCK(dp, 0); nd->nd_cred->cr_uid = nd->nd_saveduid; nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL, 0, p); /* Locks tdp. */ if (tdp) { - tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, - nd->nd_cred, p, 1); + tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, + p, 1, NULL); NFSVOPUNLOCK(tdp, 0); } } } NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART); nfsvno_setpathbuf(&tond, &tbufp, &hashp); if (!nd->nd_repstat) { error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen); if (error) { if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } /* * Done parsing, now down to business. */ nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, p, &fdirp); if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (fdirp) vrele(fdirp); if (tdp) vrele(tdp); nfsvno_relpathbuf(&tond); goto out; } if (vnode_vtype(fromnd.ni_vp) == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, p, &tdirp); nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat, nd->nd_flag, nd->nd_cred, p); if (fdirp) - fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd->nd_cred, p, - 0); + fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL); if (tdirp) - tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd->nd_cred, p, - 0); + tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd, p, 0, NULL); if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(fdirfor.na_filerev, tl); tl += 2; txdr_hyper(fdiraft.na_filerev, tl); tl += 2; *tl++ = newnfs_false; txdr_hyper(tdirfor.na_filerev, tl); tl += 2; txdr_hyper(tdiraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs link service */ APPLESTATIC int nfsrvd_link(struct nfsrv_descript *nd, int isdgram, vnode_t vp, vnode_t tovp, NFSPROC_T *p, struct nfsexstuff *exp, struct nfsexstuff *toexp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1; vnode_t dirp = NULL, dp = NULL; struct nfsvattr dirfor, diraft, at; struct nfsexstuff tnes; struct nfsrvfh dfh; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSVOPUNLOCK(vp, 0); if (vnode_vtype(vp) == VDIR) { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; if (tovp) vrele(tovp); } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { dp = tovp; tnes = *toexp; } else { error = nfsrv_mtofh(nd, &dfh); if (error) { vrele(vp); /* tovp is always NULL unless NFSv4 */ goto out; } nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL, 0, p); if (dp) NFSVOPUNLOCK(dp, 0); } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); if (!nd->nd_repstat) { nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(vp); if (dp) vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes, p, &dirp); } else { if (dp) vrele(dp); nfsvno_relpathbuf(&named); } } if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { - dirfor_ret = nfsvno_getattr(dirp, &dirfor, - nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, + NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp); if (nd->nd_flag & ND_NFSV3) - getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 0); + getret = nfsvno_getattr(vp, &at, nd, p, 0, NULL); if (dirp) { - diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); + diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); vrele(dirp); } vrele(vp); if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs symbolic link service */ APPLESTATIC int nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; vnode_t dirp = NULL; char *bufp, *pathcp = NULL; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (vpp) *vpp = NULL; NFSVNO_ATTRINIT(&nva); NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (!error && !nd->nd_repstat) error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } /* * And call nfsrvd_symlinksub() to do the common code. It will * return EBADRPC upon a parsing error, 0 otherwise. */ if (!nd->nd_repstat) { if (dirp != NULL) - dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, - p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, + NULL); nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp, pathcp, pathlen); } else if (dirp != NULL) { - dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); vrele(dirp); } if (pathcp) free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(error, nd); return (error); } /* * Common code for creating a symbolic link. */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen) { u_int32_t *tl; nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen, !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(ndp->ni_vp, - nvap, nd->nd_cred, p, 1); + nvap, nd, p, 1, NULL); } if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(ndp->ni_vp, 0); *vpp = ndp->ni_vp; } else vput(ndp->ni_vp); } if (dirp) { - *diraft_retp = nfsvno_getattr(dirp, diraftp, nd->nd_cred, p, 0); + *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs mkdir service */ APPLESTATIC int nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV3) { error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nva.na_mode = nfstov_mode(*tl++); } } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } if (nd->nd_repstat) { if (dirp != NULL) { - dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, - p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, + NULL); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp != NULL) - dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); + dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL); /* * Call nfsrvd_mkdirsub() for the code common to V4 as well. */ nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * Code common to mkdir for V2,3 and 4. */ static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp) { vnode_t vp; u_int32_t *tl; NFSVNO_SETATTRVAL(nvap, type, VDIR); nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat) { vp = ndp->ni_vp; nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) - nd->nd_repstat = nfsvno_getattr(vp, nvap, nd->nd_cred, - p, 1); + nd->nd_repstat = nfsvno_getattr(vp, nvap, nd, p, 1, + NULL); if (vpp && !nd->nd_repstat) { NFSVOPUNLOCK(vp, 0); *vpp = vp; } else { vput(vp); } } if (dirp) { - *diraft_retp = nfsvno_getattr(dirp, diraftp, nd->nd_cred, p, 0); + *diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs commit service */ APPLESTATIC int nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsvattr bfor, aft; u_int32_t *tl; int error = 0, for_ret = 1, aft_ret = 1, cnt; u_int64_t off; if (nd->nd_repstat) { nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); goto out; } /* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */ if (vp->v_type != VREG) { if (nd->nd_flag & ND_NFSV3) error = NFSERR_NOTSUPP; else error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ off = fxdr_hyper(tl); tl += 2; cnt = fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_NFSV3) - for_ret = nfsvno_getattr(vp, &bfor, nd->nd_cred, p, 1); + for_ret = nfsvno_getattr(vp, &bfor, nd, p, 1, NULL); nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p); if (nd->nd_flag & ND_NFSV3) { - aft_ret = nfsvno_getattr(vp, &aft, nd->nd_cred, p, 1); + aft_ret = nfsvno_getattr(vp, &aft, nd, p, 1, NULL); nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); } vput(vp); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs statfs service */ APPLESTATIC int nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct statfs *sf; u_int32_t *tl; int getret = 1; struct nfsvattr at; u_quad_t tval; sf = NULL; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_statfs(vp, sf); - getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS); *tl++ = txdr_unsigned(NFS_V2MAXDATA); *tl++ = txdr_unsigned(sf->f_bsize); *tl++ = txdr_unsigned(sf->f_blocks); *tl++ = txdr_unsigned(sf->f_bfree); *tl = txdr_unsigned(sf->f_bavail); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS); tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_files; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; *tl = 0; } out: free(sf, M_STATFS); NFSEXITCODE2(0, nd); return (0); } /* * nfs fsinfo service */ APPLESTATIC int nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsfsinfo fs; int getret = 1; struct nfsvattr at; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } - getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); nfsvno_getfs(&fs, isdgram); vput(vp); nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO); *tl++ = txdr_unsigned(fs.fs_rtmax); *tl++ = txdr_unsigned(fs.fs_rtpref); *tl++ = txdr_unsigned(fs.fs_rtmult); *tl++ = txdr_unsigned(fs.fs_wtmax); *tl++ = txdr_unsigned(fs.fs_wtpref); *tl++ = txdr_unsigned(fs.fs_wtmult); *tl++ = txdr_unsigned(fs.fs_dtpref); txdr_hyper(fs.fs_maxfilesize, tl); tl += 2; txdr_nfsv3time(&fs.fs_timedelta, tl); tl += 2; *tl = txdr_unsigned(fs.fs_properties); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs pathconf service */ APPLESTATIC int nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsv3_pathconf *pc; int getret = 1; long linkmax, namemax, chownres, notrunc; struct nfsvattr at; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED, &chownres, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, ¬runc, nd->nd_cred, p); - getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); + getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL); vput(vp); nfsrv_postopattr(nd, getret, &at); if (!nd->nd_repstat) { NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = newnfs_false; pc->pc_casepreserving = newnfs_true; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 lock service */ APPLESTATIC int nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock *lop; struct nfslockconflict cf; int error = 0; u_short flags = NFSLCK_LOCK, lflags; u_int64_t offset, len; nfsv4stateid_t stateid; nfsquad_t clientid; NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lflags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lflags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (*tl++ == newnfs_true) flags |= NFSLCK_RECLAIM; offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (*tl == newnfs_true) flags |= NFSLCK_OPENTOLOCK; if (flags & NFSLCK_OPENTOLOCK) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID); i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED))); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if ((nd->nd_flag & ND_NFSV41) != 0 && + stp->ls_stateid.seqid == 1 && + stp->ls_stateid.other[0] == 0 && + stp->ls_stateid.other[1] == 0 && + stp->ls_stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stp->ls_stateid = nd->nd_curstateid; + stp->ls_stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + stp->ls_opentolockseq = fxdr_unsigned(int, *tl++); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK3 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if ((nd->nd_flag & ND_NFSV41) != 0 && + stp->ls_stateid.seqid == 1 && + stp->ls_stateid.other[0] == 0 && + stp->ls_stateid.other[1] == 0 && + stp->ls_stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stp->ls_stateid = nd->nd_curstateid; + stp->ls_stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + stp->ls_seq = fxdr_unsigned(int, *tl); clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK4 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } } lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); lop->lo_first = offset; if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = offset + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } lop->lo_flags = lflags; stp->ls_flags = flags; stp->ls_uid = nd->nd_cred->cr_uid; /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) { if (lflags & NFSLCK_WRITE) { nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } else { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } } /* * We call nfsrv_lockctrl() even if nd_repstat set, so that the * seqid# gets updated. nfsrv_lockctrl() will return the value * of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (lop) free(lop, M_NFSDLOCK); if (stp) free(stp, M_NFSDSTATE); if (!nd->nd_repstat) { + /* For NFSv4.1, set the Current StateID. */ + if ((nd->nd_flag & ND_NFSV41) != 0) { + nd->nd_curstateid = stateid; + nd->nd_flag |= ND_CURSTATEID; + } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stateid.other[0]; *tl = stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 lock test service */ APPLESTATIC int nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock lo, *lop = &lo; struct nfslockconflict cf; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 7)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = NULL; stp->ls_flags = NFSLCK_TEST; stp->ls_uid = nd->nd_cred->cr_uid; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lo.lo_flags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lo.lo_flags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } lo.lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lo.lo_end = NFS64BITSSET; } else { lo.lo_end = lo.lo_first + len; if (lo.lo_end <= lo.lo_first) nd->nd_repstat = NFSERR_INVAL; } tl += 2; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK5 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stp->ls_stateid.other[0]; *tl = stp->ls_stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } } vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 unlock service */ APPLESTATIC int nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp; struct nfslock *lop; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID); stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); stp->ls_flags = NFSLCK_UNLOCK; lop->lo_flags = NFSLCK_UNLOCK; stp->ls_op = nd->nd_rp; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: break; default: nd->nd_repstat = NFSERR_BADXDR; free(stp, M_NFSDSTATE); free(lop, M_NFSDLOCK); goto nfsmout; } stp->ls_ownerlen = 0; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + + /* + * For the special stateid of other all 0s and seqid == 1, set the + * stateid to the current stateid, if it is set. + */ + if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && + stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && + stp->ls_stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stp->ls_stateid = nd->nd_curstateid; + stp->ls_stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + lop->lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = lop->lo_first + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK6 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } /* * Call nfsrv_lockctrl() even if nd_repstat is set, so that the * seqid# gets incremented. nfsrv_lockctrl() will return the * value of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (stp) free(stp, M_NFSDSTATE); if (lop) free(lop, M_NFSDLOCK); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open service */ APPLESTATIC int nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i, retext; struct nfsstate *stp = NULL; int error = 0, create, claim, exclusive_flag = 0; u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask; int how = NFSCREATE_UNCHECKED; int32_t cverf[2], tverf[2] = { 0, 0 }; vnode_t vp = NULL, dirp = NULL; struct nfsvattr nva, dirfor, diraft; struct nameidata named; nfsv4stateid_t stateid, delegstateid; nfsattrbit_t attrbits; nfsquad_t clientid; char *bufp = NULL; u_long *hashp; NFSACL_T *aclp = NULL; #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif NFSZERO_ATTRBIT(&attrbits); named.ni_startdir = NULL; named.ni_cnd.cn_nameiop = 0; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 5)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_flags = NFSLCK_OPEN; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); retext = 0; if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG | NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) { retext = 1; /* For now, ignore these. */ i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG); switch (i & NFSV4OPEN_WANTDELEGMASK) { case NFSV4OPEN_WANTANYDELEG: stp->ls_flags |= (NFSLCK_WANTRDELEG | NFSLCK_WANTWDELEG); i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTREADDELEG: stp->ls_flags |= NFSLCK_WANTRDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTWRITEDELEG: stp->ls_flags |= NFSLCK_WANTWDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTNODELEG: stp->ls_flags |= NFSLCK_WANTNODELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTCANCEL: printf("NFSv4: ignore Open WantCancel\n"); i &= ~NFSV4OPEN_WANTDELEGMASK; break; default: /* nd_repstat will be set to NFSERR_INVAL below. */ break; } } switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags |= NFSLCK_READACCESS; break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags |= NFSLCK_WRITEACCESS; break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK7 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; NFSVNO_ATTRINIT(&nva); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); create = fxdr_unsigned(int, *tl); if (!nd->nd_repstat) - nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 0); + nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL); if (create == NFSV4OPEN_CREATE) { nva.na_type = VREG; nva.na_mode = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_UNCHECKED: case NFSCREATE_GUARDED: error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; break; case NFSCREATE_EXCLUSIVE41: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; error = nfsv4_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error != 0) goto nfsmout; if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) nd->nd_repstat = NFSERR_INVAL; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } } else if (create != NFSV4OPEN_NOCREATE) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Now, handle the claim, which usually includes looking up a * name in the directory referenced by dp. The exception is * NFSV4OPEN_CLAIMPREVIOUS. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); claim = fxdr_unsigned(int, *tl); if (claim == NFSV4OPEN_CLAIMDELEGATECUR) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); stp->ls_flags |= NFSLCK_DELEGCUR; } else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV) { stp->ls_flags |= NFSLCK_DELEGPREV; } if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR || claim == NFSV4OPEN_CLAIMDELEGATEPREV) { if (!nd->nd_repstat && create == NFSV4OPEN_CREATE && claim != NFSV4OPEN_CLAIMNULL) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat) { nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, NULL, nd, p, nd->nd_repstat); goto nfsmout; } if (create == NFSV4OPEN_CREATE) NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); else NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif free(stp, M_NFSDSTATE); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (create == NFSV4OPEN_CREATE) { switch (how) { case NFSCREATE_UNCHECKED: if (named.ni_vp) { /* * Clear the setable attribute bits, except * for Size, if it is being truncated. */ NFSZERO_ATTRBIT(&attrbits); if (NFSVNO_ISSETSIZE(&nva)) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); } break; case NFSCREATE_GUARDED: if (named.ni_vp && !nd->nd_repstat) nd->nd_repstat = EEXIST; break; case NFSCREATE_EXCLUSIVE: exclusive_flag = 1; if (!named.ni_vp) nva.na_mode = 0; break; case NFSCREATE_EXCLUSIVE41: exclusive_flag = 1; break; } } nfsvno_open(nd, &named, clientid, &stateid, stp, &exclusive_flag, &nva, cverf, create, aclp, &attrbits, nd->nd_cred, p, exp, &vp); } else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim == NFSV4OPEN_CLAIMFH) { if (claim == NFSV4OPEN_CLAIMPREVIOUS) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DELEGATEREAD: stp->ls_flags |= NFSLCK_DELEGREAD; break; case NFSV4OPEN_DELEGATEWRITE: stp->ls_flags |= NFSLCK_DELEGWRITE; case NFSV4OPEN_DELEGATENONE: break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp->ls_flags |= NFSLCK_RECLAIM; } else { /* CLAIM_NULL_FH */ if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE) nd->nd_repstat = NFSERR_INVAL; } vp = dp; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, vp, nd, p, nd->nd_repstat); else nd->nd_repstat = NFSERR_PERM; } else { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { /* * The IETF working group decided that this is the correct * error return for all non-regular files. */ nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK; } if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } if (!nd->nd_repstat) { - nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; /* * Do the open locking/delegation stuff. */ if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid, &delegstateid, &rflags, exp, p, nva.na_filerev); /* * vp must be unlocked before the call to nfsvno_getattr(dirp,...) * below, to avoid a deadlock with the lookup in nfsvno_namei() above. * (ie: Leave the NFSVOPUNLOCK() about here.) */ if (vp) NFSVOPUNLOCK(vp, 0); if (stp) free(stp, M_NFSDSTATE); if (!nd->nd_repstat && dirp) - nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, - 0); + nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL); if (!nd->nd_repstat) { + /* For NFSv4.1, set the Current StateID. */ + if ((nd->nd_flag & ND_NFSV41) != 0) { + nd->nd_curstateid = stateid; + nd->nd_flag |= ND_CURSTATEID; + } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (claim == NFSV4OPEN_CLAIMPREVIOUS) { *tl++ = newnfs_true; *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = newnfs_false; /* Since dirp is not locked */ txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); tl += 2; } *tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (rflags & NFSV4OPEN_READDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD); else if (rflags & NFSV4OPEN_WRITEDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE); else if (retext != 0) { *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT); if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION); *tl = newnfs_false; } else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } } else *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE); if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED); *tl++ = txdr_unsigned(delegstateid.seqid); NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (rflags & NFSV4OPEN_RECALL) *tl = newnfs_true; else *tl = newnfs_false; if (rflags & NFSV4OPEN_WRITEDELEGATE) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE); txdr_hyper(nva.na_size, tl); } NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE); *tl++ = txdr_unsigned(0x0); acemask = NFSV4ACE_ALLFILESMASK; if (nva.na_mode & S_IRUSR) acemask |= NFSV4ACE_READMASK; if (nva.na_mode & S_IWUSR) acemask |= NFSV4ACE_WRITEMASK; if (nva.na_mode & S_IXUSR) acemask |= NFSV4ACE_EXECUTEMASK; *tl = txdr_unsigned(acemask); (void) nfsm_strtom(nd, "OWNER@", 6); } *vpp = vp; } else if (vp) { vrele(vp); } if (dirp) vrele(dirp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 close service */ APPLESTATIC int nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; - int error = 0; + int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; + struct nfsvattr na; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); + + /* + * For the special stateid of other all 0s and seqid == 1, set the + * stateid to the current stateid, if it is set. + */ + if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && + stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && + stp->ls_stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) + stp->ls_stateid = nd->nd_curstateid; + else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + stp->ls_flags = NFSLCK_CLOSE; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK8 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } - nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p); + nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, + &writeacc); + /* For pNFS, update the attributes. */ + if (writeacc != 0 || nfsrv_pnfsatime != 0) + nfsrv_updatemdsattr(vp, &na, p); vput(vp); if (!nd->nd_repstat) { + /* + * If the stateid that has been closed is the current stateid, + * unset it. + */ + if ((nd->nd_flag & ND_CURSTATEID) != 0 && + stateid.other[0] == nd->nd_curstateid.other[0] && + stateid.other[1] == nd->nd_curstateid.other[1] && + stateid.other[2] == nd->nd_curstateid.other[2]) + nd->nd_flag &= ~ND_CURSTATEID; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegpurge service */ APPLESTATIC int nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK9 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL, - NFSV4OP_DELEGPURGE, nd->nd_cred, p); + NFSV4OP_DELEGPURGE, nd->nd_cred, p, NULL); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegreturn service */ APPLESTATIC int nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; - int error = 0; + int error = 0, writeacc; nfsv4stateid_t stateid; nfsquad_t clientid; + struct nfsvattr na; NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); clientid.lval[0] = stateid.other[0]; clientid.lval[1] = stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK10 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp, - NFSV4OP_DELEGRETURN, nd->nd_cred, p); + NFSV4OP_DELEGRETURN, nd->nd_cred, p, &writeacc); + /* For pNFS, update the attributes. */ + if (writeacc != 0 || nfsrv_pnfsatime != 0) + nfsrv_updatemdsattr(vp, &na, p); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 get file handle service */ APPLESTATIC int nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { fhandle_t fh; nd->nd_repstat = nfsvno_getfh(vp, &fh, p); vput(vp); if (!nd->nd_repstat) (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 open confirm service */ APPLESTATIC int nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl); stp->ls_flags = NFSLCK_CONFIRM; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK11 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } - nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p); + nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p, + NULL); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open downgrade service */ APPLESTATIC int nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; /* opendowngrade can only work on a file object.*/ if (vp->v_type != VREG) { error = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + + /* + * For the special stateid of other all 0s and seqid == 1, set the + * stateid to the current stateid, if it is set. + */ + if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 && + stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 && + stp->ls_stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) + stp->ls_stateid = nd->nd_curstateid; + else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); if ((nd->nd_flag & ND_NFSV41) != 0) i &= ~NFSV4OPEN_WANTDELEGMASK; switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK12 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, - nd, p); + nd, p, NULL); if (!nd->nd_repstat) { + /* For NFSv4.1, set the Current StateID. */ + if ((nd->nd_flag & ND_NFSV41) != 0) { + nd->nd_curstateid = stateid; + nd->nd_flag |= ND_CURSTATEID; + } NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 renew lease service */ APPLESTATIC int nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK13 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW), NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 security info service */ APPLESTATIC int nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int len; struct nameidata named; vnode_t dirp = NULL, vp; struct nfsrvfh fh; struct nfsexstuff retnes; u_int32_t *sizp; int error = 0, savflag, i; char *bufp; u_long *hashp; /* * All this just to get the export flags for the name. */ NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) vrele(dirp); if (nd->nd_repstat) goto out; vrele(named.ni_startdir); nfsvno_relpathbuf(&named); fh.nfsrvfh_len = NFSX_MYFH; vp = named.ni_vp; nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p); vput(vp); savflag = nd->nd_flag; if (!nd->nd_repstat) { nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0, p); if (vp) vput(vp); } nd->nd_flag = savflag; if (nd->nd_repstat) goto out; /* * Finally have the export flags for name, so we can create * the security info. */ len = 0; NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED); for (i = 0; i < retnes.nes_numsecflavor; i++) { if (retnes.nes_secflavors[i] == AUTH_SYS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_UNIX); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCNONE); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len++; } } *sizp = txdr_unsigned(len); out: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id service */ APPLESTATIC int nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; int error = 0, idlen; struct nfsclient *clp = NULL; struct sockaddr_in *rad; u_char *verf, *ucp, *ucp2, addrbuf[24]; nfsquad_t clientid, confirm; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (u_char *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); clp->lc_req.nr_nam = malloc(sizeof(*clp->lc_req.nr_nam), M_SONAME, M_WAITOK | M_ZERO); NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in)); clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error) goto nfsmout; if (nd->nd_flag & ND_GSS) { clp->lc_flags = LCL_GSS; if (nd->nd_flag & ND_GSSINTEGRITY) clp->lc_flags |= LCL_GSSINTEGRITY; else if (nd->nd_flag & ND_GSSPRIVACY) clp->lc_flags |= LCL_GSSPRIVACY; } else { clp->lc_flags = 0; } if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_program = fxdr_unsigned(u_int32_t, *tl); error = nfsrv_getclientipaddr(nd, clp); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_callback = fxdr_unsigned(u_int32_t, *tl); /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (nd->nd_repstat == NFSERR_CLIDINUSE) { if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp", 3); else (void) nfsm_strtom(nd, "udp", 3); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); ucp = (u_char *)&rad->sin_addr.s_addr; ucp2 = (u_char *)&rad->sin_port; sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff, ucp2[0] & 0xff, ucp2[1] & 0xff); (void) nfsm_strtom(nd, addrbuf, strlen(addrbuf)); } if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER); *tl++ = clientid.lval[0]; *tl++ = clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id confirm service */ APPLESTATIC int nfsrvd_setclientidcfrm(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid, confirm; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW), NULL, NULL, confirm, 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 verify service */ APPLESTATIC int nfsrvd_verify(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { int error = 0, ret, fhsize = NFSX_MYFH; struct nfsvattr nva; struct statfs *sf; struct nfsfsinfo fs; fhandle_t fh; sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); - nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); + nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_statfs(vp, sf); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) { nfsvno_getfs(&fs, isdgram); error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL, sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred); if (!error) { if (nd->nd_procnum == NFSV4OP_NVERIFY) { if (ret == 0) nd->nd_repstat = NFSERR_SAME; else if (ret != NFSERR_NOTSAME) nd->nd_repstat = ret; } else if (ret) nd->nd_repstat = ret; } } vput(vp); free(sf, M_STATFS); NFSEXITCODE2(error, nd); return (error); } /* * nfs openattr rpc */ APPLESTATIC int nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, createdir; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); createdir = fxdr_unsigned(int, *tl); nd->nd_repstat = NFSERR_NOTSUPP; nfsmout: vrele(dp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 release lock owner service */ APPLESTATIC int nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate *stp = NULL; int error = 0, len; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp = malloc(sizeof (struct nfsstate) + len, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = len; stp->ls_op = NULL; stp->ls_flags = NFSLCK_RELEASE; stp->ls_uid = nd->nd_cred->cr_uid; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK14 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, len); if (error) goto nfsmout; nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p); free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: if (stp) free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 exchange_id service */ APPLESTATIC int nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, i, idlen; struct nfsclient *clp = NULL; nfsquad_t clientid, confirm; uint8_t *verf; uint32_t sp4type, v41flags; uint64_t owner_minor; struct timespec verstime; struct sockaddr_in *sad, *rad; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (uint8_t *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); clp->lc_req.nr_nam = malloc(sizeof(*clp->lc_req.nr_nam), M_SONAME, M_WAITOK | M_ZERO); NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in)); sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); rad->sin_family = AF_INET; rad->sin_addr.s_addr = 0; rad->sin_port = 0; if (sad->sin_family == AF_INET) rad->sin_addr.s_addr = sad->sin_addr.s_addr; clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_GSS) != 0) { clp->lc_flags = LCL_GSS | LCL_NFSV41; if ((nd->nd_flag & ND_GSSINTEGRITY) != 0) clp->lc_flags |= LCL_GSSINTEGRITY; else if ((nd->nd_flag & ND_GSSPRIVACY) != 0) clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); v41flags = fxdr_unsigned(uint32_t, *tl++); if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR | NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS | NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0) confirm.lval[1] = 1; else confirm.lval[1] = 0; - v41flags = NFSV4EXCH_USENONPNFS; + if (nfsrv_devidcnt == 0) + v41flags = NFSV4EXCH_USENONPNFS | NFSV4EXCH_USEPNFSDS; + else + v41flags = NFSV4EXCH_USEPNFSMDS; sp4type = fxdr_unsigned(uint32_t, *tl); if (sp4type != NFSV4EXCH_SP4NONE) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (nd->nd_repstat == 0) { if (confirm.lval[1] != 0) v41flags |= NFSV4EXCH_CONFIRMEDR; NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); *tl++ = clientid.lval[0]; /* ClientID */ *tl++ = clientid.lval[1]; *tl++ = txdr_unsigned(confirm.lval[0]); /* SequenceID */ *tl++ = txdr_unsigned(v41flags); /* Exch flags */ *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* No SSV */ owner_minor = 0; /* Owner */ txdr_hyper(owner_minor, tl); /* Minor */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Scope */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(1); (void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void)nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); } NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 create session service */ APPLESTATIC int nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct nfsdsession *sep = NULL; uint32_t rdmacnt; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession), M_NFSDSESSION, M_WAITOK | M_ZERO); sep->sess_refcnt = 1; mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF); NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++); sep->sess_crflags = fxdr_unsigned(uint32_t, *tl); /* Persistent sessions and RDMA are not supported. */ sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN; /* Fore channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++); + if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) { + sep->sess_maxreq = sb_max_adj - NFS_MAXXDR; + printf("Consider increasing kern.ipc.maxsockbuf\n"); + } sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++); + if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) { + sep->sess_maxresp = sb_max_adj - NFS_MAXXDR; + printf("Consider increasing kern.ipc.maxsockbuf\n"); + } sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxslots > NFSV4_SLOTS) sep->sess_maxslots = NFSV4_SLOTS; rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* Back channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++); rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl); /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW, NULL, sep, confirm, sep->sess_cbprogram, nd, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(confirm.lval[0]); /* sequenceid */ *tl++ = txdr_unsigned(sep->sess_crflags); /* Fore channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_maxreq); *tl++ = txdr_unsigned(sep->sess_maxresp); *tl++ = txdr_unsigned(sep->sess_maxrespcached); *tl++ = txdr_unsigned(sep->sess_maxops); *tl++ = txdr_unsigned(sep->sess_maxslots); *tl++ = txdr_unsigned(1); *tl++ = txdr_unsigned(0); /* No RDMA. */ /* Back channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_cbmaxreq); *tl++ = txdr_unsigned(sep->sess_cbmaxresp); *tl++ = txdr_unsigned(sep->sess_cbmaxrespcached); *tl++ = txdr_unsigned(sep->sess_cbmaxops); *tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(0); /* No RDMA. */ } nfsmout: if (nd->nd_repstat != 0 && sep != NULL) free(sep, M_NFSDSESSION); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 sequence service */ APPLESTATIC int nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid; int cache_this, error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID); NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); sequenceid = fxdr_unsigned(uint32_t, *tl++); nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++); highest_slotid = fxdr_unsigned(uint32_t, *tl++); if (*tl == newnfs_true) cache_this = 1; else cache_this = 0; nd->nd_flag |= ND_HASSEQUENCE; nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid, &target_highest_slotid, cache_this, &sflags, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(sequenceid); *tl++ = txdr_unsigned(nd->nd_slotid); *tl++ = txdr_unsigned(highest_slotid); *tl++ = txdr_unsigned(target_highest_slotid); *tl = txdr_unsigned(sflags); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 reclaim complete service */ APPLESTATIC int nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) nd->nd_repstat = NFSERR_NOTSUPP; else nd->nd_repstat = nfsrv_checkreclaimcomplete(nd); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy clientid service */ APPLESTATIC int nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsquad_t clientid; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; nd->nd_repstat = nfsrv_destroyclient(clientid, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 bind connection to session service */ APPLESTATIC int nfsrvd_bindconnsess(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; uint8_t sessid[NFSX_V4SESSIONID]; int error = 0, foreaft; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); NFSBCOPY(tl, sessid, NFSX_V4SESSIONID); tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED); foreaft = fxdr_unsigned(int, *tl++); if (*tl == newnfs_true) { /* RDMA is not supported. */ nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } nd->nd_repstat = nfsrv_bindconnsess(nd, sessid, &foreaft); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); NFSBCOPY(sessid, tl, NFSX_V4SESSIONID); tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(foreaft); *tl = newnfs_false; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy session service */ APPLESTATIC int nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint8_t *cp, sessid[NFSX_V4SESSIONID]; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID); NFSBCOPY(cp, sessid, NFSX_V4SESSIONID); nd->nd_repstat = nfsrv_destroysession(nd, sessid); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 free stateid service */ APPLESTATIC int nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + + /* + * For the special stateid of other all 0s and seqid == 1, set the + * stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p); + + /* If the current stateid has been free'd, unset it. */ + if (nd->nd_repstat == 0 && (nd->nd_flag & ND_CURSTATEID) != 0 && + stateid.other[0] == nd->nd_curstateid.other[0] && + stateid.other[1] == nd->nd_curstateid.other[1] && + stateid.other[2] == nd->nd_curstateid.other[2]) + nd->nd_flag &= ~ND_CURSTATEID; +nfsmout: + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 layoutget service + */ +APPLESTATIC int +nfsrvd_layoutget(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + int error = 0, layoutlen, layouttype, iomode, maxcnt, retonclose; + uint64_t offset, len, minlen; + char *layp; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + tl++; /* Signal layout available. Ignore for now. */ + layouttype = fxdr_unsigned(int, *tl++); + iomode = fxdr_unsigned(int, *tl++); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + minlen = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + maxcnt = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "layoutget ltyp=%d iom=%d off=%ju len=%ju mlen=%ju\n", + layouttype, iomode, (uintmax_t)offset, (uintmax_t)len, + (uintmax_t)minlen); + if (len < minlen || + (minlen != UINT64_MAX && offset + minlen < offset) || + (len != UINT64_MAX && offset + len < offset)) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + + /* + * For the special stateid of other all 0s and seqid == 1, set the + * stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + layp = NULL; + if (layouttype == NFSLAYOUT_NFSV4_1_FILES && nfsrv_maxpnfsmirror == 1) + layp = malloc(NFSX_V4FILELAYOUT, M_TEMP, M_WAITOK); + else if (layouttype == NFSLAYOUT_FLEXFILE) + layp = malloc(NFSX_V4FLEXLAYOUT(nfsrv_maxpnfsmirror), M_TEMP, + M_WAITOK); + else + nd->nd_repstat = NFSERR_UNKNLAYOUTTYPE; + if (layp != NULL) + nd->nd_repstat = nfsrv_layoutget(nd, vp, exp, layouttype, + &iomode, &offset, &len, minlen, &stateid, maxcnt, + &retonclose, &layoutlen, layp, nd->nd_cred, p); + NFSD_DEBUG(4, "nfsrv_layoutget stat=%u layoutlen=%d\n", nd->nd_repstat, + layoutlen); + if (nd->nd_repstat == 0) { + /* For NFSv4.1, set the Current StateID. */ + if ((nd->nd_flag & ND_NFSV41) != 0) { + nd->nd_curstateid = stateid; + nd->nd_flag |= ND_CURSTATEID; + } + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_STATEID + + 2 * NFSX_HYPER); + *tl++ = txdr_unsigned(retonclose); + *tl++ = txdr_unsigned(stateid.seqid); + NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(1); /* Only returns one layout. */ + txdr_hyper(offset, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + *tl++ = txdr_unsigned(iomode); + *tl = txdr_unsigned(layouttype); + nfsm_strtom(nd, layp, layoutlen); + } else if (nd->nd_repstat == NFSERR_LAYOUTTRYLATER) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = newnfs_false; + } + free(layp, M_TEMP); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 layoutcommit service + */ +APPLESTATIC int +nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) +{ + uint32_t *tl; + nfsv4stateid_t stateid; + int error = 0, hasnewoff, hasnewmtime, layouttype, maxcnt, reclaim; + int hasnewsize; + uint64_t offset, len, newoff, newsize; + struct timespec newmtime; + char *layp; + + layp = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + 2 * NFSX_HYPER + + NFSX_STATEID); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + reclaim = fxdr_unsigned(int, *tl++); + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + /* + * For the special stateid of other all 0s and seqid == 1, set the + * stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + hasnewoff = fxdr_unsigned(int, *tl); + if (hasnewoff != 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + newoff = fxdr_hyper(tl); tl += 2; + } else + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + hasnewmtime = fxdr_unsigned(int, *tl); + if (hasnewmtime != 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4TIME + 2 * NFSX_UNSIGNED); + fxdr_nfsv4time(tl, &newmtime); + tl += (NFSX_V4TIME / NFSX_UNSIGNED); + } else + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + layouttype = fxdr_unsigned(int, *tl++); + maxcnt = fxdr_unsigned(int, *tl); + if (maxcnt > 0) { + layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); + error = nfsrv_mtostr(nd, layp, maxcnt); + if (error != 0) + goto nfsmout; + } + nd->nd_repstat = nfsrv_layoutcommit(nd, vp, layouttype, hasnewoff, + newoff, offset, len, hasnewmtime, &newmtime, reclaim, &stateid, + maxcnt, layp, &hasnewsize, &newsize, nd->nd_cred, p); + NFSD_DEBUG(4, "nfsrv_layoutcommit stat=%u\n", nd->nd_repstat); + if (nd->nd_repstat == 0) { + if (hasnewsize != 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); + *tl++ = newnfs_true; + txdr_hyper(newsize, tl); + } else { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = newnfs_false; + } + } +nfsmout: + free(layp, M_TEMP); + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 layoutreturn service + */ +APPLESTATIC int +nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) +{ + uint32_t *tl, *layp; + nfsv4stateid_t stateid; + int error = 0, fnd, kind, layouttype, iomode, maxcnt, reclaim; + uint64_t offset, len; + + layp = NULL; + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); + reclaim = *tl++; + layouttype = fxdr_unsigned(int, *tl++); + iomode = fxdr_unsigned(int, *tl++); + kind = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "layoutreturn recl=%d ltyp=%d iom=%d kind=%d\n", reclaim, + layouttype, iomode, kind); + if (kind == NFSV4LAYOUTRET_FILE) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + + NFSX_UNSIGNED); + offset = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + + /* + * For the special stateid of other all 0s and seqid == 1, set + * the stateid to the current stateid, if it is set. + */ + if (stateid.seqid == 1 && stateid.other[0] == 0 && + stateid.other[1] == 0 && stateid.other[2] == 0) { + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + stateid = nd->nd_curstateid; + stateid.seqid = 0; + } else { + nd->nd_repstat = NFSERR_BADSTATEID; + goto nfsmout; + } + } + + maxcnt = fxdr_unsigned(int, *tl); + if (maxcnt > 0) { + layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); + error = nfsrv_mtostr(nd, (char *)layp, maxcnt); + if (error != 0) + goto nfsmout; + } + } else { + if (reclaim == newnfs_true) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + offset = len = 0; + maxcnt = 0; + } + nd->nd_repstat = nfsrv_layoutreturn(nd, vp, layouttype, iomode, + offset, len, reclaim, kind, &stateid, maxcnt, layp, &fnd, + nd->nd_cred, p); + NFSD_DEBUG(4, "nfsrv_layoutreturn stat=%u fnd=%d\n", nd->nd_repstat, + fnd); + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + if (fnd != 0) { + *tl = newnfs_true; + NFSM_BUILD(tl, uint32_t *, NFSX_STATEID); + *tl++ = txdr_unsigned(stateid.seqid); + NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER); + } else + *tl = newnfs_false; + } +nfsmout: + free(layp, M_TEMP); + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + +/* + * nfsv4 getdeviceinfo service + */ +APPLESTATIC int +nfsrvd_getdevinfo(struct nfsrv_descript *nd, __unused int isdgram, + __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) +{ + uint32_t *tl, maxcnt, notify[NFSV4_NOTIFYBITMAP]; + int cnt, devaddrlen, error = 0, i, layouttype; + char devid[NFSX_V4DEVICEID], *devaddr; + time_t dev_time; + + if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { + nd->nd_repstat = NFSERR_WRONGSEC; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_V4DEVICEID); + NFSBCOPY(tl, devid, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + layouttype = fxdr_unsigned(int, *tl++); + maxcnt = fxdr_unsigned(uint32_t, *tl++); + cnt = fxdr_unsigned(int, *tl); + NFSD_DEBUG(4, "getdevinfo ltyp=%d maxcnt=%u bitcnt=%d\n", layouttype, + maxcnt, cnt); + if (cnt > NFSV4_NOTIFYBITMAP || cnt < 0) { + nd->nd_repstat = NFSERR_INVAL; + goto nfsmout; + } + if (cnt > 0) { + NFSM_DISSECT(tl, uint32_t *, cnt * NFSX_UNSIGNED); + for (i = 0; i < cnt; i++) + notify[i] = fxdr_unsigned(uint32_t, *tl++); + } + for (i = cnt; i < NFSV4_NOTIFYBITMAP; i++) + notify[i] = 0; + + /* + * Check that the device id is not stale. Device ids are recreated + * each time the nfsd threads are restarted. + */ + NFSBCOPY(devid, &dev_time, sizeof(dev_time)); + if (dev_time != nfsdev_time) { + nd->nd_repstat = NFSERR_NOENT; + goto nfsmout; + } + + /* Look for the device id. */ + nd->nd_repstat = nfsrv_getdevinfo(devid, layouttype, &maxcnt, + notify, &devaddrlen, &devaddr); + NFSD_DEBUG(4, "nfsrv_getdevinfo stat=%u\n", nd->nd_repstat); + if (nd->nd_repstat == 0) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(layouttype); + nfsm_strtom(nd, devaddr, devaddrlen); + cnt = 0; + for (i = 0; i < NFSV4_NOTIFYBITMAP; i++) { + if (notify[i] != 0) + cnt = i + 1; + } + NFSM_BUILD(tl, uint32_t *, (cnt + 1) * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(cnt); + for (i = 0; i < cnt; i++) + *tl++ = txdr_unsigned(notify[i]); + } else if (nd->nd_repstat == NFSERR_TOOSMALL) { + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(maxcnt); + } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 test stateid service */ APPLESTATIC int nfsrvd_teststateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t *stateidp = NULL, *tstateidp; int cnt, error = 0, i, ret; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0 || cnt > 1024) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stateidp = mallocarray(cnt, sizeof(nfsv4stateid_t), M_TEMP, M_WAITOK); tstateidp = stateidp; for (i = 0; i < cnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); tstateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, tstateidp->other, NFSX_STATEIDOTHER); tstateidp++; } NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(cnt); tstateidp = stateidp; for (i = 0; i < cnt; i++) { ret = nfsrv_teststateid(nd, tstateidp, p); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(ret); tstateidp++; } nfsmout: free(stateidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 service not supported */ APPLESTATIC int nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { nd->nd_repstat = NFSERR_NOTSUPP; NFSEXITCODE2(0, nd); return (0); } Index: head/sys/fs/nfsserver/nfs_nfsdsocket.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdsocket.c (revision 335011) +++ head/sys/fs/nfsserver/nfs_nfsdsocket.c (revision 335012) @@ -1,1126 +1,1142 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by the nfs server. */ #ifndef APPLEKEXT #include extern struct nfsstatsv1 nfsstatsv1; extern struct nfsrvfh nfs_pubfh, nfs_rootfh; extern int nfs_pubfhset, nfs_rootfhset; extern struct nfsv4lock nfsv4rootfs_lock; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern struct nfsclienthashhead *nfsclienthash; extern int nfsrv_clienthashsize; extern int nfsrc_floodlevel, nfsrc_tcpsavedreplies; extern int nfsd_debuglevel; +extern int nfsrv_layouthighwater; +extern volatile int nfsrv_layoutcnt; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; int (*nfsrv3_procs0[NFS_V3NPROCS])(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_getattr, nfsrvd_setattr, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_access, nfsrvd_readlink, nfsrvd_read, nfsrvd_write, nfsrvd_create, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_remove, nfsrvd_remove, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_readdir, nfsrvd_readdirplus, nfsrvd_statfs, nfsrvd_fsinfo, nfsrvd_pathconf, nfsrvd_commit, }; int (*nfsrv3_procs1[NFS_V3NPROCS])(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_lookup, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_mkdir, nfsrvd_symlink, nfsrvd_mknod, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, }; int (*nfsrv3_procs2[NFS_V3NPROCS])(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, nfsrvd_rename, nfsrvd_link, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, }; int (*nfsrv4_ops0[NFSV41_NOPS])(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_access, nfsrvd_close, nfsrvd_commit, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_delegpurge, nfsrvd_delegreturn, nfsrvd_getattr, nfsrvd_getfh, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_lock, nfsrvd_lockt, nfsrvd_locku, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_verify, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_openconfirm, nfsrvd_opendowngrade, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_read, nfsrvd_readdirplus, nfsrvd_readlink, nfsrvd_remove, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_renew, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_secinfo, nfsrvd_setattr, nfsrvd_setclientid, nfsrvd_setclientidcfrm, nfsrvd_verify, nfsrvd_write, nfsrvd_releaselckown, nfsrvd_notsupp, nfsrvd_bindconnsess, nfsrvd_exchangeid, nfsrvd_createsession, nfsrvd_destroysession, nfsrvd_freestateid, nfsrvd_notsupp, + nfsrvd_getdevinfo, nfsrvd_notsupp, + nfsrvd_layoutcommit, + nfsrvd_layoutget, + nfsrvd_layoutreturn, nfsrvd_notsupp, - nfsrvd_notsupp, - nfsrvd_notsupp, - nfsrvd_notsupp, - nfsrvd_notsupp, nfsrvd_sequence, nfsrvd_notsupp, nfsrvd_teststateid, nfsrvd_notsupp, nfsrvd_destroyclientid, nfsrvd_reclaimcomplete, }; int (*nfsrv4_ops1[NFSV41_NOPS])(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_mknod, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_lookup, nfsrvd_lookup, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, nfsrvd_open, nfsrvd_openattr, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0, }; int (*nfsrv4_ops2[NFSV41_NOPS])(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *) = { (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, nfsrvd_link, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, nfsrvd_rename, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, (int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0, }; #endif /* !APPLEKEXT */ /* * Static array that defines which nfs rpc's are nonidempotent */ static int nfsrv_nonidempotent[NFS_V3NPROCS] = { FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, }; /* * This static array indicates whether or not the RPC modifies the * file system. */ static int nfs_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; /* local functions */ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers, NFSPROC_T *p); /* * This static array indicates which server procedures require the extra * arguments to return the current file handle for V2, 3. */ static int nfs_retfh[NFS_V3NPROCS] = { 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0 }; extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; static int nfsv3to4op[NFS_V3NPROCS] = { NFSPROC_NULL, NFSV4OP_GETATTR, NFSV4OP_SETATTR, NFSV4OP_LOOKUP, NFSV4OP_ACCESS, NFSV4OP_READLINK, NFSV4OP_READ, NFSV4OP_WRITE, NFSV4OP_V3CREATE, NFSV4OP_MKDIR, NFSV4OP_SYMLINK, NFSV4OP_MKNOD, NFSV4OP_REMOVE, NFSV4OP_RMDIR, NFSV4OP_RENAME, NFSV4OP_LINK, NFSV4OP_READDIR, NFSV4OP_READDIRPLUS, NFSV4OP_FSSTAT, NFSV4OP_FSINFO, NFSV4OP_PATHCONF, NFSV4OP_COMMIT, }; static struct mtx nfsrvd_statmtx; MTX_SYSINIT(nfsst, &nfsrvd_statmtx, "NFSstat", MTX_DEF); static void nfsrvd_statstart(int op, struct bintime *now) { if (op > (NFSV42_NOPS + NFSV4OP_FAKENOPS)) { printf("%s: op %d invalid\n", __func__, op); return; } mtx_lock(&nfsrvd_statmtx); if (nfsstatsv1.srvstartcnt == nfsstatsv1.srvdonecnt) { if (now != NULL) nfsstatsv1.busyfrom = *now; else binuptime(&nfsstatsv1.busyfrom); } nfsstatsv1.srvrpccnt[op]++; nfsstatsv1.srvstartcnt++; mtx_unlock(&nfsrvd_statmtx); } static void nfsrvd_statend(int op, uint64_t bytes, struct bintime *now, struct bintime *then) { struct bintime dt, lnow; if (op > (NFSV42_NOPS + NFSV4OP_FAKENOPS)) { printf("%s: op %d invalid\n", __func__, op); return; } if (now == NULL) { now = &lnow; binuptime(now); } mtx_lock(&nfsrvd_statmtx); nfsstatsv1.srvbytes[op] += bytes; nfsstatsv1.srvops[op]++; if (then != NULL) { dt = *now; bintime_sub(&dt, then); bintime_add(&nfsstatsv1.srvduration[op], &dt); } dt = *now; bintime_sub(&dt, &nfsstatsv1.busyfrom); bintime_add(&nfsstatsv1.busytime, &dt); nfsstatsv1.busyfrom = *now; nfsstatsv1.srvdonecnt++; mtx_unlock(&nfsrvd_statmtx); } /* * Do an RPC. Basically, get the file handles translated to vnode pointers * and then call the appropriate server routine. The server routines are * split into groups, based on whether they use a file handle or file * handle plus name or ... * The NFS V4 Compound RPC is performed separately by nfsrvd_compound(). */ APPLESTATIC void nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers, NFSPROC_T *p) { int error = 0, lktype; vnode_t vp; mount_t mp = NULL; struct nfsrvfh fh; struct nfsexstuff nes; /* * Get a locked vnode for the first file handle */ if (!(nd->nd_flag & ND_NFSV4)) { KASSERT(nd->nd_repstat == 0, ("nfsrvd_dorpc")); /* * For NFSv3, if the malloc/mget allocation is near limits, * return NFSERR_DELAY. */ if ((nd->nd_flag & ND_NFSV3) && nfsrv_mallocmget_limit()) { nd->nd_repstat = NFSERR_DELAY; vp = NULL; } else { error = nfsrv_mtofh(nd, &fh); if (error) { if (error != EBADRPC) printf("nfs dorpc err1=%d\n", error); nd->nd_repstat = NFSERR_GARBAGE; goto out; } if (nd->nd_procnum == NFSPROC_READ || nd->nd_procnum == NFSPROC_WRITE || nd->nd_procnum == NFSPROC_READDIR || nd->nd_procnum == NFSPROC_READDIRPLUS || nd->nd_procnum == NFSPROC_READLINK || nd->nd_procnum == NFSPROC_GETATTR || nd->nd_procnum == NFSPROC_ACCESS || nd->nd_procnum == NFSPROC_FSSTAT || nd->nd_procnum == NFSPROC_FSINFO) lktype = LK_SHARED; else lktype = LK_EXCLUSIVE; if (nd->nd_flag & ND_PUBLOOKUP) nfsd_fhtovp(nd, &nfs_pubfh, lktype, &vp, &nes, &mp, nfs_writerpc[nd->nd_procnum], p); else nfsd_fhtovp(nd, &fh, lktype, &vp, &nes, &mp, nfs_writerpc[nd->nd_procnum], p); if (nd->nd_repstat == NFSERR_PROGNOTV4) goto out; } } /* * For V2 and 3, set the ND_SAVEREPLY flag for the recent request * cache, as required. * For V4, nfsrvd_compound() does this. */ if (!(nd->nd_flag & ND_NFSV4) && nfsrv_nonidempotent[nd->nd_procnum]) nd->nd_flag |= ND_SAVEREPLY; nfsrvd_rephead(nd); /* * If nd_repstat is non-zero, just fill in the reply status * to complete the RPC reply for V2. Otherwise, you must do * the RPC. */ if (nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { *nd->nd_errp = nfsd_errmap(nd); nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], /*now*/ NULL); nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0, /*now*/ NULL, /*then*/ NULL); if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0) vn_finished_write(mp); goto out; } /* * Now the procedure can be performed. For V4, nfsrvd_compound() * works through the sub-rpcs, otherwise just call the procedure. * The procedures are in three groups with different arguments. * The group is indicated by the value in nfs_retfh[]. */ if (nd->nd_flag & ND_NFSV4) { nfsrvd_compound(nd, isdgram, tag, taglen, minorvers, p); } else { struct bintime start_time; binuptime(&start_time); nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], &start_time); if (nfs_retfh[nd->nd_procnum] == 1) { if (vp) NFSVOPUNLOCK(vp, 0); error = (*(nfsrv3_procs1[nd->nd_procnum]))(nd, isdgram, vp, NULL, (fhandle_t *)fh.nfsrvfh_data, p, &nes); } else if (nfs_retfh[nd->nd_procnum] == 2) { error = (*(nfsrv3_procs2[nd->nd_procnum]))(nd, isdgram, vp, NULL, p, &nes, NULL); } else { error = (*(nfsrv3_procs0[nd->nd_procnum]))(nd, isdgram, vp, p, &nes); } if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0) vn_finished_write(mp); nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0, /*now*/ NULL, /*then*/ &start_time); } if (error) { if (error != EBADRPC) printf("nfs dorpc err2=%d\n", error); nd->nd_repstat = NFSERR_GARBAGE; } *nd->nd_errp = nfsd_errmap(nd); /* * Don't cache certain reply status values. */ if (nd->nd_repstat && (nd->nd_flag & ND_SAVEREPLY) && (nd->nd_repstat == NFSERR_GARBAGE || nd->nd_repstat == NFSERR_BADXDR || nd->nd_repstat == NFSERR_MOVED || nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_BADSEQID || nd->nd_repstat == NFSERR_RESOURCE || nd->nd_repstat == NFSERR_SERVERFAULT || nd->nd_repstat == NFSERR_STALECLIENTID || nd->nd_repstat == NFSERR_STALESTATEID || nd->nd_repstat == NFSERR_OLDSTATEID || nd->nd_repstat == NFSERR_BADSTATEID || nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_NOGRACE)) nd->nd_flag &= ~ND_SAVEREPLY; out: NFSEXITCODE2(0, nd); } /* * Breaks down a compound RPC request and calls the server routines for * the subprocedures. * Some suboperations are performed directly here to simplify file handle<--> * vnode pointer handling. */ static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen, u_int32_t minorvers, NFSPROC_T *p) { int i, lktype, op, op0 = 0, statsinprog = 0; u_int32_t *tl; struct nfsclient *clp, *nclp; int numops, error = 0, igotlock; u_int32_t retops = 0, *retopsp = NULL, *repp; vnode_t vp, nvp, savevp; struct nfsrvfh fh; mount_t new_mp, temp_mp = NULL; struct ucred *credanon; struct nfsexstuff nes, vpnes, savevpnes; fsid_t cur_fsid, save_fsid; static u_int64_t compref = 0; struct bintime start_time; NFSVNO_EXINIT(&vpnes); NFSVNO_EXINIT(&savevpnes); /* * Put the seq# of the current compound RPC in nfsrv_descript. * (This is used by nfsrv_checkgetattr(), to see if the write * delegation was created by the same compound RPC as the one * with that Getattr in it.) * Don't worry about the 64bit number wrapping around. It ain't * gonna happen before this server gets shut down/rebooted. */ nd->nd_compref = compref++; /* * Check for and optionally get a lock on the root. This lock means that * no nfsd will be fiddling with the V4 file system and state stuff. It * is required when the V4 root is being changed, the stable storage * restart file is being updated, or callbacks are being done. * When any of the nfsd are processing an NFSv4 compound RPC, they must * either hold a reference count (nfs_usecnt) or the lock. When * nfsrv_unlock() is called to release the lock, it can optionally * also get a reference count, which saves the need for a call to * nfsrv_getref() after nfsrv_unlock(). */ /* * First, check to see if we need to wait for an update lock. */ igotlock = 0; NFSLOCKV4ROOTMUTEX(); if (nfsrv_stablefirst.nsf_flags & NFSNSF_NEEDLOCK) igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); else igotlock = nfsv4_lock(&nfsv4rootfs_lock, 0, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); if (igotlock) { /* * If I got the lock, I can update the stable storage file. * Done when the grace period is over or a client has long * since expired. */ nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NEEDLOCK; if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_GRACEOVER | NFSNSF_UPDATEDONE)) == NFSNSF_GRACEOVER) nfsrv_updatestable(p); /* * If at least one client has long since expired, search * the client list for them, write a REVOKE record on the * stable storage file and then remove them from the client * list. */ if (nfsrv_stablefirst.nsf_flags & NFSNSF_EXPIREDCLIENT) { nfsrv_stablefirst.nsf_flags &= ~NFSNSF_EXPIREDCLIENT; for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { if (clp->lc_flags & LCL_EXPIREIT) { if (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg)) nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); } } } } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else { /* * If we didn't get the lock, we need to get a refcnt, * which also checks for and waits for the lock. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); } /* * If flagged, search for open owners that haven't had any opens * for a long time. */ if (nfsrv_stablefirst.nsf_flags & NFSNSF_NOOPENS) { nfsrv_throwawayopens(p); } + /* Do a CBLAYOUTRECALL callback if over the high water mark. */ + if (nfsrv_layoutcnt > nfsrv_layouthighwater) + nfsrv_recalloldlayout(p); + savevp = vp = NULL; save_fsid.val[0] = save_fsid.val[1] = 0; cur_fsid.val[0] = cur_fsid.val[1] = 0; /* If taglen < 0, there was a parsing error in nfsd_getminorvers(). */ if (taglen < 0) { error = EBADRPC; goto nfsmout; } (void) nfsm_strtom(nd, tag, taglen); NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; if (nd->nd_repstat) numops = 0; else numops = fxdr_unsigned(int, *tl); /* * Loop around doing the sub ops. * vp - is an unlocked vnode pointer for the CFH * savevp - is an unlocked vnode pointer for the SAVEDFH * (at some future date, it might turn out to be more appropriate * to keep the file handles instead of vnode pointers?) * savevpnes and vpnes - are the export flags for the above. */ for (i = 0; i < numops; i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp = *tl; op = fxdr_unsigned(int, *tl); NFSD_DEBUG(4, "op=%d\n", op); binuptime(&start_time); nfsrvd_statstart(op, &start_time); statsinprog = 1; if (op < NFSV4OP_ACCESS || (op >= NFSV4OP_NOPS && (nd->nd_flag & ND_NFSV41) == 0) || (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV41) != 0)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp++ = txdr_unsigned(NFSV4OP_OPILLEGAL); *repp = nfsd_errmap(nd); retops++; break; } else { repp++; } if (i == 0) op0 = op; if (i == numops - 1) nd->nd_flag |= ND_LASTOP; /* * Check for a referral on the current FH and, if so, return * NFSERR_MOVED for all ops that allow it, except Getattr. */ if (vp != NULL && op != NFSV4OP_GETATTR && nfsv4root_getreferral(vp, NULL, 0) != NULL && nfsrv_errmoved(op)) { nd->nd_repstat = NFSERR_MOVED; *repp = nfsd_errmap(nd); retops++; break; } /* * For NFSv4.1, check for a Sequence Operation being first * or one of the other allowed operations by itself. */ if ((nd->nd_flag & ND_NFSV41) != 0) { if (i != 0 && op == NFSV4OP_SEQUENCE) nd->nd_repstat = NFSERR_SEQUENCEPOS; else if (i == 0 && op != NFSV4OP_SEQUENCE && op != NFSV4OP_EXCHANGEID && op != NFSV4OP_CREATESESSION && op != NFSV4OP_BINDCONNTOSESS && op != NFSV4OP_DESTROYCLIENTID && op != NFSV4OP_DESTROYSESSION) nd->nd_repstat = NFSERR_OPNOTINSESS; else if (i != 0 && op0 != NFSV4OP_SEQUENCE) nd->nd_repstat = NFSERR_NOTONLYOP; if (nd->nd_repstat != 0) { *repp = nfsd_errmap(nd); retops++; break; } } nd->nd_procnum = op; /* * If over flood level, reply NFSERR_RESOURCE, if at the first * Op. (Since a client recovery from NFSERR_RESOURCE can get * really nasty for certain Op sequences, I'll play it safe * and only return the error at the beginning.) The cache * will still function over flood level, but uses lots of * mbufs.) * If nfsrv_mallocmget_limit() returns True, the system is near * to its limit for memory that malloc()/mget() can allocate. */ if (i == 0 && (nd->nd_rp == NULL || nd->nd_rp->rc_refcnt == 0) && (nfsrv_mallocmget_limit() || nfsrc_tcpsavedreplies > nfsrc_floodlevel)) { if (nfsrc_tcpsavedreplies > nfsrc_floodlevel) printf("nfsd server cache flooded, try " "increasing vfs.nfsd.tcphighwater\n"); nd->nd_repstat = NFSERR_RESOURCE; *repp = nfsd_errmap(nd); if (op == NFSV4OP_SETATTR) { /* * Setattr replies require a bitmap. * even for errors like these. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = 0; } retops++; break; } if (nfsv4_opflag[op].savereply) nd->nd_flag |= ND_SAVEREPLY; switch (op) { case NFSV4OP_PUTFH: error = nfsrv_mtofh(nd, &fh); if (error) goto nfsmout; if (!nd->nd_repstat) nfsd_fhtovp(nd, &fh, LK_SHARED, &nvp, &nes, NULL, 0, p); /* For now, allow this for non-export FHs */ if (!nd->nd_repstat) { if (vp) vrele(vp); vp = nvp; cur_fsid = vp->v_mount->mnt_stat.f_fsid; NFSVOPUNLOCK(vp, 0); vpnes = nes; } break; case NFSV4OP_PUTPUBFH: if (nfs_pubfhset) nfsd_fhtovp(nd, &nfs_pubfh, LK_SHARED, &nvp, &nes, NULL, 0, p); else nd->nd_repstat = NFSERR_NOFILEHANDLE; if (!nd->nd_repstat) { if (vp) vrele(vp); vp = nvp; cur_fsid = vp->v_mount->mnt_stat.f_fsid; NFSVOPUNLOCK(vp, 0); vpnes = nes; } break; case NFSV4OP_PUTROOTFH: if (nfs_rootfhset) { nfsd_fhtovp(nd, &nfs_rootfh, LK_SHARED, &nvp, &nes, NULL, 0, p); if (!nd->nd_repstat) { if (vp) vrele(vp); vp = nvp; cur_fsid = vp->v_mount->mnt_stat.f_fsid; NFSVOPUNLOCK(vp, 0); vpnes = nes; } } else nd->nd_repstat = NFSERR_NOFILEHANDLE; break; case NFSV4OP_SAVEFH: if (vp && NFSVNO_EXPORTED(&vpnes)) { nd->nd_repstat = 0; /* If vp == savevp, a no-op */ if (vp != savevp) { if (savevp) vrele(savevp); VREF(vp); savevp = vp; savevpnes = vpnes; save_fsid = cur_fsid; } + if ((nd->nd_flag & ND_CURSTATEID) != 0) { + nd->nd_savedcurstateid = + nd->nd_curstateid; + nd->nd_flag |= ND_SAVEDCURSTATEID; + } } else { nd->nd_repstat = NFSERR_NOFILEHANDLE; } break; case NFSV4OP_RESTOREFH: if (savevp) { nd->nd_repstat = 0; /* If vp == savevp, a no-op */ if (vp != savevp) { VREF(savevp); vrele(vp); vp = savevp; vpnes = savevpnes; cur_fsid = save_fsid; + } + if ((nd->nd_flag & ND_SAVEDCURSTATEID) != 0) { + nd->nd_curstateid = + nd->nd_savedcurstateid; + nd->nd_flag |= ND_CURSTATEID; } } else { nd->nd_repstat = NFSERR_RESTOREFH; } break; default: /* * Allow a Lookup, Getattr, GetFH, Secinfo on an * non-exported directory if * nfs_rootfhset. Do I need to allow any other Ops? * (You can only have a non-exported vpnes if * nfs_rootfhset is true. See nfsd_fhtovp()) * Allow AUTH_SYS to be used for file systems * exported GSS only for certain Ops, to allow * clients to do mounts more easily. */ if (nfsv4_opflag[op].needscfh && vp) { if (!NFSVNO_EXPORTED(&vpnes) && op != NFSV4OP_LOOKUP && op != NFSV4OP_GETATTR && op != NFSV4OP_GETFH && op != NFSV4OP_ACCESS && op != NFSV4OP_READLINK && op != NFSV4OP_SECINFO) nd->nd_repstat = NFSERR_NOFILEHANDLE; else if (nfsvno_testexp(nd, &vpnes) && op != NFSV4OP_LOOKUP && op != NFSV4OP_GETFH && op != NFSV4OP_GETATTR && op != NFSV4OP_SECINFO) nd->nd_repstat = NFSERR_WRONGSEC; if (nd->nd_repstat) { if (op == NFSV4OP_SETATTR) { /* * Setattr reply requires a bitmap * even for errors like these. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = 0; } break; } } if (nfsv4_opflag[op].retfh == 1) { if (!vp) { nd->nd_repstat = NFSERR_NOFILEHANDLE; break; } VREF(vp); if (nfsv4_opflag[op].modifyfs) vn_start_write(vp, &temp_mp, V_WAIT); error = (*(nfsrv4_ops1[op]))(nd, isdgram, vp, &nvp, (fhandle_t *)fh.nfsrvfh_data, p, &vpnes); if (!error && !nd->nd_repstat) { if (op == NFSV4OP_LOOKUP || op == NFSV4OP_LOOKUPP) { new_mp = nvp->v_mount; if (cur_fsid.val[0] != new_mp->mnt_stat.f_fsid.val[0] || cur_fsid.val[1] != new_mp->mnt_stat.f_fsid.val[1]) { /* crossed a server mount point */ nd->nd_repstat = nfsvno_checkexp(new_mp, nd->nd_nam, &nes, &credanon); if (!nd->nd_repstat) nd->nd_repstat = nfsd_excred(nd, &nes, credanon); if (credanon != NULL) crfree(credanon); if (!nd->nd_repstat) { vpnes = nes; cur_fsid = new_mp->mnt_stat.f_fsid; } } /* Lookup ops return a locked vnode */ NFSVOPUNLOCK(nvp, 0); } if (!nd->nd_repstat) { vrele(vp); vp = nvp; } else vrele(nvp); } if (nfsv4_opflag[op].modifyfs) vn_finished_write(temp_mp); } else if (nfsv4_opflag[op].retfh == 2) { if (vp == NULL || savevp == NULL) { nd->nd_repstat = NFSERR_NOFILEHANDLE; break; } else if (cur_fsid.val[0] != save_fsid.val[0] || cur_fsid.val[1] != save_fsid.val[1]) { nd->nd_repstat = NFSERR_XDEV; break; } if (nfsv4_opflag[op].modifyfs) vn_start_write(savevp, &temp_mp, V_WAIT); if (NFSVOPLOCK(savevp, LK_EXCLUSIVE) == 0) { VREF(vp); VREF(savevp); error = (*(nfsrv4_ops2[op]))(nd, isdgram, savevp, vp, p, &savevpnes, &vpnes); } else nd->nd_repstat = NFSERR_PERM; if (nfsv4_opflag[op].modifyfs) vn_finished_write(temp_mp); } else { if (nfsv4_opflag[op].retfh != 0) panic("nfsrvd_compound"); if (nfsv4_opflag[op].needscfh) { if (vp != NULL) { lktype = nfsv4_opflag[op].lktype; if (nfsv4_opflag[op].modifyfs) { vn_start_write(vp, &temp_mp, V_WAIT); if (op == NFSV4OP_WRITE && MNT_SHARED_WRITES(temp_mp)) lktype = LK_SHARED; } if (NFSVOPLOCK(vp, lktype) == 0) VREF(vp); else nd->nd_repstat = NFSERR_PERM; } else { nd->nd_repstat = NFSERR_NOFILEHANDLE; if (op == NFSV4OP_SETATTR) { /* * Setattr reply requires a * bitmap even for errors like * these. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = 0; } break; } if (nd->nd_repstat == 0) error = (*(nfsrv4_ops0[op]))(nd, isdgram, vp, p, &vpnes); if (nfsv4_opflag[op].modifyfs) vn_finished_write(temp_mp); } else { error = (*(nfsrv4_ops0[op]))(nd, isdgram, NULL, p, &vpnes); } } } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = error; printf("nfsv4 comperr0=%d\n", error); } error = 0; } if (statsinprog != 0) { nfsrvd_statend(op, /*bytes*/ 0, /*now*/ NULL, /*then*/ &start_time); statsinprog = 0; } retops++; if (nd->nd_repstat) { *repp = nfsd_errmap(nd); break; } else { *repp = 0; /* NFS4_OK */ } } nfsmout: if (statsinprog != 0) { nfsrvd_statend(op, /*bytes*/ 0, /*now*/ NULL, /*then*/ &start_time); statsinprog = 0; } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; else printf("nfsv4 comperr1=%d\n", error); } if (taglen == -1) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; } else { *retopsp = txdr_unsigned(retops); } if (vp) vrele(vp); if (savevp) vrele(savevp); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); NFSEXITCODE2(0, nd); } Index: head/sys/fs/nfsserver/nfs_nfsdstate.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 335011) +++ head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 335012) @@ -1,6325 +1,8367 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT +#include #include struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; +time_t nfsdev_time = 0; +int nfsrv_layouthashsize; +volatile int nfsrv_layoutcnt = 0; extern int newnfs_numnfsd; extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; +extern struct mtx nfsrv_dslock_mtx; +extern struct mtx nfsrv_recalllock_mtx; +extern struct mtx nfsrv_dontlistlock_mtx; extern int nfsd_debuglevel; +extern u_int nfsrv_dsdirsize; +extern struct nfsdevicehead nfsrv_devidhead; +extern int nfsrv_doflexfile; +extern int nfsrv_maxpnfsmirror; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; +extern struct nfsdontlisthead nfsrv_dontlisthead; +extern volatile int nfsrv_devidcnt; +extern struct nfslayouthead nfsrv_recalllisthead; SYSCTL_DECL(_vfs_nfsd); int nfsrv_statehashsize = NFSSTATEHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN, &nfsrv_statehashsize, 0, "Size of state hash table set via loader.conf"); int nfsrv_clienthashsize = NFSCLIENTHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN, &nfsrv_clienthashsize, 0, "Size of client hash table set via loader.conf"); int nfsrv_lockhashsize = NFSLOCKHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN, &nfsrv_lockhashsize, 0, "Size of file handle hash table set via loader.conf"); int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN, &nfsrv_sessionhashsize, 0, "Size of session hash table set via loader.conf"); +int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN, + &nfsrv_layouthighwater, 0, + "High water mark for number of layouts set via loader.conf"); + static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT; SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN, &nfsrv_v4statelimit, 0, "High water limit for NFSv4 opens+locks+delegations"); static int nfsrv_writedelegifpos = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW, &nfsrv_writedelegifpos, 0, "Issue a write delegation for read opens if possible"); static int nfsrv_allowreadforwriteopen = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW, &nfsrv_allowreadforwriteopen, 0, "Allow Reads to be done with Write Access StateIDs"); +int nfsrv_pnfsatime = 0; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW, + &nfsrv_pnfsatime, 0, + "For pNFS service, do Getattr ops to keep atime up-to-date"); + +int nfsrv_flexlinuxhack = 0; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW, + &nfsrv_flexlinuxhack, 0, + "For Linux clients, hack around Flex File Layout bug"); + /* * Hash lists for nfs V4. */ struct nfsclienthashhead *nfsclienthash; struct nfslockhashhead *nfslockhash; struct nfssessionhash *nfssessionhash; +struct nfslayouthash *nfslayouthash; +volatile int nfsrv_dontlistlen = 0; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; static time_t nfsrvboottime; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; static volatile int nfsrv_writedelegcnt = 0; /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p); static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freenfslock(struct nfslock *lop); static void nfsrv_freenfslockfile(struct nfslockfile *lfp); static void nfsrv_freedeleg(struct nfsstate *); static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, u_int32_t flags, struct nfsstate **stpp); static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp); static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p); static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit); static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags); static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, - struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p); + struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p); static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp); static u_int32_t nfsrv_nextclientindex(void); static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp); static void nfsrv_markstable(struct nfsclient *clp); static void nfsrv_markreclaim(struct nfsclient *clp); static int nfsrv_checkstable(struct nfsclient *clp); static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct vnode *vp, NFSPROC_T *p); static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp); static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p); static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp); static time_t nfsrv_leaseexpiry(void); static void nfsrv_delaydelegtimeout(struct nfsstate *stp); static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op); static int nfsrv_nootherstate(struct nfsstate *stp); static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p); static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p); static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end); static void nfsrv_locklf(struct nfslockfile *lfp); static void nfsrv_unlocklf(struct nfslockfile *lfp); static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid); static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid); static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp); static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp); +static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, + nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p); +static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp); +static void nfsrv_freelayoutlist(nfsquad_t clientid); +static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, + int iomode); +static void nfsrv_freealllayouts(void); +static void nfsrv_freedevid(struct nfsdevice *ds); +static int nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, + struct nfsdevice **dsp); +static int nfsrv_delds(char *devid, NFSPROC_T *p); +static void nfsrv_deleteds(struct nfsdevice *fndds); +static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost); +static void nfsrv_freealldevids(void); +static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, + int maxcnt, NFSPROC_T *p); +static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, + fhandle_t *fhp, struct nfslayout *lyp, struct nfslayouthead *lyheadp, + int laytype, NFSPROC_T *p); +static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype, + NFSPROC_T *, struct nfslayout **lypp); +static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt); +static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, + fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs); +static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, + int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs); +static int nfsrv_dontlayout(fhandle_t *fhp); +static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, + vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, + vnode_t *tvpp); /* * Scan the client list for a match and either return the current one, * create a new entry or return an error. * If returning a non-error, the clp structure must either be linked into * the client list or free'd. */ APPLESTATIC int nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p) { struct nfsclient *clp = NULL, *new_clp = *new_clpp; int i, error = 0, ret; struct nfsstate *stp, *tstp; struct sockaddr_in *sad, *rad; struct nfsdsession *sep, *nsep; int zapit = 0, gotit, hasstate = 0, igotlock; static u_int64_t confirm_index = 0; /* * Check for state resource limit exceeded. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } if (nfsrv_issuedelegs == 0 || ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* * Don't do callbacks when delegations are disabled or * for AUTH_GSS unless enabled via nfsrv_nogsscallback. * If establishing a callback connection is attempted * when a firewall is blocking the callback path, the * server may wait too long for the connect attempt to * succeed during the Open. Some clients, such as Linux, * may timeout and give up on the Open before the server * replies. Also, since AUTH_GSS callbacks are not * yet interoperability tested, they might cause the * server to crap out, if they get past the Init call to * the client. */ new_clp->lc_program = 0; /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } if (gotit == 0) i++; } if (!gotit || (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) { if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) { /* * For NFSv4.1, if confirmp->lval[1] is non-zero, the * client is trying to update a confirmed clientid. */ NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); confirmp->lval[1] = 0; error = NFSERR_NOENT; goto out; } /* * Get rid of the old one. */ if (i != nfsrv_clienthashsize) { LIST_REMOVE(clp, lc_hash); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); zapit = 1; } /* * Add it after assigning a client id to it. */ new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = (u_int32_t)nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); LIST_INIT(&new_clp->lc_open); LIST_INIT(&new_clp->lc_deleg); LIST_INIT(&new_clp->lc_olddeleg); LIST_INIT(&new_clp->lc_session); for (i = 0; i < nfsrv_statehashsize; i++) LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if (zapit) nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * Now, handle the cases where the id is already issued. */ if (nfsrv_notsamecredname(nd, clp)) { /* * Check to see if there is expired state that should go away. */ if (clp->lc_expiry < NFSD_MONOSEC && (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); } /* * If there is outstanding state, then reply NFSERR_CLIDINUSE per * RFC3530 Sec. 8.1.2 last para. */ if (!LIST_EMPTY(&clp->lc_deleg)) { hasstate = 1; } else if (LIST_EMPTY(&clp->lc_open)) { hasstate = 0; } else { hasstate = 0; /* Look for an Open on the OpenOwner */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { if (!LIST_EMPTY(&stp->ls_open)) { hasstate = 1; break; } } } if (hasstate) { /* * If the uid doesn't match, return NFSERR_CLIDINUSE after * filling out the correct ipaddr and portnum. */ sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); sad->sin_addr.s_addr = rad->sin_addr.s_addr; sad->sin_port = rad->sin_port; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIDINUSE; goto out; } } if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) { /* * If the verifier has changed, the client has rebooted * and a new client id is issued. The old state info * can be thrown away once the SETCLIENTID_CONFIRM occurs. */ LIST_REMOVE(clp, lc_hash); /* Get rid of all sessions on this clientid. */ LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) { ret = nfsrv_freesession(sep, NULL); if (ret != 0) printf("nfsrv_setclient: verifier changed free" " session failed=%d\n", ret); } new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); /* * Save the state until confirmed. */ LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* For NFSv4.1, mark that we found a confirmed clientid. */ if ((nd->nd_flag & ND_NFSV41) != 0) { clientidp->lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = clp->lc_clientid.lval[1]; confirmp->lval[0] = 0; /* Ignored by client */ confirmp->lval[1] = 1; } else { /* * id and verifier match, so update the net address info * and get rid of any existing callback authentication * handle, so a new one will be acquired. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN); new_clp->lc_expiry = nfsrv_leaseexpiry(); confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = clp->lc_clientid.lval[1]; new_clp->lc_delegtime = clp->lc_delegtime; new_clp->lc_stateindex = clp->lc_stateindex; new_clp->lc_statemaxindex = clp->lc_statemaxindex; new_clp->lc_cbref = 0; LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if ((nd->nd_flag & ND_NFSV41) == 0) { /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsdclp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; } out: NFSEXITCODE2(error, nd); return (error); } /* * Check to see if the client id exists and optionally confirm it. */ APPLESTATIC int nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int i; struct nfsclienthashhead *hp; int error = 0, igotlock, doneok; struct nfssessionhash *shp; struct nfsdsession *sep; uint64_t sessid[2]; static uint64_t next_sess = 0; if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* * If called with opflags == CLOPS_RENEW, the State Lock is * already held. Otherwise, we need to get either that or, * for the case of Confirm, lock out the nfsd threads. */ if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); /* * Create a new sessionid here, since we need to do it where * there is a mutex held to serialize update of next_sess. */ if ((nd->nd_flag & ND_NFSV41) != 0) { sessid[0] = ++next_sess; sessid[1] = clientid.qval; } NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSLOCKSTATE(); } /* For NFSv4.1, the clp is acquired from the associated session. */ if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 && opflags == CLOPS_RENEW) { clp = NULL; if ((nd->nd_flag & ND_HASSEQUENCE) != 0) { shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep != NULL) clp = sep->sess_clp; NFSUNLOCKSESSION(shp); } } else { hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } } if (clp == NULL) { if (opflags & CLOPS_CONFIRM) error = NFSERR_STALECLIENTID; else error = NFSERR_EXPIRED; } else if (clp->lc_flags & LCL_ADMINREVOKED) { /* * If marked admin revoked, just return the error. */ error = NFSERR_ADMINREVOKED; } if (error) { if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } goto out; } /* * Perform any operations specified by the opflags. */ if (opflags & CLOPS_CONFIRM) { if (((nd->nd_flag & ND_NFSV41) != 0 && clp->lc_confirm.lval[0] != confirm.lval[0]) || ((nd->nd_flag & ND_NFSV41) == 0 && clp->lc_confirm.qval != confirm.qval)) error = NFSERR_STALECLIENTID; else if (nfsrv_notsamecredname(nd, clp)) error = NFSERR_CLIDINUSE; if (!error) { if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) == LCL_NEEDSCONFIRM) { /* * Hang onto the delegations (as old delegations) * for an Open with CLAIM_DELEGATE_PREV unless in * grace, but get rid of the rest of the state. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_olddeleg); if (nfsrv_checkgrace(nd, clp, 0)) { /* In grace, so just delete delegations */ nfsrv_freedeleglist(&clp->lc_deleg); } else { LIST_FOREACH(stp, &clp->lc_deleg, ls_list) stp->ls_flags |= NFSLCK_OLDDELEG; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg, ls_list); } if ((nd->nd_flag & ND_NFSV41) != 0) clp->lc_program = cbprogram; } clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN); if (clp->lc_program) clp->lc_flags |= LCL_NEEDSCBNULL; /* For NFSv4.1, link the session onto the client. */ if (nsep != NULL) { /* Hold a reference on the xprt for a backchannel. */ if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) { if (clp->lc_req.nr_client == NULL) clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, cbprogram, NFSV4_CBVERS); if (clp->lc_req.nr_client != NULL) { SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client->cl_private; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt; } else nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; } NFSBCOPY(sessid, nsep->sess_sessionid, NFSX_V4SESSIONID); NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid, NFSX_V4SESSIONID); shp = NFSSESSIONHASH(nsep->sess_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); LIST_INSERT_HEAD(&shp->list, nsep, sess_hash); LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list); nsep->sess_clp = clp; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); } } } else if (clp->lc_flags & LCL_NEEDSCONFIRM) { error = NFSERR_EXPIRED; } /* * If called by the Renew Op, we must check the principal. */ if (!error && (opflags & CLOPS_RENEWOP)) { if (nfsrv_notsamecredname(nd, clp)) { doneok = 0; for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if ((stp->ls_flags & NFSLCK_OPEN) && stp->ls_uid == nd->nd_cred->cr_uid) { doneok = 1; break; } } } if (!doneok) error = NFSERR_ACCES; } if (!error && (clp->lc_flags & LCL_CBDOWN)) error = NFSERR_CBPATHDOWN; } if ((!error || error == NFSERR_CBPATHDOWN) && (opflags & CLOPS_RENEW)) { clp->lc_expiry = nfsrv_leaseexpiry(); } if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } if (clpp) *clpp = clp; out: NFSEXITCODE2(error, nd); return (error); } /* * Perform the NFSv4.1 destroy clientid. */ int nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsclienthashhead *hp; int error = 0, i, igotlock; if (nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } if (clp == NULL) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* Just return ok, since it is gone. */ goto out; } + /* + * Free up all layouts on the clientid. Should the client return the + * layouts? + */ + nfsrv_freelayoutlist(clientid); + /* Scan for state on the clientid. */ for (i = 0; i < nfsrv_statehashsize; i++) if (!LIST_EMPTY(&clp->lc_stateid[i])) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } /* Destroy the clientid and return ok. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); nfsrv_zapclient(clp, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Called from the new nfssvc syscall to admin revoke a clientid. * Returns 0 for success, error otherwise. */ APPLESTATIC int nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) { struct nfsclient *clp = NULL; int i, error = 0; int gotit, igotlock; /* * First, lock out the nfsd so that state won't change while the * revocation record is being written to the stable storage restart * file. */ NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); error = EPERM; goto out; } /* * Now, write out the revocation record */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); /* * and clear out the state, marking the clientid revoked. */ clp->lc_flags &= ~LCL_CALLBACKSON; clp->lc_flags |= LCL_ADMINREVOKED; nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); out: NFSEXITCODE(error); return (error); } /* * Dump out stats for all clients. Called from nfssvc(2), that is used * nfsstatsv1. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) { struct nfsclient *clp; int i = 0, cnt = 0; /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); } i++; } if (cnt < maxcnt) dumpp[cnt].ndcl_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd. */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp) { struct nfsstate *stp, *openstp, *lckownstp; struct nfslock *lop; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0; dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0; dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0; dumpp->ndcl_flags = clp->lc_flags; dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen; NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen); sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *); dumpp->ndcl_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr; } /* * Now, scan the state lists and total up the opens and locks. */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { dumpp->ndcl_nopenowners++; LIST_FOREACH(openstp, &stp->ls_open, ls_list) { dumpp->ndcl_nopens++; LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) { dumpp->ndcl_nlockowners++; LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) { dumpp->ndcl_nlocks++; } } } } /* * and the delegation lists. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { dumpp->ndcl_ndelegs++; } LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { dumpp->ndcl_nolddelegs++; } } /* * Dump out lock stats for a file. */ APPLESTATIC void nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, NFSPROC_T *p) { struct nfsstate *stp; struct nfslock *lop; int cnt = 0; struct nfslockfile *lfp; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; int ret; fhandle_t nfh; ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0); if (ret) { ldumpp[0].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); return; } /* * For each open share on file, dump it out. */ stp = LIST_FIRST(&lfp->lf_open); while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_openowner->ls_ownerlen; NFSBCOPY(stp->ls_openowner->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_openowner->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * and all locks. */ lop = LIST_FIRST(&lfp->lf_lock); while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) { stp = lop->lo_stp; ldumpp[cnt].ndlck_flags = lop->lo_flags; ldumpp[cnt].ndlck_first = lop->lo_first; ldumpp[cnt].ndlck_end = lop->lo_end; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen; NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } lop = LIST_NEXT(lop, lo_lckfile); cnt++; } /* * and the delegations. */ stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = 0; ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * If list isn't full, mark end of list by setting the client name * to zero length. */ if (cnt < maxcnt) ldumpp[cnt].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Server timer routine. It can scan any linked list, so long * as it holds the spin/mutex lock and there is no exclusive lock on * nfsv4rootfs_lock. * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok * to do this from a callout, since the spin locks work. For * Darwin, I'm not sure what will work correctly yet.) * Should be called once per second. */ APPLESTATIC void nfsrv_servertimer(void) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int got_ref, i; /* * Make sure nfsboottime is set. This is used by V3 as well * as V4. Note that nfsboottime is not nfsrvboottime, which is * only used by the V4 server for leases. */ if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); /* * If server hasn't started yet, just return. */ NFSLOCKSTATE(); if (nfsrv_stablefirst.nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; } /* * Try and get a reference count on the nfsv4rootfs_lock so that * no nfsd thread can acquire an exclusive lock on it before this * call is done. If it is already exclusively locked, just return. */ NFSLOCKV4ROOTMUTEX(); got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); if (got_ref == 0) { NFSUNLOCKSTATE(); return; } /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC && ((LIST_EMPTY(&clp->lc_deleg) && LIST_EMPTY(&clp->lc_open)) || nfsrv_clients > nfsrv_clienthighwater)) || (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC || (clp->lc_expiry < NFSD_MONOSEC && (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) { /* * Lease has expired several nfsrv_lease times ago: * PLUS * - no state is associated with it * OR * - above high water mark for number of clients * (nfsrv_clienthighwater should be large enough * that this only occurs when clients fail to * use the same nfs_client_id4.id. Maybe somewhat * higher that the maximum number of clients that * will mount this server?) * OR * Lease has expired a very long time ago * OR * Lease has expired PLUS the number of opens + locks * has exceeded 90% of capacity * * --> Mark for expiry. The actual expiry will be done * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; nfsrv_stablefirst.nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* * If there are no opens, increment no open tick cnt * If time exceeds NFSNOOPEN, mark it to be thrown away * otherwise, if there is an open, reset no open time * Hopefully, this will avoid excessive re-creation * of open owners and subsequent open confirms. */ stp = LIST_FIRST(&clp->lc_open); while (stp != LIST_END(&clp->lc_open)) { nstp = LIST_NEXT(stp, ls_list); if (LIST_EMPTY(&stp->ls_open)) { stp->ls_noopens++; if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) nfsrv_stablefirst.nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; } stp = nstp; } } } clp = nclp; } } NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * The following set of functions free up the various data structures. */ /* * Clear out all open/lock state related to this nfsclient. * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that * there are no other active nfsd threads. */ APPLESTATIC void nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; struct nfsdsession *sep, *nsep; LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) nfsrv_freeopenowner(stp, 1, p); if ((clp->lc_flags & LCL_ADMINREVOKED) == 0) LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) (void)nfsrv_freesession(sep, NULL); } /* * Free a client that has been cleaned. It should also already have been * removed from the lists. * (Just to be safe w.r.t. newnfs_disconnect(), call this function when * softclock interrupts are enabled.) */ APPLESTATIC void nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) { #ifdef notyet if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) == (LCL_GSS | LCL_CALLBACKSON) && (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) && clp->lc_handlelen > 0) { clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE; clp->lc_hand.nfsh_flag |= NFSG_DESTROYED; (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL, - NULL, 0, NULL, NULL, NULL, p); + NULL, 0, NULL, NULL, NULL, 0, p); } #endif newnfs_disconnect(&clp->lc_req); free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); } /* * Free a list of delegation state structures. * (This function will also free all nfslockfile structures that no * longer have associated state.) */ APPLESTATIC void nfsrv_freedeleglist(struct nfsstatehead *sthp) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) { nfsrv_freedeleg(stp); } LIST_INIT(sthp); } /* * Free up a delegation. */ static void nfsrv_freedeleg(struct nfsstate *stp) { struct nfslockfile *lfp; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) nfsrv_writedelegcnt--; lfp = stp->ls_lfp; if (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); free(stp, M_NFSDSTATE); nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } /* * This function frees an open owner and all associated opens. */ static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; LIST_REMOVE(stp, ls_list); /* * Now, free all associated opens. */ nstp = LIST_FIRST(&stp->ls_open); while (nstp != LIST_END(&stp->ls_open)) { tstp = nstp; nstp = LIST_NEXT(nstp, ls_list); (void) nfsrv_freeopen(tstp, NULL, cansleep, p); } if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } /* * This function frees an open (nfsstate open structure) with all associated * lock_owners and locks. It also frees the nfslockfile structure iff there * are no other opens on the file. * Returns 1 if it free'd the nfslockfile, 0 otherwise. */ static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; struct nfslockfile *lfp; int ret; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; /* * Now, free all lockowners associated with this open. */ LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp) nfsrv_freelockowner(tstp, vp, cansleep, p); /* * The nfslockfile is freed here if there are no locks * associated with the open. * If there are locks associated with the open, the * nfslockfile structure can be freed via nfsrv_freelockowner(). * Acquire the state mutex to avoid races with calls to * nfsrv_getlockfile(). */ if (cansleep != 0) NFSLOCKSTATE(); if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) { nfsrv_freenfslockfile(lfp); ret = 1; } else ret = 0; if (cansleep != 0) NFSUNLOCKSTATE(); free(stp, M_NFSDSTATE); nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } /* * Frees a lockowner and all associated locks. */ static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); nfsrv_freeallnfslocks(stp, vp, cansleep, p); if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } /* * Free all the nfs locks on a lockowner. */ static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfslock *lop, *nlop; struct nfsrollback *rlp, *nrlp; struct nfslockfile *lfp = NULL; int gottvp = 0; vnode_t tvp = NULL; uint64_t first, end; if (vp != NULL) ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked"); lop = LIST_FIRST(&stp->ls_lock); while (lop != LIST_END(&stp->ls_lock)) { nlop = LIST_NEXT(lop, lo_lckowner); /* * Since all locks should be for the same file, lfp should * not change. */ if (lfp == NULL) lfp = lop->lo_lfp; else if (lfp != lop->lo_lfp) panic("allnfslocks"); /* * If vp is NULL and cansleep != 0, a vnode must be acquired * from the file handle. This only occurs when called from * nfsrv_cleanclient(). */ if (gottvp == 0) { if (nfsrv_dolocallocks == 0) tvp = NULL; else if (vp == NULL && cansleep != 0) { tvp = nfsvno_getvp(&lfp->lf_fh); NFSVOPUNLOCK(tvp, 0); } else tvp = vp; gottvp = 1; } if (tvp != NULL) { if (cansleep == 0) panic("allnfs2"); first = lop->lo_first; end = lop->lo_end; nfsrv_freenfslock(lop); nfsrv_localunlock(tvp, lfp, first, end, p); LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } else nfsrv_freenfslock(lop); lop = nlop; } if (vp == NULL && tvp != NULL) vrele(tvp); } /* * Free an nfslock structure. */ static void nfsrv_freenfslock(struct nfslock *lop) { if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); free(lop, M_NFSDLOCK); } /* * This function frees an nfslockfile structure. */ static void nfsrv_freenfslockfile(struct nfslockfile *lfp) { LIST_REMOVE(lfp, lf_hash); free(lfp, M_NFSDLOCKFILE); } /* * This function looks up an nfsstate structure via stateid. */ static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags, struct nfsstate **stpp) { struct nfsstate *stp; struct nfsstatehead *hp; int error = 0; *stpp = NULL; hp = NFSSTATEHASH(clp, *stateidp); LIST_FOREACH(stp, hp, ls_hash) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } /* * If no state id in list, return NFSERR_BADSTATEID. */ if (stp == LIST_END(hp)) { error = NFSERR_BADSTATEID; goto out; } *stpp = stp; out: NFSEXITCODE(error); return (error); } /* * This function gets an nfsstate structure via owner string. */ static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp) { struct nfsstate *stp; *stpp = NULL; LIST_FOREACH(stp, hp, ls_list) { if (new_stp->ls_ownerlen == stp->ls_ownerlen && !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) { *stpp = stp; return; } } } /* * Lock control function called to update lock status. * Returns 0 upon success, -1 if there is no lock and the flags indicate * that one isn't to be created and an NFSERR_xxx for other errors. * The structures new_stp and new_lop are passed in as pointers that should * be set to NULL if the structure is used and shouldn't be free'd. * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are * never used and can safely be allocated on the stack. For all other * cases, *new_stpp and *new_lopp should be malloc'd before the call, * in case they are used. */ APPLESTATIC int nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, struct nfslock **new_lopp, struct nfslockconflict *cfp, nfsquad_t clientid, nfsv4stateid_t *stateidp, __unused struct nfsexstuff *exp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfslock *lop; struct nfsstate *new_stp = *new_stpp; struct nfslock *new_lop = *new_lopp; struct nfsstate *tstp, *mystp, *nstp; int specialid = 0; struct nfslockfile *lfp; struct nfslock *other_lop = NULL; struct nfsstate *stp, *lckstp = NULL; struct nfsclient *clp = NULL; u_int32_t bits; int error = 0, haslock = 0, ret, reterr; int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0; fhandle_t nfh; uint64_t first, end; uint32_t lock_flags; if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Note the special cases of "all 1s" or "all 0s" stateids and * let reads with all 1s go ahead. */ if (new_stp->ls_stateid.seqid == 0x0 && new_stp->ls_stateid.other[0] == 0x0 && new_stp->ls_stateid.other[1] == 0x0 && new_stp->ls_stateid.other[2] == 0x0) specialid = 1; else if (new_stp->ls_stateid.seqid == 0xffffffff && new_stp->ls_stateid.other[0] == 0xffffffff && new_stp->ls_stateid.other[1] == 0xffffffff && new_stp->ls_stateid.other[2] == 0xffffffff) specialid = 2; } /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, specialid); if (error) goto out; /* * Check for state resource limit exceeded. */ if ((new_stp->ls_flags & NFSLCK_LOCK) && nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } /* * For the lock case, get another nfslock structure, * just in case we need it. * Malloc now, before we start sifting through the linked lists, * in case we have to wait for memory. */ tryagain: if (new_stp->ls_flags & NFSLCK_LOCK) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); filestruct_locked = 0; reterr = 0; lfp = NULL; /* * Get the lockfile structure for CFH now, so we can do a sanity * check against the stateid, before incrementing the seqid#, since * we want to return NFSERR_BADSTATEID on failure and the seqid# * shouldn't be incremented for this case. * If nfsrv_getlockfile() returns -1, it means "not found", which * will be handled later. * If we are doing Lock/LockU and local locking is enabled, sleep * lock the nfslockfile structure. */ getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p); NFSLOCKSTATE(); if (getlckret == 0) { if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 && nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) { getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 1); if (getlckret == 0) filestruct_locked = 1; } else getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 0); } if (getlckret != 0 && getlckret != -1) reterr = getlckret; if (filestruct_locked != 0) { LIST_INIT(&lfp->lf_rollback); if ((new_stp->ls_flags & NFSLCK_LOCK)) { /* * For local locking, do the advisory locking now, so * that any conflict can be detected. A failure later * can be rolled back locally. If an error is returned, * struct nfslockfile has been unlocked and any local * locking rolled back. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } reterr = nfsrv_locallock(vp, lfp, (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)), new_lop->lo_first, new_lop->lo_end, cfp, p); NFSLOCKSTATE(); } } if (specialid == 0) { if (new_stp->ls_flags & NFSLCK_TEST) { /* * RFC 3530 does not list LockT as an op that renews a * lease, but the consensus seems to be that it is ok * for a server to do so. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid * error returns for LockT, just go ahead and test for a lock, * since there are no locks for this client, but other locks * can conflict. (ie. same client will always be false) */ if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED) error = 0; lckstp = new_stp; } else { error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) /* * Look up the stateid */ error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * do some sanity checks for an unconfirmed open or a * stateid that refers to the wrong file, for an open stateid */ if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) && ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) || (getlckret == 0 && stp->ls_lfp != lfp))){ /* * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID * The only exception is using SETATTR with SIZE. * */ if ((new_stp->ls_flags & (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR) error = NFSERR_BADSTATEID; } if (error == 0 && (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) && getlckret == 0 && stp->ls_lfp != lfp) error = NFSERR_BADSTATEID; /* * If the lockowner stateid doesn't refer to the same file, * I believe that is considered ok, since some clients will * only create a single lockowner and use that for all locks * on all files. * For now, log it as a diagnostic, instead of considering it * a BadStateid. */ if (error == 0 && (stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 && getlckret == 0 && stp->ls_lfp != lfp) { #ifdef DIAGNOSTIC printf("Got a lock statid for different file open\n"); #endif /* error = NFSERR_BADSTATEID; */ } if (error == 0) { if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) { /* * If haslock set, we've already checked the seqid. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); else error = NFSERR_BADSTATEID; } if (!error) nfsrv_getowner(&stp->ls_open, new_stp, &lckstp); if (lckstp) /* * I believe this should be an error, but it * isn't obvious what NFSERR_xxx would be * appropriate, so I'll use NFSERR_INVAL for now. */ error = NFSERR_INVAL; else lckstp = new_stp; } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) { /* * If haslock set, ditto above. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = NFSERR_BADSTATEID; else error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp, new_stp->ls_op); } lckstp = stp; } else { lckstp = stp; } } /* * If the seqid part of the stateid isn't the same, return * NFSERR_OLDSTATEID for cases other than I/O Ops. * For I/O Ops, only return NFSERR_OLDSTATEID if * nfsrv_returnoldstateid is set. (The consensus on the email * list was that most clients would prefer to not receive * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that * is what will happen, so I use the nfsrv_returnoldstateid to * allow for either server configuration.) */ if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && (!(new_stp->ls_flags & NFSLCK_CHECK) || nfsrv_returnoldstateid)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; } } /* * Now we can check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If we successfully Reclaimed state, note that. */ if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error) nfsrv_markstable(clp); /* * At this point, either error == NFSERR_BADSTATEID or the * seqid# has been updated, so we can return any error. * If error == 0, there may be an error in: * nd_repstat - Set by the calling function. * reterr - Set above, if getting the nfslockfile structure * or acquiring the local lock failed. * (If both of these are set, nd_repstat should probably be * returned, since that error was detected before this * function call.) */ if (error != 0 || nd->nd_repstat != 0 || reterr != 0) { if (error == 0) { if (nd->nd_repstat != 0) error = nd->nd_repstat; else error = reterr; } if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Check the nfsrv_getlockfile return. * Returned -1 if no structure found. */ if (getlckret == -1) { error = NFSERR_EXPIRED; /* * Called from lockt, so no lock is OK. */ if (new_stp->ls_flags & NFSLCK_TEST) { error = 0; } else if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Called to check for a lock, OK if the stateid is all * 1s or all 0s, but there should be an nfsstate * otherwise. * (ie. If there is no open, I'll assume no share * deny bits.) */ if (specialid) error = 0; else error = NFSERR_BADSTATEID; } NFSUNLOCKSTATE(); goto out; } /* * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict. * For NFSLCK_CHECK, allow a read if write access is granted, * but check for a deny. For NFSLCK_LOCK, require correct access, * which implies a conflicting deny can't exist. */ if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) { /* * Four kinds of state id: * - specialid (all 0s or all 1s), only for NFSLCK_CHECK * - stateid for an open * - stateid for a delegation * - stateid for a lock owner */ if (!specialid) { if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { delegation = 1; mystp = stp; nfsrv_delaydelegtimeout(stp); } else if (stp->ls_flags & NFSLCK_OPEN) { mystp = stp; } else { mystp = stp->ls_openstp; } /* * If locking or checking, require correct access * bit set. */ if (((new_stp->ls_flags & NFSLCK_LOCK) && !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) & mystp->ls_flags & NFSLCK_ACCESSBITS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) == (NFSLCK_CHECK | NFSLCK_READACCESS) && !(mystp->ls_flags & NFSLCK_READACCESS) && nfsrv_allowreadforwriteopen == 0) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) == (NFSLCK_CHECK | NFSLCK_WRITEACCESS) && !(mystp->ls_flags & NFSLCK_WRITEACCESS))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl3"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); error = NFSERR_OPENMODE; goto out; } } else mystp = NULL; if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) { /* * Check for a conflicting deny bit. */ LIST_FOREACH(tstp, &lfp->lf_open, ls_file) { if (tstp != mystp) { bits = tstp->ls_flags; bits >>= NFSLCK_SHIFT; if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked1")); ret = nfsrv_clientconflict(tstp->ls_clp, &haslock, vp, p); if (ret == 1) { /* * nfsrv_clientconflict unlocks state * when it returns non-zero. */ lckstp = NULL; goto tryagain; } if (ret == 0) NFSUNLOCKSTATE(); if (ret == 2) error = NFSERR_PERM; else error = NFSERR_OPENMODE; goto out; } } } /* We're outta here */ NFSUNLOCKSTATE(); goto out; } } /* * For setattr, just get rid of all the Delegations for other clients. */ if (new_stp->ls_flags & NFSLCK_SETATTR) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked2")); ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (ret) { /* * nfsrv_cleandeleg() unlocks state when it * returns non-zero. */ if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } if (!(new_stp->ls_flags & NFSLCK_CHECK) || (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg))) { NFSUNLOCKSTATE(); goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * I currently believe the conflict algorithm to be: * For Lock Ops (Lock/LockT/LockU) * - there is a conflict iff a different client has a write delegation * For Reading (Read Op) * - there is a conflict iff a different client has a write delegation * (the specialids are always a different client) * For Writing (Write/Setattr of size) * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (I don't understand why this isn't allowed, but that seems to be * the current consensus?) */ tstp = LIST_FIRST(&lfp->lf_deleg); while (tstp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(tstp, ls_file); if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))|| ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_READ))) && clp != tstp->ls_clp && (tstp->ls_flags & NFSLCK_DELEGWRITE)) || ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_WRITE) && (clp != tstp->ls_clp || (tstp->ls_flags & NFSLCK_DELEGREAD)))) { ret = 0; if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4"); NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if ((vp->v_iflag & VI_DOOMED) != 0) ret = NFSERR_SERVERFAULT; NFSLOCKSTATE(); } if (ret == 0) ret = nfsrv_delegconflict(tstp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict unlocks state when it * returns non-zero, which it always does. */ if (other_lop) { free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } /* Never gets here. */ } tstp = nstp; } /* * Handle the unlock case by calling nfsrv_updatelock(). * (Should I have done some access checking above for unlock? For now, * just let it happen.) */ if (new_stp->ls_flags & NFSLCK_UNLOCK) { first = new_lop->lo_first; end = new_lop->lo_end; nfsrv_updatelock(stp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(stp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = stp->ls_stateid.seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; if (filestruct_locked != 0) { NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } /* Update the local locks. */ nfsrv_localunlock(vp, lfp, first, end, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Search for a conflicting lock. A lock conflicts if: * - the lock range overlaps and * - at least one lock is a write lock and * - it is not owned by the same lock owner */ if (!delegation) { LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { if (new_lop->lo_end > lop->lo_first && new_lop->lo_first < lop->lo_end && (new_lop->lo_flags == NFSLCK_WRITE || lop->lo_flags == NFSLCK_WRITE) && lckstp != lop->lo_stp && (clp != lop->lo_stp->ls_clp || lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen || NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner, lckstp->ls_ownerlen))) { if (other_lop) { free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (vnode_unlocked != 0) ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, NULL, p); else ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, vp, p); if (ret == 1) { if (filestruct_locked != 0) { if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6"); NFSVOPUNLOCK(vp, 0); } /* Roll back local locks. */ nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if ((vp->v_iflag & VI_DOOMED) != 0) { error = NFSERR_SERVERFAULT; goto out; } } /* * nfsrv_clientconflict() unlocks state when it * returns non-zero. */ lckstp = NULL; goto tryagain; } /* * Found a conflicting lock, so record the conflict and * return the error. */ if (cfp != NULL && ret == 0) { cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0]; cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1]; cfp->cl_first = lop->lo_first; cfp->cl_end = lop->lo_end; cfp->cl_flags = lop->lo_flags; cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen; NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner, cfp->cl_ownerlen); } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else if (new_stp->ls_flags & NFSLCK_CHECK) error = NFSERR_LOCKED; else error = NFSERR_DENIED; if (filestruct_locked != 0 && ret == 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } if (ret == 0) NFSUNLOCKSTATE(); goto out; } } } /* * We only get here if there was no lock that conflicted. */ if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) { NFSUNLOCKSTATE(); goto out; } /* * We only get here when we are creating or modifying a lock. * There are two variants: * - exist_lock_owner where lock_owner exists * - open_to_lock_owner with new lock_owner */ first = new_lop->lo_first; end = new_lop->lo_end; lock_flags = new_lop->lo_flags; if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) { nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(lckstp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = lckstp->ls_stateid.seqid = 1; stateidp->other[0] = lckstp->ls_stateid.other[0]; stateidp->other[1] = lckstp->ls_stateid.other[1]; stateidp->other[2] = lckstp->ls_stateid.other[2]; } else { /* * The new open_to_lock_owner case. * Link the new nfsstate into the lists. */ new_stp->ls_seq = new_stp->ls_opentolockseq; nfsrvd_refcache(new_stp->ls_op); stateidp->seqid = new_stp->ls_stateid.seqid = 1; stateidp->other[0] = new_stp->ls_stateid.other[0] = clp->lc_clientid.lval[0]; stateidp->other[1] = new_stp->ls_stateid.other[1] = clp->lc_clientid.lval[1]; stateidp->other[2] = new_stp->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_stp->ls_clp = clp; LIST_INIT(&new_stp->ls_lock); new_stp->ls_openstp = stp; new_stp->ls_lfp = lfp; nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp, lfp); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid), new_stp, ls_hash); LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { NFSUNLOCKSTATE(); nfsrv_locallock_commit(lfp, lock_flags, first, end); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); out: if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (vnode_unlocked != 0) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) error = NFSERR_SERVERFAULT; } if (other_lop) free(other_lop, M_NFSDLOCK); NFSEXITCODE2(error, nd); return (error); } /* * Check for state errors for Open. * repstat is passed back out as an error if more critical errors * are not detected. */ APPLESTATIC int nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd, NFSPROC_T *p, int repstat) { struct nfsstate *stp, *nstp; struct nfsclient *clp; struct nfsstate *ownerstp; struct nfslockfile *lfp, *new_lfp; int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; /* * Check for state resource limit exceeded. * Technically this should be SMP protected, but the worst * case error is "out by one or two" on the count when it * returns NFSERR_RESOURCE and the limit is just a rather * arbitrary high water mark, so no harm is done. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } tryagain: new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); if (vp) getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the nfsclient structure. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (!error && ownerstp) { error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp, new_stp->ls_op); /* * If the OpenOwner hasn't been confirmed, assume the * old one was a replay and this one is ok. * See: RFC3530 Sec. 14.2.18. */ if (error == NFSERR_BADSEQID && (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM)) error = 0; } /* * Check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If none of the above errors occurred, let repstat be * returned. */ if (repstat && !error) error = repstat; if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * If vp == NULL, the file doesn't exist yet, so return ok. * (This always happens on the first pass, so haslock must be 0.) */ if (vp == NULL) { NFSUNLOCKSTATE(); free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } } /* * Check for access/deny bit conflicts. I check for the same * owner as well, in case the client didn't bother. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) && (((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks * state when it returns non-zero. */ goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) * Don't check for a Reclaim, since that will be dealt with * by nfsrv_openctrl(). */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ if (ret == -1) goto tryagain; error = ret; goto out; } } stp = nstp; } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Open control function to create/update open state for an open. */ APPLESTATIC int nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp, nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp, NFSPROC_T *p, u_quad_t filerev) { struct nfsstate *new_stp = *new_stpp; struct nfsstate *stp, *nstp; struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg; struct nfslockfile *lfp, *new_lfp; struct nfsclient *clp; int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1; int readonly = 0, cbret = 1, getfhret = 0; int gotstate = 0, len = 0; u_char *clidp = NULL; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). * (Paranoia, should have been detected by nfsrv_opencheck().) * If an error does show up, return NFSERR_EXPIRED, since the * the seqid# has already been incremented. */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) { printf("Nfsd: openctrl unexpected restart err=%d\n", error); error = NFSERR_EXPIRED; goto out; } clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); tryagain: new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); new_open = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); new_deleg = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the client structure. Since the linked lists could be changed * by other nfsd processes if this process does a tsleep(), one of * two things must be done. * 1 - don't tsleep() * or * 2 - get the nfsv4_lock() { indicated by haslock == 1 } * before using the lists, since this lock stops the other * nfsd. This should only be used for rare cases, since it * essentially single threads the nfsd. * At this time, it is only done for cases where the stable * storage file must be written prior to completion of state * expiration. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) && clp->lc_program) { /* * This happens on the first open for a client * that supports callbacks. */ NFSUNLOCKSTATE(); /* * Although nfsrv_docallback() will sleep, clp won't * go away, since they are only removed when the * nfsv4_lock() has blocked the nfsd threads. The * fields in clp can change, but having multiple * threads do this Null callback RPC should be * harmless. */ cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL, - NULL, 0, NULL, NULL, NULL, p); + NULL, 0, NULL, NULL, NULL, 0, p); NFSLOCKSTATE(); clp->lc_flags &= ~LCL_NEEDSCBNULL; if (!cbret) clp->lc_flags |= LCL_CALLBACKSON; } /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (error) { NFSUNLOCKSTATE(); printf("Nfsd: openctrl unexpected state err=%d\n", error); free(new_lfp, M_NFSDLOCKFILE); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } if (new_stp->ls_flags & NFSLCK_RECLAIM) nfsrv_markstable(clp); /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected getlockfile err=%d\n", error); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected expiry\n"); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } /* * Don't issue a Delegation, since one already exists and * delay delegation timeout, as required. */ delegate = 0; nfsrv_delaydelegtimeout(stp); } /* * Check for access/deny bit conflicts. I also check for the * same owner, since the client might not have bothered to check. * Also, note an open for the same file and owner, if found, * which is all we do here for Delegate_Cur, since conflict * checking is already done. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (ownerstp && stp->ls_openowner == ownerstp) openstp = stp; if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) { /* * If another client has the file open, the only * delegation that can be issued is a Read delegation * and only if it is a Read open with Deny none. */ if (clp != stp->ls_clp) { if ((stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) writedeleg = 0; else delegate = 0; } if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks state * when it returns non-zero. */ free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); openstp = NULL; goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); printf("nfsd openctrl unexpected client cnfl\n"); goto out; } } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD)) writedeleg = 0; else delegate = 0; if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { if (new_stp->ls_flags & NFSLCK_RECLAIM) { delegate = 2; } else { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ printf("Nfsd openctrl unexpected deleg cnfl\n"); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (ret == -1) { openstp = NULL; goto tryagain; } error = ret; goto out; } } } stp = nstp; } } /* * We only get here if there was no open that conflicted. * If an open for the owner exists, or in the access/deny bits. * Otherwise it is a new open. If the open_owner hasn't been * confirmed, replace the open with the new one needing confirmation, * otherwise add the open. */ if (new_stp->ls_flags & NFSLCK_DELEGPREV) { /* * Handle NFSLCK_DELEGPREV by searching the old delegations for * a match. If found, just move the old delegation to the current * delegation list and issue open. If not found, return * NFSERR_EXPIRED. */ LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { if (stp->ls_lfp == lfp) { /* Found it */ if (stp->ls_clp != clp) panic("olddeleg clp"); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_hash); stp->ls_flags &= ~NFSLCK_OLDDELEG; stp->ls_stateid.seqid = delegstateidp->seqid = 1; stp->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; stp->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; stp->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); stp->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, stp->ls_stateid), stp, ls_hash); if (stp->ls_flags & NFSLCK_DELEGWRITE) *rflagsp |= NFSV4OPEN_WRITEDELEGATE; else *rflagsp |= NFSV4OPEN_READDELEGATE; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)| NFSLCK_OPEN; if (stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } } if (stp == LIST_END(&clp->lc_olddeleg)) error = NFSERR_EXPIRED; } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { /* * Scan to see that no delegation for this client and file * doesn't already exist. * There also shouldn't yet be an Open for this file and * openowner. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_clp == clp) break; } if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) { /* * This is the Claim_Previous case with a delegation * type != Delegate_None. */ /* * First, add the delegation. (Although we must issue the * delegation, we can also ask for an immediate return.) */ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (new_stp->ls_flags & NFSLCK_DELEGWRITE) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; if (delegate == 2 || nfsrv_issuedelegs == 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) | NFSLCK_OPEN; if (new_stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; } } else if (ownerstp) { if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) { /* Replace the open */ if (ownerstp->ls_op) nfsrvd_derefcache(ownerstp->ls_op); ownerstp->ls_op = new_stp->ls_op; nfsrvd_refcache(ownerstp->ls_op); ownerstp->ls_seq = new_stp->ls_seq; *rflagsp |= NFSV4OPEN_RESULTCONFIRM; stp = LIST_FIRST(&ownerstp->ls_open); stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; stp->ls_stateid.seqid = 1; stp->ls_uid = new_stp->ls_uid; if (lfp != stp->ls_lfp) { LIST_REMOVE(stp, ls_file); LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file); stp->ls_lfp = lfp; } openstp = stp; } else if (openstp) { openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS); openstp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && openstp->ls_stateid.seqid == 0) openstp->ls_stateid.seqid = 1; /* * This is where we can choose to issue a delegation. */ if (delegate == 0 || writedeleg == 0 || NFSVNO_EXRDONLY(exp) || (readonly != 0 && nfsrv_writedelegifpos == 0) || !NFSVNO_DELEGOK(vp) || (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else if (nfsrv_issuedelegs == 0 || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; nfsrv_writedelegcnt++; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } else { new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)| NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; new_open->ls_openowner = ownerstp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* * This is where we can choose to issue a delegation. */ if (delegate == 0 || (writedeleg == 0 && readonly == 0) || !NFSVNO_DELEGOK(vp) || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else if (nfsrv_issuedelegs == 0 || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } } else { /* * New owner case. Start the open_owner sequence with a * Needs confirmation (unless a reclaim) and hang the * new open off it. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; LIST_INIT(&new_open->ls_open); new_open->ls_openowner = new_stp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); if (new_stp->ls_flags & NFSLCK_RECLAIM) { new_stp->ls_flags = 0; } else if ((nd->nd_flag & ND_NFSV41) != 0) { /* NFSv4.1 never needs confirmation. */ new_stp->ls_flags = 0; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && (writedeleg || readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0)) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } /* * Since NFSv4.1 never does an OpenConfirm, the first * open state will be acquired here. */ if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } } else { *rflagsp |= NFSV4OPEN_RESULTCONFIRM; new_stp->ls_flags = NFSLCK_NEEDSCONFIRM; } nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; *new_stpp = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { stateidp->seqid = openstp->ls_stateid.seqid; stateidp->other[0] = openstp->ls_stateid.other[0]; stateidp->other[1] = openstp->ls_stateid.other[1]; stateidp->other[2] = openstp->ls_stateid.other[2]; } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (new_open) free(new_open, M_NFSDSTATE); if (new_deleg) free(new_deleg, M_NFSDSTATE); /* * If the NFSv4.1 client just acquired its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Open update. Does the confirm, downgrade and close. */ APPLESTATIC int nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid, - nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p) + nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p, + int *retwriteaccessp) { struct nfsstate *stp; struct nfsclient *clp; struct nfslockfile *lfp; u_int32_t bits; int error = 0, gotstate = 0, len = 0; u_char *clidp = NULL; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * Sanity check the open. */ if (!error && (!(stp->ls_flags & NFSLCK_OPEN) || (!(new_stp->ls_flags & NFSLCK_CONFIRM) && (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) || ((new_stp->ls_flags & NFSLCK_CONFIRM) && (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))))) error = NFSERR_BADSTATEID; if (!error) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && !(new_stp->ls_flags & NFSLCK_CONFIRM)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; if (!error && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) error = NFSERR_ISDIR; else error = NFSERR_INVAL; } if (error) { /* * If a client tries to confirm an Open with a bad * seqid# and there are no byte range locks or other Opens * on the openowner, just throw it away, so the next use of the * openowner will start a fresh seq#. */ if (error == NFSERR_BADSEQID && (new_stp->ls_flags & NFSLCK_CONFIRM) && nfsrv_nootherstate(stp)) nfsrv_freeopenowner(stp->ls_openowner, 0, p); NFSUNLOCKSTATE(); goto out; } /* * Set the return stateid. */ stateidp->seqid = stp->ls_stateid.seqid + 1; if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; /* * Now, handle the three cases. */ if (new_stp->ls_flags & NFSLCK_CONFIRM) { /* * If the open doesn't need confirmation, it seems to me that * there is a client error, but I'll just log it and keep going? */ if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) printf("Nfsv4d: stray open confirm\n"); stp->ls_openowner->ls_flags = 0; stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } NFSUNLOCKSTATE(); } else if (new_stp->ls_flags & NFSLCK_CLOSE) { lfp = stp->ls_lfp; + if (retwriteaccessp != NULL) { + if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0) + *retwriteaccessp = 1; + else + *retwriteaccessp = 0; + } if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) { /* Get the lf lock */ nfsrv_locklf(lfp); NFSUNLOCKSTATE(); ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate"); NFSVOPUNLOCK(vp, 0); if (nfsrv_freeopen(stp, vp, 1, p) == 0) { NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); } else { (void) nfsrv_freeopen(stp, NULL, 0, p); NFSUNLOCKSTATE(); } } else { /* * Update the share bits, making sure that the new set are a * subset of the old ones. */ bits = (new_stp->ls_flags & NFSLCK_SHAREBITS); if (~(stp->ls_flags) & bits) { NFSUNLOCKSTATE(); error = NFSERR_INVAL; goto out; } stp->ls_flags = (bits | NFSLCK_OPEN); stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; NFSUNLOCKSTATE(); } /* * If the client just confirmed its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Delegation update. Does the purge and return. */ APPLESTATIC int nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid, nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred, - NFSPROC_T *p) + NFSPROC_T *p, int *retwriteaccessp) { struct nfsstate *stp; struct nfsclient *clp; int error = 0; fhandle_t fh; /* * Do a sanity check against the file handle for DelegReturn. */ if (vp) { error = nfsvno_getfh(vp, &fh, p); if (error) goto out; } /* * Check for restart conditions (client and server). */ if (op == NFSV4OP_DELEGRETURN) error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN, stateidp, 0); else error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE, stateidp, 0); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ if (!error) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error) { if (error == NFSERR_CBPATHDOWN) error = 0; if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN) error = NFSERR_STALESTATEID; } if (!error && op == NFSV4OP_DELEGRETURN) { error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp); if (!error && stp->ls_stateid.seqid != stateidp->seqid && ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0)) error = NFSERR_OLDSTATEID; } /* * NFSERR_EXPIRED means that the state has gone away, * so Delegations have been purged. Just return ok. */ if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) { NFSUNLOCKSTATE(); error = 0; goto out; } if (error) { NFSUNLOCKSTATE(); goto out; } if (op == NFSV4OP_DELEGRETURN) { if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh, sizeof (fhandle_t))) { NFSUNLOCKSTATE(); error = NFSERR_BADSTATEID; goto out; } + if (retwriteaccessp != NULL) { + if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) + *retwriteaccessp = 1; + else + *retwriteaccessp = 0; + } nfsrv_freedeleg(stp); } else { nfsrv_freedeleglist(&clp->lc_olddeleg); } NFSUNLOCKSTATE(); error = 0; out: NFSEXITCODE(error); return (error); } /* * Release lock owner. */ APPLESTATIC int nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid, NFSPROC_T *p) { struct nfsstate *stp, *nstp, *openstp, *ownstp; struct nfsclient *clp; int error = 0; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the lock owner by name. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); if (error) { NFSUNLOCKSTATE(); goto out; } LIST_FOREACH(ownstp, &clp->lc_open, ls_list) { LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) { stp = LIST_FIRST(&openstp->ls_open); while (stp != LIST_END(&openstp->ls_open)) { nstp = LIST_NEXT(stp, ls_list); /* * If the owner matches, check for locks and * then free or return an error. */ if (stp->ls_ownerlen == new_stp->ls_ownerlen && !NFSBCMP(stp->ls_owner, new_stp->ls_owner, stp->ls_ownerlen)){ if (LIST_EMPTY(&stp->ls_lock)) { nfsrv_freelockowner(stp, NULL, 0, p); } else { NFSUNLOCKSTATE(); error = NFSERR_LOCKSHELD; goto out; } } stp = nstp; } } } NFSUNLOCKSTATE(); out: NFSEXITCODE(error); return (error); } /* * Get the file handle for a lock structure. */ static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p) { fhandle_t *fhp = NULL; int error; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL")); fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfh"); } error = nfsvno_getfh(vp, fhp, p); NFSEXITCODE(error); return (error); } /* * Get an nfs lock structure. Allocate one, as required, and return a * pointer to it. * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock. */ static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit) { struct nfslockfile *lfp; fhandle_t *fhp = NULL, *tfhp; struct nfslockhashhead *hp; struct nfslockfile *new_lfp = NULL; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfile"); } hp = NFSLOCKHASH(fhp); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(fhp, tfhp)) { if (lockit) nfsrv_locklf(lfp); *lfpp = lfp; return (0); } } if (!(flags & NFSLCK_OPEN)) return (-1); /* * No match, so chain the new one into the list. */ LIST_INIT(&new_lfp->lf_open); LIST_INIT(&new_lfp->lf_lock); LIST_INIT(&new_lfp->lf_deleg); LIST_INIT(&new_lfp->lf_locallock); LIST_INIT(&new_lfp->lf_rollback); new_lfp->lf_locallock_lck.nfslock_usecnt = 0; new_lfp->lf_locallock_lck.nfslock_lock = 0; new_lfp->lf_usecount = 0; LIST_INSERT_HEAD(hp, new_lfp, lf_hash); *lfpp = new_lfp; *new_lfpp = NULL; return (0); } /* * This function adds a nfslock lock structure to the list for the associated * nfsstate and nfslockfile structures. It will be inserted after the * entry pointed at by insert_lop. */ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp) { struct nfslock *lop, *nlop; new_lop->lo_stp = stp; new_lop->lo_lfp = lfp; if (stp != NULL) { /* Insert in increasing lo_first order */ lop = LIST_FIRST(&lfp->lf_lock); if (lop == LIST_END(&lfp->lf_lock) || new_lop->lo_first <= lop->lo_first) { LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile); } else { nlop = LIST_NEXT(lop, lo_lckfile); while (nlop != LIST_END(&lfp->lf_lock) && nlop->lo_first < new_lop->lo_first) { lop = nlop; nlop = LIST_NEXT(lop, lo_lckfile); } LIST_INSERT_AFTER(lop, new_lop, lo_lckfile); } } else { new_lop->lo_lckfile.le_prev = NULL; /* list not used */ } /* * Insert after insert_lop, which is overloaded as stp or lfp for * an empty list. */ if (stp == NULL && (struct nfslockfile *)insert_lop == lfp) LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner); else if ((struct nfsstate *)insert_lop == stp) LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner); else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. */ static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp) { struct nfslock *new_lop = *new_lopp; struct nfslock *lop, *tlop, *ilop; struct nfslock *other_lop = *other_lopp; int unlock = 0, myfile = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->lo_flags & NFSLCK_UNLOCK) unlock = 1; if (stp != NULL) { ilop = (struct nfslock *)stp; lop = LIST_FIRST(&stp->ls_lock); } else { ilop = (struct nfslock *)lfp; lop = LIST_FIRST(&lfp->lf_locallock); } while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->lo_lfp == lfp) { myfile = 1; if (lop->lo_end >= new_lop->lo_first) { if (new_lop->lo_end < lop->lo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->lo_flags == lop->lo_flags || (new_lop->lo_first <= lop->lo_first && new_lop->lo_end >= lop->lo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (lop->lo_first < new_lop->lo_first) new_lop->lo_first = lop->lo_first; if (lop->lo_end > new_lop->lo_end) new_lop->lo_end = lop->lo_end; tlop = lop; lop = LIST_NEXT(lop, lo_lckowner); nfsrv_freenfslock(tlop); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->lo_first <= lop->lo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ lop->lo_first = new_lop->lo_end; break; } if (new_lop->lo_end >= lop->lo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ lop->lo_end = new_lop->lo_first; ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->lo_flags any longer. */ tmp = new_lop->lo_first; if (other_lop == NULL) { if (!unlock) panic("nfsd srv update unlock"); other_lop = new_lop; *new_lopp = NULL; } other_lop->lo_first = new_lop->lo_end; other_lop->lo_end = lop->lo_end; other_lop->lo_flags = lop->lo_flags; other_lop->lo_stp = stp; other_lop->lo_lfp = lfp; lop->lo_end = tmp; nfsrv_insertlock(other_lop, lop, stp, lfp); *other_lopp = NULL; ilop = lop; break; } } ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); if (myfile && (lop == NULL || lop->lo_lfp != lfp)) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfsrv_insertlock(new_lop, ilop, stp, lfp); *new_lopp = NULL; } } /* * This function handles sequencing of locks, etc. * It returns an error that indicates what the caller should do. */ static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op) { int error = 0; if ((nd->nd_flag & ND_NFSV41) != 0) /* NFSv4.1 ignores the open_seqid and lock_seqid. */ goto out; if (op != nd->nd_rp) panic("nfsrvstate checkseqid"); if (!(op->rc_flag & RC_INPROG)) panic("nfsrvstate not inprog"); if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) { printf("refcnt=%d\n", stp->ls_op->rc_refcnt); panic("nfsrvstate op refcnt"); } if ((stp->ls_seq + 1) == seqid) { if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); stp->ls_op = op; nfsrvd_refcache(op); stp->ls_seq = seqid; goto out; } else if (stp->ls_seq == seqid && stp->ls_op && op->rc_xid == stp->ls_op->rc_xid && op->rc_refcnt == 0 && op->rc_reqlen == stp->ls_op->rc_reqlen && op->rc_cksum == stp->ls_op->rc_cksum) { if (stp->ls_op->rc_flag & RC_INPROG) { error = NFSERR_DONTREPLY; goto out; } nd->nd_rp = stp->ls_op; nd->nd_rp->rc_flag |= RC_INPROG; nfsrvd_delcache(op); error = NFSERR_REPLYFROMCACHE; goto out; } error = NFSERR_BADSEQID; out: NFSEXITCODE2(error, nd); return (error); } /* * Get the client ip address for callbacks. If the strings can't be parsed, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { u_int32_t *tl; u_char *cp, *cp2; int i, j; struct sockaddr_in *rad, *sad; u_char protocol[5], addr[24]; int error = 0, cantparse = 0; union { in_addr_t ival; u_char cval[4]; } ip; union { in_port_t sval; u_char cval[2]; } port; rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); rad->sin_family = AF_INET; rad->sin_len = sizeof (struct sockaddr_in); rad->sin_addr.s_addr = 0; rad->sin_port = 0; clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (!strcmp(protocol, "tcp")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; } else if (!strcmp(protocol, "udp")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; } else { cantparse = 1; } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (i == 0) { cantparse = 1; } else if (!cantparse && i <= 23 && i >= 11) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* * Parse out the address fields. We expect 6 decimal numbers * separated by '.'s. */ cp = addr; i = 0; while (*cp && i < 6) { cp2 = cp; while (*cp2 && *cp2 != '.') cp2++; if (*cp2) *cp2++ = '\0'; else if (i != 5) { cantparse = 1; break; } j = nfsrv_getipnumber(cp); if (j >= 0) { if (i < 4) ip.cval[3 - i] = j; else port.cval[5 - i] = j; } else { cantparse = 1; break; } cp = cp2; i++; } if (!cantparse) { if (ip.ival != 0x0) { rad->sin_addr.s_addr = htonl(ip.ival); rad->sin_port = htons(port.sval); } else { cantparse = 1; } } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } if (cantparse) { sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); if (sad->sin_family == AF_INET) { rad->sin_addr.s_addr = sad->sin_addr.s_addr; rad->sin_port = 0x0; } clp->lc_program = 0; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Turn a string of up to three decimal digits into a number. Return -1 upon * error. */ static int nfsrv_getipnumber(u_char *cp) { int i = 0, j = 0; while (*cp) { if (j > 2 || *cp < '0' || *cp > '9') return (-1); i *= 10; i += (*cp - '0'); cp++; j++; } if (i < 256) return (i); return (-1); } /* * This function checks for restart conditions. */ static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid) { int ret = 0; /* * First check for a server restart. Open, LockT, ReleaseLockOwner * and DelegPurge have a clientid, the rest a stateid. */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { if (clientid.lval[0] != nfsrvboottime) { ret = NFSERR_STALECLIENTID; goto out; } } else if (stateidp->other[0] != nfsrvboottime && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; } /* * Read, Write, Setattr and LockT can return NFSERR_GRACE and do * not use a lock/open owner seqid#, so the check can be done now. * (The others will be checked, as required, later.) */ if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST))) goto out; NFSLOCKSTATE(); ret = nfsrv_checkgrace(NULL, NULL, flags); NFSUNLOCKSTATE(); out: NFSEXITCODE(ret); return (ret); } /* * Check for grace. */ static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags) { int error = 0, notreclaimed; struct nfsrv_stable *sp; if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE | NFSNSF_GRACEOVER)) == 0) { /* * First, check to see if all of the clients have done a * ReclaimComplete. If so, grace can end now. */ notreclaimed = 0; LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) { notreclaimed = 1; break; } } if (notreclaimed == 0) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); } if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; } } else { if (!(flags & NFSLCK_RECLAIM)) { error = NFSERR_GRACE; goto out; } if (nd != NULL && clp != NULL && (nd->nd_flag & ND_NFSV41) != 0 && (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) { error = NFSERR_NOGRACE; goto out; } /* * If grace is almost over and we are still getting Reclaims, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } out: NFSEXITCODE(error); return (error); } /* * Do a server callback. */ static int -nfsrv_docallback(struct nfsclient *clp, int procnum, - nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, - struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p) +nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, + int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, + int laytype, NFSPROC_T *p) { mbuf_t m; u_int32_t *tl; - struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsrv_descript *nd; struct ucred *cred; int error = 0; u_int32_t callback; struct nfsdsession *sep = NULL; + uint64_t tval; + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); cred = newnfs_getcred(); NFSLOCKSTATE(); /* mostly for lc_cbref++ */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); panic("docallb"); } clp->lc_cbref++; /* * Fill the callback program# and version into the request * structure for newnfs_connect() to use. */ clp->lc_req.nr_prog = clp->lc_program; #ifdef notnow if ((clp->lc_flags & LCL_NFSV41) != 0) clp->lc_req.nr_vers = NFSV41_CBVERS; else #endif clp->lc_req.nr_vers = NFSV4_CBVERS; /* * First, fill in some of the fields of nd and cr. */ nd->nd_flag = ND_NFSV4; if (clp->lc_flags & LCL_GSS) nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; callback = clp->lc_callback; NFSUNLOCKSTATE(); cred->cr_ngroups = 1; /* * Get the first mbuf for the request. */ MGET(m, M_WAITOK, MT_DATA); mbuf_setlen(m, 0); nd->nd_mreq = nd->nd_mb = m; nd->nd_bpos = NFSMTOD(m, caddr_t); /* * and build the callback request. */ if (procnum == NFSV4OP_CBGETATTR) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR, "CB Getattr", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); (void)nfsrv_putattrbit(nd, attrbitp); } else if (procnum == NFSV4OP_CBRECALL) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL, "CB Recall", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (trunc) *tl = newnfs_true; else *tl = newnfs_false; (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); + } else if (procnum == NFSV4OP_CBLAYOUTRECALL) { + NFSD_DEBUG(4, "docallback layout recall\n"); + nd->nd_procnum = NFSV4PROC_CBCOMPOUND; + error = nfsrv_cbcallargs(nd, clp, callback, + NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep); + NFSD_DEBUG(4, "aft cbcallargs=%d\n", error); + if (error != 0) { + mbuf_freem(nd->nd_mreq); + goto errout; + } + NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(laytype); + *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY); + *tl++ = newnfs_true; + *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE); + nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID); + tval = 0; + txdr_hyper(tval, tl); tl += 2; + tval = UINT64_MAX; + txdr_hyper(tval, tl); tl += 2; + *tl++ = txdr_unsigned(stateidp->seqid); + NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER); + tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); + NFSD_DEBUG(4, "aft args\n"); } else if (procnum == NFSV4PROC_CBNULL) { nd->nd_procnum = NFSV4PROC_CBNULL; if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsv4_getcbsession(clp, &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } } } else { error = NFSERR_SERVERFAULT; mbuf_freem(nd->nd_mreq); goto errout; } /* * Call newnfs_connect(), as required, and then newnfs_request(). */ (void) newnfs_sndlock(&clp->lc_req.nr_lock); if (clp->lc_req.nr_client == NULL) { if ((clp->lc_flags & LCL_NFSV41) != 0) { error = ECONNREFUSED; nfsrv_freesession(sep, NULL); } else if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 1); else error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 3); } newnfs_sndunlock(&clp->lc_req.nr_lock); + NFSD_DEBUG(4, "aft sndunlock=%d\n", error); if (!error) { if ((nd->nd_flag & ND_NFSV41) != 0) { KASSERT(sep != NULL, ("sep NULL")); if (sep->sess_cbsess.nfsess_xprt != NULL) error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, &sep->sess_cbsess); else { /* * This should probably never occur, but if a * client somehow does an RPC without a * SequenceID Op that causes a callback just * after the nfsd threads have been terminated * and restared we could conceivably get here * without a backchannel xprt. */ printf("nfsrv_docallback: no xprt\n"); error = ECONNREFUSED; } + NFSD_DEBUG(4, "aft newnfs_request=%d\n", error); nfsrv_freesession(sep, NULL); } else error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, NULL); } errout: NFSFREECRED(cred); /* * If error is set here, the Callback path isn't working * properly, so twiddle the appropriate LCL_ flags. * (nd_repstat != 0 indicates the Callback path is working, * but the callback failed on the client.) */ if (error) { /* * Mark the callback pathway down, which disabled issuing * of delegations and gets Renew to return NFSERR_CBPATHDOWN. */ NFSLOCKSTATE(); clp->lc_flags |= LCL_CBDOWN; NFSUNLOCKSTATE(); } else { /* * Callback worked. If the callback path was down, disable * callbacks, so no more delegations will be issued. (This * is done on the assumption that the callback pathway is * flakey.) */ NFSLOCKSTATE(); if (clp->lc_flags & LCL_CBDOWN) clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON); NFSUNLOCKSTATE(); - if (nd->nd_repstat) + if (nd->nd_repstat) { error = nd->nd_repstat; - else if (error == 0 && procnum == NFSV4OP_CBGETATTR) + NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n", + procnum, error); + } else if (error == 0 && procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, NULL); mbuf_freem(nd->nd_mrep); } NFSLOCKSTATE(); clp->lc_cbref--; if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) { clp->lc_flags &= ~LCL_WAKEUPWANTED; wakeup(clp); } NFSUNLOCKSTATE(); + free(nd, M_TEMP); NFSEXITCODE(error); return (error); } /* * Set up the compound RPC for the callback. */ static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp) { uint32_t *tl; int error, len; len = strlen(optag); (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); error = nfsv4_setcbsequence(nd, clp, 1, sepp); if (error != 0) return (error); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(op); } else { *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(op); } return (0); } /* * Return the next index# for a clientid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around, * it should be rebooted. * At an average rate of one new client per second, it will wrap around in * approximately 136 years. (I think the server will have been shut * down or rebooted before then.) */ static u_int32_t nfsrv_nextclientindex(void) { static u_int32_t client_index = 0; client_index++; if (client_index != 0) return (client_index); printf("%s: out of clientids\n", __func__); return (client_index); } /* * Return the next index# for a stateid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around * (will a BSD server stay up that long?), find * new start and end values. */ static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp) { struct nfsstate *stp; int i; u_int32_t canuse, min_index, max_index; if (!(clp->lc_flags & LCL_INDEXNOTOK)) { clp->lc_stateindex++; if (clp->lc_stateindex != clp->lc_statemaxindex) return (clp->lc_stateindex); } /* * Yuck, we've hit the end. * Look for a new min and max. */ min_index = 0; max_index = 0xffffffff; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] > 0x80000000) { if (stp->ls_stateid.other[2] < max_index) max_index = stp->ls_stateid.other[2]; } else { if (stp->ls_stateid.other[2] > min_index) min_index = stp->ls_stateid.other[2]; } } } /* * Yikes, highly unlikely, but I'll handle it anyhow. */ if (min_index == 0x80000000 && max_index == 0x80000001) { canuse = 0; /* * Loop around until we find an unused entry. Return that * and set LCL_INDEXNOTOK, so the search will continue next time. * (This is one of those rare cases where a goto is the * cleanest way to code the loop.) */ tryagain: for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] == canuse) { canuse++; goto tryagain; } } } clp->lc_flags |= LCL_INDEXNOTOK; return (canuse); } /* * Ok to start again from min + 1. */ clp->lc_stateindex = min_index + 1; clp->lc_statemaxindex = max_index; clp->lc_flags &= ~LCL_INDEXNOTOK; return (clp->lc_stateindex); } /* * The following functions handle the stable storage file that deals with * the edge conditions described in RFC3530 Sec. 8.6.3. * The file is as follows: * - a single record at the beginning that has the lease time of the * previous server instance (before the last reboot) and the nfsrvboottime * values for the previous server boots. * These previous boot times are used to ensure that the current * nfsrvboottime does not, somehow, get set to a previous one. * (This is important so that Stale ClientIDs and StateIDs can * be recognized.) * The number of previous nfsvrboottime values precedes the list. * - followed by some number of appended records with: * - client id string * - flag that indicates it is a record revoking state via lease * expiration or similar * OR has successfully acquired state. * These structures vary in length, with the client string at the end, up * to NFSV4_OPAQUELIMIT in size. * * At the end of the grace period, the file is truncated, the first * record is rewritten with updated information and any acquired state * records for successful reclaims of state are written. * * Subsequent records are appended when the first state is issued to * a client and when state is revoked for a client. * * When reading the file in, state issued records that come later in * the file override older ones, since the append log is in cronological order. * If, for some reason, the file can't be read, the grace period is * immediately terminated and all reclaims get NFSERR_NOGRACE. */ /* * Read in the stable storage file. Called by nfssvc() before the nfsd * processes start servicing requests. */ APPLESTATIC void nfsrv_setupstable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; off_t off = 0; ssize_t aresid, len; /* * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without * a reboot, so state has not been lost. */ if (sf->nsf_flags & NFSNSF_UPDATEDONE) return; /* * Set Grace over just until the file reads successfully. */ nfsrvboottime = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; if (sf->nsf_fp == NULL) return; error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid || sf->nsf_numboots == 0 || sf->nsf_numboots > NFSNSF_MAXNUMBOOTS) return; /* * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * sizeof (time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid) { free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } /* * Make sure this nfsrvboottime is different from all recorded * previous ones. */ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { if (nfsrvboottime == sf->nsf_bootvals[i]) { nfsrvboottime++; tryagain = 1; break; } } } while (tryagain); sf->nsf_flags |= NFSNSF_OK; off += (sf->nsf_numboots * sizeof (time_t)); /* * Read through the file, building a list of records for grace * checking. * Each record is between sizeof (struct nfst_rec) and * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1 * and is actually sizeof (struct nfst_rec) + nst_len - 1. */ tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK); do { error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid; if (error || (len > 0 && (len < sizeof (struct nfst_rec) || len < (sizeof (struct nfst_rec) + tsp->len - 1)))) { /* * Yuck, the file has been corrupted, so just return * after clearing out any restart state, so the grace period * is over. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { LIST_REMOVE(sp, nst_list); free(sp, M_TEMP); } free(tsp, M_TEMP); sf->nsf_flags &= ~NFSNSF_OK; free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } if (len > 0) { off += sizeof (struct nfst_rec) + tsp->len - 1; /* * Search the list for a matching client. */ LIST_FOREACH(sp, &sf->nsf_head, nst_list) { if (tsp->len == sp->nst_len && !NFSBCMP(tsp->client, sp->nst_client, tsp->len)) break; } if (sp == LIST_END(&sf->nsf_head)) { sp = (struct nfsrv_stable *)malloc(tsp->len + sizeof (struct nfsrv_stable) - 1, M_TEMP, M_WAITOK); NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec, sizeof (struct nfst_rec) + tsp->len - 1); LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list); } else { if (tsp->flag == NFSNST_REVOKE) sp->nst_flag |= NFSNST_REVOKE; else /* * A subsequent timestamp indicates the client * did a setclientid/confirm and any previous * revoke is no longer relevant. */ sp->nst_flag &= ~NFSNST_REVOKE; } } } while (len > 0); free(tsp, M_TEMP); sf->nsf_flags = NFSNSF_OK; sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease + NFSRV_LEASEDELTA; } /* * Update the stable storage file, now that the grace period is over. */ APPLESTATIC void nfsrv_updatestable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; vnode_t vp; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) mount_t mp = NULL; #endif int error; if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE)) return; sf->nsf_flags |= NFSNSF_UPDATEDONE; /* * Ok, we need to rewrite the stable storage file. * - truncate to 0 length * - write the new first structure * - loop through the data structures, writing out any that * have timestamps older than the old boot */ if (sf->nsf_bootvals) { sf->nsf_numboots++; for (i = sf->nsf_numboots - 2; i >= 0; i--) sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), M_TEMP, M_WAITOK); } sf->nsf_bootvals[0] = nfsrvboottime; sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); vp = NFSFPVNODE(sf->nsf_fp); vn_start_write(vp, &mp, V_WAIT); if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p, NULL); NFSVOPUNLOCK(vp, 0); } else error = EPERM; vn_finished_write(mp); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0, UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), (off_t)(sizeof (struct nfsf_rec)), UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); return; } sf->nsf_flags |= NFSNSF_OK; /* * Loop through the list and write out timestamp records for * any clients that successfully reclaimed state. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { if (sp->nst_flag & NFSNST_GOTSTATE) { nfsrv_writestable(sp->nst_client, sp->nst_len, NFSNST_NEWSTATE, p); sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE; } LIST_REMOVE(sp, nst_list); free(sp, M_TEMP); } nfsrv_backupstable(); } /* * Append a record to the stable storage file. */ APPLESTATIC void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfst_rec *sp; int error; if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL) return; sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + len - 1, M_TEMP, M_WAITOK); sp->len = len; NFSBCOPY(client, sp->client, len); sp->flag = flag; error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp), (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0, UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p); free(sp, M_TEMP); if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); } } /* * This function is called during the grace period to mark a client * that successfully reclaimed state. */ static void nfsrv_markstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just mark it and set the nfsclient back pointer. */ sp->nst_flag |= NFSNST_GOTSTATE; sp->nst_clp = clp; } /* * This function is called when a NFSv4.1 client does a ReclaimComplete. * Very similar to nfsrv_markstable(), except for the flag being set. */ static void nfsrv_markreclaim(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just set the flag. */ sp->nst_flag |= NFSNST_RECLAIMED; } /* * This function is called for a reclaim, to see if it gets grace. * It returns 0 if a reclaim is allowed, 1 otherwise. */ static int nfsrv_checkstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First, find the entry for the client. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } /* * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) return (1); return (0); } /* * Test for and try to clear out a conflicting client. This is called by * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients * a found. * The trick here is that it can't revoke a conflicting client with an * expired lease unless it holds the v4root lock, so... * If no v4root lock, get the lock and return 1 to indicate "try again". * Return 0 to indicate the conflict can't be revoked and 1 to indicate * the revocation worked and the conflicting client is "bye, bye", so it * can be tried again. * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK(). * Unlocks State before a non-zero value is returned. */ static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, NFSPROC_T *p) { int gotlock, lktype = 0; /* * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (2); } return (1); } NFSUNLOCKSTATE(); /* * Ok, we can expire the conflicting client. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); return (1); } /* * Resolve a delegation conflict. * Returns 0 to indicate the conflict was resolved without sleeping. * Return -1 to indicate that the caller should check for conflicts again. * Return > 0 for an error that should be returned, normally NFSERR_DELAY. * * Also, manipulate the nfsv4root_lock, as required. It isn't changed * for a return of 0, since there was no sleep and it could be required * later. It is released for a return of NFSERR_DELAY, since the caller * will return that error. It is released when a sleep was done waiting * for the delegation to be returned or expire (so that other nfsds can * handle ops). Then, it must be acquired for the write to stable storage. * (This function is somewhat similar to nfsrv_clientconflict(), but * the semantics differ in a couple of subtle ways. The return of 0 * indicates the conflict was resolved without sleeping here, not * that the conflict can't be resolved and the handling of nfsv4root_lock * differs, as noted above.) * Unlocks State before returning a non-zero value. */ static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp) { struct nfsclient *clp = stp->ls_clp; int gotlock, error, lktype = 0, retrycnt, zapped_clp; nfsv4stateid_t tstateid; fhandle_t tfh; /* * If the conflict is with an old delegation... */ if (stp->ls_flags & NFSLCK_OLDDELEG) { /* * You can delete it, if it has expired. */ if (clp->lc_delegtime < NFSD_MONOSEC) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); error = -1; goto out; } NFSUNLOCKSTATE(); /* * During this delay, the old delegation could expire or it * could be recovered by the client via an Open with * CLAIM_DELEGATE_PREV. * Release the nfsv4root_lock, if held. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * It's a current delegation, so: * - check to see if the delegation has expired * - if so, get the v4root lock and then expire it */ if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) { /* * - do a recall callback, since not yet done * For now, never allow truncate to be set. To use * truncate safely, it must be guaranteed that the * Remove, Rename or Setattr with size of 0 will * succeed and that would require major changes to * the VFS/Vnode OPs. * Set the expiry time large enough so that it won't expire * until after the callback, then set it correctly, once * the callback is done. (The delegation will now time * out whether or not the Recall worked ok. The timeout * will be extended when ops are done on the delegation * stateid, up to the timelimit.) */ stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_flags |= NFSLCK_DELEGRECALL; /* * Loop NFSRV_CBRETRYCNT times while the CBRecall replies * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done * in order to try and avoid a race that could happen * when a CBRecall request passed the Open reply with * the delegation in it when transitting the network. * Since nfsrv_docallback will sleep, don't use stp after * the call. */ NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid, sizeof (tstateid)); NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh, sizeof (tfh)); NFSUNLOCKSTATE(); if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } retrycnt = 0; do { error = nfsrv_docallback(clp, NFSV4OP_CBRECALL, - &tstateid, 0, &tfh, NULL, NULL, p); + &tstateid, 0, &tfh, NULL, NULL, 0, p); retrycnt++; } while ((error == NFSERR_BADSTATEID || error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT); error = NFSERR_DELAY; goto out; } if (clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); /* * A recall has been done, but it has not yet expired. * So, RETURN_DELAY. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * If we don't yet have the lock, just get it and then return, * since we need that before deleting expired state, such as * this delegation. * When getting the lock, unlock the vnode, so other nfsds that * are in progress, won't get stuck waiting for the vnode lock. */ if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_PERM; goto out; } } error = -1; goto out; } NFSUNLOCKSTATE(); /* * Ok, we can delete the expired delegation. * First, write the Revoke record to stable storage and then * clear out the conflict. * Since all other nfsd threads are now blocked, we can safely * sleep without the state changing. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); if (clp->lc_expiry < NFSD_MONOSEC) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); zapped_clp = 1; } else { nfsrv_freedeleg(stp); zapped_clp = 0; } if (zapped_clp) nfsrv_zapclient(clp, p); error = -1; out: NFSEXITCODE(error); return (error); } /* * Check for a remove allowed, if remove is set to 1 and get rid of * delegations. */ APPLESTATIC int nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (error == -1) error = 0; goto out; } /* * Now, we must Recall any delegations. */ error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero * return. */ if (error == -1) goto tryagain; if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Now, look for a conflicting open share. */ if (remove) { /* * If the entry in the directory was the last reference to the * corresponding filesystem object, the object can be destroyed * */ if(lfp->lf_usecount>1) LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (stp->ls_flags & NFSLCK_WRITEDENY) { error = NFSERR_FILEOPEN; break; } } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE(error); return (error); } /* * Clear out all delegations for the file referred to by lfp. * May return NFSERR_DELAY, if there will be a delay waiting for * delegations to expire. * Returns -1 to indicate it slept while recalling a delegation. * This function has the side effect of deleting the nfslockfile structure, * if it no longer has associated state and didn't have to sleep. * Unlocks State before a non-zero value is returned. */ static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; int ret = 0; stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp) { ret = nfsrv_delegconflict(stp, haslockp, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ goto out; } } stp = nstp; } out: NFSEXITCODE(ret); return (ret); } /* * There are certain operations that, when being done outside of NFSv4, * require that any NFSv4 delegation for the file be recalled. * This function is to be called for those cases: * VOP_RENAME() - When a delegation is being recalled for any reason, * the client may have to do Opens against the server, using the file's * final component name. If the file has been renamed on the server, * that component name will be incorrect and the Open will fail. * VOP_REMOVE() - Theoretically, a client could Open a file after it has * been removed on the server, if there is a delegation issued to * that client for the file. I say "theoretically" since clients * normally do an Access Op before the Open and that Access Op will * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so * they will detect the file's removal in the same manner. (There is * one case where RFC3530 allows a client to do an Open without first * doing an Access Op, which is passage of a check against the ACE * returned with a Write delegation, but current practice is to ignore * the ACE and always do an Access Op.) * Since the functions can only be called with an unlocked vnode, this * can't be done at this time. * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range * locks locally in the client, which are not visible to the server. To * deal with this, issuing of delegations for a vnode must be disabled * and all delegations for the vnode recalled. This is done via the * second function, using the VV_DISABLEDELEG vflag on the vnode. */ APPLESTATIC void nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) { time_t starttime; int error; /* * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ if (newnfs_numnfsd == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp)); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* * Now, call nfsrv_checkremove() in a loop while it returns * NFSERR_DELAY. Return upon any other error or when timed out. */ starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(vp, 0, p); NFSVOPUNLOCK(vp, 0); } else error = EPERM; if (error == NFSERR_DELAY) { if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO) break; /* Sleep for a short period of time */ (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } APPLESTATIC void nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p) { #ifdef VV_DISABLEDELEG /* * First, flag issuance of delegations disabled. */ atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG); #endif /* * Then call nfsd_recalldelegation() to get rid of all extant * delegations. */ nfsd_recalldelegation(vp, p); } /* * Check for conflicting locks, etc. and then get rid of delegations. * (At one point I thought that I should get rid of delegations for any * Setattr, since it could potentially disallow the I/O op (read or write) * allowed by the delegation. However, Setattr Ops that aren't changing * the size get a stateid of all 0s, so you can't tell if it is a delegation * for the same client or a different one, so I decided to only get rid * of delegations for other clients when the size is being changed.) * In general, a Setattr can disable NFS I/O Ops that are outstanding, such * as Write backs, even if there is no delegation, so it really isn't any * different?) */ APPLESTATIC int nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct nfsexstuff *exp, NFSPROC_T *p) { struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; int error = 0; nfsquad_t clientid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_first = nvap->na_size; } else { stp->ls_flags = 0; lop->lo_first = 0; } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL)) stp->ls_flags |= NFSLCK_SETATTR; if (stp->ls_flags == 0) goto out; lop->lo_end = NFS64BITSSET; lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = stateidp->seqid; clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0]; clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1]; stp->ls_stateid.other[2] = stateidp->other[2]; error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, stateidp, exp, nd, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Check for a write delegation and do a CBGETATTR if there is one, updating * the attributes, as required. * Should I return an error if I can't get the attributes? (For now, I'll * just return ok. */ APPLESTATIC int nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp, - struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred, - NFSPROC_T *p) + struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; struct nfsclient *clp; struct nfsvattr nva; fhandle_t nfh; int error = 0; nfsattrbit_t cbbits; u_quad_t delegfilerev; NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits); if (!NFSNONZERO_ATTRBIT(&cbbits)) goto out; if (nfsrv_writedelegcnt == 0) goto out; /* * Get the lock file structure. * (A return of -1 means no associated state, so return ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (error == -1) error = 0; goto out; } /* * Now, look for a write delegation. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_flags & NFSLCK_DELEGWRITE) break; } if (stp == LIST_END(&lfp->lf_deleg)) { NFSUNLOCKSTATE(); goto out; } clp = stp->ls_clp; delegfilerev = stp->ls_filerev; /* * If the Write delegation was issued as a part of this Compound RPC * or if we have an Implied Clientid (used in a previous Op in this * compound) and it is the client the delegation was issued to, * just return ok. * I also assume that it is from the same client iff the network * host IP address is the same as the callback address. (Not * exactly correct by the RFC, but avoids a lot of Getattr * callbacks.) */ if (nd->nd_compref == stp->ls_compref || ((nd->nd_flag & ND_IMPLIEDCLID) && clp->lc_clientid.qval == nd->nd_clientid.qval) || nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) { NFSUNLOCKSTATE(); goto out; } /* * We are now done with the delegation state structure, * so the statelock can be released and we can now tsleep(). */ /* * Now, we must do the CB Getattr callback, to see if Change or Size * has changed. */ if (clp->lc_expiry >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); NFSVNO_ATTRINIT(&nva); nva.na_filerev = NFS64BITSSET; error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL, - 0, &nfh, &nva, &cbbits, p); + 0, &nfh, &nva, &cbbits, 0, p); if (!error) { if ((nva.na_filerev != NFS64BITSSET && nva.na_filerev > delegfilerev) || (NFSVNO_ISSETSIZE(&nva) && nva.na_size != nvap->na_size)) { - error = nfsvno_updfilerev(vp, nvap, cred, p); + error = nfsvno_updfilerev(vp, nvap, nd, p); if (NFSVNO_ISSETSIZE(&nva)) nvap->na_size = nva.na_size; } } else error = 0; /* Ignore callback errors for now. */ } else { NFSUNLOCKSTATE(); } out: NFSEXITCODE2(error, nd); return (error); } /* * This function looks for openowners that haven't had any opens for * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS * is set. */ APPLESTATIC void nfsrv_throwawayopens(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int i; NFSLOCKSTATE(); nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit)) nfsrv_freeopenowner(stp, 0, p); } } } NFSUNLOCKSTATE(); } /* * This function checks to see if the credentials are the same. * Returns 1 for not same, 0 otherwise. */ static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp) { if (nd->nd_flag & ND_GSS) { if (!(clp->lc_flags & LCL_GSS)) return (1); if (clp->lc_flags & LCL_NAME) { if (nd->nd_princlen != clp->lc_namelen || NFSBCMP(nd->nd_principal, clp->lc_name, clp->lc_namelen)) return (1); else return (0); } if (nd->nd_cred->cr_uid == clp->lc_uid) return (0); else return (1); } else if (clp->lc_flags & LCL_GSS) return (1); /* * For AUTH_SYS, allow the same uid or root. (This is underspecified * in RFC3530, which talks about principals, but doesn't say anything * about uids for AUTH_SYS.) */ if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0) return (0); else return (1); } /* * Calculate the lease expiry time. */ static time_t nfsrv_leaseexpiry(void) { if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } /* * Delay the delegation timeout as far as ls_delegtimelimit, as required. */ static void nfsrv_delaydelegtimeout(struct nfsstate *stp) { if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) return; if ((stp->ls_delegtime + 15) > NFSD_MONOSEC && stp->ls_delegtime < stp->ls_delegtimelimit) { stp->ls_delegtime += nfsrv_lease; if (stp->ls_delegtime > stp->ls_delegtimelimit) stp->ls_delegtime = stp->ls_delegtimelimit; } } /* * This function checks to see if there is any other state associated * with the openowner for this Open. * It returns 1 if there is no other state, 0 otherwise. */ static int nfsrv_nootherstate(struct nfsstate *stp) { struct nfsstate *tstp; LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) { if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock)) return (0); } return (1); } /* * Create a list of lock deltas (changes to local byte range locking * that can be rolled back using the list) and apply the changes via * nfsvno_advlock(). Optionally, lock the list. It is expected that either * the rollback or update function will be called after this. * It returns an error (and rolls back, as required), if any nfsvno_advlock() * call fails. If it returns an error, it will unlock the list. */ static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfslock *lop, *nlop; int error = 0; /* Loop through the list of locks. */ lop = LIST_FIRST(&lfp->lf_locallock); while (first < end && lop != NULL) { nlop = LIST_NEXT(lop, lo_lckowner); if (first >= lop->lo_end) { /* not there yet */ lop = nlop; } else if (first < lop->lo_first) { /* new one starts before entry in list */ if (end <= lop->lo_first) { /* no overlap between old and new */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, lop->lo_first, cfp, p); if (error != 0) break; first = lop->lo_first; } } else { /* new one overlaps this entry in list */ if (end <= lop->lo_end) { /* overlaps all of new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, lop->lo_end, cfp, p); if (error != 0) break; first = lop->lo_end; lop = nlop; } } } if (first < end && error == 0) /* handle fragment past end of list */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); NFSEXITCODE(error); return (error); } /* * Local lock unlock. Unlock all byte ranges that are no longer locked * by NFSv4. To do this, unlock any subranges of first-->end that * do not overlap with the byte ranges of any lock in the lfp->lf_lock * list. This list has all locks for the file held by other * tuples. The list is ordered by increasing * lo_first value, but may have entries that overlap each other, for * the case of read locks. */ static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p) { struct nfslock *lop; uint64_t first, end, prevfirst; first = init_first; end = init_end; while (first < init_end) { /* Loop through all nfs locks, adjusting first and end */ prevfirst = 0; LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { KASSERT(prevfirst <= lop->lo_first, ("nfsv4 locks out of order")); KASSERT(lop->lo_first < lop->lo_end, ("nfsv4 bogus lock")); prevfirst = lop->lo_first; if (first >= lop->lo_first && first < lop->lo_end) /* * Overlaps with initial part, so trim * off that initial part by moving first past * it. */ first = lop->lo_end; else if (end > lop->lo_first && lop->lo_first > first) { /* * This lock defines the end of the * segment to unlock, so set end to the * start of it and break out of the loop. */ end = lop->lo_first; break; } if (first >= end) /* * There is no segment left to do, so * break out of this loop and then exit * the outer while() since first will be set * to end, which must equal init_end here. */ break; } if (first < end) { /* Unlock this segment */ (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK, NFSLCK_READ, first, end, NULL, p); nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK, first, end); } /* * Now move past this segment and look for any further * segment in the range, if there is one. */ first = end; end = init_end; } } /* * Do the local lock operation and update the rollback list, as required. * Perform the rollback and return the error if nfsvno_advlock() fails. */ static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfsrollback *rlp; int error = 0, ltype, oldltype; if (flags & NFSLCK_WRITE) ltype = F_WRLCK; else if (flags & NFSLCK_READ) ltype = F_RDLCK; else ltype = F_UNLCK; if (oldflags & NFSLCK_WRITE) oldltype = F_WRLCK; else if (oldflags & NFSLCK_READ) oldltype = F_RDLCK; else oldltype = F_UNLCK; if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK)) /* nothing to do */ goto out; error = nfsvno_advlock(vp, ltype, first, end, p); if (error != 0) { if (cfp != NULL) { cfp->cl_clientid.lval[0] = 0; cfp->cl_clientid.lval[1] = 0; cfp->cl_first = 0; cfp->cl_end = NFS64BITSSET; cfp->cl_flags = NFSLCK_WRITE; cfp->cl_ownerlen = 5; NFSBCOPY("LOCAL", cfp->cl_owner, 5); } nfsrv_locallock_rollback(vp, lfp, p); } else if (ltype != F_UNLCK) { rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK, M_WAITOK); rlp->rlck_first = first; rlp->rlck_end = end; rlp->rlck_type = oldltype; LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list); } out: NFSEXITCODE(error); return (error); } /* * Roll back local lock changes and free up the rollback list. */ static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p) { struct nfsrollback *rlp, *nrlp; LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) { (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first, rlp->rlck_end, p); free(rlp, M_NFSDROLLBACK); } LIST_INIT(&lfp->lf_rollback); } /* * Update local lock list and delete rollback list (ie now committed to the * local locks). Most of the work is done by the internal function. */ static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end) { struct nfsrollback *rlp, *nrlp; struct nfslock *new_lop, *other_lop; new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); if (flags & (NFSLCK_READ | NFSLCK_WRITE)) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); else other_lop = NULL; new_lop->lo_flags = flags; new_lop->lo_first = first; new_lop->lo_end = end; nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp); if (new_lop != NULL) free(new_lop, M_NFSDLOCK); if (other_lop != NULL) free(other_lop, M_NFSDLOCK); /* and get rid of the rollback list */ LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } /* * Lock the struct nfslockfile for local lock updating. */ static void nfsrv_locklf(struct nfslockfile *lfp) { int gotlock; /* lf_usecount ensures *lfp won't be free'd */ lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } /* * Unlock the struct nfslockfile after local lock updating. */ static void nfsrv_unlocklf(struct nfslockfile *lfp) { nfsv4_unlock(&lfp->lf_locallock_lck, 0); } /* * Clear out all state for the NFSv4 server. * Must be called by a thread that can sleep when no nfsds are running. */ void nfsrv_throwawayallstate(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfslockfile *lfp, *nlfp; int i; /* * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } } /* * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { - LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) + LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) { + printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); + } } + + /* And get rid of the deviceid structures and layouts. */ + nfsrv_freealllayoutsanddevids(); } /* * Check the sequence# for the session and slot provided as an argument. * Also, renew the lease if the session will return NFS_OK. */ int nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this, uint32_t *sflagsp, NFSPROC_T *p) { struct nfsdsession *sep; struct nfssessionhash *shp; int error; SVCXPRT *savxprt; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); return (NFSERR_BADSESSION); } error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp, sep->sess_slots, NULL, NFSV4_SLOTS - 1); if (error != 0) { NFSUNLOCKSESSION(shp); return (error); } if (cache_this != 0) nd->nd_flag |= ND_SAVEREPLY; /* Renew the lease. */ sep->sess_clp->lc_expiry = nfsrv_leaseexpiry(); nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; /* * If this session handles the backchannel, save the nd_xprt for this * RPC, since this is the one being used. * RFC-5661 specifies that the fore channel will be implicitly * bound by a Sequence operation. However, since some NFSv4.1 clients * erroneously assumed that the back channel would be implicitly * bound as well, do the implicit binding unless a * BindConnectiontoSession has already been done on the session. */ if (sep->sess_clp->lc_req.nr_client != NULL && sep->sess_cbsess.nfsess_xprt != nd->nd_xprt && (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 && (sep->sess_clp->lc_flags & LCL_DONEBINDCONN) == 0) { NFSD_DEBUG(2, "nfsrv_checksequence: implicit back channel bind\n"); savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = sep->sess_clp->lc_req.nr_client->cl_private; nd->nd_xprt->xp_idletimeout = 0; /* Disable timeout. */ sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; if (savxprt != NULL) SVC_RELEASE(savxprt); } *sflagsp = 0; if (sep->sess_clp->lc_req.nr_client == NULL) *sflagsp |= NFSV4SEQ_CBPATHDOWN; NFSUNLOCKSESSION(shp); if (error == NFSERR_EXPIRED) { *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED; error = 0; } else if (error == NFSERR_ADMINREVOKED) { *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED; error = 0; } *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1; return (0); } /* * Check/set reclaim complete for this session/clientid. */ int nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd) { struct nfsdsession *sep; struct nfssessionhash *shp; int error = 0; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } /* Check to see if reclaim complete has already happened. */ if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) error = NFSERR_COMPLETEALREADY; else { sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE; nfsrv_markreclaim(sep->sess_clp); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Cache the reply in a session slot. */ void nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat, struct mbuf **m) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); printf("nfsrv_cache_session: no session\n"); m_freem(*m); return; } nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m); NFSUNLOCKSESSION(shp); } /* * Search for a session that matches the sessionid. */ static struct nfsdsession * nfsrv_findsession(uint8_t *sessionid) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); LIST_FOREACH(sep, &shp->list, sess_hash) { if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID)) break; } return (sep); } /* * Destroy a session. */ int nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid) { int error, igotlock, samesess; samesess = 0; if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) && (nd->nd_flag & ND_HASSEQUENCE) != 0) { samesess = 1; if ((nd->nd_flag & ND_LASTOP) == 0) return (NFSERR_BADSESSION); } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); error = nfsrv_freesession(NULL, sessionid); if (error == 0 && samesess != 0) nd->nd_flag &= ~ND_HASSEQUENCE; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); return (error); } /* * Bind a connection to a session. * For now, only certain variants are supported, since the current session * structure can only handle a single backchannel entry, which will be * applied to all connections if it is set. */ int nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp) { struct nfssessionhash *shp; struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *savxprt; int error; error = 0; shp = NFSSESSIONHASH(sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep != NULL) { clp = sep->sess_clp; if (*foreaftp == NFSCDFC4_BACK || *foreaftp == NFSCDFC4_BACK_OR_BOTH || *foreaftp == NFSCDFC4_FORE_OR_BOTH) { /* Try to set up a backchannel. */ if (clp->lc_req.nr_client == NULL) { NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire " "backchannel\n"); clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, sep->sess_cbprogram, NFSV4_CBVERS); } if (clp->lc_req.nr_client != NULL) { NFSD_DEBUG(2, "nfsrv_bindconnsess: set up " "backchannel\n"); savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client->cl_private; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; if (savxprt != NULL) SVC_RELEASE(savxprt); sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN; clp->lc_flags |= LCL_DONEBINDCONN; if (*foreaftp == NFSCDFS4_BACK) *foreaftp = NFSCDFS4_BACK; else *foreaftp = NFSCDFS4_BOTH; } else if (*foreaftp != NFSCDFC4_BACK) { NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set " "up backchannel\n"); sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; clp->lc_flags |= LCL_DONEBINDCONN; *foreaftp = NFSCDFS4_FORE; } else { error = NFSERR_NOTSUPP; printf("nfsrv_bindconnsess: Can't add " "backchannel\n"); } } else { NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n"); clp->lc_flags |= LCL_DONEBINDCONN; *foreaftp = NFSCDFS4_FORE; } } else error = NFSERR_BADSESSION; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Free up a session structure. */ static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid) { struct nfssessionhash *shp; int i; NFSLOCKSTATE(); if (sep == NULL) { shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); } else { shp = NFSSESSIONHASH(sep->sess_sessionid); NFSLOCKSESSION(shp); } if (sep != NULL) { sep->sess_refcnt--; if (sep->sess_refcnt > 0) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BACKCHANBUSY); } LIST_REMOVE(sep, sess_hash); LIST_REMOVE(sep, sess_list); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); if (sep == NULL) return (NFSERR_BADSESSION); for (i = 0; i < NFSV4_SLOTS; i++) if (sep->sess_slots[i].nfssl_reply != NULL) m_freem(sep->sess_slots[i].nfssl_reply); if (sep->sess_cbsess.nfsess_xprt != NULL) SVC_RELEASE(sep->sess_cbsess.nfsess_xprt); free(sep, M_NFSDSESSION); return (0); } /* * Free a stateid. * RFC5661 says that it should fail when there are associated opens, locks * or delegations. Since stateids represent opens, I don't see how you can * free an open stateid (it will be free'd when closed), so this function * only works for lock stateids (freeing the lock_owner) or delegations. */ int nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) { /* First, check for a delegation. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } if (stp != NULL) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); return (error); } } /* Not a delegation, try for a lock_owner. */ if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0)) /* Not a lock_owner stateid. */ error = NFSERR_LOCKSHELD; if (error == 0 && !LIST_EMPTY(&stp->ls_lock)) error = NFSERR_LOCKSHELD; if (error == 0) nfsrv_freelockowner(stp, NULL, 0, p); NFSUNLOCKSTATE(); return (error); } /* * Test a stateid. */ int nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && stateidp->seqid != 0 && SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid)) error = NFSERR_OLDSTATEID; NFSUNLOCKSTATE(); return (error); } /* * Generate the xdr for an NFSv4.1 CBSequence Operation. */ static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp) { struct nfsdsession *sep; uint32_t *tl, slotseq = 0; int maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; int error; error = nfsv4_getcbsession(clp, sepp); if (error != 0) return (error); sep = *sepp; (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot, &slotseq, sessionid); KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot")); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = 0; /* No referring call list, for now. */ nd->nd_flag |= ND_HASSEQUENCE; return (0); } /* * Get a session for the callback. */ static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp) { struct nfsdsession *sep; NFSLOCKSTATE(); LIST_FOREACH(sep, &clp->lc_session, sess_list) { if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) break; } if (sep == NULL) { NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } sep->sess_refcnt++; *sepp = sep; NFSUNLOCKSTATE(); return (0); } /* * Free up all backchannel xprts. This needs to be done when the nfsd threads * exit, since those transports will all be going away. * This is only called after all the nfsd threads are done performing RPCs, * so locking shouldn't be an issue. */ APPLESTATIC void nfsrv_freeallbackchannel_xprts(void) { struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *xprt; int i; for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; if (xprt != NULL) SVC_RELEASE(xprt); } } } +} + +/* + * Do a layout commit. Actually just call nfsrv_updatemdsattr(). + * I have no idea if the rest of these arguments will ever be useful? + */ +int +nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype, + int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len, + int hasnewmtime, struct timespec *newmtimep, int reclaim, + nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep, + uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p) +{ + struct nfsvattr na; + int error; + + error = nfsrv_updatemdsattr(vp, &na, p); + if (error == 0) { + *hasnewsizep = 1; + *newsizep = na.na_size; + } + return (error); +} + +/* + * Try and get a layout. + */ +int +nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp, + int layouttype, int *iomode, uint64_t *offset, uint64_t *len, + uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose, + int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp; + char *devid; + fhandle_t fh, *dsfhp; + int error, mirrorcnt; + + if (nfsrv_devidcnt == 0) + return (NFSERR_UNKNLAYOUTTYPE); + + if (*offset != 0) + printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset, + (uintmax_t)*len); + error = nfsvno_getfh(vp, &fh, p); + NFSD_DEBUG(4, "layoutget getfh=%d\n", error); + if (error != 0) + return (error); + + /* + * For now, all layouts are for entire files. + * Only issue Read/Write layouts if requested for a non-readonly fs. + */ + if (NFSVNO_EXRDONLY(exp)) { + if (*iomode == NFSLAYOUTIOMODE_RW) + return (NFSERR_LAYOUTTRYLATER); + *iomode = NFSLAYOUTIOMODE_READ; + } + if (*iomode != NFSLAYOUTIOMODE_RW) + *iomode = NFSLAYOUTIOMODE_READ; + + /* + * Check to see if a write layout can be issued for this file. + * This is used during mirror recovery to avoid RW layouts being + * issued for a file while it is being copied to the recovered + * mirror. + */ + if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0) + return (NFSERR_LAYOUTTRYLATER); + + *retonclose = 0; + *offset = 0; + *len = UINT64_MAX; + + /* First, see if a layout already exists and return if found. */ + lhyp = NFSLAYOUTHASH(&fh); + NFSLOCKLAYOUT(lhyp); + error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp); + NFSD_DEBUG(4, "layoutget findlay=%d\n", error); + /* + * Not sure if the seqid must be the same, so I won't check it. + */ + if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] || + stateidp->other[1] != lyp->lay_stateid.other[1] || + stateidp->other[2] != lyp->lay_stateid.other[2])) { + if ((lyp->lay_flags & NFSLAY_CALLB) == 0) { + NFSUNLOCKLAYOUT(lhyp); + NFSD_DEBUG(1, "ret bad stateid\n"); + return (NFSERR_BADSTATEID); + } + /* + * I believe we get here because there is a race between + * the client processing the CBLAYOUTRECALL and the layout + * being deleted here on the server. + * The client has now done a LayoutGet with a non-layout + * stateid, as it would when there is no layout. + * As such, free this layout and set error == NFSERR_BADSTATEID + * so the code below will create a new layout structure as + * would happen if no layout was found. + * "lyp" will be set before being used below, but set it NULL + * as a safety belt. + */ + nfsrv_freelayout(&lhyp->list, lyp); + lyp = NULL; + error = NFSERR_BADSTATEID; + } + if (error == 0) { + if (lyp->lay_layoutlen > maxcnt) { + NFSUNLOCKLAYOUT(lhyp); + NFSD_DEBUG(1, "ret layout too small\n"); + return (NFSERR_TOOSMALL); + } + if (*iomode == NFSLAYOUTIOMODE_RW) + lyp->lay_flags |= NFSLAY_RW; + else + lyp->lay_flags |= NFSLAY_READ; + NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); + *layoutlenp = lyp->lay_layoutlen; + if (++lyp->lay_stateid.seqid == 0) + lyp->lay_stateid.seqid = 1; + stateidp->seqid = lyp->lay_stateid.seqid; + NFSUNLOCKLAYOUT(lhyp); + NFSD_DEBUG(4, "ret fnd layout\n"); + return (0); + } + NFSUNLOCKLAYOUT(lhyp); + + /* Find the device id and file handle. */ + dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK); + devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK); + error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid); + NFSD_DEBUG(4, "layoutget devandfh=%d\n", error); + if (error == 0) { + if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { + if (NFSX_V4FILELAYOUT > maxcnt) + error = NFSERR_TOOSMALL; + else + lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp, + devid, vp->v_mount->mnt_stat.f_fsid); + } else { + if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt) + error = NFSERR_TOOSMALL; + else + lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt, + &fh, dsfhp, devid, + vp->v_mount->mnt_stat.f_fsid); + } + } + free(dsfhp, M_TEMP); + free(devid, M_TEMP); + if (error != 0) + return (error); + + /* + * Now, add this layout to the list. + */ + error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p); + NFSD_DEBUG(4, "layoutget addl=%d\n", error); + /* + * The lyp will be set to NULL by nfsrv_addlayout() if it + * linked the new structure into the lists. + */ + free(lyp, M_NFSDSTATE); + return (error); +} + +/* + * Generate a File Layout. + */ +static struct nfslayout * +nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp, + fhandle_t *dsfhp, char *devid, fsid_t fs) +{ + uint32_t *tl; + struct nfslayout *lyp; + uint64_t pattern_offset; + + lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE, + M_WAITOK | M_ZERO); + lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES; + if (iomode == NFSLAYOUTIOMODE_RW) + lyp->lay_flags = NFSLAY_RW; + else + lyp->lay_flags = NFSLAY_READ; + NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp)); + lyp->lay_clientid.qval = nd->nd_clientid.qval; + lyp->lay_fsid = fs; + + /* Fill in the xdr for the files layout. */ + tl = (uint32_t *)lyp->lay_xdr; + NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */ + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + + /* + * Make the stripe size as many 64K blocks as will fit in the stripe + * mask. Since there is only one stripe, the stripe size doesn't really + * matter, except that the Linux client will only handle an exact + * multiple of their PAGE_SIZE (usually 4K). I chose 64K as a value + * that should cover most/all arches w.r.t. PAGE_SIZE. + */ + *tl++ = txdr_unsigned(NFSFLAYUTIL_STRIPE_MASK & ~0xffff); + *tl++ = 0; /* 1st stripe index. */ + pattern_offset = 0; + txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */ + *tl++ = txdr_unsigned(1); /* 1 file handle. */ + *tl++ = txdr_unsigned(NFSX_V4PNFSFH); + NFSBCOPY(dsfhp, tl, sizeof(*dsfhp)); + lyp->lay_layoutlen = NFSX_V4FILELAYOUT; + return (lyp); +} + +#define FLEX_OWNERID "999" +#define FLEX_UID0 "0" +/* + * Generate a Flex File Layout. + * The FLEX_OWNERID can be any string of 3 decimal digits. Although this + * string goes on the wire, it isn't supposed to be used by the client, + * since this server uses tight coupling. + * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use + * a string of "0". This works around the Linux Flex File Layout driver bug + * which uses the synthetic uid/gid strings for the "tightly coupled" case. + */ +static struct nfslayout * +nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt, + fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs) +{ + uint32_t *tl; + struct nfslayout *lyp; + uint64_t lenval; + int i; + + lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt), + M_NFSDSTATE, M_WAITOK | M_ZERO); + lyp->lay_type = NFSLAYOUT_FLEXFILE; + if (iomode == NFSLAYOUTIOMODE_RW) + lyp->lay_flags = NFSLAY_RW; + else + lyp->lay_flags = NFSLAY_READ; + NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp)); + lyp->lay_clientid.qval = nd->nd_clientid.qval; + lyp->lay_fsid = fs; + lyp->lay_mirrorcnt = mirrorcnt; + + /* Fill in the xdr for the files layout. */ + tl = (uint32_t *)lyp->lay_xdr; + lenval = 0; + txdr_hyper(lenval, tl); tl += 2; /* Stripe unit. */ + *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */ + for (i = 0; i < mirrorcnt; i++) { + *tl++ = txdr_unsigned(1); /* One stripe. */ + NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */ + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + devid += NFSX_V4DEVICEID; + *tl++ = txdr_unsigned(1); /* Efficiency. */ + *tl++ = 0; /* Proxy Stateid. */ + *tl++ = 0x55555555; + *tl++ = 0x55555555; + *tl++ = 0x55555555; + *tl++ = txdr_unsigned(1); /* 1 file handle. */ + *tl++ = txdr_unsigned(NFSX_V4PNFSFH); + NFSBCOPY(dsfhp, tl, sizeof(*dsfhp)); + tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED); + dsfhp++; + if (nfsrv_flexlinuxhack != 0) { + *tl++ = txdr_unsigned(strlen(FLEX_UID0)); + *tl = 0; /* 0 pad string. */ + NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0)); + *tl++ = txdr_unsigned(strlen(FLEX_UID0)); + *tl = 0; /* 0 pad string. */ + NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0)); + } else { + *tl++ = txdr_unsigned(strlen(FLEX_OWNERID)); + NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED); + *tl++ = txdr_unsigned(strlen(FLEX_OWNERID)); + NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED); + } + } + *tl++ = txdr_unsigned(0); /* ff_flags. */ + *tl = txdr_unsigned(60); /* Status interval hint. */ + lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt); + return (lyp); +} + +/* + * Parse and process Flex File errors returned via LayoutReturn. + */ +static void +nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt, + NFSPROC_T *p) +{ + uint32_t *tl; + int cnt, errcnt, i, j, opnum, stat; + char devid[NFSX_V4DEVICEID]; + + tl = layp; + cnt = fxdr_unsigned(int, *tl++); + NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt); + for (i = 0; i < cnt; i++) { + /* Skip offset, length and stateid for now. */ + tl += (4 + NFSX_STATEID / NFSX_UNSIGNED); + errcnt = fxdr_unsigned(int, *tl++); + NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt); + for (j = 0; j < errcnt; j++) { + NFSBCOPY(tl, devid, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + stat = fxdr_unsigned(int, *tl++); + opnum = fxdr_unsigned(int, *tl++); + NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum, + stat); + /* + * Except for NFSERR_ACCES errors for Reading, + * shut the mirror down. + */ + if (opnum != NFSV4OP_READ || stat != NFSERR_ACCES) + nfsrv_delds(devid, p); + } + } +} + +/* + * This function removes all flex file layouts which has a mirror with + * a device id that matches the argument. + * Called when the DS represented by the device id has failed. + */ +void +nfsrv_flexmirrordel(char *devid, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfslayout *lyp, *nlyp; + struct nfslayouthash *lhyp; + struct nfslayouthead loclyp; + int i, j; + + NFSD_DEBUG(4, "flexmirrordel\n"); + /* Move all layouts found onto a local list. */ + TAILQ_INIT(&loclyp); + for (i = 0; i < nfsrv_layouthashsize; i++) { + lhyp = &nfslayouthash[i]; + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { + if (lyp->lay_type == NFSLAYOUT_FLEXFILE && + lyp->lay_mirrorcnt > 1) { + NFSD_DEBUG(4, "possible match\n"); + tl = lyp->lay_xdr; + tl += 3; + for (j = 0; j < lyp->lay_mirrorcnt; j++) { + tl++; + if (NFSBCMP(devid, tl, NFSX_V4DEVICEID) + == 0) { + /* Found one. */ + NFSD_DEBUG(4, "fnd one\n"); + TAILQ_REMOVE(&lhyp->list, lyp, + lay_list); + TAILQ_INSERT_HEAD(&loclyp, lyp, + lay_list); + break; + } + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED + + NFSM_RNDUP(NFSX_V4PNFSFH) / + NFSX_UNSIGNED + 11 * NFSX_UNSIGNED); + } + } + } + NFSUNLOCKLAYOUT(lhyp); + } + + /* Now, try to do a Layout recall for each one found. */ + TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) { + NFSD_DEBUG(4, "do layout recall\n"); + /* + * The layout stateid.seqid needs to be incremented + * before doing a LAYOUT_RECALL callback. + * Set lay_trycnt to UINT16_MAX so it won't set up a retry. + */ + if (++lyp->lay_stateid.seqid == 0) + lyp->lay_stateid.seqid = 1; + lyp->lay_trycnt = UINT16_MAX; + nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid, + &lyp->lay_fh, lyp, &loclyp, lyp->lay_type, p); + nfsrv_freelayout(&loclyp, lyp); + } +} + +/* + * Do a recall callback to the client for this layout. + */ +static int +nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp, + struct nfslayout *lyp, struct nfslayouthead *lyheadp, int laytype, + NFSPROC_T *p) +{ + struct nfsclient *clp; + int error; + + NFSD_DEBUG(4, "nfsrv_recalllayout\n"); + error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0), + 0, NULL, p); + NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error); + if (error != 0) + return (error); + if ((clp->lc_flags & LCL_NFSV41) != 0) { + error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL, + stateidp, 0, fhp, NULL, NULL, laytype, p); + /* If lyp != NULL, handle an error return here. */ + if (error != 0 && lyp != NULL) { + NFSDRECALLLOCK(); + if (error == NFSERR_NOMATCHLAYOUT) { + /* + * Mark it returned, since there is no layout. + */ + if ((lyp->lay_flags & NFSLAY_RECALL) != 0) { + lyp->lay_flags |= NFSLAY_RETURNED; + wakeup(lyp); + } + NFSDRECALLUNLOCK(); + } else if ((lyp->lay_flags & NFSLAY_RETURNED) == 0 && + lyp->lay_trycnt < 10) { + /* + * Clear recall, so it can be tried again + * and put it at the end of the list to + * delay the retry a little longer. + */ + lyp->lay_flags &= ~NFSLAY_RECALL; + lyp->lay_trycnt++; + TAILQ_REMOVE(lyheadp, lyp, lay_list); + TAILQ_INSERT_TAIL(lyheadp, lyp, lay_list); + NFSDRECALLUNLOCK(); + nfs_catnap(PVFS, 0, "nfsrclay"); + } else + NFSDRECALLUNLOCK(); + } + } else + printf("nfsrv_recalllayout: clp not NFSv4.1\n"); + return (error); +} + +/* + * Find a layout to recall when we exceed our high water mark. + */ +void +nfsrv_recalloldlayout(NFSPROC_T *p) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp; + nfsquad_t clientid; + nfsv4stateid_t stateid; + fhandle_t fh; + int error, laytype, ret; + + lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize]; + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) { + if ((lyp->lay_flags & NFSLAY_CALLB) == 0) { + lyp->lay_flags |= NFSLAY_CALLB; + /* + * The layout stateid.seqid needs to be incremented + * before doing a LAYOUT_RECALL callback. + */ + if (++lyp->lay_stateid.seqid == 0) + lyp->lay_stateid.seqid = 1; + clientid = lyp->lay_clientid; + stateid = lyp->lay_stateid; + fh = lyp->lay_fh; + laytype = lyp->lay_type; + break; + } + } + NFSUNLOCKLAYOUT(lhyp); + if (lyp != NULL) { + error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, NULL, + laytype, p); + if (error != 0 && error != NFSERR_NOMATCHLAYOUT) + printf("recallold=%d\n", error); + if (error != 0) { + NFSLOCKLAYOUT(lhyp); + /* + * Since the hash list was unlocked, we need to + * find it again. + */ + ret = nfsrv_findlayout(&clientid, &fh, laytype, p, + &lyp); + if (ret == 0 && + (lyp->lay_flags & NFSLAY_CALLB) != 0 && + lyp->lay_stateid.other[0] == stateid.other[0] && + lyp->lay_stateid.other[1] == stateid.other[1] && + lyp->lay_stateid.other[2] == stateid.other[2]) { + /* + * The client no longer knows this layout, so + * it can be free'd now. + */ + if (error == NFSERR_NOMATCHLAYOUT) + nfsrv_freelayout(&lhyp->list, lyp); + else { + /* + * Leave it to be tried later by + * clearing NFSLAY_CALLB and moving + * it to the head of the list, so it + * won't be tried again for a while. + */ + lyp->lay_flags &= ~NFSLAY_CALLB; + TAILQ_REMOVE(&lhyp->list, lyp, + lay_list); + TAILQ_INSERT_HEAD(&lhyp->list, lyp, + lay_list); + } + } + NFSUNLOCKLAYOUT(lhyp); + } + } +} + +/* + * Try and return layout(s). + */ +int +nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp, + int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim, + int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsvattr na; + struct nfslayouthash *lhyp; + struct nfslayout *lyp; + fhandle_t fh; + int error = 0; + + *fndp = 0; + if (kind == NFSV4LAYOUTRET_FILE) { + error = nfsvno_getfh(vp, &fh, p); + if (error == 0) { + error = nfsrv_updatemdsattr(vp, &na, p); + if (error != 0) + printf("nfsrv_layoutreturn: updatemdsattr" + " failed=%d\n", error); + } + if (error == 0) { + if (reclaim == newnfs_true) { + error = nfsrv_checkgrace(NULL, NULL, + NFSLCK_RECLAIM); + if (error != NFSERR_NOGRACE) + error = 0; + return (error); + } + lhyp = NFSLAYOUTHASH(&fh); + NFSDRECALLLOCK(); + NFSLOCKLAYOUT(lhyp); + error = nfsrv_findlayout(&nd->nd_clientid, &fh, + layouttype, p, &lyp); + NFSD_DEBUG(4, "layoutret findlay=%d\n", error); + if (error == 0 && + stateidp->other[0] == lyp->lay_stateid.other[0] && + stateidp->other[1] == lyp->lay_stateid.other[1] && + stateidp->other[2] == lyp->lay_stateid.other[2]) { + NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d" + " %x %x %x laystateid %d %x %x %x" + " off=%ju len=%ju flgs=0x%x\n", + stateidp->seqid, stateidp->other[0], + stateidp->other[1], stateidp->other[2], + lyp->lay_stateid.seqid, + lyp->lay_stateid.other[0], + lyp->lay_stateid.other[1], + lyp->lay_stateid.other[2], + (uintmax_t)offset, (uintmax_t)len, + lyp->lay_flags); + if (++lyp->lay_stateid.seqid == 0) + lyp->lay_stateid.seqid = 1; + stateidp->seqid = lyp->lay_stateid.seqid; + if (offset == 0 && len == UINT64_MAX) { + if ((iomode & NFSLAYOUTIOMODE_READ) != + 0) + lyp->lay_flags &= ~NFSLAY_READ; + if ((iomode & NFSLAYOUTIOMODE_RW) != 0) + lyp->lay_flags &= ~NFSLAY_RW; + if ((lyp->lay_flags & (NFSLAY_READ | + NFSLAY_RW)) == 0) + nfsrv_freelayout(&lhyp->list, + lyp); + else + *fndp = 1; + } else + *fndp = 1; + } + NFSUNLOCKLAYOUT(lhyp); + /* Search the nfsrv_recalllist for a match. */ + TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { + if (NFSBCMP(&lyp->lay_fh, &fh, + sizeof(fh)) == 0 && + lyp->lay_clientid.qval == + nd->nd_clientid.qval && + stateidp->other[0] == + lyp->lay_stateid.other[0] && + stateidp->other[1] == + lyp->lay_stateid.other[1] && + stateidp->other[2] == + lyp->lay_stateid.other[2]) { + lyp->lay_flags |= NFSLAY_RETURNED; + wakeup(lyp); + error = 0; + } + } + NFSDRECALLUNLOCK(); + } + if (layouttype == NFSLAYOUT_FLEXFILE) + nfsrv_flexlayouterr(nd, layp, maxcnt, p); + } else if (kind == NFSV4LAYOUTRET_FSID) + nfsrv_freelayouts(&nd->nd_clientid, + &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode); + else if (kind == NFSV4LAYOUTRET_ALL) + nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode); + else + error = NFSERR_INVAL; + if (error == -1) + error = 0; + return (error); +} + +/* + * Look for an existing layout. + */ +static int +nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype, + NFSPROC_T *p, struct nfslayout **lypp) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp; + int ret; + + *lypp = NULL; + ret = 0; + lhyp = NFSLAYOUTHASH(fhp); + TAILQ_FOREACH(lyp, &lhyp->list, lay_list) { + if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 && + lyp->lay_clientid.qval == clientidp->qval && + lyp->lay_type == laytype) + break; + } + if (lyp != NULL) + *lypp = lyp; + else + ret = -1; + return (ret); +} + +/* + * Add the new layout, as required. + */ +static int +nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, + nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p) +{ + struct nfsclient *clp; + struct nfslayouthash *lhyp; + struct nfslayout *lyp, *nlyp; + fhandle_t *fhp; + int error; + + KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0, + ("nfsrv_layoutget: no nd_clientid\n")); + lyp = *lypp; + fhp = &lyp->lay_fh; + NFSLOCKSTATE(); + error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, + NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); + if (error != 0) { + NFSUNLOCKSTATE(); + return (error); + } + lyp->lay_stateid.seqid = stateidp->seqid = 1; + lyp->lay_stateid.other[0] = stateidp->other[0] = + clp->lc_clientid.lval[0]; + lyp->lay_stateid.other[1] = stateidp->other[1] = + clp->lc_clientid.lval[1]; + lyp->lay_stateid.other[2] = stateidp->other[2] = + nfsrv_nextstateindex(clp); + NFSUNLOCKSTATE(); + + lhyp = NFSLAYOUTHASH(fhp); + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) { + if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 && + nlyp->lay_clientid.qval == nd->nd_clientid.qval) + break; + } + if (nlyp != NULL) { + /* A layout already exists, so use it. */ + nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)); + NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen); + *layoutlenp = nlyp->lay_layoutlen; + if (++nlyp->lay_stateid.seqid == 0) + nlyp->lay_stateid.seqid = 1; + stateidp->seqid = nlyp->lay_stateid.seqid; + stateidp->other[0] = nlyp->lay_stateid.other[0]; + stateidp->other[1] = nlyp->lay_stateid.other[1]; + stateidp->other[2] = nlyp->lay_stateid.other[2]; + NFSUNLOCKLAYOUT(lhyp); + return (0); + } + + /* Insert the new layout in the lists. */ + *lypp = NULL; + atomic_add_int(&nfsrv_layoutcnt, 1); + NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); + *layoutlenp = lyp->lay_layoutlen; + TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); + NFSUNLOCKLAYOUT(lhyp); + return (0); +} + +/* + * Get the devinfo for a deviceid. + */ +int +nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt, + uint32_t *notify, int *devaddrlen, char **devaddr) +{ + struct nfsdevice *ds; + + if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype != + NFSLAYOUT_FLEXFILE) || + (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES)) + return (NFSERR_UNKNLAYOUTTYPE); + + /* + * Now, search for the device id. Note that the structures won't go + * away, but the order changes in the list. As such, the lock only + * needs to be held during the search through the list. + */ + NFSDDSLOCK(); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 && + ds->nfsdev_nmp != NULL) + break; + } + NFSDDSUNLOCK(); + if (ds == NULL) + return (NFSERR_NOENT); + + /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */ + *devaddrlen = 0; + if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { + *devaddrlen = ds->nfsdev_fileaddrlen; + *devaddr = ds->nfsdev_fileaddr; + } else if (layouttype == NFSLAYOUT_FLEXFILE) { + *devaddrlen = ds->nfsdev_flexaddrlen; + *devaddr = ds->nfsdev_flexaddr; + } + if (*devaddrlen == 0) + return (NFSERR_UNKNLAYOUTTYPE); + + /* + * The XDR overhead is 3 unsigned values: layout_type, + * length_of_address and notify bitmap. + * If the notify array is changed to not all zeros, the + * count of unsigned values must be increased. + */ + if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) + + 3 * NFSX_UNSIGNED) { + *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED; + return (NFSERR_TOOSMALL); + } + return (0); +} + +/* + * Free a list of layout state structures. + */ +static void +nfsrv_freelayoutlist(nfsquad_t clientid) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp, *nlyp; + int i; + + for (i = 0; i < nfsrv_layouthashsize; i++) { + lhyp = &nfslayouthash[i]; + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { + if (lyp->lay_clientid.qval == clientid.qval) + nfsrv_freelayout(&lhyp->list, lyp); + } + NFSUNLOCKLAYOUT(lhyp); + } +} + +/* + * Free up a layout. + */ +static void +nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp) +{ + + NFSD_DEBUG(4, "Freelayout=%p\n", lyp); + atomic_add_int(&nfsrv_layoutcnt, -1); + TAILQ_REMOVE(lhp, lyp, lay_list); + free(lyp, M_NFSDSTATE); +} + +/* + * Free up a device id. + */ +void +nfsrv_freeonedevid(struct nfsdevice *ds) +{ + int i; + + atomic_add_int(&nfsrv_devidcnt, -1); + vrele(ds->nfsdev_dvp); + for (i = 0; i < nfsrv_dsdirsize; i++) + if (ds->nfsdev_dsdir[i] != NULL) + vrele(ds->nfsdev_dsdir[i]); + free(ds->nfsdev_fileaddr, M_NFSDSTATE); + free(ds->nfsdev_flexaddr, M_NFSDSTATE); + free(ds->nfsdev_host, M_NFSDSTATE); + free(ds, M_NFSDSTATE); +} + +/* + * Free up a device id and its mirrors. + */ +static void +nfsrv_freedevid(struct nfsdevice *ds) +{ + + TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); + nfsrv_freeonedevid(ds); +} + +/* + * Free all layouts and device ids. + * Done when the nfsd threads are shut down since there may be a new + * modified device id list created when the nfsd is restarted. + */ +void +nfsrv_freealllayoutsanddevids(void) +{ + struct nfsdontlist *mrp, *nmrp; + struct nfslayout *lyp, *nlyp; + + /* Get rid of the deviceid structures. */ + nfsrv_freealldevids(); + TAILQ_INIT(&nfsrv_devidhead); + nfsrv_devidcnt = 0; + + /* Get rid of all layouts. */ + nfsrv_freealllayouts(); + + /* Get rid of any nfsdontlist entries. */ + LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp) + free(mrp, M_NFSDSTATE); + LIST_INIT(&nfsrv_dontlisthead); + nfsrv_dontlistlen = 0; + + /* Free layouts in the recall list. */ + TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp) + nfsrv_freelayout(&nfsrv_recalllisthead, lyp); + TAILQ_INIT(&nfsrv_recalllisthead); +} + +/* + * Free layouts that match the arguments. + */ +static void +nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp, *nlyp; + int i; + + for (i = 0; i < nfsrv_layouthashsize; i++) { + lhyp = &nfslayouthash[i]; + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { + if (clid->qval != lyp->lay_clientid.qval) + continue; + if (fs != NULL && (fs->val[0] != lyp->lay_fsid.val[0] || + fs->val[1] != lyp->lay_fsid.val[1])) + continue; + if (laytype != lyp->lay_type) + continue; + if ((iomode & NFSLAYOUTIOMODE_READ) != 0) + lyp->lay_flags &= ~NFSLAY_READ; + if ((iomode & NFSLAYOUTIOMODE_RW) != 0) + lyp->lay_flags &= ~NFSLAY_RW; + if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0) + nfsrv_freelayout(&lhyp->list, lyp); + } + NFSUNLOCKLAYOUT(lhyp); + } +} + +/* + * Free all layouts for the argument file. + */ +void +nfsrv_freefilelayouts(fhandle_t *fhp) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp, *nlyp; + + lhyp = NFSLAYOUTHASH(fhp); + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { + if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0) + nfsrv_freelayout(&lhyp->list, lyp); + } + NFSUNLOCKLAYOUT(lhyp); +} + +/* + * Free all layouts. + */ +static void +nfsrv_freealllayouts(void) +{ + struct nfslayouthash *lhyp; + struct nfslayout *lyp, *nlyp; + int i; + + for (i = 0; i < nfsrv_layouthashsize; i++) { + lhyp = &nfslayouthash[i]; + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) + nfsrv_freelayout(&lhyp->list, lyp); + NFSUNLOCKLAYOUT(lhyp); + } +} + +/* + * Look up the mount path for the DS server. + */ +static int +nfsrv_setdsserver(char *dspathp, NFSPROC_T *p, struct nfsdevice **dsp) +{ + struct nameidata nd; + struct nfsdevice *ds; + int error, i; + char *dsdirpath; + size_t dsdirsize; + + NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp); + *dsp = NULL; + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, + dspathp, p); + error = namei(&nd); + NFSD_DEBUG(4, "lookup=%d\n", error); + if (error != 0) + return (error); + if (nd.ni_vp->v_type != VDIR) { + vput(nd.ni_vp); + NFSD_DEBUG(4, "dspath not dir\n"); + return (ENOTDIR); + } + if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { + vput(nd.ni_vp); + NFSD_DEBUG(4, "dspath not an NFS mount\n"); + return (ENXIO); + } + + /* + * Allocate a DS server structure with the NFS mounted directory + * vnode reference counted, so that a non-forced dismount will + * fail with EBUSY. + */ + *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t), + M_NFSDSTATE, M_WAITOK | M_ZERO); + ds->nfsdev_dvp = nd.ni_vp; + ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount); + NFSVOPUNLOCK(nd.ni_vp, 0); + + dsdirsize = strlen(dspathp) + 16; + dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK); + /* Now, create the DS directory structures. */ + for (i = 0; i < nfsrv_dsdirsize; i++) { + snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, + UIO_SYSSPACE, dsdirpath, p); + error = namei(&nd); + NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error); + if (error != 0) + break; + if (nd.ni_vp->v_type != VDIR) { + vput(nd.ni_vp); + error = ENOTDIR; + NFSD_DEBUG(4, "dsdirpath not a VDIR\n"); + break; + } + if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { + vput(nd.ni_vp); + error = ENXIO; + NFSD_DEBUG(4, "dsdirpath not an NFS mount\n"); + break; + } + ds->nfsdev_dsdir[i] = nd.ni_vp; + NFSVOPUNLOCK(nd.ni_vp, 0); + } + free(dsdirpath, M_TEMP); + + TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); + atomic_add_int(&nfsrv_devidcnt, 1); + return (error); +} + +/* + * Look up the mount path for the DS server and delete it. + */ +int +nfsrv_deldsserver(char *dspathp, NFSPROC_T *p) +{ + struct mount *mp; + struct nfsmount *nmp; + struct nfsdevice *ds; + int error; + + NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp); + /* + * Search for the path in the mount list. Avoid looking the path + * up, since this mount point may be hung, with associated locked + * vnodes, etc. + * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked + * until this completes. + * As noted in the man page, this should be done before any forced + * dismount on the mount point, but at least the handshake on + * NFSMNTP_CANCELRPCS should make it safe. + */ + error = 0; + ds = NULL; + nmp = NULL; + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 && + strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 && + mp->mnt_data != NULL) { + nmp = VFSTONFS(mp); + NFSLOCKMNT(nmp); + if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | + NFSMNTP_CANCELRPCS)) == 0) { + nmp->nm_privflag |= NFSMNTP_CANCELRPCS; + NFSUNLOCKMNT(nmp); + } else { + NFSUNLOCKMNT(nmp); + nmp = NULL; + } + break; + } + } + mtx_unlock(&mountlist_mtx); + + if (nmp != NULL) { + ds = nfsrv_deldsnmp(nmp, p); + NFSD_DEBUG(4, "deldsnmp=%p\n", ds); + if (ds != NULL) { + nfsrv_killrpcs(nmp); + NFSD_DEBUG(4, "aft killrpcs\n"); + } else + error = ENXIO; + NFSLOCKMNT(nmp); + nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; + wakeup(nmp); + NFSUNLOCKMNT(nmp); + } else + error = EINVAL; + return (error); +} + +/* + * Search for and remove a DS entry which matches the "nmp" argument. + * The nfsdevice structure pointer is returned so that the caller can + * free it via nfsrv_freeonedevid(). + */ +struct nfsdevice * +nfsrv_deldsnmp(struct nfsmount *nmp, NFSPROC_T *p) +{ + struct nfsdevice *fndds; + + NFSD_DEBUG(4, "deldsdvp\n"); + NFSDDSLOCK(); + fndds = nfsv4_findmirror(nmp); + if (fndds != NULL) + nfsrv_deleteds(fndds); + NFSDDSUNLOCK(); + if (fndds != NULL) { + nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); + printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); + } + return (fndds); +} + +/* + * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid. + * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount + * point. + * Also, returns an error instead of the nfsdevice found. + */ +static int +nfsrv_delds(char *devid, NFSPROC_T *p) +{ + struct nfsdevice *ds, *fndds; + struct nfsmount *nmp; + int fndmirror; + + NFSD_DEBUG(4, "delds\n"); + /* + * Search the DS server list for a match with devid. + * Remove the DS entry if found and there is a mirror. + */ + fndds = NULL; + nmp = NULL; + fndmirror = 0; + NFSDDSLOCK(); + TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { + if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 && + ds->nfsdev_nmp != NULL) { + NFSD_DEBUG(4, "fnd main ds\n"); + fndds = ds; + } else if (ds->nfsdev_nmp != NULL) + fndmirror = 1; + if (fndds != NULL && fndmirror != 0) + break; + } + if (fndds != NULL && fndmirror != 0) { + nmp = fndds->nfsdev_nmp; + NFSLOCKMNT(nmp); + if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | + NFSMNTP_CANCELRPCS)) == 0) { + nmp->nm_privflag |= NFSMNTP_CANCELRPCS; + NFSUNLOCKMNT(nmp); + nfsrv_deleteds(fndds); + } else { + NFSUNLOCKMNT(nmp); + nmp = NULL; + } + } + NFSDDSUNLOCK(); + if (fndds != NULL && nmp != NULL) { + nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); + printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); + nfsrv_killrpcs(nmp); + NFSLOCKMNT(nmp); + nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; + wakeup(nmp); + NFSUNLOCKMNT(nmp); + return (0); + } + return (ENXIO); +} + +/* + * Mark a DS as disabled by setting nfsdev_nmp = NULL. + */ +static void +nfsrv_deleteds(struct nfsdevice *fndds) +{ + + NFSD_DEBUG(4, "deleteds: deleting a mirror\n"); + fndds->nfsdev_nmp = NULL; +} + +/* + * Fill in the addr structures for the File and Flex File layouts. + */ +static void +nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) +{ + uint32_t *tl; + char *netprot; + int addrlen; + static uint64_t new_devid = 0; + + if (strchr(addr, ':') != NULL) + netprot = "tcp6"; + else + netprot = "tcp"; + + /* Fill in the device id. */ + NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time)); + new_devid++; + NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)], + sizeof(new_devid)); + + /* + * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4 + * as defined in RFC5661) in XDR. + */ + addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + + 6 * NFSX_UNSIGNED; + NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot); + ds->nfsdev_fileaddrlen = addrlen; + tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); + ds->nfsdev_fileaddr = (char *)tl; + *tl++ = txdr_unsigned(1); /* One stripe with index 0. */ + *tl++ = 0; + *tl++ = txdr_unsigned(1); /* One multipath list */ + *tl++ = txdr_unsigned(1); /* with one entry in it. */ + /* The netaddr for this one entry. */ + *tl++ = txdr_unsigned(strlen(netprot)); + NFSBCOPY(netprot, tl, strlen(netprot)); + tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(strlen(addr)); + NFSBCOPY(addr, tl, strlen(addr)); + + /* + * Fill in the flex file addr (actually the ff_device_addr4 + * as defined for Flexible File Layout) in XDR. + */ + addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + + 9 * NFSX_UNSIGNED; + ds->nfsdev_flexaddrlen = addrlen; + tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); + ds->nfsdev_flexaddr = (char *)tl; + *tl++ = txdr_unsigned(1); /* One multipath entry. */ + /* The netaddr for this one entry. */ + *tl++ = txdr_unsigned(strlen(netprot)); + NFSBCOPY(netprot, tl, strlen(netprot)); + tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(strlen(addr)); + NFSBCOPY(addr, tl, strlen(addr)); + tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(1); /* One NFS Version. */ + *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ + *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */ + *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ + *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ + *tl = newnfs_true; /* Tightly coupled. */ + + ds->nfsdev_hostnamelen = strlen(dnshost); + ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE, + M_WAITOK); + NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1); +} + + +/* + * Create the device id list. + * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument + * is misconfigured. + */ +int +nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p) +{ + struct nfsdevice *ds; + char *addrp, *dnshostp, *dspathp; + int error, i; + + addrp = args->addr; + dnshostp = args->dnshost; + dspathp = args->dspath; + nfsrv_maxpnfsmirror = args->mirrorcnt; + if (addrp == NULL || dnshostp == NULL || dspathp == NULL) + return (0); + + /* + * Loop around for each nul-terminated string in args->addr, + * args->dnshost and args->dnspath. + */ + while (addrp < (args->addr + args->addrlen) && + dnshostp < (args->dnshost + args->dnshostlen) && + dspathp < (args->dspath + args->dspathlen)) { + error = nfsrv_setdsserver(dspathp, p, &ds); + if (error != 0) { + /* Free all DS servers. */ + nfsrv_freealldevids(); + nfsrv_devidcnt = 0; + return (ENXIO); + } + nfsrv_allocdevid(ds, addrp, dnshostp); + addrp += (strlen(addrp) + 1); + dnshostp += (strlen(dnshostp) + 1); + dspathp += (strlen(dspathp) + 1); + } + if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) { + /* Free all DS servers. */ + nfsrv_freealldevids(); + nfsrv_devidcnt = 0; + nfsrv_maxpnfsmirror = 1; + return (ENXIO); + } + + /* + * Allocate the nfslayout hash table now, since this is a pNFS server. + * Make it 1% of the high water mark and at least 100. + */ + if (nfslayouthash == NULL) { + nfsrv_layouthashsize = nfsrv_layouthighwater / 100; + if (nfsrv_layouthashsize < 100) + nfsrv_layouthashsize = 100; + nfslayouthash = mallocarray(nfsrv_layouthashsize, + sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK | + M_ZERO); + for (i = 0; i < nfsrv_layouthashsize; i++) { + mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF); + TAILQ_INIT(&nfslayouthash[i].list); + } + } + return (0); +} + +/* + * Free all device ids. + */ +static void +nfsrv_freealldevids(void) +{ + struct nfsdevice *ds, *nds; + + TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds) + nfsrv_freedevid(ds); +} + +/* + * Check to see if there is a Read/Write Layout plus either: + * - A Write Delegation + * or + * - An Open with Write_access. + * Return 1 if this is the case and 0 otherwise. + * This function is used by nfsrv_proxyds() to decide if doing a Proxy + * Getattr RPC to the Data Server (DS) is necessary. + */ +#define NFSCLIDVECSIZE 6 +APPLESTATIC int +nfsrv_checkdsattr(struct nfsrv_descript *nd, vnode_t vp, NFSPROC_T *p) +{ + fhandle_t fh, *tfhp; + struct nfsstate *stp; + struct nfslayout *lyp; + struct nfslayouthash *lhyp; + struct nfslockhashhead *hp; + struct nfslockfile *lfp; + nfsquad_t clid[NFSCLIDVECSIZE]; + int clidcnt, ret; + + ret = nfsvno_getfh(vp, &fh, p); + if (ret != 0) + return (0); + + /* First check for a Read/Write Layout. */ + clidcnt = 0; + lhyp = NFSLAYOUTHASH(&fh); + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH(lyp, &lhyp->list, lay_list) { + if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && + ((lyp->lay_flags & NFSLAY_RW) != 0 || + ((lyp->lay_flags & NFSLAY_READ) != 0 && + nfsrv_pnfsatime != 0))) { + if (clidcnt < NFSCLIDVECSIZE) + clid[clidcnt].qval = lyp->lay_clientid.qval; + clidcnt++; + } + } + NFSUNLOCKLAYOUT(lhyp); + if (clidcnt == 0) { + /* None found, so return 0. */ + return (0); + } + + /* Get the nfslockfile for this fh. */ + NFSLOCKSTATE(); + hp = NFSLOCKHASH(&fh); + LIST_FOREACH(lfp, hp, lf_hash) { + tfhp = &lfp->lf_fh; + if (NFSVNO_CMPFH(&fh, tfhp)) + break; + } + if (lfp == NULL) { + /* None found, so return 0. */ + NFSUNLOCKSTATE(); + return (0); + } + + /* Now, look for a Write delegation for this clientid. */ + LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { + if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 && + nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0) + break; + } + if (stp != NULL) { + /* Found one, so return 1. */ + NFSUNLOCKSTATE(); + return (1); + } + + /* No Write delegation, so look for an Open with Write_access. */ + LIST_FOREACH(stp, &lfp->lf_open, ls_file) { + KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0, + ("nfsrv_checkdsattr: Non-open in Open list\n")); + if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 && + nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0) + break; + } + NFSUNLOCKSTATE(); + if (stp != NULL) + return (1); + return (0); +} + +/* + * Look for a matching clientid in the vector. Return 1 if one might match. + */ +static int +nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt) +{ + int i; + + /* If too many for the vector, return 1 since there might be a match. */ + if (clidcnt > NFSCLIDVECSIZE) + return (1); + + for (i = 0; i < clidcnt; i++) + if (clidvec[i].qval == clid.qval) + return (1); + return (0); +} + +/* + * Check the don't list for "vp" and see if issuing an rw layout is allowed. + * Return 1 if issuing an rw layout isn't allowed, 0 otherwise. + */ +static int +nfsrv_dontlayout(fhandle_t *fhp) +{ + struct nfsdontlist *mrp; + int ret; + + if (nfsrv_dontlistlen == 0) + return (0); + ret = 0; + NFSDDONTLISTLOCK(); + LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) { + if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 && + (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) { + ret = 1; + break; + } + } + NFSDDONTLISTUNLOCK(); + return (ret); +} + +#define PNFSDS_COPYSIZ 65536 +/* + * Create a new file on a DS and copy the contents of an extant DS file to it. + * This can be used for recovery of a DS file onto a recovered DS. + * The steps are: + * - When called, the MDS file's vnode is locked, blocking LayoutGet operations. + * - Disable issuing of read/write layouts for the file via the nfsdontlist, + * so that they will be disabled after the MDS file's vnode is unlocked. + * - Set up the nfsrv_recalllist so that recall of read/write layouts can + * be done. + * - Unlock the MDS file's vnode, so that the client(s) can perform proxied + * writes, LayoutCommits and LayoutReturns for the file when completing the + * LayoutReturn requested by the LayoutRecall callback. + * - Issue a LayoutRecall callback for all read/write layouts and wait for + * them to be returned. (If the LayoutRecall callback replies + * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.) + * - Exclusively lock the MDS file's vnode. This ensures that no proxied + * writes are in progress or can occur during the DS file copy. + * It also blocks Setattr operations. + * - Create the file on the recovered mirror. + * - Copy the file from the operational DS. + * - Copy any ACL from the MDS file to the new DS file. + * - Set the modify time of the new DS file to that of the MDS file. + * - Update the extended attribute for the MDS file. + * - Enable issuing of rw layouts by deleting the nfsdontlist entry. + * - The caller will unlock the MDS file's vnode allowing operations + * to continue normally, since it is now on the mirror again. + */ +int +nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds, + struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsdontlist *mrp, *nmrp; + struct nfslayouthash *lhyp; + struct nfslayout *lyp, *nlyp; + struct nfslayouthead thl; + struct mount *mp; + struct acl *aclp; + struct vattr va; + struct timespec mtime; + fhandle_t fh; + vnode_t tvp; + off_t rdpos, wrpos; + ssize_t aresid; + char *dat; + int didprintf, ret, retacl, xfer; + + ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp"); + ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp"); + /* + * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag + * so that no more RW layouts will get issued. + */ + ret = nfsvno_getfh(vp, &fh, p); + if (ret != 0) { + NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret); + return (ret); + } + nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK); + nmrp->nfsmr_flags = NFSMR_DONTLAYOUT; + NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh)); + NFSDDONTLISTLOCK(); + LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) { + if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0) + break; + } + if (mrp == NULL) { + LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list); + mrp = nmrp; + nmrp = NULL; + nfsrv_dontlistlen++; + NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n"); + } else { + NFSDDONTLISTUNLOCK(); + free(nmrp, M_NFSDSTATE); + NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n"); + return (ENXIO); + } + NFSDDONTLISTUNLOCK(); + + /* + * Search for all RW layouts for this file. Move them to the + * recall list, so they can be recalled and their return noted. + */ + lhyp = NFSLAYOUTHASH(&fh); + NFSDRECALLLOCK(); + NFSLOCKLAYOUT(lhyp); + TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { + if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && + (lyp->lay_flags & NFSLAY_RW) != 0) { + TAILQ_REMOVE(&lhyp->list, lyp, lay_list); + TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list); + lyp->lay_trycnt = 0; + } + } + NFSUNLOCKLAYOUT(lhyp); + NFSDRECALLUNLOCK(); + + ret = 0; + didprintf = 0; + TAILQ_INIT(&thl); + /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */ + NFSVOPUNLOCK(vp, 0); + /* Now, do a recall for all layouts not yet recalled. */ +tryagain: + NFSDRECALLLOCK(); + TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { + if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && + (lyp->lay_flags & NFSLAY_RECALL) == 0) { + lyp->lay_flags |= NFSLAY_RECALL; + /* + * The layout stateid.seqid needs to be incremented + * before doing a LAYOUT_RECALL callback. + */ + if (++lyp->lay_stateid.seqid == 0) + lyp->lay_stateid.seqid = 1; + NFSDRECALLUNLOCK(); + nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid, + &lyp->lay_fh, lyp, &nfsrv_recalllisthead, + lyp->lay_type, p); + NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n"); + goto tryagain; + } + } + + /* Now wait for them to be returned. */ +tryagain2: + TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { + if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) { + if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) { + TAILQ_REMOVE(&nfsrv_recalllisthead, lyp, + lay_list); + TAILQ_INSERT_HEAD(&thl, lyp, lay_list); + NFSD_DEBUG(4, + "nfsrv_copymr: layout returned\n"); + } else { + ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR, + PVFS | PCATCH, "nfsmrl", hz); + NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n", + ret); + if (ret == EINTR || ret == ERESTART) + break; + if ((lyp->lay_flags & NFSLAY_RETURNED) == 0 && + didprintf == 0) { + printf("nfsrv_copymr: layout not " + "returned\n"); + didprintf = 1; + } + } + goto tryagain2; + } + } + NFSDRECALLUNLOCK(); + /* We can now get rid of the layouts that have been returned. */ + TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp) + nfsrv_freelayout(&thl, lyp); + + /* + * LK_EXCLUSIVE lock the MDS vnode, so that any + * proxied writes through the MDS will be blocked until we have + * completed the copy and update of the extended attributes. + * This will also ensure that any attributes and ACL will not be + * changed until the copy is complete. + */ + NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); + if ((vp->v_iflag & VI_DOOMED) != 0) { + NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n"); + ret = ESTALE; + } + + /* Create the data file on the recovered DS. */ + if (ret == 0) + ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp); + + /* Copy the DS file, if created successfully. */ + if (ret == 0) { + /* + * Get any NFSv4 ACL on the MDS file, so that it can be set + * on the new DS file. + */ + aclp = acl_alloc(M_WAITOK | M_ZERO); + retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); + if (retacl != 0 && retacl != ENOATTR) + NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl); + dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK); + rdpos = wrpos = 0; + mp = NULL; + ret = vn_start_write(tvp, &mp, V_WAIT | PCATCH); + aresid = 0; + while (ret == 0 && aresid == 0) { + ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ, + rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, + &aresid, p); + xfer = PNFSDS_COPYSIZ - aresid; + if (ret == 0 && xfer > 0) { + rdpos += xfer; + ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer, + wrpos, UIO_SYSSPACE, IO_NODELOCKED, + cred, NULL, NULL, p); + if (ret == 0) + wrpos += xfer; + } + } + + /* If there is an ACL and the copy succeeded, set the ACL. */ + if (ret == 0 && retacl == 0) { + ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p); + /* + * Don't consider these as errors, since VOP_GETACL() + * can return an ACL when they are not actually + * supported. For example, for UFS, VOP_GETACL() + * will return a trivial ACL based on the uid/gid/mode + * when there is no ACL on the file. + * This case should be recognized as a trivial ACL + * by UFS's VOP_SETACL() and succeed, but... + */ + if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM) + ret = 0; + } + + if (mp != NULL) + vn_finished_write(mp); + if (ret == 0) + ret = VOP_FSYNC(tvp, MNT_WAIT, p); + + /* Set the DS data file's modify time that of the MDS file. */ + if (ret == 0) + ret = VOP_GETATTR(vp, &va, cred); + if (ret == 0) { + mtime = va.va_mtime; + VATTR_NULL(&va); + va.va_mtime = mtime; + ret = VOP_SETATTR(tvp, &va, cred); + } + + vput(tvp); + acl_free(aclp); + free(dat, M_TEMP); + } + + /* Update the extended attributes for the newly created DS file. */ + if (ret == 0) { + mp = NULL; + ret = vn_start_write(vp, &mp, V_WAIT | PCATCH); + if (ret == 0) + ret = vn_extattr_set(vp, IO_NODELOCKED, + EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", + sizeof(*wpf) * mirrorcnt, (char *)wpf, p); + if (mp != NULL) + vn_finished_write(mp); + } + + /* Get rid of the dontlist entry, so that Layouts can be issued. */ + NFSDDONTLISTLOCK(); + LIST_REMOVE(mrp, nfsmr_list); + NFSDDONTLISTUNLOCK(); + free(mrp, M_NFSDSTATE); + return (ret); +} + +/* + * Create a data storage file on the recovered DS. + */ +static int +nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, + vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, + vnode_t *tvpp) +{ + struct vattr va, nva; + int error; + + /* Make data file name based on FH. */ + error = VOP_GETATTR(vp, &va, cred); + if (error == 0) { + /* Set the attributes for "vp" to Setattr the DS vp. */ + VATTR_NULL(&nva); + nva.va_uid = va.va_uid; + nva.va_gid = va.va_gid; + nva.va_mode = va.va_mode; + nva.va_size = 0; + VATTR_NULL(&va); + va.va_type = VREG; + va.va_mode = nva.va_mode; + NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf); + error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL, + pf->dsf_filename, cred, p, tvpp); + } + return (error); +} + +/* + * Look up the MDS file shared locked, and then get the extended attribute + * to find the extant DS file to be copied to the new mirror. + * If successful, *vpp is set to the MDS file's vp and *nvpp is + * set to a DS data file for the MDS file, both exclusively locked. + * The "buf" argument has the pnfsdsfile structure from the MDS file + * in it and buflen is set to its length. + */ +int +nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf, + int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp, + struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp, + struct nfsdevice **fdsp) +{ + struct nameidata nd; + struct vnode *vp, *curvp; + struct pnfsdsfile *pf; + struct nfsmount *nmp, *curnmp; + int dsdir, error, mirrorcnt, ippos; + + vp = NULL; + curvp = NULL; + curnmp = NULL; + *dsp = NULL; + *fdsp = NULL; + if (dspathp == NULL && curdspathp != NULL) + return (EPERM); + + /* + * Look up the MDS file shared locked. The lock will be upgraded + * to an exclusive lock after any rw layouts have been returned. + */ + NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, + mdspathp, p); + error = namei(&nd); + NFSD_DEBUG(4, "lookup=%d\n", error); + if (error != 0) + return (error); + if (nd.ni_vp->v_type != VREG) { + vput(nd.ni_vp); + NFSD_DEBUG(4, "mdspath not reg\n"); + return (EISDIR); + } + vp = nd.ni_vp; + + if (curdspathp != NULL) { + /* + * Look up the current DS path and find the nfsdev structure for + * it. + */ + NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, + UIO_SYSSPACE, curdspathp, p); + error = namei(&nd); + NFSD_DEBUG(4, "ds lookup=%d\n", error); + if (error != 0) { + vput(vp); + return (error); + } + if (nd.ni_vp->v_type != VDIR) { + vput(nd.ni_vp); + vput(vp); + NFSD_DEBUG(4, "curdspath not dir\n"); + return (ENOTDIR); + } + if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { + vput(nd.ni_vp); + vput(vp); + NFSD_DEBUG(4, "curdspath not an NFS mount\n"); + return (ENXIO); + } + curnmp = VFSTONFS(nd.ni_vp->v_mount); + + /* Search the nfsdev list for a match. */ + NFSDDSLOCK(); + *fdsp = nfsv4_findmirror(curnmp); + NFSDDSUNLOCK(); + if (*fdsp == NULL) + curnmp = NULL; + if (curnmp == NULL) { + vput(nd.ni_vp); + vput(vp); + NFSD_DEBUG(4, "mdscopymr: no current ds\n"); + return (ENXIO); + } + curvp = nd.ni_vp; + } + + if (dspathp != NULL) { + /* Look up the nfsdev path and find the nfsdev structure. */ + NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, + UIO_SYSSPACE, dspathp, p); + error = namei(&nd); + NFSD_DEBUG(4, "ds lookup=%d\n", error); + if (error != 0) { + vput(vp); + if (curvp != NULL) + vput(curvp); + return (error); + } + if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) { + vput(nd.ni_vp); + vput(vp); + if (curvp != NULL) + vput(curvp); + NFSD_DEBUG(4, "dspath not dir\n"); + if (nd.ni_vp == curvp) + return (EPERM); + return (ENOTDIR); + } + if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { + vput(nd.ni_vp); + vput(vp); + if (curvp != NULL) + vput(curvp); + NFSD_DEBUG(4, "dspath not an NFS mount\n"); + return (ENXIO); + } + nmp = VFSTONFS(nd.ni_vp->v_mount); + + /* Search the nfsdev list for a match. */ + NFSDDSLOCK(); + *dsp = nfsv4_findmirror(nmp); + NFSDDSUNLOCK(); + if (*dsp == NULL) { + vput(nd.ni_vp); + vput(vp); + if (curvp != NULL) + vput(curvp); + NFSD_DEBUG(4, "mdscopymr: no ds\n"); + return (ENXIO); + } + } else { + nd.ni_vp = NULL; + nmp = NULL; + } + + /* + * Get a vp for an available DS data file using the extended + * attribute on the MDS file. + * If there is a valid entry for the new DS in the extended attribute + * on the MDS file (as checked via the nmp argument), + * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur. + */ + error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p, + NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir); + if (curvp != NULL) + vput(curvp); + if (nd.ni_vp == NULL) { + if (error == 0 && nmp != NULL) { + /* Search the nfsdev list for a match. */ + NFSDDSLOCK(); + *dsp = nfsv4_findmirror(nmp); + NFSDDSUNLOCK(); + } + if (error == 0 && (nmp == NULL || *dsp == NULL)) { + if (nvpp != NULL && *nvpp != NULL) { + vput(*nvpp); + *nvpp = NULL; + } + error = ENXIO; + } + } else + vput(nd.ni_vp); + + /* + * When dspathp != NULL and curdspathp == NULL, this is a recovery + * and is only allowed if there is a 0.0.0.0 IP address entry. + * When curdspathp != NULL, the ippos will be set to that entry. + */ + if (error == 0 && dspathp != NULL && ippos == -1) { + if (nvpp != NULL && *nvpp != NULL) { + vput(*nvpp); + *nvpp = NULL; + } + error = ENXIO; + } + if (error == 0) { + *vpp = vp; + + pf = (struct pnfsdsfile *)buf; + if (ippos == -1) { + /* If no zeroip pnfsdsfile, add one. */ + ippos = *buflenp / sizeof(*pf); + *buflenp += sizeof(*pf); + pf += ippos; + pf->dsf_dir = dsdir; + strlcpy(pf->dsf_filename, fname, + sizeof(pf->dsf_filename)); + } else + pf += ippos; + *pfp = pf; + } else + vput(vp); + return (error); } Index: head/sys/fs/nfsserver/nfs_nfsdsubs.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdsubs.c (revision 335011) +++ head/sys/fs/nfsserver/nfs_nfsdsubs.c (revision 335012) @@ -1,2122 +1,2132 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include extern u_int32_t newnfs_true, newnfs_false; extern int nfs_pubfhset; extern struct nfsclienthashhead *nfsclienthash; extern int nfsrv_clienthashsize; extern struct nfslockhashhead *nfslockhash; extern int nfsrv_lockhashsize; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern int nfsrv_useacl; extern uid_t nfsrv_defaultuid; extern gid_t nfsrv_defaultgid; char nfs_v2pubfh[NFSX_V2FH]; +struct nfsdontlisthead nfsrv_dontlisthead; +struct nfslayouthead nfsrv_recalllisthead; static nfstype newnfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; extern nfstype nfsv34_type[9]; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_isannfserr(u_int32_t); SYSCTL_DECL(_vfs_nfsd); static int enable_checkutf8 = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_checkutf8, CTLFLAG_RW, &enable_checkutf8, 0, "Enable the NFSv4 check for the UTF8 compliant name required by rfc3530"); static int enable_nobodycheck = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_nobodycheck, CTLFLAG_RW, &enable_nobodycheck, 0, "Enable the NFSv4 check when setting user nobody as owner"); static int enable_nogroupcheck = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_nogroupcheck, CTLFLAG_RW, &enable_nogroupcheck, 0, "Enable the NFSv4 check when setting group nogroup as owner"); static char nfsrv_hexdigit(char, int *); /* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. (It now includes the errors added for NFSv3.) */ static u_char nfsrv_v2errmap[NFSERR_REMOTE] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_MLINK, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_REMOTE, }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static short nfsv3err_null[] = { 0, 0, }; static short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_PERM, NFSERR_IO, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_NAMETOL, NFSERR_IO, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOSPC, NFSERR_INVAL, NFSERR_FBIG, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_create[] = { NFSERR_IO, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_ACCES, NFSERR_IO, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_EXIST, NFSERR_ACCES, NFSERR_NAMETOL, NFSERR_IO, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_NOSPC, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_NOSPC, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, NFSERR_BADTYPE, 0, }; static short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_NAMETOL, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_IO, NFSERR_EXIST, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_XDEV, NFSERR_IO, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_link[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NAMETOL, NFSERR_IO, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, NFSERR_DELAY, 0, }; static short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; /* * And the same for V4. */ static short nfsv4err_null[] = { 0, 0, }; static short nfsv4err_access[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_close[] = { NFSERR_EXPIRED, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKSHELD, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_commit[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_create[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADOWNER, NFSERR_BADTYPE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_PERM, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_delegpurge[] = { NFSERR_SERVERFAULT, NFSERR_BADXDR, NFSERR_NOTSUPP, NFSERR_LEASEMOVED, NFSERR_MOVED, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_delegreturn[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_INVAL, NFSERR_LEASEMOVED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTSUPP, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_getattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_getfh[] = { NFSERR_BADHANDLE, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_link[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_FHEXPIRED, NFSERR_FILEOPEN, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_MLINK, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_NOTSUPP, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, NFSERR_XDEV, 0, }; static short nfsv4err_lock[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADRANGE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DEADLOCK, NFSERR_DELAY, NFSERR_DENIED, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKNOTSUPP, NFSERR_LOCKRANGE, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOGRACE, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_RECLAIMBAD, NFSERR_RECLAIMCONFLICT, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALECLIENTID, NFSERR_STALESTATEID, 0, }; static short nfsv4err_lockt[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADRANGE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DENIED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKRANGE, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_locku[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADRANGE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKRANGE, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_lookup[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_SYMLINK, NFSERR_WRONGSEC, 0, }; static short nfsv4err_lookupp[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_IO, NFSERR_MOVED, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_nverify[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SAME, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_open[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADOWNER, NFSERR_BADSEQID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_IO, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOGRACE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_NOTSUPP, NFSERR_PERM, NFSERR_RECLAIMBAD, NFSERR_RECLAIMCONFLICT, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_SHAREDENIED, NFSERR_STALE, NFSERR_STALECLIENTID, NFSERR_SYMLINK, NFSERR_WRONGSEC, 0, }; static short nfsv4err_openattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_FHEXPIRED, NFSERR_IO, NFSERR_MOVED, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTSUPP, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_openconfirm[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_opendowngrade[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSEQID, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_OLDSTATEID, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_putfh[] = { NFSERR_SERVERFAULT, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, 0, }; static short nfsv4err_putpubfh[] = { NFSERR_SERVERFAULT, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_WRONGSEC, 0, }; static short nfsv4err_putrootfh[] = { NFSERR_SERVERFAULT, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_WRONGSEC, 0, }; static short nfsv4err_read[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_EXPIRED, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_IO, NFSERR_INVAL, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NXIO, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_readdir[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_NOTSAME, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_TOOSMALL, 0, }; static short nfsv4err_readlink[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADHANDLE, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTSUPP, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_remove[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_FILEOPEN, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_NOTEMPTY, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_rename[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXIST, NFSERR_FHEXPIRED, NFSERR_FILEOPEN, NFSERR_INVAL, NFSERR_IO, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NOTDIR, NFSERR_NOTEMPTY, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, NFSERR_XDEV, 0, }; static short nfsv4err_renew[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADXDR, NFSERR_CBPATHDOWN, NFSERR_EXPIRED, NFSERR_LEASEMOVED, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_restorefh[] = { NFSERR_SERVERFAULT, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_RESOURCE, NFSERR_RESTOREFH, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_WRONGSEC, 0, }; static short nfsv4err_savefh[] = { NFSERR_SERVERFAULT, NFSERR_BADHANDLE, NFSERR_FHEXPIRED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_secinfo[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADNAME, NFSERR_BADXDR, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_MOVED, NFSERR_NAMETOL, NFSERR_NOENT, NFSERR_NOFILEHANDLE, NFSERR_NOTDIR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_setattr[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADOWNER, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXPIRED, NFSERR_FBIG, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_LOCKED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_PERM, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_setclientid[] = { NFSERR_SERVERFAULT, NFSERR_BADXDR, NFSERR_CLIDINUSE, NFSERR_INVAL, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_WRONGSEC, 0, }; static short nfsv4err_setclientidconfirm[] = { NFSERR_SERVERFAULT, NFSERR_BADXDR, NFSERR_CLIDINUSE, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short nfsv4err_verify[] = { NFSERR_SERVERFAULT, NFSERR_ACCES, NFSERR_ATTRNOTSUPP, NFSERR_BADCHAR, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_FHEXPIRED, NFSERR_INVAL, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOTSAME, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALE, 0, }; static short nfsv4err_write[] = { NFSERR_IO, NFSERR_ACCES, NFSERR_ADMINREVOKED, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_DELAY, NFSERR_DQUOT, NFSERR_EXPIRED, NFSERR_FBIG, NFSERR_FHEXPIRED, NFSERR_GRACE, NFSERR_INVAL, NFSERR_IO, NFSERR_ISDIR, NFSERR_LEASEMOVED, NFSERR_LOCKED, NFSERR_MOVED, NFSERR_NOFILEHANDLE, NFSERR_NOSPC, NFSERR_NXIO, NFSERR_OLDSTATEID, NFSERR_OPENMODE, NFSERR_RESOURCE, NFSERR_ROFS, NFSERR_SERVERFAULT, NFSERR_STALE, NFSERR_STALESTATEID, 0, }; static short nfsv4err_releaselockowner[] = { NFSERR_SERVERFAULT, NFSERR_ADMINREVOKED, NFSERR_BADXDR, NFSERR_EXPIRED, NFSERR_LEASEMOVED, NFSERR_LOCKSHELD, NFSERR_RESOURCE, NFSERR_SERVERFAULT, NFSERR_STALECLIENTID, 0, }; static short *nfsrv_v4errmap[] = { nfsv4err_null, nfsv4err_null, nfsv4err_null, nfsv4err_access, nfsv4err_close, nfsv4err_commit, nfsv4err_create, nfsv4err_delegpurge, nfsv4err_delegreturn, nfsv4err_getattr, nfsv4err_getfh, nfsv4err_link, nfsv4err_lock, nfsv4err_lockt, nfsv4err_locku, nfsv4err_lookup, nfsv4err_lookupp, nfsv4err_nverify, nfsv4err_open, nfsv4err_openattr, nfsv4err_openconfirm, nfsv4err_opendowngrade, nfsv4err_putfh, nfsv4err_putpubfh, nfsv4err_putrootfh, nfsv4err_read, nfsv4err_readdir, nfsv4err_readlink, nfsv4err_remove, nfsv4err_rename, nfsv4err_renew, nfsv4err_restorefh, nfsv4err_savefh, nfsv4err_secinfo, nfsv4err_setattr, nfsv4err_setclientid, nfsv4err_setclientidconfirm, nfsv4err_verify, nfsv4err_write, nfsv4err_releaselockowner, }; /* * A fiddled version of m_adj() that ensures null fill to a long * boundary and only trims off the back end */ APPLESTATIC void nfsrv_adj(mbuf_t mp, int len, int nul) { mbuf_t m; int count, i; char *cp; /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ count = 0; m = mp; for (;;) { count += mbuf_len(m); if (mbuf_next(m) == NULL) break; m = mbuf_next(m); } if (mbuf_len(m) > len) { mbuf_setlen(m, mbuf_len(m) - len); if (nul > 0) { cp = NFSMTOD(m, caddr_t) + mbuf_len(m) - nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ for (m = mp; m; m = mbuf_next(m)) { if (mbuf_len(m) >= count) { mbuf_setlen(m, count); if (nul > 0) { cp = NFSMTOD(m, caddr_t) + mbuf_len(m) - nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } break; } count -= mbuf_len(m); } for (m = mbuf_next(m); m; m = mbuf_next(m)) mbuf_setlen(m, 0); } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ APPLESTATIC void nfsrv_wcc(struct nfsrv_descript *nd, int before_ret, struct nfsvattr *before_nvap, int after_ret, struct nfsvattr *after_nvap) { u_int32_t *tl; if (before_ret) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(before_nvap->na_size, tl); tl += 2; txdr_nfsv3time(&(before_nvap->na_mtime), tl); tl += 2; txdr_nfsv3time(&(before_nvap->na_ctime), tl); } nfsrv_postopattr(nd, after_ret, after_nvap); } APPLESTATIC void nfsrv_postopattr(struct nfsrv_descript *nd, int after_ret, struct nfsvattr *after_nvap) { u_int32_t *tl; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (after_ret) *tl = newnfs_false; else { *tl = newnfs_true; nfsrv_fillattr(nd, after_nvap); } } /* * Fill in file attributes for V2 and 3. For V4, call a separate * routine that sifts through all the attribute bits. */ APPLESTATIC void nfsrv_fillattr(struct nfsrv_descript *nd, struct nfsvattr *nvap) { struct nfs_fattr *fp; int fattr_size; /* * Build space for the attribute structure. */ if (nd->nd_flag & ND_NFSV3) fattr_size = NFSX_V3FATTR; else fattr_size = NFSX_V2FATTR; NFSM_BUILD(fp, struct nfs_fattr *, fattr_size); /* * Now just fill it all in. */ fp->fa_nlink = txdr_unsigned(nvap->na_nlink); fp->fa_uid = txdr_unsigned(nvap->na_uid); fp->fa_gid = txdr_unsigned(nvap->na_gid); if (nd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv34_type(nvap->na_type); fp->fa_mode = vtonfsv34_mode(nvap->na_mode); txdr_hyper(nvap->na_size, &fp->fa3_size); txdr_hyper(nvap->na_bytes, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(NFSMAJOR(nvap->na_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(NFSMINOR(nvap->na_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(nvap->na_fsid); txdr_hyper(nvap->na_fileid, &fp->fa3_fileid); txdr_nfsv3time(&nvap->na_atime, &fp->fa3_atime); txdr_nfsv3time(&nvap->na_mtime, &fp->fa3_mtime); txdr_nfsv3time(&nvap->na_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(nvap->na_type); fp->fa_mode = vtonfsv2_mode(nvap->na_type, nvap->na_mode); fp->fa2_size = txdr_unsigned(nvap->na_size); fp->fa2_blocksize = txdr_unsigned(nvap->na_blocksize); if (nvap->na_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(nvap->na_rdev); fp->fa2_blocks = txdr_unsigned(nvap->na_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(nvap->na_fsid); fp->fa2_fileid = txdr_unsigned(nvap->na_fileid); txdr_nfsv2time(&nvap->na_atime, &fp->fa2_atime); txdr_nfsv2time(&nvap->na_mtime, &fp->fa2_mtime); txdr_nfsv2time(&nvap->na_ctime, &fp->fa2_ctime); } } /* * This function gets a file handle out of an mbuf list. * It returns 0 for success, EBADRPC otherwise. * If sets the third flagp argument to 1 if the file handle is * the public file handle. * For NFSv4, if the length is incorrect, set nd_repstat == NFSERR_BADHANDLE */ APPLESTATIC int nfsrv_mtofh(struct nfsrv_descript *nd, struct nfsrvfh *fhp) { u_int32_t *tl; int error = 0, len, copylen; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len == 0 && nfs_pubfhset && (nd->nd_flag & ND_NFSV3) && nd->nd_procnum == NFSPROC_LOOKUP) { nd->nd_flag |= ND_PUBLOOKUP; goto nfsmout; } - if (len < NFSRV_MINFH || len > NFSRV_MAXFH) { + copylen = len; + + /* If len == NFSX_V4PNFSFH the RPC is a pNFS DS one. */ + if (len == NFSX_V4PNFSFH && (nd->nd_flag & ND_NFSV41) != 0) { + copylen = NFSX_MYFH; + len = NFSM_RNDUP(len); + nd->nd_flag |= ND_DSSERVER; + } else if (len < NFSRV_MINFH || len > NFSRV_MAXFH) { if (nd->nd_flag & ND_NFSV4) { if (len > 0 && len <= NFSX_V4FHMAX) { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; nd->nd_repstat = NFSERR_BADHANDLE; goto nfsmout; } else { error = EBADRPC; goto nfsmout; } } else { error = EBADRPC; goto nfsmout; } } - copylen = len; } else { /* * For NFSv2, the file handle is always 32 bytes on the * wire, but this server only cares about the first * NFSRV_MAXFH bytes. */ len = NFSX_V2FH; copylen = NFSRV_MAXFH; } NFSM_DISSECT(tl, u_int32_t *, len); if ((nd->nd_flag & ND_NFSV2) && nfs_pubfhset && nd->nd_procnum == NFSPROC_LOOKUP && !NFSBCMP((caddr_t)tl, nfs_v2pubfh, NFSX_V2FH)) { nd->nd_flag |= ND_PUBLOOKUP; goto nfsmout; } NFSBCOPY(tl, (caddr_t)fhp->nfsrvfh_data, copylen); fhp->nfsrvfh_len = copylen; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Map errnos to NFS error numbers. For Version 3 and 4 also filter out error * numbers not specified for the associated procedure. * NFSPROC_NOOP is a special case, where the high order bits of nd_repstat * should be cleared. NFSPROC_NOOP is used to return errors when a valid * RPC procedure is not involved. * Returns the error number in XDR. */ APPLESTATIC int nfsd_errmap(struct nfsrv_descript *nd) { short *defaulterrp, *errp; if (!nd->nd_repstat) return (0); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_procnum == NFSPROC_NOOP) return (txdr_unsigned(nd->nd_repstat & 0xffff)); if (nd->nd_flag & ND_NFSV3) errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; else if (nd->nd_repstat == EBADRPC) return (txdr_unsigned(NFSERR_BADXDR)); else if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); else if ((nd->nd_flag & ND_NFSV41) != 0) { if (nd->nd_repstat == EOPNOTSUPP) nd->nd_repstat = NFSERR_NOTSUPP; nd->nd_repstat = nfsrv_isannfserr(nd->nd_repstat); return (txdr_unsigned(nd->nd_repstat)); } else errp = defaulterrp = nfsrv_v4errmap[nd->nd_procnum]; while (*++errp) if (*errp == nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } if (nd->nd_repstat <= NFSERR_REMOTE) return (txdr_unsigned(nfsrv_v2errmap[nd->nd_repstat - 1])); return (txdr_unsigned(NFSERR_IO)); } /* * Check to see if the error is a valid NFS one. If not, replace it with * NFSERR_IO. */ static u_int32_t nfsrv_isannfserr(u_int32_t errval) { if (errval == NFSERR_OK) return (errval); if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_DELEGREVOKED) return (errval); if (errval > 0 && errval <= NFSERR_REMOTE) return (nfsrv_v2errmap[errval - 1]); return (NFSERR_IO); } /* * Check to see if setting a uid/gid is permitted when creating a new * file object. (Called when uid and/or gid is specified in the * settable attributes for V4. */ APPLESTATIC int nfsrv_checkuidgid(struct nfsrv_descript *nd, struct nfsvattr *nvap) { int error = 0; /* * If not setting either uid nor gid, it's OK. */ if (NFSVNO_NOTSETUID(nvap) && NFSVNO_NOTSETGID(nvap)) goto out; if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid == nfsrv_defaultuid && enable_nobodycheck == 1) || (NFSVNO_ISSETGID(nvap) && nvap->na_gid == nfsrv_defaultgid && enable_nogroupcheck == 1)) { error = NFSERR_BADOWNER; goto out; } if (nd->nd_cred->cr_uid == 0) goto out; if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid != nd->nd_cred->cr_uid) || (NFSVNO_ISSETGID(nvap) && nvap->na_gid != nd->nd_cred->cr_gid && !groupmember(nvap->na_gid, nd->nd_cred))) error = NFSERR_PERM; out: NFSEXITCODE2(error, nd); return (error); } /* * and this routine fixes up the settable attributes for V4 if allowed * by nfsrv_checkuidgid(). */ APPLESTATIC void nfsrv_fixattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, NFSACL_T *aclp, NFSPROC_T *p, nfsattrbit_t *attrbitp, struct nfsexstuff *exp) { int change = 0; struct nfsvattr nva; uid_t tuid; int error; nfsattrbit_t nattrbits; /* * Maybe this should be done for V2 and 3 but it never has been * and nobody seems to be upset, so I think it's best not to change * the V2 and 3 semantics. */ if ((nd->nd_flag & ND_NFSV4) == 0) goto out; NFSVNO_ATTRINIT(&nva); NFSZERO_ATTRBIT(&nattrbits); tuid = nd->nd_cred->cr_uid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) && NFSVNO_ISSETUID(nvap) && nvap->na_uid != nd->nd_cred->cr_uid) { if (nd->nd_cred->cr_uid == 0) { nva.na_uid = nvap->na_uid; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_OWNER); } else { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_OWNER); } } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESSSET) && NFSVNO_ISSETATIME(nvap)) { nva.na_atime = nvap->na_atime; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_TIMEACCESSSET); } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFYSET) && NFSVNO_ISSETMTIME(nvap)) { nva.na_mtime = nvap->na_mtime; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_TIMEMODIFYSET); } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) && NFSVNO_ISSETGID(nvap)) { if (nvap->na_gid == nd->nd_cred->cr_gid || groupmember(nvap->na_gid, nd->nd_cred)) { nd->nd_cred->cr_uid = 0; nva.na_gid = nvap->na_gid; change++; NFSSETBIT_ATTRBIT(&nattrbits, NFSATTRBIT_OWNERGROUP); } else { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP); } } if (change) { error = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); if (error) { NFSCLRALL_ATTRBIT(attrbitp, &nattrbits); } } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) && NFSVNO_ISSETSIZE(nvap) && nvap->na_size != (u_quad_t)0) { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_SIZE); } #ifdef NFS4_ACL_EXTATTR_NAME if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL) && nfsrv_useacl != 0 && aclp != NULL) { if (aclp->acl_cnt > 0) { error = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (error) { NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_ACL); } } } else #endif NFSCLRBIT_ATTRBIT(attrbitp, NFSATTRBIT_ACL); nd->nd_cred->cr_uid = tuid; out: NFSEXITCODE2(0, nd); } /* * Translate an ASCII hex digit to it's binary value. Return -1 if the * char isn't a hex digit. */ static char nfsrv_hexdigit(char c, int *err) { *err = 0; if (c >= '0' && c <= '9') return (c - '0'); if (c >= 'a' && c <= 'f') return (c - 'a' + ((char)10)); if (c >= 'A' && c <= 'F') return (c - 'A' + ((char)10)); /* Not valid ! */ *err = 1; return (1); /* BOGUS */ } /* * Check to see if NFSERR_MOVED can be returned for this op. Return 1 iff * it can be. */ APPLESTATIC int nfsrv_errmoved(int op) { short *errp; errp = nfsrv_v4errmap[op]; while (*errp != 0) { if (*errp == NFSERR_MOVED) return (1); errp++; } return (0); } /* * Fill in attributes for a Referral. * (Return the number of bytes of XDR created.) */ APPLESTATIC int nfsrv_putreferralattr(struct nfsrv_descript *nd, nfsattrbit_t *retbitp, struct nfsreferral *refp, int getattr, int *reterrp) { u_int32_t *tl, *retnump; u_char *cp, *cp2; int prefixnum, retnum = 0, i, len, bitpos, rderrbit = 0, nonrefbit = 0; int fslocationsbit = 0; nfsattrbit_t tmpbits, refbits; NFSREFERRAL_ATTRBIT(&refbits); if (getattr) NFSCLRBIT_ATTRBIT(&refbits, NFSATTRBIT_RDATTRERROR); else if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_RDATTRERROR)) rderrbit = 1; if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_FSLOCATIONS)) fslocationsbit = 1; /* * Check for the case where unsupported referral attributes are * requested. */ NFSSET_ATTRBIT(&tmpbits, retbitp); NFSCLRALL_ATTRBIT(&tmpbits, &refbits); if (NFSNONZERO_ATTRBIT(&tmpbits)) nonrefbit = 1; if (nonrefbit && !fslocationsbit && (getattr || !rderrbit)) { *reterrp = NFSERR_MOVED; return (0); } /* * Now we can fill in the attributes. */ NFSSET_ATTRBIT(&tmpbits, retbitp); NFSCLRNOT_ATTRBIT(&tmpbits, &refbits); /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, &tmpbits); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(&tmpbits, bitpos)) { switch (bitpos) { case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(NFSV4ROOT_FSID0); *tl++ = 0; *tl = txdr_unsigned(NFSV4ROOT_REFERRAL); retnum += NFSX_V4FSID; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (nonrefbit) *tl = txdr_unsigned(NFSERR_MOVED); else *tl = 0; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSLOCATIONS: retnum += nfsm_strtom(nd, "/", 1); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(refp->nfr_srvcnt); retnum += NFSX_UNSIGNED; cp = refp->nfr_srvlist; for (i = 0; i < refp->nfr_srvcnt; i++) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(1); retnum += NFSX_UNSIGNED; cp2 = STRCHR(cp, ':'); if (cp2 != NULL) len = cp2 - cp; else len = 1; retnum += nfsm_strtom(nd, cp, len); if (cp2 != NULL) cp = cp2 + 1; cp2 = STRCHR(cp, ','); if (cp2 != NULL) len = cp2 - cp; else len = strlen(cp); retnum += nfsm_strtom(nd, cp, len); if (cp2 != NULL) cp = cp2 + 1; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(refp->nfr_dfileno, tl); retnum += NFSX_HYPER; break; default: printf("EEK! Bad V4 refattr bitpos=%d\n", bitpos); } } } *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Parse a file name out of a request. */ APPLESTATIC int nfsrv_parsename(struct nfsrv_descript *nd, char *bufp, u_long *hashp, NFSPATHLEN_T *outlenp) { char *fromcp, *tocp, val = '\0'; mbuf_t md; int i; int rem, len, error = 0, pubtype = 0, outlen = 0, percent = 0; char digit; u_int32_t *tl; u_long hash = 0; if (hashp != NULL) *hashp = 0; tocp = bufp; /* * For V4, check for lookup parent. * Otherwise, get the component name. */ if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum == NFSV4OP_LOOKUPP) { *tocp++ = '.'; hash += ((u_char)'.'); *tocp++ = '.'; hash += ((u_char)'.'); outlen = 2; } else { /* * First, get the name length. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXNAMLEN) { nd->nd_repstat = NFSERR_NAMETOL; error = 0; goto nfsmout; } else if (len <= 0) { nd->nd_repstat = NFSERR_INVAL; error = 0; goto nfsmout; } /* * Now, copy the component name into the buffer. */ fromcp = nd->nd_dpos; md = nd->nd_md; rem = NFSMTOD(md, caddr_t) + mbuf_len(md) - fromcp; for (i = 0; i < len; i++) { while (rem == 0) { md = mbuf_next(md); if (md == NULL) { error = EBADRPC; goto nfsmout; } fromcp = NFSMTOD(md, caddr_t); rem = mbuf_len(md); } if (*fromcp == '\0') { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } /* * For lookups on the public filehandle, do some special * processing on the name. (The public file handle is the * root of the public file system for this server.) */ if (nd->nd_flag & ND_PUBLOOKUP) { /* * If the first char is ASCII, it is a canonical * path, otherwise it is a native path. (RFC2054 * doesn't actually state what it is if the first * char isn't ASCII or 0x80, so I assume native.) * pubtype == 1 -> native path * pubtype == 2 -> canonical path */ if (i == 0) { if (*fromcp & 0x80) { /* * Since RFC2054 doesn't indicate * that a native path of just 0x80 * isn't allowed, I'll replace the * 0x80 with '/' instead of just * throwing it away. */ *fromcp = '/'; pubtype = 1; } else { pubtype = 2; } } /* * '/' only allowed in a native path */ if (*fromcp == '/' && pubtype != 1) { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } /* * For the special case of 2 hex digits after a * '%' in an absolute path, calculate the value. * percent == 1 -> indicates "get first hex digit" * percent == 2 -> indicates "get second hex digit" */ if (percent > 0) { digit = nfsrv_hexdigit(*fromcp, &error); if (error) { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } if (percent == 1) { val = (digit << 4); percent = 2; } else { val += digit; percent = 0; *tocp++ = val; hash += ((u_char)val); outlen++; } } else { if (*fromcp == '%' && pubtype == 2) { /* * Must be followed by 2 hex digits */ if ((len - i) < 3) { nd->nd_repstat = EACCES; error = 0; goto nfsmout; } percent = 1; } else { *tocp++ = *fromcp; hash += ((u_char)*fromcp); outlen++; } } } else { /* * Normal, non lookup on public, name. */ if (*fromcp == '/') { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_BADNAME; else nd->nd_repstat = EACCES; error = 0; goto nfsmout; } hash += ((u_char)*fromcp); *tocp++ = *fromcp; outlen++; } fromcp++; rem--; } nd->nd_md = md; nd->nd_dpos = fromcp; i = NFSM_RNDUP(len) - len; if (i > 0) { if (rem >= i) { nd->nd_dpos += i; } else { error = nfsm_advance(nd, i, rem); if (error) goto nfsmout; } } /* * For v4, don't allow lookups of '.' or '..' and * also check for non-utf8 strings. */ if (nd->nd_flag & ND_NFSV4) { if ((outlen == 1 && bufp[0] == '.') || (outlen == 2 && bufp[0] == '.' && bufp[1] == '.')) { nd->nd_repstat = NFSERR_BADNAME; error = 0; goto nfsmout; } if (enable_checkutf8 == 1 && nfsrv_checkutf8((u_int8_t *)bufp, outlen)) { nd->nd_repstat = NFSERR_INVAL; error = 0; goto nfsmout; } } } *tocp = '\0'; *outlenp = (size_t)outlen; if (hashp != NULL) *hashp = hash; nfsmout: NFSEXITCODE2(error, nd); return (error); } void nfsd_init(void) { int i; static int inited = 0; if (inited) return; inited = 1; /* * Initialize client queues. Don't free/reinitialize * them when nfsds are restarted. */ nfsclienthash = malloc(sizeof(struct nfsclienthashhead) * nfsrv_clienthashsize, M_NFSDCLIENT, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_clienthashsize; i++) LIST_INIT(&nfsclienthash[i]); nfslockhash = malloc(sizeof(struct nfslockhashhead) * nfsrv_lockhashsize, M_NFSDLOCKFILE, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lockhashsize; i++) LIST_INIT(&nfslockhash[i]); nfssessionhash = malloc(sizeof(struct nfssessionhash) * nfsrv_sessionhashsize, M_NFSDSESSION, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_sessionhashsize; i++) { mtx_init(&nfssessionhash[i].mtx, "nfssm", NULL, MTX_DEF); LIST_INIT(&nfssessionhash[i].list); } + LIST_INIT(&nfsrv_dontlisthead); + TAILQ_INIT(&nfsrv_recalllisthead); /* and the v2 pubfh should be all zeros */ NFSBZERO(nfs_v2pubfh, NFSX_V2FH); } /* * Check the v4 root exports. * Return 0 if ok, 1 otherwise. */ int nfsd_checkrootexp(struct nfsrv_descript *nd) { if ((nd->nd_flag & (ND_GSS | ND_EXAUTHSYS)) == ND_EXAUTHSYS) return (0); if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_EXGSSINTEGRITY)) == (ND_GSSINTEGRITY | ND_EXGSSINTEGRITY)) return (0); if ((nd->nd_flag & (ND_GSSPRIVACY | ND_EXGSSPRIVACY)) == (ND_GSSPRIVACY | ND_EXGSSPRIVACY)) return (0); if ((nd->nd_flag & (ND_GSS | ND_GSSINTEGRITY | ND_GSSPRIVACY | ND_EXGSS)) == (ND_GSS | ND_EXGSS)) return (0); return (1); } /* * Parse the first part of an NFSv4 compound to find out what the minor * version# is. */ void nfsd_getminorvers(struct nfsrv_descript *nd, u_char *tag, u_char **tagstrp, int *taglenp, u_int32_t *minversp) { uint32_t *tl; int error = 0, taglen = -1; u_char *tagstr = NULL; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); if (taglen < 0 || taglen > NFSV4_OPAQUELIMIT) { error = EBADRPC; goto nfsmout; } if (taglen <= NFSV4_SMALLSTR) tagstr = tag; else tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, tagstr, taglen); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); *minversp = fxdr_unsigned(u_int32_t, *tl); *tagstrp = tagstr; if (*minversp == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; nfsmout: if (error != 0) { if (tagstr != NULL && taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); taglen = -1; } *taglenp = taglen; } Index: head/sys/nfs/nfs_nfssvc.c =================================================================== --- head/sys/nfs/nfs_nfssvc.c (revision 335011) +++ head/sys/nfs/nfs_nfssvc.c (revision 335012) @@ -1,154 +1,154 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct syscall_helper_data nfssvc_syscalls[] = { SYSCALL_INIT_HELPER(nfssvc), SYSCALL_INIT_LAST }; /* * This tiny module simply handles the nfssvc() system call. The other * nfs modules that use the system call register themselves by setting * the nfsd_call_xxx function pointers non-NULL. */ int (*nfsd_call_nfsserver)(struct thread *, struct nfssvc_args *) = NULL; int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *) = NULL; int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *) = NULL; int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *) = NULL; /* * Nfs server pseudo system call for the nfsd's */ int sys_nfssvc(struct thread *td, struct nfssvc_args *uap) { int error; KASSERT(!mtx_owned(&Giant), ("nfssvc(): called with Giant")); AUDIT_ARG_CMD(uap->flag); /* Allow anyone to get the stats. */ if ((uap->flag & ~NFSSVC_GETSTATS) != 0) { error = priv_check(td, PRIV_NFS_DAEMON); if (error != 0) return (error); } error = EINVAL; if ((uap->flag & (NFSSVC_ADDSOCK | NFSSVC_OLDNFSD | NFSSVC_NFSD)) && nfsd_call_nfsserver != NULL) error = (*nfsd_call_nfsserver)(td, uap); else if ((uap->flag & (NFSSVC_CBADDSOCK | NFSSVC_NFSCBD | NFSSVC_DUMPMNTOPTS | NFSSVC_FORCEDISM)) && nfsd_call_nfscl != NULL) error = (*nfsd_call_nfscl)(td, uap); else if ((uap->flag & (NFSSVC_IDNAME | NFSSVC_GETSTATS | NFSSVC_GSSDADDPORT | NFSSVC_GSSDADDFIRST | NFSSVC_GSSDDELETEALL | NFSSVC_NFSUSERDPORT | NFSSVC_NFSUSERDDELPORT)) && nfsd_call_nfscommon != NULL) error = (*nfsd_call_nfscommon)(td, uap); else if ((uap->flag & (NFSSVC_NFSDNFSD | NFSSVC_NFSDADDSOCK | NFSSVC_PUBLICFH | NFSSVC_V4ROOTEXPORT | NFSSVC_NOPUBLICFH | NFSSVC_STABLERESTART | NFSSVC_ADMINREVOKE | NFSSVC_DUMPCLIENTS | NFSSVC_DUMPLOCKS | NFSSVC_BACKUPSTABLE | - NFSSVC_SUSPENDNFSD | NFSSVC_RESUMENFSD)) && + NFSSVC_SUSPENDNFSD | NFSSVC_RESUMENFSD | NFSSVC_PNFSDS)) && nfsd_call_nfsd != NULL) error = (*nfsd_call_nfsd)(td, uap); if (error == EINTR || error == ERESTART) error = 0; return (error); } /* * Called once to initialize data structures... */ static int nfssvc_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: error = syscall_helper_register(nfssvc_syscalls, SY_THR_STATIC_KLD); break; case MOD_UNLOAD: if (nfsd_call_nfsserver != NULL || nfsd_call_nfscommon != NULL || nfsd_call_nfscl != NULL || nfsd_call_nfsd != NULL) { error = EBUSY; break; } syscall_helper_unregister(nfssvc_syscalls); break; default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfssvc_mod = { "nfssvc", nfssvc_modevent, NULL, }; DECLARE_MODULE(nfssvc, nfssvc_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfssvc, 1); Index: head/sys/nfs/nfssvc.h =================================================================== --- head/sys/nfs/nfssvc.h (revision 335011) +++ head/sys/nfs/nfssvc.h (revision 335012) @@ -1,84 +1,85 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSSVC_H_ #define _NFS_NFSSVC_H_ /* * Flags for nfssvc() system call. */ #define NFSSVC_OLDNFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_NFSD 0x010 /* * and ones for nfsv4. */ #define NFSSVC_NOPUBLICFH 0x00000020 #define NFSSVC_STABLERESTART 0x00000040 #define NFSSVC_NFSDNFSD 0x00000080 #define NFSSVC_NFSDADDSOCK 0x00000100 #define NFSSVC_IDNAME 0x00000200 #define NFSSVC_GSSDDELETEALL 0x00000400 #define NFSSVC_GSSDADDPORT 0x00000800 #define NFSSVC_NFSUSERDPORT 0x00001000 #define NFSSVC_NFSUSERDDELPORT 0x00002000 #define NFSSVC_V4ROOTEXPORT 0x00004000 #define NFSSVC_ADMINREVOKE 0x00008000 #define NFSSVC_DUMPCLIENTS 0x00010000 #define NFSSVC_DUMPLOCKS 0x00020000 #define NFSSVC_GSSDADDFIRST 0x00040000 #define NFSSVC_PUBLICFH 0x00080000 #define NFSSVC_NFSCBD 0x00100000 #define NFSSVC_CBADDSOCK 0x00200000 #define NFSSVC_GETSTATS 0x00400000 #define NFSSVC_BACKUPSTABLE 0x00800000 #define NFSSVC_ZEROCLTSTATS 0x01000000 /* modifier for GETSTATS */ #define NFSSVC_ZEROSRVSTATS 0x02000000 /* modifier for GETSTATS */ #define NFSSVC_SUSPENDNFSD 0x04000000 #define NFSSVC_RESUMENFSD 0x08000000 #define NFSSVC_DUMPMNTOPTS 0x10000000 #define NFSSVC_NEWSTRUCT 0x20000000 #define NFSSVC_FORCEDISM 0x40000000 +#define NFSSVC_PNFSDS 0x80000000 /* Argument structure for NFSSVC_DUMPMNTOPTS. */ struct nfscl_dumpmntopts { char *ndmnt_fname; /* File Name */ size_t ndmnt_blen; /* Size of buffer */ void *ndmnt_buf; /* and the buffer */ }; #endif /* _NFS_NFSSVC_H */