Index: head/sys/fs/nfs/nfs.h =================================================================== --- head/sys/fs/nfs/nfs.h (revision 244041) +++ head/sys/fs/nfs/nfs.h (revision 244042) @@ -1,650 +1,667 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 500 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfscl_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_TCPTIMEO 300 /* TCP timeout */ #define NFS_MAXRCVTIMEO 60 /* 1 minute in seconds */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFSV4_CALLBACKTIMEO (2 * NFS_HZ) /* Timeout in ticks */ #define NFSV4_CALLBACKRETRY 5 /* Number of retries before failure */ +#define NFSV4_CBSLOTS 8 /* Number of slots for session */ #define NFSV4_CBRETRYCNT 4 /* # of CBRecall retries upon err */ #define NFSV4_UPCALLTIMEO (15 * NFS_HZ) /* Timeout in ticks for upcalls */ /* to gssd or nfsuserd */ #define NFSV4_UPCALLRETRY 4 /* Number of retries before failure */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_RETRANS_TCP 2 /* Num of retrans for TCP soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #define NFS_TRYLATERDEL 15 /* Maximum delay timeout (sec) */ #ifndef NFS_REMOVETIMEO #define NFS_REMOVETIMEO 15 /* # sec to wait for delegret in local syscall */ #endif #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 16 /* Max. read ahead # blocks */ #define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #ifndef NFSRV_LEASE #define NFSRV_LEASE 120 /* Lease time in seconds for V4 */ #endif /* assigned to nfsrv_lease */ #ifndef NFSRV_STALELEASE #define NFSRV_STALELEASE (5 * nfsrv_lease) #endif #ifndef NFSRV_MOULDYLEASE #define NFSRV_MOULDYLEASE 604800 /* One week (in sec) */ #endif #ifndef NFSCLIENTHASHSIZE #define NFSCLIENTHASHSIZE 20 /* Size of server client hash table */ #endif #ifndef NFSLOCKHASHSIZE #define NFSLOCKHASHSIZE 20 /* Size of server nfslock hash table */ #endif #define NFSSTATEHASHSIZE 10 /* Size of server stateid hash table */ #ifndef NFSUSERHASHSIZE #define NFSUSERHASHSIZE 30 /* Size of user id hash table */ #endif #ifndef NFSGROUPHASHSIZE #define NFSGROUPHASHSIZE 5 /* Size of group id hash table */ #endif #ifndef NFSCLDELEGHIGHWATER #define NFSCLDELEGHIGHWATER 10000 /* limit for client delegations */ #endif +#ifndef NFSCLLAYOUTHIGHWATER +#define NFSCLLAYOUTHIGHWATER 10000 /* limit for client pNFS layouts */ +#endif #ifndef NFSNOOPEN /* Inactive open owner (sec) */ #define NFSNOOPEN 120 #endif #define NFSRV_LEASEDELTA 15 /* # of seconds to delay beyond lease */ #define NFS_IDMAXSIZE 4 /* max sizeof (in_addr_t) */ #ifndef NFSRVCACHE_UDPTIMEOUT #define NFSRVCACHE_UDPTIMEOUT 30 /* # of sec to hold cached rpcs(udp) */ #endif #ifndef NFSRVCACHE_UDPHIGHWATER #define NFSRVCACHE_UDPHIGHWATER 500 /* Max # of udp cache entries */ #endif #ifndef NFSRVCACHE_TCPTIMEOUT #define NFSRVCACHE_TCPTIMEOUT (3600*12) /*#of sec to hold cached rpcs(tcp) */ #endif #ifndef NFSRVCACHE_FLOODLEVEL #define NFSRVCACHE_FLOODLEVEL 16384 /* Very high water mark for cache */ #endif #ifndef NFSRV_CLIENTHIGHWATER #define NFSRV_CLIENTHIGHWATER 1000 #endif #ifndef NFSRV_MAXDUMPLIST #define NFSRV_MAXDUMPLIST 10000 #endif #ifndef NFS_ACCESSCACHESIZE #define NFS_ACCESSCACHESIZE 8 #endif #define NFSV4_CBPORT 7745 /* Callback port for testing */ /* * This macro defines the high water mark for issuing V4 delegations. * (It is currently set at a conservative 20% of NFSRV_V4STATELIMIT. This * may want to increase when clients can make more effective use of * delegations.) */ #define NFSRV_V4DELEGLIMIT(c) (((c) * 5) > NFSRV_V4STATELIMIT) #define NFS_READDIRBLKSIZ DIRBLKSIZ /* Minimal nm_readdirsize */ /* * Oddballs */ #define NFS_CMPFH(n, f, s) \ ((n)->n_fhp->nfh_len == (s) && !NFSBCMP((n)->n_fhp->nfh_fh, (caddr_t)(f), (s))) #define NFSRV_CMPFH(nf, ns, f, s) \ ((ns) == (s) && !NFSBCMP((caddr_t)(nf), (caddr_t)(f), (s))) #define NFS_CMPTIME(t1, t2) \ ((t1).tv_sec == (t2).tv_sec && (t1).tv_nsec == (t2).tv_nsec) #define NFS_SETTIME(t) do { \ (t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? \ NFS_MAXDATA : NFS_V2MAXDATA) #define NFS64BITSSET 0xffffffffffffffffull #define NFS64BITSMINUS1 0xfffffffffffffffeull /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd, mount_nfs * and nfsloaduser should ever try and use it. */ struct nfsd_addsock_args { int sock; /* Socket to serve */ caddr_t name; /* Client addr for connection based sockets */ int namelen; /* Length of name */ }; /* * nfsd argument for new krpc. */ struct nfsd_nfsd_args { const char *principal; /* GSS-API service principal name */ int minthreads; /* minimum service thread count */ int maxthreads; /* maximum service thread count */ }; /* * Arguments for use by the callback daemon. */ struct nfsd_nfscbd_args { const char *principal; /* GSS-API service principal name */ }; struct nfscbd_args { int sock; /* Socket to serve */ caddr_t name; /* Client addr for connection based sockets */ int namelen; /* Length of name */ u_short port; /* Port# for callbacks */ }; struct nfsd_idargs { int nid_flag; /* Flags (see below) */ uid_t nid_uid; /* user/group id */ gid_t nid_gid; int nid_usermax; /* Upper bound on user name cache */ int nid_usertimeout;/* User name timeout (minutes) */ u_char *nid_name; /* Name */ int nid_namelen; /* and its length */ }; struct nfsd_clid { int nclid_idlen; /* Length of client id */ u_char nclid_id[NFSV4_OPAQUELIMIT]; /* and name */ }; struct nfsd_dumplist { int ndl_size; /* Number of elements */ void *ndl_list; /* and the list of elements */ }; struct nfsd_dumpclients { u_int32_t ndcl_flags; /* LCL_xxx flags */ u_int32_t ndcl_nopenowners; /* Number of openowners */ u_int32_t ndcl_nopens; /* and opens */ u_int32_t ndcl_nlockowners; /* and of lockowners */ u_int32_t ndcl_nlocks; /* and of locks */ u_int32_t ndcl_ndelegs; /* and of delegations */ u_int32_t ndcl_nolddelegs; /* and old delegations */ sa_family_t ndcl_addrfam; /* Callback address */ union { struct in_addr sin_addr; struct in6_addr sin6_addr; } ndcl_cbaddr; struct nfsd_clid ndcl_clid; /* and client id */ }; struct nfsd_dumplocklist { char *ndllck_fname; /* File Name */ int ndllck_size; /* Number of elements */ void *ndllck_list; /* and the list of elements */ }; struct nfsd_dumplocks { u_int32_t ndlck_flags; /* state flags NFSLCK_xxx */ nfsv4stateid_t ndlck_stateid; /* stateid */ u_int64_t ndlck_first; /* lock byte range */ u_int64_t ndlck_end; struct nfsd_clid ndlck_owner; /* Owner of open/lock */ sa_family_t ndlck_addrfam; /* Callback address */ union { struct in_addr sin_addr; struct in6_addr sin6_addr; } ndlck_cbaddr; struct nfsd_clid ndlck_clid; /* and client id */ }; /* * Structure for referral information. */ struct nfsreferral { u_char *nfr_srvlist; /* List of servers */ int nfr_srvcnt; /* number of servers */ vnode_t nfr_vp; /* vnode for referral */ u_int32_t nfr_dfileno; /* assigned dir inode# */ }; /* * Flags for lc_flags and opsflags for nfsrv_getclient(). */ #define LCL_NEEDSCONFIRM 0x00000001 #define LCL_DONTCLEAN 0x00000002 #define LCL_WAKEUPWANTED 0x00000004 #define LCL_TCPCALLBACK 0x00000008 #define LCL_CALLBACKSON 0x00000010 #define LCL_INDEXNOTOK 0x00000020 #define LCL_STAMPEDSTABLE 0x00000040 #define LCL_EXPIREIT 0x00000080 #define LCL_CBDOWN 0x00000100 #define LCL_KERBV 0x00000400 #define LCL_NAME 0x00000800 #define LCL_NEEDSCBNULL 0x00001000 #define LCL_GSSINTEGRITY 0x00002000 #define LCL_GSSPRIVACY 0x00004000 #define LCL_ADMINREVOKED 0x00008000 #define LCL_GSS LCL_KERBV /* Or of all mechs */ /* * Bits for flags in nfslock and nfsstate. * The access, deny, NFSLCK_READ and NFSLCK_WRITE bits must be defined as * below, in the correct order, so the shifts work for tests. */ #define NFSLCK_READACCESS 0x00000001 #define NFSLCK_WRITEACCESS 0x00000002 #define NFSLCK_ACCESSBITS (NFSLCK_READACCESS | NFSLCK_WRITEACCESS) #define NFSLCK_SHIFT 2 #define NFSLCK_READDENY 0x00000004 #define NFSLCK_WRITEDENY 0x00000008 #define NFSLCK_DENYBITS (NFSLCK_READDENY | NFSLCK_WRITEDENY) #define NFSLCK_SHAREBITS \ (NFSLCK_READACCESS|NFSLCK_WRITEACCESS|NFSLCK_READDENY|NFSLCK_WRITEDENY) #define NFSLCK_LOCKSHIFT 4 #define NFSLCK_READ 0x00000010 #define NFSLCK_WRITE 0x00000020 #define NFSLCK_BLOCKING 0x00000040 #define NFSLCK_RECLAIM 0x00000080 #define NFSLCK_OPENTOLOCK 0x00000100 #define NFSLCK_TEST 0x00000200 #define NFSLCK_LOCK 0x00000400 #define NFSLCK_UNLOCK 0x00000800 #define NFSLCK_OPEN 0x00001000 #define NFSLCK_CLOSE 0x00002000 #define NFSLCK_CHECK 0x00004000 #define NFSLCK_RELEASE 0x00008000 #define NFSLCK_NEEDSCONFIRM 0x00010000 #define NFSLCK_CONFIRM 0x00020000 #define NFSLCK_DOWNGRADE 0x00040000 #define NFSLCK_DELEGREAD 0x00080000 #define NFSLCK_DELEGWRITE 0x00100000 #define NFSLCK_DELEGCUR 0x00200000 #define NFSLCK_DELEGPREV 0x00400000 #define NFSLCK_OLDDELEG 0x00800000 #define NFSLCK_DELEGRECALL 0x01000000 #define NFSLCK_SETATTR 0x02000000 #define NFSLCK_DELEGPURGE 0x04000000 #define NFSLCK_DELEGRETURN 0x08000000 /* And bits for nid_flag */ #define NFSID_INITIALIZE 0x0001 #define NFSID_ADDUID 0x0002 #define NFSID_DELUID 0x0004 #define NFSID_ADDUSERNAME 0x0008 #define NFSID_DELUSERNAME 0x0010 #define NFSID_ADDGID 0x0020 #define NFSID_DELGID 0x0040 #define NFSID_ADDGROUPNAME 0x0080 #define NFSID_DELGROUPNAME 0x0100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * Here is the definition of the attribute bits array and macros that * manipulate it. * THE MACROS MUST BE MANUALLY MODIFIED IF NFSATTRBIT_MAXWORDS CHANGES!! * It is (NFSATTRBIT_MAX + 31) / 32. */ #define NFSATTRBIT_MAXWORDS 2 typedef struct { u_int32_t bits[NFSATTRBIT_MAXWORDS]; } nfsattrbit_t; #define NFSZERO_ATTRBIT(b) do { (b)->bits[0] = 0; (b)->bits[1] = 0; } while (0) #define NFSSET_ATTRBIT(t, f) do { (t)->bits[0] = (f)->bits[0]; \ (t)->bits[1] = (f)->bits[1]; } while (0) #define NFSSETSUPP_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_SUPP0; \ (b)->bits[1] = (NFSATTRBIT_SUPP1 | NFSATTRBIT_SUPPSETONLY); } while (0) #define NFSISSET_ATTRBIT(b, p) ((b)->bits[(p) / 32] & (1 << ((p) % 32))) #define NFSSETBIT_ATTRBIT(b, p) ((b)->bits[(p) / 32] |= (1 << ((p) % 32))) #define NFSCLRBIT_ATTRBIT(b, p) ((b)->bits[(p) / 32] &= ~(1 << ((p) % 32))) #define NFSCLRALL_ATTRBIT(b, a) do { \ (b)->bits[0] &= ~((a)->bits[0]); \ (b)->bits[1] &= ~((a)->bits[1]); \ } while (0) #define NFSCLRNOT_ATTRBIT(b, a) do { \ (b)->bits[0] &= ((a)->bits[0]); \ (b)->bits[1] &= ((a)->bits[1]); \ } while (0) #define NFSCLRNOTFILLABLE_ATTRBIT(b) do { \ (b)->bits[0] &= NFSATTRBIT_SUPP0; \ (b)->bits[1] &= NFSATTRBIT_SUPP1; } while (0) #define NFSCLRNOTSETABLE_ATTRBIT(b) do { \ (b)->bits[0] &= NFSATTRBIT_SETABLE0; \ (b)->bits[1] &= NFSATTRBIT_SETABLE1; } while (0) #define NFSNONZERO_ATTRBIT(b) ((b)->bits[0] || (b)->bits[1]) #define NFSEQUAL_ATTRBIT(b, p) \ ((b)->bits[0] == (p)->bits[0] && (b)->bits[1] == (p)->bits[1]) #define NFSGETATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_GETATTR0; \ (b)->bits[1] = NFSATTRBIT_GETATTR1; } while (0) #define NFSWCCATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_WCCATTR0; \ (b)->bits[1] = NFSATTRBIT_WCCATTR1; } while (0) #define NFSWRITEGETATTR_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_WRITEGETATTR0; \ (b)->bits[1] = NFSATTRBIT_WRITEGETATTR1; } while (0) #define NFSCBGETATTR_ATTRBIT(b, c) do { \ (c)->bits[0] = ((b)->bits[0] & NFSATTRBIT_CBGETATTR0); \ (c)->bits[1] = ((b)->bits[1] & NFSATTRBIT_CBGETATTR1); } while (0) #define NFSPATHCONF_GETATTRBIT(b) do { \ (b)->bits[0] = NFSGETATTRBIT_PATHCONF0; \ (b)->bits[1] = NFSGETATTRBIT_PATHCONF1; } while (0) #define NFSSTATFS_GETATTRBIT(b) do { \ (b)->bits[0] = NFSGETATTRBIT_STATFS0; \ (b)->bits[1] = NFSGETATTRBIT_STATFS1; } while (0) #define NFSISSETSTATFS_ATTRBIT(b) \ (((b)->bits[0] & NFSATTRBIT_STATFS0) || \ ((b)->bits[1] & NFSATTRBIT_STATFS1)) #define NFSCLRSTATFS_ATTRBIT(b) do { \ (b)->bits[0] &= ~NFSATTRBIT_STATFS0; \ (b)->bits[1] &= ~NFSATTRBIT_STATFS1; } while (0) #define NFSREADDIRPLUS_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_READDIRPLUS0; \ (b)->bits[1] = NFSATTRBIT_READDIRPLUS1; } while (0) #define NFSREFERRAL_ATTRBIT(b) do { \ (b)->bits[0] = NFSATTRBIT_REFERRAL0; \ (b)->bits[1] = NFSATTRBIT_REFERRAL1; } while (0) /* * Store uid, gid creds that were used when the stateid was acquired. * The RPC layer allows NFS_MAXGRPS + 1 groups to go out on the wire, * so that's how many gets stored here. */ struct nfscred { uid_t nfsc_uid; gid_t nfsc_groups[NFS_MAXGRPS + 1]; int nfsc_ngroups; }; /* * Constants that define the file handle for the V4 root directory. * (The FSID must never be used by other file systems that are exported.) */ #define NFSV4ROOT_FSID0 ((int32_t) -1) #define NFSV4ROOT_FSID1 ((int32_t) -1) #define NFSV4ROOT_REFERRAL ((int32_t) -2) #define NFSV4ROOT_INO 2 /* It's traditional */ #define NFSV4ROOT_GEN 1 /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(_KERNEL) || defined(KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ /* * Socket errors ignored for connectionless sockets? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * This structure holds socket information for a connection. Used by the * client and the server for callbacks. */ struct nfssockreq { NFSSOCKADDR_T nr_nam; int nr_sotype; int nr_soproto; int nr_soflags; struct ucred *nr_cred; int nr_lock; NFSMUTEX_T nr_mtx; u_int32_t nr_prog; u_int32_t nr_vers; struct __rpc_client *nr_client; }; /* * And associated nr_lock bits. */ #define NFSR_SNDLOCK 0x01 #define NFSR_WANTSND 0x02 #define NFSR_RCVLOCK 0x04 #define NFSR_WANTRCV 0x08 #define NFSR_RESERVEDPORT 0x10 #define NFSR_LOCALHOST 0x20 /* * Queue head for nfsreq's */ TAILQ_HEAD(nfsreqhead, nfsreq); /* This is the only nfsreq R_xxx flag still used. */ #define R_DONTRECOVER 0x00000100 /* don't initiate recovery when this rpc gets a stale state reply */ /* * Network address hash list element */ union nethostaddr { struct in_addr had_inet; struct in6_addr had_inet6; }; /* * Structure of list of mechanisms. */ struct nfsgss_mechlist { int len; const u_char *str; int totlen; }; #define KERBV_MECH 0 /* position in list */ /* * This structure is used by the server for describing each request. */ struct nfsrv_descript { mbuf_t nd_mrep; /* Request mbuf list */ mbuf_t nd_md; /* Current dissect mbuf */ mbuf_t nd_mreq; /* Reply mbuf list */ mbuf_t nd_mb; /* Current build mbuf */ NFSSOCKADDR_T nd_nam; /* and socket addr */ NFSSOCKADDR_T nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ caddr_t nd_bpos; /* Current build pos */ u_int16_t nd_procnum; /* RPC # */ u_int32_t nd_flag; /* nd_flag */ u_int32_t nd_repstat; /* Reply status */ int *nd_errp; /* Pointer to ret status */ u_int32_t nd_retxid; /* Reply xid */ struct nfsrvcache *nd_rp; /* Assoc. cache entry */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred *nd_cred; /* Credentials */ uid_t nd_saveduid; /* Saved uid */ u_int64_t nd_sockref; /* Rcv socket ref# */ u_int64_t nd_compref; /* Compound RPC ref# */ time_t nd_tcpconntime; /* Time TCP connection est. */ nfsquad_t nd_clientid; /* Implied clientid */ int nd_gssnamelen; /* principal name length */ char *nd_gssname; /* principal name */ + uint32_t *nd_slotseq; /* ptr to slot seq# in req */ }; #define nd_princlen nd_gssnamelen #define nd_principal nd_gssname /* Bits for "nd_flag" */ #define ND_DONTSAVEREPLY 0x00000001 #define ND_SAVEREPLY 0x00000002 #define ND_NFSV2 0x00000004 #define ND_NFSV3 0x00000008 #define ND_NFSV4 0x00000010 #define ND_KERBV 0x00000020 #define ND_GSSINTEGRITY 0x00000040 #define ND_GSSPRIVACY 0x00000080 #define ND_WINDOWVERF 0x00000100 #define ND_GSSINITREPLY 0x00000200 #define ND_STREAMSOCK 0x00000400 #define ND_PUBLOOKUP 0x00000800 #define ND_USEGSSNAME 0x00001000 #define ND_SAMETCPCONN 0x00002000 #define ND_IMPLIEDCLID 0x00004000 #define ND_NOMOREDATA 0x00008000 #define ND_V4WCCATTR 0x00010000 #define ND_NFSCB 0x00020000 #define ND_AUTHNONE 0x00040000 #define ND_EXAUTHSYS 0x00080000 #define ND_EXGSS 0x00100000 #define ND_EXGSSINTEGRITY 0x00200000 #define ND_EXGSSPRIVACY 0x00400000 #define ND_INCRSEQID 0x00800000 #define ND_NFSCL 0x01000000 +#define ND_NFSV41 0x02000000 +#define ND_HASSEQUENCE 0x04000000 /* * ND_GSS should be the "or" of all GSS type authentications. */ #define ND_GSS (ND_KERBV) struct nfsv4_opflag { int retfh; int needscfh; int savereply; int modifyfs; int lktype; + int needsseq; }; /* * Flags used to indicate what to do w.r.t. seqid checking. */ #define NFSRVSEQID_FIRST 0x01 #define NFSRVSEQID_LAST 0x02 #define NFSRVSEQID_OPEN 0x04 /* * assign a doubly linked list to a new head * and prepend one list into another. */ #define LIST_NEWHEAD(nhead, ohead, field) do { \ if (((nhead)->lh_first = (ohead)->lh_first) != NULL) \ (ohead)->lh_first->field.le_prev = &(nhead)->lh_first; \ (ohead)->lh_first = NULL; \ } while (0) #define LIST_PREPEND(head, phead, lelm, field) do { \ if ((head)->lh_first != NULL) { \ (lelm)->field.le_next = (head)->lh_first; \ (lelm)->field.le_next->field.le_prev = \ &(lelm)->field.le_next; \ } \ (head)->lh_first = (phead)->lh_first; \ (head)->lh_first->field.le_prev = &(head)->lh_first; \ } while (0) /* * File handle structure for client. Malloc'd to the correct length with * malloc type M_NFSFH. */ struct nfsfh { u_int16_t nfh_len; /* Length of file handle */ u_int8_t nfh_fh[1]; /* and the file handle */ }; /* * File handle structure for server. The NFSRV_MAXFH constant is * set in nfsdport.h. I use a 32bit length, so that alignment is * preserved. */ struct nfsrvfh { u_int32_t nfsrvfh_len; u_int8_t nfsrvfh_data[NFSRV_MAXFH]; }; /* * This structure is used for sleep locks on the NFSv4 nfsd threads and * NFSv4 client data structures. */ struct nfsv4lock { u_int32_t nfslock_usecnt; u_int8_t nfslock_lock; }; #define NFSV4LOCK_LOCK 0x01 #define NFSV4LOCK_LOCKWANTED 0x02 #define NFSV4LOCK_WANTED 0x04 /* * Values for the override argument for nfsvno_accchk(). */ #define NFSACCCHK_NOOVERRIDE 0 #define NFSACCCHK_ALLOWROOT 1 #define NFSACCCHK_ALLOWOWNER 2 /* * and values for the vpislocked argument for nfsvno_accchk(). */ #define NFSACCCHK_VPNOTLOCKED 0 #define NFSACCCHK_VPISLOCKED 1 + +/* + * Slot for the NFSv4.1 Sequence Op. + */ +struct nfsslot { + int nfssl_inprog; + uint32_t nfssl_seq; + struct mbuf *nfssl_reply; +}; #endif /* _KERNEL */ #endif /* _NFS_NFS_H */ Index: head/sys/fs/nfs/nfs_commonkrpc.c =================================================================== --- head/sys/fs/nfs/nfs_commonkrpc.c (revision 244041) +++ head/sys/fs/nfs/nfs_commonkrpc.c (revision 244042) @@ -1,1137 +1,1216 @@ /*- * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_kdtrace.h" #include "opt_kgssapi.h" #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfscl_nfs234_start_probe; dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfscl_nfs234_done_probe; /* * Registered probes by RPC type. */ -uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; -uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; +uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; -uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; -uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; +uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; -uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; -uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; +uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; #endif NFSSTATESPINLOCK; NFSREQSPINLOCK; +NFSDLOCKMUTEX; extern struct nfsstats newnfsstats; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); +extern int nfs_numnfscbd; +extern int nfscl_debuglevel; +SVCPOOL *nfscbd_pool; static int nfsrv_gsscallbackson = 0; static int nfs_bufpackets = 4; static int nfs_reconnects; static int nfs3_jukebox_delay = 10; static int nfs_skip_wcc_data_onerr = 1; static int nfs_keytab_enctype = ETYPE_DES_CBC_CRC; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "Buffer reservation size 2 < x < 64"); SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "Number of times the nfs client has had to reconnect"); SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, "Number of seconds to delay a retry after receiving EJUKEBOX"); SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, "Disable weak cache consistency checking when server returns an error"); SYSCTL_INT(_vfs_nfs, OID_AUTO, keytab_enctype, CTLFLAG_RW, &nfs_keytab_enctype, 0, "Encryption type for the keytab entry used by nfs"); static void nfs_down(struct nfsmount *, struct thread *, const char *, int, int); static void nfs_up(struct nfsmount *, struct thread *, const char *, int, int); static int nfs_msg(struct thread *, const char *, const char *, int); struct nfs_cached_auth { int ca_refs; /* refcount, including 1 from the cache */ uid_t ca_uid; /* uid that corresponds to this auth */ AUTH *ca_auth; /* RPC auth handle */ }; static int nfsv2_procid[NFS_V3NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; int pktscale; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; struct netconfig *nconf; struct socket *so; int one = 1, retries, error = 0; struct thread *td = curthread; + SVCXPRT *xprt; struct timeval timo; /* * We need to establish the socket using the credentials of * the mountpoint. Some parts of this process (such as * sobind() and soconnect()) will use the curent thread's * credential instead of the socket credential. To work * around this, temporarily change the current thread's * credential to that of the mountpoint. * * XXX: It would be better to explicitly pass the correct * credential to sobind() and soconnect(). */ origcred = td->td_ucred; /* * Use the credential in nr_cred, if not NULL. */ if (nrp->nr_cred != NULL) td->td_ucred = nrp->nr_cred; else td->td_ucred = cred; saddr = nrp->nr_nam; if (saddr->sa_family == AF_INET) if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp"); else nconf = getnetconfigent("tcp"); else if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp6"); else nconf = getnetconfigent("tcp6"); pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. * Print a log message suggesting increasing sb_max. * Creating a socket and doing this is necessary since, if the * reservation sizes are too large and will make soreserve() fail, * the connection will work until a large send is attempted and * then it will loop in the krpc code. */ so = NULL; saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); error = socreate(saddr->sa_family, &so, nrp->nr_sotype, nrp->nr_soproto, td->td_ucred, td); if (error) { td->td_ucred = origcred; goto out; } do { if (error != 0 && pktscale > 2) pktscale--; if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } else { if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); } while (error != 0 && pktscale > 2); soclose(so); if (error) { td->td_ucred = origcred; goto out; } client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, nrp->nr_vers, sndreserve, rcvreserve); CLNT_CONTROL(client, CLSET_WAITCHAN, "newnfsreq"); if (nmp != NULL) { if ((nmp->nm_flag & NFSMNT_INT)) CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); if ((nmp->nm_flag & NFSMNT_RESVPORT)) CLNT_CONTROL(client, CLSET_PRIVPORT, &one); if (NFSHASSOFT(nmp)) { if (nmp->nm_sotype == SOCK_DGRAM) /* * For UDP, the large timeout for a reconnect * will be set to "nm_retry * nm_timeo / 2", so * we only want to do 2 reconnect timeout * retries. */ retries = 2; else retries = nmp->nm_retry; } else retries = INT_MAX; + if (NFSHASNFSV4N(nmp)) { + /* + * Make sure the nfscbd_pool doesn't get destroyed + * while doing this. + */ + NFSD_LOCK(); + if (nfs_numnfscbd > 0) { + nfs_numnfscbd++; + NFSD_UNLOCK(); + xprt = svc_vc_create_backchannel(nfscbd_pool); + CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); + NFSD_LOCK(); + nfs_numnfscbd--; + if (nfs_numnfscbd == 0) + wakeup(&nfs_numnfscbd); + } + NFSD_UNLOCK(); + } } else { /* * Three cases: * - Null RPC callback to client * - Non-Null RPC callback to client, wait a little longer * - upcalls to nfsuserd and gssd (clp == NULL) */ if (callback_retry_mult == 0) { retries = NFSV4_UPCALLRETRY; CLNT_CONTROL(client, CLSET_PRIVPORT, &one); } else { retries = NFSV4_CALLBACKRETRY * callback_retry_mult; } } CLNT_CONTROL(client, CLSET_RETRIES, &retries); if (nmp != NULL) { /* * For UDP, there are 2 timeouts: * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer * that does a retransmit of an RPC request using the same * socket and xid. This is what you normally want to do, * since NFS servers depend on "same xid" for their * Duplicate Request Cache. * - timeout specified in CLNT_CALL_MBUF(), which specifies when * retransmits on the same socket should fail and a fresh * socket created. Each of these timeouts counts as one * CLSET_RETRIES as set above. * Set the initial retransmit timeout for UDP. This timeout * doesn't exist for TCP and the following call just fails, * which is ok. */ timo.tv_sec = nmp->nm_timeo / NFS_HZ; timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); } mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { /* * Someone else already connected. */ CLNT_RELEASE(client); } else { nrp->nr_client = client; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { mtx_unlock(&nrp->nr_mtx); CLNT_CONTROL(client, CLSET_CONNECT, &one); } else { mtx_unlock(&nrp->nr_mtx); } /* Restore current thread's credentials. */ td->td_ucred = origcred; out: NFSEXITCODE(error); return (error); } /* * NFS disconnect. Clean up and unlink. */ void newnfs_disconnect(struct nfssockreq *nrp) { CLIENT *client; mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { client = nrp->nr_client; nrp->nr_client = NULL; mtx_unlock(&nrp->nr_mtx); rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else { mtx_unlock(&nrp->nr_mtx); } } static AUTH * nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, char *srv_principal, gss_OID mech_oid, struct ucred *cred) { rpc_gss_service_t svc; AUTH *auth; #ifdef notyet rpc_gss_options_req_t req_options; #endif switch (secflavour) { case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!mech_oid) { if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) return (NULL); } if (secflavour == RPCSEC_GSS_KRB5) svc = rpc_gss_svc_none; else if (secflavour == RPCSEC_GSS_KRB5I) svc = rpc_gss_svc_integrity; else svc = rpc_gss_svc_privacy; #ifdef notyet req_options.req_flags = GSS_C_MUTUAL_FLAG; req_options.time_req = 0; req_options.my_cred = GSS_C_NO_CREDENTIAL; req_options.input_channel_bindings = NULL; req_options.enc_type = nfs_keytab_enctype; auth = rpc_gss_secfind_call(nrp->nr_client, cred, clnt_principal, srv_principal, mech_oid, svc, &req_options); #else /* * Until changes to the rpcsec_gss code are committed, * there is no support for host based initiator * principals. As such, that case cannot yet be handled. */ if (clnt_principal == NULL) auth = rpc_gss_secfind_call(nrp->nr_client, cred, srv_principal, mech_oid, svc); else auth = NULL; #endif if (auth != NULL) return (auth); /* fallthrough */ case AUTH_SYS: default: return (authunix_create(cred)); } } /* * Callback from the RPC code to generate up/down notifications. */ struct nfs_feedback_arg { struct nfsmount *nf_mount; int nf_lastmsg; /* last tprintf */ int nf_tprintfmsg; struct thread *nf_td; }; static void nfs_feedback(int type, int proc, void *arg) { struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; struct nfsmount *nmp = nf->nf_mount; struct timeval now; getmicrouptime(&now); switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { nfs_down(nmp, nf->nf_td, "not responding", 0, NFSSTA_TIMEO); nf->nf_tprintfmsg = TRUE; nf->nf_lastmsg = now.tv_sec; } break; case FEEDBACK_OK: nfs_up(nf->nf_mount, nf->nf_td, "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); break; } } /* * newnfs_request - goes something like this * - does the rpc by calling the krpc layer * - break down rpc header and return with nfs reply * nb: always frees up nd_mreq mbuf list */ int newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, - u_char *retsum, int toplevel, u_int64_t *xidp) + u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *sep) { - u_int32_t *tl; + u_int32_t retseq, retval, *tl; time_t waituntil; - int i, j, set_sigset = 0, timeo; + int i = 0, j = 0, opcnt, set_sigset = 0, slot; int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS; + int freeslot, timeo; u_int16_t procnum; u_int trylater_delay = 1; struct nfs_feedback_arg nf; struct timeval timo, now; AUTH *auth; struct rpc_callextra ext; enum clnt_stat stat; struct nfsreq *rep = NULL; char *srv_principal = NULL; sigset_t oldset; struct ucred *authcred; if (xidp != NULL) *xidp = 0; /* Reject requests while attempting a forced unmount. */ if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) { m_freem(nd->nd_mreq); return (ESTALE); } /* * Set authcred, which is used to acquire RPC credentials to * the cred argument, by default. The crhold() should not be * necessary, but will ensure that some future code change * doesn't result in the credential being free'd prematurely. */ authcred = crhold(cred); /* For client side interruptible mounts, mask off the signals. */ if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { newnfs_set_sigmask(td, &oldset); set_sigset = 1; } /* * XXX if not already connected call nfs_connect now. Longer * term, change nfs_mount to call nfs_connect unconditionally * and let clnt_reconnect_create handle reconnects. */ if (nrp->nr_client == NULL) newnfs_connect(nmp, nrp, cred, td, 0); /* * For a client side mount, nmp is != NULL and clp == NULL. For * server calls (callbacks or upcalls), nmp == NULL. */ if (clp != NULL) { NFSLOCKSTATE(); if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { secflavour = RPCSEC_GSS_KRB5; if (nd->nd_procnum != NFSPROC_NULL) { if (clp->lc_flags & LCL_GSSINTEGRITY) secflavour = RPCSEC_GSS_KRB5I; else if (clp->lc_flags & LCL_GSSPRIVACY) secflavour = RPCSEC_GSS_KRB5P; } } NFSUNLOCKSTATE(); } else if (nmp != NULL && NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL) { if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) nd->nd_flag |= ND_USEGSSNAME; if ((nd->nd_flag & ND_USEGSSNAME) != 0) { /* * If there is a client side host based credential, * use that, otherwise use the system uid, if set. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ if (nmp->nm_krbnamelen > 0) { usegssname = 1; } else if (nmp->nm_uid != (uid_t)-1) { KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } } else if (nmp->nm_krbnamelen == 0 && nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { /* * If there is no host based principal name and * the system uid is set and this is root, use the * system uid, since root won't have user * credentials in a credentials cache file. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (NFSHASINTEGRITY(nmp)) secflavour = RPCSEC_GSS_KRB5I; else if (NFSHASPRIVACY(nmp)) secflavour = RPCSEC_GSS_KRB5P; else secflavour = RPCSEC_GSS_KRB5; srv_principal = NFSMNT_SRVKRBNAME(nmp); } else if (nmp != NULL && !NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL && (nd->nd_flag & ND_USEGSSNAME) != 0) { /* * Use the uid that did the mount when the RPC is doing * NFSv4 system operations, as indicated by the * ND_USEGSSNAME flag, for the AUTH_SYS case. * The credentials in nm_sockreq.nr_cred were used for the * mount. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (nmp != NULL) { bzero(&nf, sizeof(struct nfs_feedback_arg)); nf.nf_mount = nmp; nf.nf_td = td; getmicrouptime(&now); nf.nf_lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); } if (nd->nd_procnum == NFSPROC_NULL) auth = authnone_create(); else if (usegssname) auth = nfs_getauth(nrp, secflavour, nmp->nm_krbname, srv_principal, NULL, authcred); else auth = nfs_getauth(nrp, secflavour, NULL, srv_principal, NULL, authcred); crfree(authcred); if (auth == NULL) { m_freem(nd->nd_mreq); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (EACCES); } bzero(&ext, sizeof(ext)); ext.rc_auth = auth; if (nmp != NULL) { ext.rc_feedback = nfs_feedback; ext.rc_feedback_arg = &nf; } procnum = nd->nd_procnum; if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBCOMPOUND) procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { NFSINCRGLOBAL(newnfsstats.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { if (nd->nd_procnum < NFS_V3NPROCS) procnum = nfsv2_procid[nd->nd_procnum]; else procnum = NFSV2PROC_NOOP; } /* * Now only used for the R_DONTRECOVER case, but until that is * supported within the krpc code, I need to keep a queue of * outstanding RPCs for nfsv4 client requests. */ if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSDREQ, M_WAITOK); #ifdef KDTRACE_HOOKS if (dtrace_nfscl_nfs234_start_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_start_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_start_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_start_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_start_probe) (probe_id, vp, nd->nd_mreq, cred, probe_procnum); } #endif } trycnt = 0; + freeslot = -1; /* Set to slot that needs to be free'd */ tryagain: + slot = -1; /* Slot that needs a sequence# increment. */ /* * This timeout specifies when a new socket should be created, * along with new xid values. For UDP, this should be done * infrequently, since retransmits of RPC requests should normally * use the same xid. */ if (nmp == NULL) { timo.tv_usec = 0; if (clp == NULL) timo.tv_sec = NFSV4_UPCALLTIMEO; else timo.tv_sec = NFSV4_CALLBACKTIMEO; } else { if (nrp->nr_sotype != SOCK_DGRAM) { timo.tv_usec = 0; if ((nmp->nm_flag & NFSMNT_NFSV4)) timo.tv_sec = INT_MAX; else timo.tv_sec = NFS_TCPTIMEO; } else { if (NFSHASSOFT(nmp)) { /* * CLSET_RETRIES is set to 2, so this should be * half of the total timeout required. */ timeo = nmp->nm_retry * nmp->nm_timeo / 2; if (timeo < 1) timeo = 1; timo.tv_sec = timeo / NFS_HZ; timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; } else { /* For UDP hard mounts, use a large value. */ timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; timo.tv_usec = 0; } } if (rep != NULL) { rep->r_flags = 0; rep->r_nmp = nmp; /* * Chain request into list of outstanding requests. */ NFSLOCKREQ(); TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } } nd->nd_mrep = NULL; stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); if (rep != NULL) { /* * RPC done, unlink the request. */ NFSLOCKREQ(); TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { error = EPROTONOSUPPORT; } else { error = EACCES; } if (error) { m_freem(nd->nd_mreq); AUTH_DESTROY(auth); if (rep != NULL) FREE((caddr_t)rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ newnfs_realign(&nd->nd_mrep); nd->nd_md = nd->nd_mrep; nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); nd->nd_repstat = 0; if (nd->nd_procnum != NFSPROC_NULL) { + /* If sep == NULL, set it to the default in nmp. */ + if (sep == NULL && nmp != NULL) + sep = NFSMNT_MDSSESSION(nmp); /* * and now the actual NFS xdr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); + if (nd->nd_repstat >= 10000) + NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, + (int)nd->nd_repstat); + + /* + * Get rid of the tag, return count and SEQUENCE result for + * NFSv4. + */ + if ((nd->nd_flag & ND_NFSV4) != 0) { + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); + opcnt = fxdr_unsigned(int, *tl++); + i = fxdr_unsigned(int, *tl++); + j = fxdr_unsigned(int, *tl); + if (j >= 10000) + NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); + /* + * If the first op is Sequence, free up the slot. + */ + if (nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) + NFSCL_DEBUG(1, "failed seq=%d\n", j); + if (nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 5 * NFSX_UNSIGNED); + mtx_lock(&sep->nfsess_mtx); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + retseq = fxdr_unsigned(uint32_t, *tl++); + slot = fxdr_unsigned(int, *tl++); + freeslot = slot; + if (retseq != sep->nfsess_slotseq[slot]) + printf("retseq diff 0x%x\n", retseq); + retval = fxdr_unsigned(uint32_t, *++tl); + if ((retval + 1) < sep->nfsess_foreslots) + sep->nfsess_foreslots = (retval + 1); + else if ((retval + 1) > sep->nfsess_foreslots) + sep->nfsess_foreslots = (retval < 64) ? + (retval + 1) : 64; + mtx_unlock(&sep->nfsess_mtx); + + /* Grab the op and status for the next one. */ + if (opcnt > 1) { + NFSM_DISSECT(tl, uint32_t *, + 2 * NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl++); + j = fxdr_unsigned(int, *tl); + } + } + } if (nd->nd_repstat != 0) { if (((nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_GRACE) && (nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_DELEGRETURN && nd->nd_procnum != NFSPROC_SETATTR && nd->nd_procnum != NFSPROC_READ && + nd->nd_procnum != NFSPROC_READDS && nd->nd_procnum != NFSPROC_WRITE && + nd->nd_procnum != NFSPROC_WRITEDS && nd->nd_procnum != NFSPROC_OPEN && nd->nd_procnum != NFSPROC_CREATE && nd->nd_procnum != NFSPROC_OPENCONFIRM && nd->nd_procnum != NFSPROC_OPENDOWNGRADE && nd->nd_procnum != NFSPROC_CLOSE && nd->nd_procnum != NFSPROC_LOCK && nd->nd_procnum != NFSPROC_LOCKU) || (nd->nd_repstat == NFSERR_DELAY && (nd->nd_flag & ND_NFSV4) == 0) || nd->nd_repstat == NFSERR_RESOURCE) { if (trylater_delay > NFS_TRYLATERDEL) trylater_delay = NFS_TRYLATERDEL; waituntil = NFSD_MONOSEC + trylater_delay; while (NFSD_MONOSEC < waituntil) (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; + if (slot != -1) { + mtx_lock(&sep->nfsess_mtx); + sep->nfsess_slotseq[slot]++; + *nd->nd_slotseq = txdr_unsigned( + sep->nfsess_slotseq[slot]); + mtx_unlock(&sep->nfsess_mtx); + } m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. * (vp != NULL implies a client side call) */ if (nd->nd_repstat == ESTALE && vp != NULL) { cache_purge(vp); if (ncl_call_invalcaches != NULL) (*ncl_call_invalcaches)(vp); } } - - /* - * Get rid of the tag, return count, and PUTFH result for V4. - */ - if (nd->nd_flag & ND_NFSV4) { - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - i = fxdr_unsigned(int, *tl); - error = nfsm_advance(nd, NFSM_RNDUP(i), -1); - if (error) - goto nfsmout; - NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); - i = fxdr_unsigned(int, *++tl); - + if ((nd->nd_flag & ND_NFSV4) != 0) { + /* Free the slot, as required. */ + if (freeslot != -1) + nfsv4_freeslot(sep, freeslot); /* - * If the first op's status is non-zero, mark that - * there is no more data to process. + * If this op is Putfh, throw its results away. */ - if (*++tl) - nd->nd_flag |= ND_NOMOREDATA; - - /* - * If the first op is Putfh, throw its results away - * and toss the op# and status for the first op. - */ - if (nmp != NULL && i == NFSV4OP_PUTFH && *tl == 0) { + if (j >= 10000) + NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); + if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); + if (j >= 10000) + NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, + j); /* * All Compounds that do an Op that must * be in sequence consist of NFSV4OP_PUTFH * followed by one of these. As such, we * can determine if the seqid# should be * incremented, here. */ if ((i == NFSV4OP_OPEN || i == NFSV4OP_OPENCONFIRM || i == NFSV4OP_OPENDOWNGRADE || i == NFSV4OP_CLOSE || i == NFSV4OP_LOCK || i == NFSV4OP_LOCKU) && (j == 0 || (j != NFSERR_STALECLIENTID && j != NFSERR_STALESTATEID && j != NFSERR_BADSTATEID && j != NFSERR_BADSEQID && j != NFSERR_BADXDR && j != NFSERR_RESOURCE && j != NFSERR_NOFILEHANDLE))) nd->nd_flag |= ND_INCRSEQID; - /* - * If the first op's status is non-zero, mark - * that there is no more data to process. - */ - if (j) - nd->nd_flag |= ND_NOMOREDATA; } + /* + * If this op's status is non-zero, mark + * that there is no more data to process. + */ + if (j) + nd->nd_flag |= ND_NOMOREDATA; /* * If R_DONTRECOVER is set, replace the stale error * reply, so that recovery isn't initiated. */ if ((nd->nd_repstat == NFSERR_STALECLIENTID || + nd->nd_repstat == NFSERR_BADSESSION || nd->nd_repstat == NFSERR_STALESTATEID) && rep != NULL && (rep->r_flags & R_DONTRECOVER)) nd->nd_repstat = NFSERR_STALEDONTRECOVER; } } #ifdef KDTRACE_HOOKS if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_done_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, nd->nd_mreq, cred, probe_procnum, 0); } #endif m_freem(nd->nd_mreq); AUTH_DESTROY(auth); if (rep != NULL) FREE((caddr_t)rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (0); nfsmout: mbuf_freem(nd->nd_mrep); mbuf_freem(nd->nd_mreq); AUTH_DESTROY(auth); if (rep != NULL) FREE((caddr_t)rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int newnfs_nmcancelreqs(struct nfsmount *nmp) { if (nmp->nm_sockreq.nr_client != NULL) CLNT_CLOSE(nmp->nm_sockreq.nr_client); return (0); } /* * Any signal that can interrupt an NFS operation in an intr mount * should be added to this set. SIGSTOP and SIGKILL cannot be masked. */ int newnfs_sig_set[] = { SIGINT, SIGTERM, SIGHUP, SIGKILL, SIGSTOP, SIGQUIT }; /* * Check to see if one of the signals in our subset is pending on * the process (in an intr mount). */ static int nfs_sig_pending(sigset_t set) { int i; for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) if (SIGISMEMBER(set, newnfs_sig_set[i])) return (1); return (0); } /* * The set/restore sigmask functions are used to (temporarily) overwrite * the process p_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void newnfs_set_sigmask(struct thread *td, sigset_t *oldset) { sigset_t newset; int i; struct proc *p; SIGFILLSET(newset); if (td == NULL) td = curthread; /* XXX */ p = td->td_proc; /* Remove the NFS set of signals from newset */ PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) { /* * But make sure we leave the ones already masked * by the process, ie. remove the signal from the * temporary signalmask only if it wasn't already * in p_sigmask. */ if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) SIGDELSET(newset, newnfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); PROC_UNLOCK(p); kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); } void newnfs_restore_sigmask(struct thread *td, sigset_t *set) { if (td == NULL) td = curthread; /* XXX */ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); } /* * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the * old one after msleep() returns. */ int newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) { sigset_t oldset; int error; struct proc *p; if ((priority & PCATCH) == 0) return msleep(ident, mtx, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ newnfs_set_sigmask(td, &oldset); error = msleep(ident, mtx, priority, wmesg, timo); newnfs_restore_sigmask(td, &oldset); p = td->td_proc; return (error); } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int newnfs_sigintr(struct nfsmount *nmp, struct thread *td) { struct proc *p; sigset_t tmpset; /* Terminate all requests while attempting a forced unmount. */ if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) return (EIO); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETOR(tmpset, td->td_siglist); SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) && nfs_sig_pending(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } static int nfs_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "newnfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "newnfs server %s: %s\n", server, msg); } return (0); } static void nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, int error, int flags) { if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state |= NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 0); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else mtx_unlock(&nmp->nm_mtx); nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, int flags, int tprintfmsg) { if (nmp == NULL) return; if (tprintfmsg) { nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); } mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 1); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else mtx_unlock(&nmp->nm_mtx); } Index: head/sys/fs/nfs/nfs_commonport.c =================================================================== --- head/sys/fs/nfs/nfs_commonport.c (revision 244041) +++ head/sys/fs/nfs/nfs_commonport.c (revision 244042) @@ -1,628 +1,634 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Functions that need to be different for different versions of BSD * kernel should be kept here, along with any global storage specific * to this BSD variant. */ #include #include #include #include #include #include #include #include #include #include extern int nfscl_ticks; extern int nfsrv_nfsuserd; extern struct nfssockreq nfsrv_nfsuserdsock; extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *, struct thread *); extern int nfsrv_useacl; struct mount nfsv4root_mnt; int newnfs_numnfsd = 0; struct nfsstats newnfsstats; int nfs_numnfscbd = 0; int nfscl_debuglevel = 0; char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; struct callout newnfsd_callout; void (*nfsd_call_servertimer)(void) = NULL; void (*ncl_call_invalcaches)(struct vnode *) = NULL; static int nfs_realign_test; static int nfs_realign_count; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "New NFS filesystem"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "Number of realign tests done"); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "Number of mbuf realignments done"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW, nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr), "NFSv4 callback addr for server to use"); SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel, 0, "Debug level for new nfs client"); /* * Defines for malloc * (Here for FreeBSD, since they allocate storage.) */ MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache"); MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id"); MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state", "NFSD V4 State (Openowner, Open, Lockowner, Delegation"); MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock"); MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file"); MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string"); MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map"); MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle"); MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner"); MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open"); MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation"); MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client"); MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner"); MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock"); MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "New nfs vnode"); MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "New nfs Direct IO buffer"); MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff", "New NFS directory offset data"); MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback", "New NFS local lock rollback"); +MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout"); +MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout"); +MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info"); +MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req"); +MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session"); +MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall"); /* * Definition of mutex locks. * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list * and assorted other nfsd structures. */ struct mtx newnfsd_mtx; struct mtx nfs_sockl_mutex; struct mtx nfs_state_mutex; struct mtx nfs_nameid_mutex; struct mtx nfs_req_mutex; struct mtx nfs_slock_mutex; /* local functions */ static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *); #ifdef __NO_STRICT_ALIGNMENT /* * These architectures don't need re-alignment, so just return. */ void newnfs_realign(struct mbuf **pm) { return; } #else /* !__NO_STRICT_ALIGNMENT */ /* * newnfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occassionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. * */ void newnfs_realign(struct mbuf **pm) { struct mbuf *m, *n; int off, space; ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { /* * NB: we can't depend on m_pkthdr.len to help us * decide what to do here. May not be worth doing * the m_length calculation as m_copyback will * expand the mbuf chain below as needed. */ space = m_length(m, NULL); if (space >= MINCLSIZE) { /* NB: m_copyback handles space > MCLBYTES */ n = m_getcl(M_WAITOK, MT_DATA, 0); } else n = m_get(M_WAITOK, MT_DATA); if (n == NULL) return; /* * Align the remainder of the mbuf chain. */ n->m_len = 0; off = 0; while (m != NULL) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; ++nfs_realign_count; break; } pm = &m->m_next; } } #endif /* __NO_STRICT_ALIGNMENT */ #ifdef notdef static void nfsrv_object_create(struct vnode *vp, struct thread *td) { if (vp == NULL || vp->v_type != VREG) return; (void) vfs_object_create(vp, td, td->td_ucred); } #endif /* * Look up a file name. Basically just initialize stuff and call namei(). */ int nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p) { int error; NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname, p); error = namei(ndp); if (!error) { NDFREE(ndp, NDF_ONLY_PNBUF); } return (error); } /* * Copy NFS uid, gids to the cred structure. */ void newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) { KASSERT(nfscr->nfsc_ngroups >= 0, ("newnfs_copycred: negative nfsc_ngroups")); cr->cr_uid = nfscr->nfsc_uid; crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); } /* * Map args from nfsmsleep() to msleep(). */ int nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg, struct timespec *ts) { u_int64_t nsecval; int error, timeo; if (ts) { timeo = hz * ts->tv_sec; nsecval = (u_int64_t)ts->tv_nsec; nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) / 1000000000; timeo += (int)nsecval; } else { timeo = 0; } error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo); return (error); } /* * Get the file system info for the server. For now, just assume FFS. */ void nfsvno_getfs(struct nfsfsinfo *sip, int isdgram) { int pref; /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (isdgram) pref = NFS_MAXDGRAMDATA; else pref = NFS_MAXDATA; sip->fs_rtmax = NFS_MAXDATA; sip->fs_rtpref = pref; sip->fs_rtmult = NFS_FABLKSIZE; sip->fs_wtmax = NFS_MAXDATA; sip->fs_wtpref = pref; sip->fs_wtmult = NFS_FABLKSIZE; sip->fs_dtpref = pref; sip->fs_maxfilesize = 0xffffffffffffffffull; sip->fs_timedelta.tv_sec = 0; sip->fs_timedelta.tv_nsec = 1; sip->fs_properties = (NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); } /* * Do the pathconf vnode op. */ int nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf, struct ucred *cred, struct thread *p) { int error; error = VOP_PATHCONF(vp, flag, retf); if (error == EOPNOTSUPP || error == EINVAL) { /* * Some file systems return EINVAL for name arguments not * supported and some return EOPNOTSUPP for this case. * So the NFSv3 Pathconf RPC doesn't fail for these cases, * just fake them. */ switch (flag) { case _PC_LINK_MAX: *retf = LINK_MAX; break; case _PC_NAME_MAX: *retf = NAME_MAX; break; case _PC_CHOWN_RESTRICTED: *retf = 1; break; case _PC_NO_TRUNC: *retf = 1; break; default: /* * Only happens if a _PC_xxx is added to the server, * but this isn't updated. */ *retf = 0; printf("nfsrvd pathconf flag=%d not supp\n", flag); }; error = 0; } NFSEXITCODE(error); return (error); } /* Fake nfsrv_atroot. Just return 0 */ int nfsrv_atroot(struct vnode *vp, long *retp) { return (0); } /* * Set the credentials to refer to root. * If only the various BSDen could agree on whether cr_gid is a separate * field or cr_groups[0]... */ void newnfs_setroot(struct ucred *cred) { cred->cr_uid = 0; cred->cr_groups[0] = 0; cred->cr_ngroups = 1; } /* * Get the client credential. Used for Renew and recovery. */ struct ucred * newnfs_getcred(void) { struct ucred *cred; struct thread *td = curthread; cred = crdup(td->td_ucred); newnfs_setroot(cred); return (cred); } /* * Nfs timer routine * Call the nfsd's timer function once/sec. */ void newnfs_timer(void *arg) { static time_t lasttime = 0; /* * Call the server timer, if set up. * The argument indicates if it is the next second and therefore * leases should be checked. */ if (lasttime != NFSD_MONOSEC) { lasttime = NFSD_MONOSEC; if (nfsd_call_servertimer != NULL) (*nfsd_call_servertimer)(); } callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL); } /* * Sleep for a short period of time unless errval == NFSERR_GRACE, where * the sleep should be for 5 seconds. * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on * an event that never gets a wakeup. Only return EINTR or 0. */ int nfs_catnap(int prio, int errval, const char *wmesg) { static int non_event; int ret; if (errval == NFSERR_GRACE) ret = tsleep(&non_event, prio, wmesg, 5 * hz); else ret = tsleep(&non_event, prio, wmesg, 1); if (ret != EINTR) ret = 0; return (ret); } /* * Get referral. For now, just fail. */ struct nfsreferral * nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno) { return (NULL); } static int nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) { int error; error = nfssvc_call(td, uap, td->td_ucred); NFSEXITCODE(error); return (error); } static int nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { int error = EINVAL; struct nfsd_idargs nid; if (uap->flag & NFSSVC_IDNAME) { error = copyin(uap->argp, (caddr_t)&nid, sizeof (nid)); if (error) goto out; error = nfssvc_idname(&nid); goto out; } else if (uap->flag & NFSSVC_GETSTATS) { error = copyout(&newnfsstats, CAST_USER_ADDR_T(uap->argp), sizeof (newnfsstats)); if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { newnfsstats.attrcache_hits = 0; newnfsstats.attrcache_misses = 0; newnfsstats.lookupcache_hits = 0; newnfsstats.lookupcache_misses = 0; newnfsstats.direofcache_hits = 0; newnfsstats.direofcache_misses = 0; newnfsstats.accesscache_hits = 0; newnfsstats.accesscache_misses = 0; newnfsstats.biocache_reads = 0; newnfsstats.read_bios = 0; newnfsstats.read_physios = 0; newnfsstats.biocache_writes = 0; newnfsstats.write_bios = 0; newnfsstats.write_physios = 0; newnfsstats.biocache_readlinks = 0; newnfsstats.readlink_bios = 0; newnfsstats.biocache_readdirs = 0; newnfsstats.readdir_bios = 0; newnfsstats.rpcretries = 0; newnfsstats.rpcrequests = 0; newnfsstats.rpctimeouts = 0; newnfsstats.rpcunexpected = 0; newnfsstats.rpcinvalid = 0; bzero(newnfsstats.rpccnt, sizeof(newnfsstats.rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { newnfsstats.srvrpc_errs = 0; newnfsstats.srv_errs = 0; newnfsstats.srvcache_inproghits = 0; newnfsstats.srvcache_idemdonehits = 0; newnfsstats.srvcache_nonidemdonehits = 0; newnfsstats.srvcache_misses = 0; newnfsstats.srvcache_tcppeak = 0; newnfsstats.srvclients = 0; newnfsstats.srvopenowners = 0; newnfsstats.srvopens = 0; newnfsstats.srvlockowners = 0; newnfsstats.srvlocks = 0; newnfsstats.srvdelegates = 0; newnfsstats.clopenowners = 0; newnfsstats.clopens = 0; newnfsstats.cllockowners = 0; newnfsstats.cllocks = 0; newnfsstats.cldelegates = 0; newnfsstats.cllocalopenowners = 0; newnfsstats.cllocalopens = 0; newnfsstats.cllocallockowners = 0; newnfsstats.cllocallocks = 0; bzero(newnfsstats.srvrpccnt, sizeof(newnfsstats.srvrpccnt)); bzero(newnfsstats.cbrpccnt, sizeof(newnfsstats.cbrpccnt)); } } goto out; } else if (uap->flag & NFSSVC_NFSUSERDPORT) { u_short sockport; error = copyin(uap->argp, (caddr_t)&sockport, sizeof (u_short)); if (!error) error = nfsrv_nfsuserdport(sockport, p); } else if (uap->flag & NFSSVC_NFSUSERDDELPORT) { nfsrv_nfsuserddelport(); error = 0; } out: NFSEXITCODE(error); return (error); } /* * called by all three modevent routines, so that it gets things * initialized soon enough. */ void newnfs_portinit(void) { static int inited = 0; if (inited) return; inited = 1; /* Initialize SMP locks used by both client and server. */ mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF); mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF); } /* * Determine if the file system supports NFSv4 ACLs. * Return 1 if it does, 0 otherwise. */ int nfs_supportsnfsv4acls(struct vnode *vp) { int error; register_t retval; ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls"); if (nfsrv_useacl == 0) return (0); error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval); if (error == 0 && retval != 0) return (1); return (0); } extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscommon_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF); mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF); mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF); mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF); mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL, MTX_DEF); callout_init(&newnfsd_callout, CALLOUT_MPSAFE); newnfs_init(); nfsd_call_nfscommon = nfssvc_nfscommon; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != 0 || nfs_numnfscbd != 0) { error = EBUSY; break; } nfsd_call_nfscommon = NULL; callout_drain(&newnfsd_callout); /* and get rid of the mutexes */ mtx_destroy(&nfs_nameid_mutex); mtx_destroy(&newnfsd_mtx); mtx_destroy(&nfs_state_mutex); mtx_destroy(&nfs_sockl_mutex); mtx_destroy(&nfs_slock_mutex); mtx_destroy(&nfs_req_mutex); mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return error; } static moduledata_t nfscommon_mod = { "nfscommon", nfscommon_modevent, NULL, }; DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscommon, 1); MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscommon, krpc, 1, 1, 1); Index: head/sys/fs/nfs/nfs_commonsubs.c =================================================================== --- head/sys/fs/nfs/nfs_commonsubs.c (revision 244041) +++ head/sys/fs/nfs/nfs_commonsubs.c (revision 244042) @@ -1,3512 +1,3768 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; /* And other global data */ nfstype nfsv34_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; enum vtype newnv2tov_type[8] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv34tov_type[8]={ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; struct timeval nfsboottime; /* Copy boottime once, so it never changes */ int nfscl_ticks; int nfsrv_useacl = 1; struct nfssockreq nfsrv_nfsuserdsock; int nfsrv_nfsuserd = 0; struct nfsreqhead nfsd_reqq; uid_t nfsrv_defaultuid; gid_t nfsrv_defaultgid; int nfsrv_lease = NFSRV_LEASE; int ncl_mbuf_mlen = MLEN; NFSNAMEIDMUTEX; NFSSOCKMUTEX; /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used * 0 - doesn't change cfh or use a sfh * 1 - replaces cfh with a new one (unless it returns an error status) * 2 - uses cfh and sfh * needscfh - if the op wants a cfh and premtime * 0 - doesn't use a cfh * 1 - uses a cfh, but doesn't want pre-op attributes * 2 - uses a cfh and wants pre-op attributes * savereply - indicates a non-idempotent Op * 0 - not non-idempotent * 1 - non-idempotent * Ops that are ordered via seqid# are handled separately from these * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ -struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS] = { - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* undef */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* undef */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* undef */ - { 0, 1, 0, 0, LK_SHARED }, /* Access */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Close */ - { 0, 2, 0, 1, LK_EXCLUSIVE }, /* Commit */ - { 1, 2, 1, 1, LK_EXCLUSIVE }, /* Create */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* Delegpurge */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Delegreturn */ - { 0, 1, 0, 0, LK_SHARED }, /* Getattr */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* GetFH */ - { 2, 1, 1, 1, LK_EXCLUSIVE }, /* Link */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Lock */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* LockT */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* LockU */ - { 1, 1, 0, 0, LK_EXCLUSIVE }, /* Lookup */ - { 1, 1, 0, 0, LK_EXCLUSIVE }, /* Lookupp */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* NVerify */ - { 1, 1, 0, 1, LK_EXCLUSIVE }, /* Open */ - { 1, 1, 0, 0, LK_EXCLUSIVE }, /* OpenAttr */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* OpenConfirm */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* OpenDowngrade */ - { 1, 0, 0, 0, LK_EXCLUSIVE }, /* PutFH */ - { 1, 0, 0, 0, LK_EXCLUSIVE }, /* PutPubFH */ - { 1, 0, 0, 0, LK_EXCLUSIVE }, /* PutRootFH */ - { 0, 1, 0, 0, LK_SHARED }, /* Read */ - { 0, 1, 0, 0, LK_SHARED }, /* Readdir */ - { 0, 1, 0, 0, LK_SHARED }, /* ReadLink */ - { 0, 2, 1, 1, LK_EXCLUSIVE }, /* Remove */ - { 2, 1, 1, 1, LK_EXCLUSIVE }, /* Rename */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* Renew */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* RestoreFH */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* SaveFH */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* SecInfo */ - { 0, 2, 1, 1, LK_EXCLUSIVE }, /* Setattr */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* SetClientID */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* SetClientIDConfirm */ - { 0, 1, 0, 0, LK_EXCLUSIVE }, /* Verify */ - { 0, 2, 1, 1, LK_EXCLUSIVE }, /* Write */ - { 0, 0, 0, 0, LK_EXCLUSIVE }, /* ReleaseLockOwner */ +struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* undef */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* undef */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* undef */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Access */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Close */ + { 0, 2, 0, 1, LK_EXCLUSIVE, 1 }, /* Commit */ + { 1, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Create */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Delegpurge */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Delegreturn */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Getattr */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* GetFH */ + { 2, 1, 1, 1, LK_EXCLUSIVE, 1 }, /* Link */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Lock */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* LockT */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* LockU */ + { 1, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Lookup */ + { 1, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Lookupp */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* NVerify */ + { 1, 1, 0, 1, LK_EXCLUSIVE, 1 }, /* Open */ + { 1, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* OpenAttr */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* OpenConfirm */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* OpenDowngrade */ + { 1, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* PutFH */ + { 1, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* PutPubFH */ + { 1, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* PutRootFH */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Read */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* Readdir */ + { 0, 1, 0, 0, LK_SHARED, 1 }, /* ReadLink */ + { 0, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Remove */ + { 2, 1, 1, 1, LK_EXCLUSIVE, 1 }, /* Rename */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Renew */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* RestoreFH */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* SaveFH */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* SecInfo */ + { 0, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Setattr */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* SetClientID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* SetClientIDConfirm */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Verify */ + { 0, 2, 1, 1, LK_EXCLUSIVE, 1 }, /* Write */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* ReleaseLockOwner */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Backchannel Ctrl */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Bind Conn to Sess */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Exchange ID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Create Session */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Destroy Session */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Free StateID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Get Dir Deleg */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Get Device Info */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Get Device List */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Layout Commit */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Layout Get */ + { 0, 1, 0, 0, LK_EXCLUSIVE, 1 }, /* Layout Return */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Secinfo No name */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Sequence */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Set SSV */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Test StateID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Want Delegation */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 0 }, /* Destroy ClientID */ + { 0, 0, 0, 0, LK_EXCLUSIVE, 1 }, /* Reclaim Complete */ }; #endif /* !APPLEKEXT */ static int ncl_mbuf_mhlen = MHLEN; static int nfsrv_usercnt = 0; static int nfsrv_dnsnamelen; static u_char *nfsrv_dnsname = NULL; static int nfsrv_usermax = 999999999; static struct nfsuserhashhead nfsuserhash[NFSUSERHASHSIZE]; static struct nfsuserhashhead nfsusernamehash[NFSUSERHASHSIZE]; static struct nfsuserhashhead nfsgrouphash[NFSGROUPHASHSIZE]; static struct nfsuserhashhead nfsgroupnamehash[NFSGROUPHASHSIZE]; static struct nfsuserlruhead nfsuserlruhead; /* * This static array indicates whether or not the RPC generates a large * reply. This is used by nfs_reply() to decide whether or not an mbuf * cluster should be allocated. (If a cluster is required by an RPC * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFS_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, +int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 }; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); static void nfsv4_wanted(struct nfsv4lock *lp); static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len); static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p); static void nfsrv_removeuser(struct nfsusrgrp *usrp); static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, int *, int *); static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); #ifndef APPLE /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbufuio(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *mbufcp, *uiocp; int xfer, left, len; mbuf_t mp; long uiosiz, rem; int error = 0; mp = nd->nd_md; mbufcp = nd->nd_dpos; len = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - mbufcp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) { error = EBADRPC; goto out; } left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mbuf_next(mp); if (mp == NULL) { error = EBADRPC; goto out; } mbufcp = NFSMTOD(mp, caddr_t); len = mbuf_len(mp); } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(mbufcp, uiocp, xfer); else copyout(mbufcp, CAST_USER_ADDR_T(uiocp), xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (void *) ((char *)uiop->uio_iov->iov_base + uiosiz); uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } nd->nd_dpos = mbufcp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } #endif /* !APPLE */ /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macro NFSM_DISSECT for tough * cases. */ APPLESTATIC void * nfsm_dissct(struct nfsrv_descript *nd, int siz) { mbuf_t mp2; int siz2, xfer; caddr_t p; int left; caddr_t retp; retp = NULL; left = NFSMTOD(nd->nd_md, caddr_t) + mbuf_len(nd->nd_md) - nd->nd_dpos; while (left == 0) { nd->nd_md = mbuf_next(nd->nd_md); if (nd->nd_md == NULL) return (retp); left = mbuf_len(nd->nd_md); nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); } if (left >= siz) { retp = nd->nd_dpos; nd->nd_dpos += siz; } else if (mbuf_next(nd->nd_md) == NULL) { return (retp); } else if (siz > ncl_mbuf_mhlen) { panic("nfs S too big"); } else { NFSMGET(mp2); mbuf_setnext(mp2, mbuf_next(nd->nd_md)); mbuf_setnext(nd->nd_md, mp2); mbuf_setlen(nd->nd_md, mbuf_len(nd->nd_md) - left); nd->nd_md = mp2; retp = p = NFSMTOD(mp2, caddr_t); NFSBCOPY(nd->nd_dpos, p, left); /* Copy what was left */ siz2 = siz - left; p += left; mp2 = mbuf_next(mp2); /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (NULL); xfer = (siz2 > mbuf_len(mp2)) ? mbuf_len(mp2) : siz2; if (xfer > 0) { NFSBCOPY(NFSMTOD(mp2, caddr_t), p, xfer); NFSM_DATAP(mp2, xfer); mbuf_setlen(mp2, mbuf_len(mp2) - xfer); p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mbuf_next(mp2); } mbuf_setlen(nd->nd_md, siz); nd->nd_md = mp2; nd->nd_dpos = NFSMTOD(mp2, caddr_t); } return (retp); } /* * Advance the position in the mbuf chain. * If offs == 0, this is a no-op, but it is simpler to just return from * here than check for offs > 0 for all calls to nfsm_advance. * If left == -1, it should be calculated here. */ APPLESTATIC int nfsm_advance(struct nfsrv_descript *nd, int offs, int left) { int error = 0; if (offs == 0) goto out; /* * A negative offs should be considered a serious problem. */ if (offs < 0) panic("nfsrv_advance"); /* * If left == -1, calculate it here. */ if (left == -1) left = NFSMTOD(nd->nd_md, caddr_t) + mbuf_len(nd->nd_md) - nd->nd_dpos; /* * Loop around, advancing over the mbuf data. */ while (offs > left) { offs -= left; nd->nd_md = mbuf_next(nd->nd_md); if (nd->nd_md == NULL) { error = EBADRPC; goto out; } left = mbuf_len(nd->nd_md); nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); } nd->nd_dpos += offs; out: NFSEXITCODE(error); return (error); } /* * Copy a string into mbuf(s). * Return the number of bytes output, including XDR overheads. */ APPLESTATIC int nfsm_strtom(struct nfsrv_descript *nd, const char *cp, int siz) { mbuf_t m2; int xfer, left; mbuf_t m1; int rem, bytesize; u_int32_t *tl; char *cp2; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(siz); rem = NFSM_RNDUP(siz) - siz; bytesize = NFSX_UNSIGNED + siz + rem; m2 = nd->nd_mb; cp2 = nd->nd_bpos; left = M_TRAILINGSPACE(m2); /* * Loop around copying the string to mbuf(s). */ while (siz > 0) { if (left == 0) { if (siz > ncl_mbuf_mlen) NFSMCLGET(m1, M_WAITOK); else NFSMGET(m1); mbuf_setlen(m1, 0); mbuf_setnext(m2, m1); m2 = m1; cp2 = NFSMTOD(m2, caddr_t); left = M_TRAILINGSPACE(m2); } if (left >= siz) xfer = siz; else xfer = left; NFSBCOPY(cp, cp2, xfer); cp += xfer; mbuf_setlen(m2, mbuf_len(m2) + xfer); siz -= xfer; left -= xfer; if (siz == 0 && rem) { if (left < rem) panic("nfsm_strtom"); NFSBZERO(cp2 + xfer, rem); mbuf_setlen(m2, mbuf_len(m2) + rem); } } nd->nd_mb = m2; nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2); return (bytesize); } /* * Called once to initialize data structures... */ APPLESTATIC void newnfs_init(void) { static int nfs_inited = 0; if (nfs_inited) return; nfs_inited = 1; newnfs_true = txdr_unsigned(TRUE); newnfs_false = txdr_unsigned(FALSE); newnfs_xdrneg1 = txdr_unsigned(-1); nfscl_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfscl_ticks < 1) nfscl_ticks = 1; NFSSETBOOTTIME(nfsboottime); /* * Initialize reply list and start timer */ TAILQ_INIT(&nfsd_reqq); NFS_TIMERINIT; } /* * Put a file handle in an mbuf list. * If the size argument == 0, just use the default size. * set_true == 1 if there should be an newnfs_true prepended on the file handle. * Return the number of bytes output, including XDR overhead. */ APPLESTATIC int nfsm_fhtom(struct nfsrv_descript *nd, u_int8_t *fhp, int size, int set_true) { u_int32_t *tl; u_int8_t *cp; int fullsiz, rem, bytesize = 0; if (size == 0) size = NFSX_MYFH; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: if (size > NFSX_V2FH) panic("fh size > NFSX_V2FH for NFSv2"); NFSM_BUILD(cp, u_int8_t *, NFSX_V2FH); NFSBCOPY(fhp, cp, size); if (size < NFSX_V2FH) NFSBZERO(cp + size, NFSX_V2FH - size); bytesize = NFSX_V2FH; break; case ND_NFSV3: case ND_NFSV4: fullsiz = NFSM_RNDUP(size); rem = fullsiz - size; if (set_true) { bytesize = 2 * NFSX_UNSIGNED + fullsiz; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; } else { bytesize = NFSX_UNSIGNED + fullsiz; } (void) nfsm_strtom(nd, fhp, size); break; }; return (bytesize); } /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ APPLESTATIC int nfsaddr_match(int family, union nethostaddr *haddr, NFSSOCKADDR_T nam) { struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = NFSSOCKADDR(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inet.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inetaddr6; inetaddr6 = NFSSOCKADDR(nam, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inetaddr6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inetaddr6->sin6_addr, &haddr->had_inet6)) return (1); } break; #endif }; return (0); } /* * Similar to the above, but takes to NFSSOCKADDR_T args. */ APPLESTATIC int nfsaddr2_match(NFSSOCKADDR_T nam1, NFSSOCKADDR_T nam2) { struct sockaddr_in *addr1, *addr2; struct sockaddr *inaddr; inaddr = NFSSOCKADDR(nam1, struct sockaddr *); switch (inaddr->sa_family) { case AF_INET: addr1 = NFSSOCKADDR(nam1, struct sockaddr_in *); addr2 = NFSSOCKADDR(nam2, struct sockaddr_in *); if (addr2->sin_family == AF_INET && addr1->sin_addr.s_addr == addr2->sin_addr.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inet6addr1, *inet6addr2; inet6addr1 = NFSSOCKADDR(nam1, struct sockaddr_in6 *); inet6addr2 = NFSSOCKADDR(nam2, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inet6addr2->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inet6addr1->sin6_addr, &inet6addr2->sin6_addr)) return (1); } break; #endif }; return (0); } /* * Trim the stuff already dissected off the mbuf list. */ APPLESTATIC void newnfs_trimleading(nd) struct nfsrv_descript *nd; { mbuf_t m, n; int offs; /* * First, free up leading mbufs. */ if (nd->nd_mrep != nd->nd_md) { m = nd->nd_mrep; while (mbuf_next(m) != nd->nd_md) { if (mbuf_next(m) == NULL) panic("nfsm trim leading"); m = mbuf_next(m); } mbuf_setnext(m, NULL); mbuf_freem(nd->nd_mrep); } m = nd->nd_md; /* * Now, adjust this mbuf, based on nd_dpos. */ offs = nd->nd_dpos - NFSMTOD(m, caddr_t); if (offs == mbuf_len(m)) { n = m; m = mbuf_next(m); if (m == NULL) panic("nfsm trim leading2"); mbuf_setnext(n, NULL); mbuf_freem(n); } else if (offs > 0) { mbuf_setlen(m, mbuf_len(m) - offs); NFSM_DATAP(m, offs); } else if (offs < 0) panic("nfsm trimleading offs"); nd->nd_mrep = m; nd->nd_md = m; nd->nd_dpos = NFSMTOD(m, caddr_t); } /* * Trim trailing data off the mbuf list being built. */ APPLESTATIC void newnfs_trimtrailing(nd, mb, bpos) struct nfsrv_descript *nd; mbuf_t mb; caddr_t bpos; { if (mbuf_next(mb)) { mbuf_freem(mbuf_next(mb)); mbuf_setnext(mb, NULL); } mbuf_setlen(mb, bpos - NFSMTOD(mb, caddr_t)); nd->nd_mb = mb; nd->nd_bpos = bpos; } /* * Dissect a file handle on the client. */ APPLESTATIC int nfsm_getfh(struct nfsrv_descript *nd, struct nfsfh **nfhpp) { u_int32_t *tl; struct nfsfh *nfhp; int error, len; *nfhpp = NULL; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { error = EBADRPC; goto nfsmout; } } else len = NFSX_V2FH; MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + len, M_NFSFH, M_WAITOK); error = nfsrv_mtostr(nd, nfhp->nfh_fh, len); if (error) { FREE((caddr_t)nfhp, M_NFSFH); goto nfsmout; } nfhp->nfh_len = len; *nfhpp = nfhp; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Break down the nfsv4 acl. * If the aclp == NULL or won't fit in an acl, just discard the acl info. */ APPLESTATIC int nfsrv_dissectacl(struct nfsrv_descript *nd, NFSACL_T *aclp, int *aclerrp, int *aclsizep, __unused NFSPROC_T *p) { u_int32_t *tl; int i, aclsize; int acecnt, error = 0, aceerr = 0, acesize; *aclerrp = 0; if (aclp) aclp->acl_cnt = 0; /* * Parse out the ace entries and expect them to conform to * what can be supported by R/W/X bits. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); aclsize = NFSX_UNSIGNED; acecnt = fxdr_unsigned(int, *tl); if (acecnt > ACL_MAX_ENTRIES) aceerr = NFSERR_ATTRNOTSUPP; if (nfsrv_useacl == 0) aceerr = NFSERR_ATTRNOTSUPP; for (i = 0; i < acecnt; i++) { if (aclp && !aceerr) error = nfsrv_dissectace(nd, &aclp->acl_entry[i], &aceerr, &acesize, p); else error = nfsrv_skipace(nd, &acesize); if (error) goto nfsmout; aclsize += acesize; } if (aclp && !aceerr) aclp->acl_cnt = acecnt; if (aceerr) *aclerrp = aceerr; if (aclsizep) *aclsizep = aclsize; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Skip over an NFSv4 ace entry. Just dissect the xdr and discard it. */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep) { u_int32_t *tl; int error, len = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 3)); error = nfsm_advance(nd, NFSM_RNDUP(len), -1); nfsmout: *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); NFSEXITCODE2(error, nd); return (error); } /* * Get attribute bits from an mbuf list. * Returns EBADRPC for a parsing error, 0 otherwise. * If the clearinvalid flag is set, clear the bits not supported. */ APPLESTATIC int nfsrv_getattrbits(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp, int *cntp, int *retnotsupp) { u_int32_t *tl; int cnt, i, outcnt; int error = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (cnt > NFSATTRBIT_MAXWORDS) { outcnt = NFSATTRBIT_MAXWORDS; if (retnotsupp) *retnotsupp = NFSERR_ATTRNOTSUPP; } else { outcnt = cnt; } NFSZERO_ATTRBIT(attrbitp); if (outcnt > 0) { NFSM_DISSECT(tl, u_int32_t *, outcnt * NFSX_UNSIGNED); for (i = 0; i < outcnt; i++) attrbitp->bits[i] = fxdr_unsigned(u_int32_t, *tl++); } if (cnt > outcnt) error = nfsm_advance(nd, (cnt - outcnt) * NFSX_UNSIGNED, -1); if (cntp) *cntp = NFSX_UNSIGNED + (cnt * NFSX_UNSIGNED); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Get the attributes for V4. * If the compare flag is true, test for any attribute changes, * otherwise return the attribute values. * These attributes cover fields in "struct vattr", "struct statfs", * "struct nfsfsinfo", the file handle and the lease duration. * The value of retcmpp is set to 1 if all attributes are the same, * and 0 otherwise. * Returns EBADRPC if it can't be parsed, 0 otherwise. */ APPLESTATIC int nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize, struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp, struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp, u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred) { u_int32_t *tl; int i = 0, j, k, l = 0, m, bitpos, attrsum = 0; int error, tfhsize, aceerr, attrsize, cnt, retnotsup; u_char *cp, *cp2, namestr[NFSV4_SMALLSTR + 1]; nfsattrbit_t attrbits, retattrbits, checkattrbits; struct nfsfh *tnfhp; struct nfsreferral *refp; u_quad_t tquad; nfsquad_t tnfsquad; struct timespec temptime; uid_t uid; gid_t gid; long fid; u_int32_t freenum = 0, tuint; u_int64_t uquad = 0, thyp, thyp2; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif if (compare) { retnotsup = 0; error = nfsrv_getattrbits(nd, &attrbits, NULL, &retnotsup); } else { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (error) goto nfsmout; if (compare) { *retcmpp = retnotsup; } else { /* * Just set default values to some of the important ones. */ if (nap != NULL) { nap->na_type = VREG; nap->na_mode = 0; nap->na_rdev = (NFSDEV_T)0; nap->na_mtime.tv_sec = 0; nap->na_mtime.tv_nsec = 0; nap->na_gen = 0; nap->na_flags = 0; nap->na_blocksize = NFS_FABLKSIZE; } if (sbp != NULL) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; } if (fsp != NULL) { fsp->fs_rtmax = 8192; fsp->fs_rtpref = 8192; fsp->fs_maxname = NFS_MAXNAMLEN; fsp->fs_wtmax = 8192; fsp->fs_wtpref = 8192; fsp->fs_wtmult = NFS_FABLKSIZE; fsp->fs_dtpref = 8192; fsp->fs_maxfilesize = 0xffffffffffffffffull; fsp->fs_timedelta.tv_sec = 0; fsp->fs_timedelta.tv_nsec = 1; fsp->fs_properties = (NFSV3_FSFLINK | NFSV3_FSFSYMLINK | NFSV3_FSFHOMOGENEOUS | NFSV3_FSFCANSETTIME); } if (pc != NULL) { pc->pc_linkmax = LINK_MAX; pc->pc_namemax = NAME_MAX; pc->pc_notrunc = 0; pc->pc_chownrestricted = 0; pc->pc_caseinsensitive = 0; pc->pc_casepreserving = 1; } } /* * Loop around getting the attributes. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(&attrbits, bitpos)) switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: retnotsup = 0; if (compare || nap == NULL) error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); else error = nfsrv_getattrbits(nd, &nap->na_suppattr, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits); if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_TYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_type != nfsv34tov_type(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_type = nfsv34tov_type(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFSV4FHTYPE_PERSISTENT) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_filerev != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filerev = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_size != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_size = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFLINK; else fsp->fs_properties &= ~NFSV3_FSFLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFSYMLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFSYMLINK; else fsp->fs_properties &= ~NFSV3_FSFSYMLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); thyp = fxdr_hyper(tl); tl += 2; thyp2 = fxdr_hyper(tl); if (compare) { if (*retcmpp == 0) { if (thyp != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[0] || thyp2 != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[1]) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filesid[0] = thyp; nap->na_filesid[1] = thyp2; } attrsum += (4 * NFSX_UNSIGNED); break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (fxdr_unsigned(int, *tl) != nfsrv_lease && !(*retcmpp)) *retcmpp = NFSERR_NOTSAME; } else if (leasep != NULL) { *leasep = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) *retcmpp = NFSERR_INVAL; } else if (rderrp != NULL) { *rderrp = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ACL: if (compare) { if (!(*retcmpp)) { if (nfsrv_useacl) { NFSACL_T *naclp; naclp = acl_alloc(M_WAITOK); error = nfsrv_dissectacl(nd, naclp, &aceerr, &cnt, p); if (error) { acl_free(naclp); goto nfsmout; } if (aceerr || aclp == NULL || nfsrv_compareacl(aclp, naclp)) *retcmpp = NFSERR_NOTSAME; acl_free(naclp); } else { error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); *retcmpp = NFSERR_ATTRNOTSUPP; } } } else { if (vp != NULL && aclp != NULL) error = nfsrv_dissectacl(nd, aclp, &aceerr, &cnt, p); else error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); if (error) goto nfsmout; } attrsum += cnt; break; case NFSATTRBIT_ACLSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (nfsrv_useacl) { if (fxdr_unsigned(u_int32_t, *tl) != NFSV4ACE_SUPTYPES) *retcmpp = NFSERR_NOTSAME; } else { *retcmpp = NFSERR_ATTRNOTSUPP; } } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFCANSETTIME) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFCANSETTIME; else fsp->fs_properties &= ~NFSV3_FSFCANSETTIME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: error = nfsm_getfh(nd, &tnfhp); if (error) goto nfsmout; tfhsize = tnfhp->nfh_len; if (compare) { if (!(*retcmpp) && !NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize, fhp, fhsize)) *retcmpp = NFSERR_NOTSAME; FREE((caddr_t)tnfhp, M_NFSFH); } else if (nfhpp != NULL) { *nfhpp = tnfhp; } else { FREE((caddr_t)tnfhp, M_NFSFH); } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(tfhsize)); break; case NFSATTRBIT_FILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if ((u_int64_t)nap->na_fileid != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (*tl++) printf("NFSv4 fileid > 32bits\n"); nap->na_fileid = thyp; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_afiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_afiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_ffiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_ffiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tfiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tfiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: error = nfsrv_getrefstr(nd, &cp, &cp2, &l, &m); if (error) goto nfsmout; attrsum += l; if (compare && !(*retcmpp)) { refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { if (cp == NULL || cp2 == NULL || strcmp(cp, "/") || strcmp(cp2, refp->nfr_srvlist)) *retcmpp = NFSERR_NOTSAME; } else if (m == 0) { *retcmpp = NFSERR_NOTSAME; } } if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFHOMOGENEOUS) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFHOMOGENEOUS; else fsp->fs_properties &= ~NFSV3_FSFHOMOGENEOUS; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); tnfsquad.qval = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { tquad = NFSRV_MAXFILESIZE; if (tquad != tnfsquad.qval) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_maxfilesize = tnfsquad.qval; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fxdr_unsigned(int, *tl) != LINK_MAX) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_maxname != fxdr_unsigned(u_int32_t, *tl)) *retcmpp = NFSERR_NOTSAME; } } else { tuint = fxdr_unsigned(u_int32_t, *tl); /* * Some Linux NFSv4 servers report this * as 0 or 4billion, so I'll set it to * NFS_MAXNAMLEN. If a server actually creates * a name longer than NFS_MAXNAMLEN, it will * get an error back. */ if (tuint == 0 || tuint > NFS_MAXNAMLEN) tuint = NFS_MAXNAMLEN; if (fsp != NULL) fsp->fs_maxname = tuint; if (pc != NULL) pc->pc_namemax = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_rtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *++tl); fsp->fs_rtpref = fsp->fs_rtmax; fsp->fs_dtpref = fsp->fs_rtpref; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_wtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_wtmax = fxdr_unsigned(int, *++tl); fsp->fs_wtpref = fsp->fs_wtmax; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_mode != nfstov_mode(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mode = nfstov_mode(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); tuint = fxdr_unsigned(u_int32_t, *tl); if (compare) { if (!(*retcmpp)) { if ((u_int32_t)nap->na_nlink != tuint) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_nlink = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtouid(nd, cp, j, &uid, p) || nap->na_uid != uid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtouid(nd, cp, j, &uid, p)) nap->na_uid = nfsrv_defaultuid; else nap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtogid(nd, cp, j, &gid, p) || nap->na_gid != gid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtogid(nd, cp, j, &gid, p)) nap->na_gid = nfsrv_defaultgid; else nap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4SPECDATA); j = fxdr_unsigned(int, *tl++); k = fxdr_unsigned(int, *tl); if (compare) { if (!(*retcmpp)) { if (nap->na_rdev != NFSMAKEDEV(j, k)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_rdev = NFSMAKEDEV(j, k); } attrsum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_abytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_abytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_fbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_fbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if ((u_int64_t)nap->na_bytes != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_bytes = thyp; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESS: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_atime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_atime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEDELTA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (fsp != NULL) { if (compare) { if (!(*retcmpp)) { if ((u_int32_t)fsp->fs_timedelta.tv_sec != fxdr_unsigned(u_int32_t, *(tl + 1)) || (u_int32_t)fsp->fs_timedelta.tv_nsec != (fxdr_unsigned(u_int32_t, *(tl + 2)) % 1000000000) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else { fxdr_nfsv4time(tl, &fsp->fs_timedelta); } } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_ctime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_ctime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_mtime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mtime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (*tl++) { *retcmpp = NFSERR_NOTSAME; } else { if (!vp || !nfsrv_atroot(vp, &fid)) fid = nap->na_fileid; if ((u_int64_t)fid != thyp) *retcmpp = NFSERR_NOTSAME; } } } else if (nap != NULL) { if (*tl++) printf("NFSv4 mounted on fileid > 32bits\n"); nap->na_mntonfileno = thyp; } attrsum += NFSX_HYPER; break; default: printf("EEK! nfsv4_loadattr unknown attr=%d\n", bitpos); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; /* * and get out of the loop, since we can't parse * the unknown attrbute data. */ bitpos = NFSATTRBIT_MAX; break; }; } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Implement sleep locks for newnfs. The nfslock_usecnt allows for a * shared lock and the NFSXXX_LOCK flag permits an exclusive lock. * The first argument is a pointer to an nfsv4lock structure. * The second argument is 1 iff a blocking lock is wanted. * If this argument is 0, the call waits until no thread either wants nor * holds an exclusive lock. * It returns 1 if the lock was acquired, 0 otherwise. * If several processes call this function concurrently wanting the exclusive * lock, one will get the lock and the rest will return without getting the * lock. (If the caller must have the lock, it simply calls this function in a * loop until the function returns 1 to indicate the lock was acquired.) * Any usecnt must be decremented by calling nfsv4_relref() before * calling nfsv4_lock(). It was done this way, so nfsv4_lock() could * be called in a loop. * The isleptp argument is set to indicate if the call slept, iff not NULL * and the mp argument indicates to check for a forced dismount, iff not * NULL. */ APPLESTATIC int nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * If a lock is wanted, loop around until the lock is acquired by * someone and then released. If I want the lock, try to acquire it. * For a lock to be issued, no lock must be in force and the usecnt * must be zero. */ if (iwantlock) { if (!(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } lp->nfslock_lock |= NFSV4LOCK_LOCKWANTED; } while (lp->nfslock_lock & (NFSV4LOCK_LOCK | NFSV4LOCK_LOCKWANTED)) { if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; return (0); } lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4lck", NULL); if (iwantlock && !(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } } return (0); } /* * Release the lock acquired by nfsv4_lock(). * The second argument is set to 1 to indicate the nfslock_usecnt should be * incremented, as well. */ APPLESTATIC void nfsv4_unlock(struct nfsv4lock *lp, int incref) { lp->nfslock_lock &= ~NFSV4LOCK_LOCK; if (incref) lp->nfslock_usecnt++; nfsv4_wanted(lp); } /* * Release a reference cnt. */ APPLESTATIC void nfsv4_relref(struct nfsv4lock *lp) { if (lp->nfslock_usecnt <= 0) panic("nfsv4root ref cnt"); lp->nfslock_usecnt--; if (lp->nfslock_usecnt == 0) nfsv4_wanted(lp); } /* * Get a reference cnt. * This function will wait for any exclusive lock to be released, but will * not wait for threads that want the exclusive lock. If priority needs * to be given to threads that need the exclusive lock, a call to nfsv4_lock() * with the 2nd argument == 0 should be done before calling nfsv4_getref(). * If the mp argument is not NULL, check for MNTK_UNMOUNTF being set and * return without getting a refcnt for that case. */ APPLESTATIC void nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * Wait for a lock held. */ while (lp->nfslock_lock & NFSV4LOCK_LOCK) { if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) return; lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, - PZERO - 1, "nfsv4lck", NULL); + PZERO - 1, "nfsv4gr", NULL); } if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) return; lp->nfslock_usecnt++; } /* * Get a reference as above, but return failure instead of sleeping if * an exclusive lock is held. */ APPLESTATIC int nfsv4_getref_nonblock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) != 0) return (0); lp->nfslock_usecnt++; return (1); } /* * Test for a lock. Return 1 if locked, 0 otherwise. */ APPLESTATIC int nfsv4_testlock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) == 0 && lp->nfslock_usecnt == 0) return (0); return (1); } /* * Wake up anyone sleeping, waiting for this lock. */ static void nfsv4_wanted(struct nfsv4lock *lp) { if (lp->nfslock_lock & NFSV4LOCK_WANTED) { lp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)&lp->nfslock_lock); } } /* * Copy a string from an mbuf list into a character array. * Return EBADRPC if there is an mbuf error, * 0 otherwise. */ APPLESTATIC int nfsrv_mtostr(struct nfsrv_descript *nd, char *str, int siz) { char *cp; int xfer, len; mbuf_t mp; int rem, error = 0; mp = nd->nd_md; cp = nd->nd_dpos; len = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - cp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (len > siz) xfer = siz; else xfer = len; NFSBCOPY(cp, str, xfer); str += xfer; siz -= xfer; if (siz > 0) { mp = mbuf_next(mp); if (mp == NULL) { error = EBADRPC; goto out; } cp = NFSMTOD(mp, caddr_t); len = mbuf_len(mp); } else { cp += xfer; len -= xfer; } } *str = '\0'; nd->nd_dpos = cp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } /* * Fill in the attributes as marked by the bitmap (V4). */ APPLESTATIC int nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { int bitpos, retnum = 0; u_int32_t *tl; int siz, prefixnum, error; u_char *cp, namestr[NFSV4_SMALLSTR]; nfsattrbit_t attrbits, retbits; nfsattrbit_t *retbitp = &retbits; u_int32_t freenum, *retnump; u_int64_t uquad; struct statfs fs; struct nfsfsinfo fsinf; struct timespec temptime; struct timeval curtime; NFSACL_T *aclp, *naclp = NULL; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif /* * First, set the bits that can be filled and get fsinfo. */ NFSSET_ATTRBIT(retbitp, attrbitp); /* If p and cred are NULL, it is a client side call */ if (p == NULL && cred == NULL) { NFSCLRNOTSETABLE_ATTRBIT(retbitp); aclp = saclp; } else { NFSCLRNOTFILLABLE_ATTRBIT(retbitp); naclp = acl_alloc(M_WAITOK); aclp = naclp; } nfsvno_getfs(&fsinf, isdgram); #ifndef APPLE /* * Get the VFS_STATFS(), since some attributes need them. */ if (NFSISSETSTATFS_ATTRBIT(retbitp)) { error = VFS_STATFS(mp, &fs); if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; return (0); } NFSCLRSTATFS_ATTRBIT(retbitp); } } #endif /* * And the NFSv4 ACL... */ if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT) && (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0))) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT); } if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACL)) { if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } else if (naclp != NULL) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_ACCESSX(vp, VREAD_ACL, cred, p); if (error == 0) error = VOP_GETACL(vp, ACL_TYPE_NFS4, naclp, cred, p); NFSVOPUNLOCK(vp, 0); } else error = NFSERR_PERM; if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; return (0); } NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } } } /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, retbitp); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(retbitp, bitpos)) { switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: NFSSETSUPP_ATTRBIT(&attrbits); if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT); NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL); } retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vap->va_type); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4FHTYPE_PERSISTENT); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_filerev, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_size, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_LINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_SYMLINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(mp->mnt_stat.f_fsid.val[0]); *tl++ = 0; *tl = txdr_unsigned(mp->mnt_stat.f_fsid.val[1]); retnum += NFSX_V4FSID; break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsrv_lease); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(rderror); retnum += NFSX_UNSIGNED; break; /* * Recommended Attributes. (Only the supported ones.) */ case NFSATTRBIT_ACL: retnum += nfsrv_buildacl(nd, aclp, vnode_vtype(vp), p); break; case NFSATTRBIT_ACLSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4ACE_SUPTYPES); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_CANSETTIME) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: retnum += nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); break; case NFSATTRBIT_FILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(vap->va_fileid); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: /* * Check quota and use min(quota, f_ffree). */ freenum = fs.f_ffree; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_isoftlimit-dqb.dqb_curinodes, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(freenum); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs.f_ffree); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs.f_files); retnum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; retnum += 2 * NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = NFSRV_MAXFILESIZE; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(LINK_MAX); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_MAXNAMLEN); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_rtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_wtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MODE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_mode(vap->va_mode); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(vap->va_nlink); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: cp = namestr; nfsv4_uidtostr(vap->va_uid, &cp, &siz, p); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: cp = namestr; nfsv4_gidtostr(vap->va_gid, &cp, &siz, p); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = fs.f_bfree; else freenum = fs.f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs.f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = fs.f_bfree; else freenum = fs.f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs.f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs.f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_BUILD(tl, u_int32_t *, NFSX_V4SPECDATA); *tl++ = txdr_unsigned(NFSMAJOR(vap->va_rdev)); *tl = txdr_unsigned(NFSMINOR(vap->va_rdev)); retnum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0)) uquad = (u_int64_t)fs.f_bfree; else uquad = (u_int64_t)fs.f_bavail; uquad *= fs.f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)fs.f_bfree; uquad *= fs.f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)fs.f_blocks; uquad *= fs.f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_bytes, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_TIMEACCESS: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: NFSGETTIME(&curtime); if (vap->va_atime.tv_sec != curtime.tv_sec) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_TIMEDELTA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); temptime.tv_sec = 0; temptime.tv_nsec = 1000000000 / hz; txdr_nfsv4time(&temptime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_ctime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSGETTIME(&curtime); if (vap->va_mtime.tv_sec != curtime.tv_sec) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (at_root != 0) uquad = mounted_on_fileno; else uquad = (u_int64_t)vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); }; } } if (naclp != NULL) acl_free(naclp); *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Put the attribute bits onto an mbuf list. * Return the number of bytes of output generated. */ APPLESTATIC int nfsrv_putattrbit(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp) { u_int32_t *tl; int cnt, i, bytesize; for (cnt = NFSATTRBIT_MAXWORDS; cnt > 0; cnt--) if (attrbitp->bits[cnt - 1]) break; bytesize = (cnt + 1) * NFSX_UNSIGNED; NFSM_BUILD(tl, u_int32_t *, bytesize); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(attrbitp->bits[i]); return (bytesize); } /* * Convert a uid to a string. * If the lookup fails, just output the digits. * uid - the user id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_uidtostr(uid_t uid, u_char **cpp, int *retlenp, NFSPROC_T *p) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; uid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; cnt = 0; tryagain: NFSLOCKNAMEID(); if (nfsrv_dnsname) { /* * Always map nfsrv_defaultuid to "nobody". */ if (uid == nfsrv_defaultuid) { i = nfsrv_dnsnamelen + 7; if (i > len) { NFSUNLOCKNAMEID(); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nobody@", cp, 7); cp += 7; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); NFSUNLOCKNAMEID(); return; } hasampersand = 0; LIST_FOREACH(usrp, NFSUSERHASH(uid), lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { NFSUNLOCKNAMEID(); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru); TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru); NFSUNLOCKNAMEID(); return; } } NFSUNLOCKNAMEID(); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL, p); if (ret == 0 && cnt < 2) goto tryagain; } else { NFSUNLOCKNAMEID(); } /* * No match, just return a string of digits. */ tmp = uid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = uid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Convert a string to a uid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp, NFSPROC_T *p) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; uid_t tuid; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tuid = (uid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len && (nd->nd_flag & (ND_KERBV | ND_NFSCL)) == ND_NFSCL) { *uidp = tuid; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: NFSLOCKNAMEID(); /* * If an '@' is found and the domain name matches, search for the name * with dns stripped off. * Mixed case alpahbetics will match for the domain name, but all * upper case will not. */ if (cnt == 0 && i < len && i > 0 && nfsrv_dnsname && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nobody". */ if (len == 6 && !NFSBCMP(str, "nobody", 6)) { *uidp = nfsrv_defaultuid; NFSUNLOCKNAMEID(); error = 0; goto out; } LIST_FOREACH(usrp, NFSUSERNAMEHASH(str, len), lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; *uidp = usrp->lug_uid; TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru); TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru); NFSUNLOCKNAMEID(); error = 0; goto out; } } NFSUNLOCKNAMEID(); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0, str, p); if (ret == 0 && cnt < 2) goto tryagain; error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Convert a gid to a string. * gid - the group id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_gidtostr(gid_t gid, u_char **cpp, int *retlenp, NFSPROC_T *p) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; gid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; cnt = 0; tryagain: NFSLOCKNAMEID(); if (nfsrv_dnsname) { /* * Always map nfsrv_defaultgid to "nogroup". */ if (gid == nfsrv_defaultgid) { i = nfsrv_dnsnamelen + 8; if (i > len) { NFSUNLOCKNAMEID(); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nogroup@", cp, 8); cp += 8; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); NFSUNLOCKNAMEID(); return; } hasampersand = 0; LIST_FOREACH(usrp, NFSGROUPHASH(gid), lug_numhash) { if (usrp->lug_gid == gid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { NFSUNLOCKNAMEID(); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru); TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru); NFSUNLOCKNAMEID(); return; } } NFSUNLOCKNAMEID(); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGID, (uid_t)0, gid, NULL, p); if (ret == 0 && cnt < 2) goto tryagain; } else { NFSUNLOCKNAMEID(); } /* * No match, just return a string of digits. */ tmp = gid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = gid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Convert a string to a gid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp, NFSPROC_T *p) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; gid_t tgid; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tgid = (gid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len && (nd->nd_flag & (ND_KERBV | ND_NFSCL)) == ND_NFSCL) { *gidp = tgid; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: NFSLOCKNAMEID(); /* * If an '@' is found and the dns name matches, search for the name * with the dns stripped off. */ if (cnt == 0 && i < len && i > 0 && nfsrv_dnsname && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nogroup". */ if (len == 7 && !NFSBCMP(str, "nogroup", 7)) { *gidp = nfsrv_defaultgid; NFSUNLOCKNAMEID(); error = 0; goto out; } LIST_FOREACH(usrp, NFSGROUPNAMEHASH(str, len), lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; *gidp = usrp->lug_gid; TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru); TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru); NFSUNLOCKNAMEID(); error = 0; goto out; } } NFSUNLOCKNAMEID(); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0, str, p); if (ret == 0 && cnt < 2) goto tryagain; error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Cmp len chars, allowing mixed case in the first argument to match lower * case in the second, but not if the first argument is all upper case. * Return 0 for a match, 1 otherwise. */ static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len) { int i; u_char tmp; int fndlower = 0; for (i = 0; i < len; i++) { if (*cp >= 'A' && *cp <= 'Z') { tmp = *cp++ + ('a' - 'A'); } else { tmp = *cp++; if (tmp >= 'a' && tmp <= 'z') fndlower = 1; } if (tmp != *cp2++) return (1); } if (fndlower) return (0); else return (1); } /* * Set the port for the nfsuserd. */ APPLESTATIC int nfsrv_nfsuserdport(u_short port, NFSPROC_T *p) { struct nfssockreq *rp; struct sockaddr_in *ad; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } nfsrv_nfsuserd = 1; NFSUNLOCKNAMEID(); /* * Set up the socket record and connect. */ rp = &nfsrv_nfsuserdsock; rp->nr_client = NULL; rp->nr_sotype = SOCK_DGRAM; rp->nr_soproto = IPPROTO_UDP; rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST); rp->nr_cred = NULL; NFSSOCKADDRALLOC(rp->nr_nam); NFSSOCKADDRSIZE(rp->nr_nam, sizeof (struct sockaddr_in)); ad = NFSSOCKADDR(rp->nr_nam, struct sockaddr_in *); ad->sin_family = AF_INET; ad->sin_addr.s_addr = htonl((u_int32_t)0x7f000001); /* 127.0.0.1 */ ad->sin_port = port; rp->nr_prog = RPCPROG_NFSUSERD; rp->nr_vers = RPCNFSUSERD_VERS; error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0); if (error) { NFSSOCKADDRFREE(rp->nr_nam); nfsrv_nfsuserd = 0; } out: NFSEXITCODE(error); return (error); } /* * Delete the nfsuserd port. */ APPLESTATIC void nfsrv_nfsuserddelport(void) { NFSLOCKNAMEID(); if (nfsrv_nfsuserd == 0) { NFSUNLOCKNAMEID(); return; } nfsrv_nfsuserd = 0; NFSUNLOCKNAMEID(); newnfs_disconnect(&nfsrv_nfsuserdsock); NFSSOCKADDRFREE(nfsrv_nfsuserdsock.nr_nam); } /* * Do upcalls to the nfsuserd, for cache misses of the owner/ownergroup * name<-->id cache. * Returns 0 upon success, non-zero otherwise. */ static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript *nd; int len; struct nfsrv_descript nfsd; struct ucred *cred; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd == 0) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } NFSUNLOCKNAMEID(); nd = &nfsd; cred = newnfs_getcred(); nd->nd_flag = ND_GSSINITREPLY; nfsrvd_rephead(nd); nd->nd_procnum = procnum; if (procnum == RPCNFSUSERD_GETUID || procnum == RPCNFSUSERD_GETGID) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (procnum == RPCNFSUSERD_GETUID) *tl = txdr_unsigned(uid); else *tl = txdr_unsigned(gid); } else { len = strlen(name); (void) nfsm_strtom(nd, name, len); } error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL, - cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL); + cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSFREECRED(cred); if (!error) { mbuf_freem(nd->nd_mrep); error = nd->nd_repstat; } out: NFSEXITCODE(error); return (error); } /* * This function is called from the nfssvc(2) system call, to update the * kernel user/group name list(s) for the V4 owner and ownergroup attributes. */ APPLESTATIC int nfssvc_idname(struct nfsd_idargs *nidp) { struct nfsusrgrp *nusrp, *usrp, *newusrp; struct nfsuserhashhead *hp; int i; int error = 0; u_char *cp; if (nidp->nid_flag & NFSID_INITIALIZE) { cp = (u_char *)malloc(nidp->nid_namelen + 1, M_NFSSTRING, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), cp, nidp->nid_namelen); NFSLOCKNAMEID(); if (nfsrv_dnsname) { /* * Free up all the old stuff and reinitialize hash lists. */ TAILQ_FOREACH_SAFE(usrp, &nfsuserlruhead, lug_lru, nusrp) { nfsrv_removeuser(usrp); } free(nfsrv_dnsname, M_NFSSTRING); nfsrv_dnsname = NULL; } TAILQ_INIT(&nfsuserlruhead); for (i = 0; i < NFSUSERHASHSIZE; i++) LIST_INIT(&nfsuserhash[i]); for (i = 0; i < NFSGROUPHASHSIZE; i++) LIST_INIT(&nfsgrouphash[i]); for (i = 0; i < NFSUSERHASHSIZE; i++) LIST_INIT(&nfsusernamehash[i]); for (i = 0; i < NFSGROUPHASHSIZE; i++) LIST_INIT(&nfsgroupnamehash[i]); /* * Put name in "DNS" string. */ if (!error) { nfsrv_dnsname = cp; nfsrv_dnsnamelen = nidp->nid_namelen; nfsrv_defaultuid = nidp->nid_uid; nfsrv_defaultgid = nidp->nid_gid; nfsrv_usercnt = 0; nfsrv_usermax = nidp->nid_usermax; } NFSUNLOCKNAMEID(); if (error) free(cp, M_NFSSTRING); goto out; } /* * malloc the new one now, so any potential sleep occurs before * manipulation of the lists. */ MALLOC(newusrp, struct nfsusrgrp *, sizeof (struct nfsusrgrp) + nidp->nid_namelen, M_NFSUSERGROUP, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), newusrp->lug_name, nidp->nid_namelen); if (error) { free((caddr_t)newusrp, M_NFSUSERGROUP); goto out; } newusrp->lug_namelen = nidp->nid_namelen; NFSLOCKNAMEID(); /* * Delete old entries, as required. */ if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) { hp = NFSUSERHASH(nidp->nid_uid); LIST_FOREACH_SAFE(usrp, hp, lug_numhash, nusrp) { if (usrp->lug_uid == nidp->nid_uid) nfsrv_removeuser(usrp); } } if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) { hp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); LIST_FOREACH_SAFE(usrp, hp, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) nfsrv_removeuser(usrp); } } if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) { hp = NFSGROUPHASH(nidp->nid_gid); LIST_FOREACH_SAFE(usrp, hp, lug_numhash, nusrp) { if (usrp->lug_gid == nidp->nid_gid) nfsrv_removeuser(usrp); } } if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) { hp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); LIST_FOREACH_SAFE(usrp, hp, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) nfsrv_removeuser(usrp); } } TAILQ_FOREACH_SAFE(usrp, &nfsuserlruhead, lug_lru, nusrp) { if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp); } while (nfsrv_usercnt >= nfsrv_usermax) { usrp = TAILQ_FIRST(&nfsuserlruhead); nfsrv_removeuser(usrp); } /* * Now, we can add the new one. */ if (nidp->nid_usertimeout) newusrp->lug_expiry = NFSD_MONOSEC + nidp->nid_usertimeout; else newusrp->lug_expiry = NFSD_MONOSEC + 5; if (nidp->nid_flag & (NFSID_ADDUID | NFSID_ADDUSERNAME)) { newusrp->lug_uid = nidp->nid_uid; LIST_INSERT_HEAD(NFSUSERHASH(newusrp->lug_uid), newusrp, lug_numhash); LIST_INSERT_HEAD(NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen), newusrp, lug_namehash); TAILQ_INSERT_TAIL(&nfsuserlruhead, newusrp, lug_lru); nfsrv_usercnt++; } else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) { newusrp->lug_gid = nidp->nid_gid; LIST_INSERT_HEAD(NFSGROUPHASH(newusrp->lug_gid), newusrp, lug_numhash); LIST_INSERT_HEAD(NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen), newusrp, lug_namehash); TAILQ_INSERT_TAIL(&nfsuserlruhead, newusrp, lug_lru); nfsrv_usercnt++; } else FREE((caddr_t)newusrp, M_NFSUSERGROUP); NFSUNLOCKNAMEID(); out: NFSEXITCODE(error); return (error); } /* * Remove a user/group name element. */ static void nfsrv_removeuser(struct nfsusrgrp *usrp) { NFSNAMEIDREQUIRED(); LIST_REMOVE(usrp, lug_numhash); LIST_REMOVE(usrp, lug_namehash); TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru); nfsrv_usercnt--; FREE((caddr_t)usrp, M_NFSUSERGROUP); } /* * This function scans a byte string and checks for UTF-8 compliance. * It returns 0 if it conforms and NFSERR_INVAL if not. */ APPLESTATIC int nfsrv_checkutf8(u_int8_t *cp, int len) { u_int32_t val = 0x0; int cnt = 0, gotd = 0, shift = 0; u_int8_t byte; static int utf8_shift[5] = { 7, 11, 16, 21, 26 }; int error = 0; /* * Here are what the variables are used for: * val - the calculated value of a multibyte char, used to check * that it was coded with the correct range * cnt - the number of 10xxxxxx bytes to follow * gotd - set for a char of Dxxx, so D800<->DFFF can be checked for * shift - lower order bits of range (ie. "val >> shift" should * not be 0, in other words, dividing by the lower bound * of the range should get a non-zero value) * byte - used to calculate cnt */ while (len > 0) { if (cnt > 0) { /* This handles the 10xxxxxx bytes */ if ((*cp & 0xc0) != 0x80 || (gotd && (*cp & 0x20))) { error = NFSERR_INVAL; goto out; } gotd = 0; val <<= 6; val |= (*cp & 0x3f); cnt--; if (cnt == 0 && (val >> shift) == 0x0) { error = NFSERR_INVAL; goto out; } } else if (*cp & 0x80) { /* first byte of multi byte char */ byte = *cp; while ((byte & 0x40) && cnt < 6) { cnt++; byte <<= 1; } if (cnt == 0 || cnt == 6) { error = NFSERR_INVAL; goto out; } val = (*cp & (0x3f >> cnt)); shift = utf8_shift[cnt - 1]; if (cnt == 2 && val == 0xd) /* Check for the 0xd800-0xdfff case */ gotd = 1; } cp++; len--; } if (cnt > 0) error = NFSERR_INVAL; out: NFSEXITCODE(error); return (error); } /* * Parse the xdr for an NFSv4 FsLocations attribute. Return two malloc'd * strings, one with the root path in it and the other with the list of * locations. The list is in the same format as is found in nfr_refs. * It is a "," separated list of entries, where each of them is of the * form :. For example * "nfsv4-test:/sub2,nfsv4-test2:/user/mnt,nfsv4-test2:/user/mnt2" * The nilp argument is set to 1 for the special case of a null fs_root * and an empty server list. * It returns NFSERR_BADXDR, if the xdr can't be parsed and returns the * number of xdr bytes parsed in sump. */ static int nfsrv_getrefstr(struct nfsrv_descript *nd, u_char **fsrootp, u_char **srvp, int *sump, int *nilp) { u_int32_t *tl; u_char *cp = NULL, *cp2 = NULL, *cp3, *str; int i, j, len, stringlen, cnt, slen, siz, xdrsum, error = 0, nsrv; struct list { SLIST_ENTRY(list) next; int len; u_char host[1]; } *lsp, *nlsp; SLIST_HEAD(, list) head; *fsrootp = NULL; *srvp = NULL; *nilp = 0; /* * Get the fs_root path and check for the special case of null path * and 0 length server list. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > 10240) { error = NFSERR_BADXDR; goto nfsmout; } if (len == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = NFSERR_BADXDR; goto nfsmout; } *nilp = 1; *sump = 2 * NFSX_UNSIGNED; error = 0; goto nfsmout; } cp = malloc(len + 1, M_NFSSTRING, M_WAITOK); error = nfsrv_mtostr(nd, cp, len); if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0) error = NFSERR_BADXDR; } if (error) goto nfsmout; /* * Now, loop through the location list and make up the srvlist. */ xdrsum = (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); cp2 = cp3 = malloc(1024, M_NFSSTRING, M_WAITOK); slen = 1024; siz = 0; for (i = 0; i < cnt; i++) { SLIST_INIT(&head); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nsrv = fxdr_unsigned(int, *tl); if (nsrv <= 0) { error = NFSERR_BADXDR; goto nfsmout; } /* * Handle the first server by putting it in the srvstr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 3, &cp2, &cp3, &slen); if (cp3 != cp2) { *cp3++ = ','; siz++; } error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; cp3 += len; *cp3++ = ':'; siz += (len + 1); xdrsum += (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); for (j = 1; j < nsrv; j++) { /* * Yuck, put them in an slist and process them later. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } lsp = (struct list *)malloc(sizeof (struct list) + len, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, lsp->host, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); lsp->len = len; SLIST_INSERT_HEAD(&head, lsp, next); } /* * Finally, we can get the path. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 1, &cp2, &cp3, &slen); error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); str = cp3; stringlen = len; cp3 += len; siz += len; SLIST_FOREACH_SAFE(lsp, &head, next, nlsp) { nfsrv_refstrbigenough(siz + lsp->len + stringlen + 3, &cp2, &cp3, &slen); *cp3++ = ','; NFSBCOPY(lsp->host, cp3, lsp->len); cp3 += lsp->len; *cp3++ = ':'; NFSBCOPY(str, cp3, stringlen); cp3 += stringlen; *cp3 = '\0'; siz += (lsp->len + stringlen + 2); free((caddr_t)lsp, M_TEMP); } } *fsrootp = cp; *srvp = cp2; *sump = xdrsum; NFSEXITCODE2(0, nd); return (0); nfsmout: if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); NFSEXITCODE2(error, nd); return (error); } /* * Make the malloc'd space large enough. This is a pain, but the xdr * doesn't set an upper bound on the side, so... */ static void nfsrv_refstrbigenough(int siz, u_char **cpp, u_char **cpp2, int *slenp) { u_char *cp; int i; if (siz <= *slenp) return; cp = malloc(siz + 1024, M_NFSSTRING, M_WAITOK); NFSBCOPY(*cpp, cp, *slenp); free(*cpp, M_NFSSTRING); i = *cpp2 - *cpp; *cpp = cp; *cpp2 = cp + i; *slenp = siz + 1024; } /* * Initialize the reply header data structures. */ APPLESTATIC void nfsrvd_rephead(struct nfsrv_descript *nd) { mbuf_t mreq; /* * If this is a big reply, use a cluster. */ if ((nd->nd_flag & ND_GSSINITREPLY) == 0 && nfs_bigreply[nd->nd_procnum]) { NFSMCLGET(mreq, M_WAITOK); nd->nd_mreq = mreq; nd->nd_mb = mreq; } else { NFSMGET(mreq); nd->nd_mreq = mreq; nd->nd_mb = mreq; } nd->nd_bpos = NFSMTOD(mreq, caddr_t); mbuf_setlen(mreq, 0); if ((nd->nd_flag & ND_GSSINITREPLY) == 0) NFSM_BUILD(nd->nd_errp, int *, NFSX_UNSIGNED); } /* * Lock a socket against others. * Currently used to serialize connect/disconnect attempts. */ int newnfs_sndlock(int *flagp) { struct timespec ts; NFSLOCKSOCK(); while (*flagp & NFSR_SNDLOCK) { *flagp |= NFSR_WANTSND; ts.tv_sec = 0; ts.tv_nsec = 0; (void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR, PZERO - 1, "nfsndlck", &ts); } *flagp |= NFSR_SNDLOCK; NFSUNLOCKSOCK(); return (0); } /* * Unlock the stream socket for others. */ void newnfs_sndunlock(int *flagp) { NFSLOCKSOCK(); if ((*flagp & NFSR_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSR_SNDLOCK; if (*flagp & NFSR_WANTSND) { *flagp &= ~NFSR_WANTSND; wakeup((caddr_t)flagp); } NFSUNLOCKSOCK(); +} + +APPLESTATIC int +nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_storage *sa, + int *isudp) +{ + struct sockaddr_in *sad; + struct sockaddr_in6 *sad6; + struct in_addr saddr; + uint32_t portnum, *tl; + int af = 0, i, j, k; + char addr[64], protocol[5], *cp; + int cantparse = 0, error = 0; + uint16_t portv; + + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + if (i >= 3 && i <= 4) { + error = nfsrv_mtostr(nd, protocol, i); + if (error) + goto nfsmout; + if (strcmp(protocol, "tcp") == 0) { + af = AF_INET; + *isudp = 0; + } else if (strcmp(protocol, "udp") == 0) { + af = AF_INET; + *isudp = 1; + } else if (strcmp(protocol, "tcp6") == 0) { + af = AF_INET6; + *isudp = 0; + } else if (strcmp(protocol, "udp6") == 0) { + af = AF_INET6; + *isudp = 1; + } else + cantparse = 1; + } else { + cantparse = 1; + if (i > 0) { + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + } + } + NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + if (i < 0) { + error = NFSERR_BADXDR; + goto nfsmout; + } else if (cantparse == 0 && i >= 11 && i < 64) { + /* + * The shortest address is 11chars and the longest is < 64. + */ + error = nfsrv_mtostr(nd, addr, i); + if (error) + goto nfsmout; + + /* Find the port# at the end and extract that. */ + i = strlen(addr); + k = 0; + cp = &addr[i - 1]; + /* Count back two '.'s from end to get port# field. */ + for (j = 0; j < i; j++) { + if (*cp == '.') { + k++; + if (k == 2) + break; + } + cp--; + } + if (k == 2) { + /* + * The NFSv4 port# is appended as .N.N, where N is + * a decimal # in the range 0-255, just like an inet4 + * address. Cheat and use inet_aton(), which will + * return a Class A address and then shift the high + * order 8bits over to convert it to the port#. + */ + *cp++ = '\0'; + if (inet_aton(cp, &saddr) == 1) { + portnum = ntohl(saddr.s_addr); + portv = (uint16_t)((portnum >> 16) | + (portnum & 0xff)); + } else + cantparse = 1; + } else + cantparse = 1; + if (cantparse == 0) { + if (af == AF_INET) { + sad = (struct sockaddr_in *)sa; + if (inet_pton(af, addr, &sad->sin_addr) == 1) { + sad->sin_len = sizeof(*sad); + sad->sin_family = AF_INET; + sad->sin_port = htons(portv); + return (0); + } + } else { + sad6 = (struct sockaddr_in6 *)sa; + if (inet_pton(af, addr, &sad6->sin6_addr) + == 1) { + sad6->sin6_len = sizeof(*sad6); + sad6->sin6_family = AF_INET6; + sad6->sin6_port = htons(portv); + return (0); + } + } + } + } else { + if (i > 0) { + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + } + } + error = EPERM; +nfsmout: + return (error); +} + +/* + * Handle an NFSv4.1 Sequence request for the session. + */ +int +nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot, + struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot) +{ + int error; + + error = 0; + *reply = NULL; + if (slotid > maxslot) + return (NFSERR_BADSLOT); + if (seqid == slots[slotid].nfssl_seq) { + /* A retry. */ + if (slots[slotid].nfssl_inprog != 0) + error = NFSERR_DELAY; + else if (slots[slotid].nfssl_reply != NULL) { + *reply = slots[slotid].nfssl_reply; + slots[slotid].nfssl_reply = NULL; + slots[slotid].nfssl_inprog = 1; + } else + error = NFSERR_SEQMISORDERED; + } else if ((slots[slotid].nfssl_seq + 1) == seqid) { + m_freem(slots[slotid].nfssl_reply); + slots[slotid].nfssl_reply = NULL; + slots[slotid].nfssl_inprog = 1; + slots[slotid].nfssl_seq++; + } else + error = NFSERR_SEQMISORDERED; + return (error); +} + +/* + * Cache this reply for the slot. + */ +void +nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, struct mbuf *rep) +{ + + slots[slotid].nfssl_reply = rep; + slots[slotid].nfssl_inprog = 0; +} + +/* + * Generate the xdr for an NFSv4.1 Sequence Operation. + */ +APPLESTATIC void +nfsv4_setsequence(struct nfsrv_descript *nd, struct nfsclsession *sep, + int dont_replycache) +{ + uint32_t *tl, slotseq = 0; + int i, maxslot, slotpos; + uint64_t bitval; + uint8_t sessionid[NFSX_V4SESSIONID]; + + /* Find an unused slot. */ + slotpos = -1; + maxslot = -1; + mtx_lock(&sep->nfsess_mtx); + do { + bitval = 1; + for (i = 0; i < sep->nfsess_foreslots; i++) { + if ((bitval & sep->nfsess_slots) == 0) { + slotpos = i; + sep->nfsess_slots |= bitval; + sep->nfsess_slotseq[i]++; + slotseq = sep->nfsess_slotseq[i]; + break; + } + bitval <<= 1; + } + if (slotpos == -1) + (void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx, + PZERO, "nfsclseq", 0); + } while (slotpos == -1); + /* Now, find the highest slot in use. (nfsc_slots is 64bits) */ + bitval = 1; + for (i = 0; i < 64; i++) { + if ((bitval & sep->nfsess_slots) != 0) + maxslot = i; + bitval <<= 1; + } + bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); + mtx_unlock(&sep->nfsess_mtx); + KASSERT(maxslot >= 0, ("nfscl_setsequence neg maxslot")); + + /* Build the Sequence arguments. */ + NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); + bcopy(sessionid, tl, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + nd->nd_slotseq = tl; + *tl++ = txdr_unsigned(slotseq); + *tl++ = txdr_unsigned(slotpos); + *tl++ = txdr_unsigned(maxslot); + if (dont_replycache == 0) + *tl = newnfs_true; + else + *tl = newnfs_false; + nd->nd_flag |= ND_HASSEQUENCE; +} + +/* + * Free a session slot. + */ +APPLESTATIC void +nfsv4_freeslot(struct nfsclsession *sep, int slot) +{ + uint64_t bitval; + + bitval = 1; + if (slot > 0) + bitval <<= slot; + mtx_lock(&sep->nfsess_mtx); + if ((bitval & sep->nfsess_slots) == 0) + printf("freeing free slot!!\n"); + sep->nfsess_slots &= ~bitval; + wakeup(&sep->nfsess_slots); + mtx_unlock(&sep->nfsess_mtx); } Index: head/sys/fs/nfs/nfs_var.h =================================================================== --- head/sys/fs/nfs/nfs_var.h (revision 244041) +++ head/sys/fs/nfs/nfs_var.h (revision 244042) @@ -1,605 +1,657 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * XXX needs and because of typedefs */ struct uio; struct ucred; struct nfscred; NFSPROC_T; struct buf; struct sockaddr_in; struct nfs_dlmount; struct file; struct nfsmount; struct socket; struct nfsreq; struct nfssockreq; struct vattr; struct nameidata; struct nfsnode; struct nfsfh; struct sillyrename; struct componentname; struct nfsd_srvargs; struct nfsrv_descript; struct nfs_fattr; union nethostaddr; struct nfsstate; struct nfslock; struct nfsclient; struct nfslockconflict; struct nfsd_idargs; struct nfsd_clid; struct nfsusrgrp; struct nfsclowner; struct nfsclopen; struct nfsclopenhead; struct nfsclclient; +struct nfsclsession; struct nfscllockowner; struct nfscllock; struct nfscldeleg; +struct nfscllayout; +struct nfscldevinfo; struct nfsv4lock; struct nfsvattr; struct nfs_vattr; struct NFSSVCARGS; #ifdef __FreeBSD__ NFS_ACCESS_ARGS; NFS_OPEN_ARGS; NFS_GETATTR_ARGS; NFS_LOOKUP_ARGS; NFS_READDIR_ARGS; #endif /* nfs_nfsdstate.c */ int nfsrv_setclient(struct nfsrv_descript *, struct nfsclient **, nfsquad_t *, nfsquad_t *, NFSPROC_T *); int nfsrv_getclient(nfsquad_t, int, struct nfsclient **, nfsquad_t, struct nfsrv_descript *, NFSPROC_T *); int nfsrv_adminrevoke(struct nfsd_clid *, NFSPROC_T *); void nfsrv_dumpclients(struct nfsd_dumpclients *, int); void nfsrv_dumplocks(vnode_t, struct nfsd_dumplocks *, int, NFSPROC_T *); int nfsrv_lockctrl(vnode_t, struct nfsstate **, struct nfslock **, struct nfslockconflict *, nfsquad_t, nfsv4stateid_t *, struct nfsexstuff *, struct nfsrv_descript *, NFSPROC_T *); int nfsrv_openctrl(struct nfsrv_descript *, vnode_t, struct nfsstate **, nfsquad_t, nfsv4stateid_t *, nfsv4stateid_t *, u_int32_t *, struct nfsexstuff *, NFSPROC_T *, u_quad_t); int nfsrv_opencheck(nfsquad_t, nfsv4stateid_t *, struct nfsstate *, vnode_t, struct nfsrv_descript *, NFSPROC_T *, int); int nfsrv_openupdate(vnode_t, struct nfsstate *, nfsquad_t, nfsv4stateid_t *, struct nfsrv_descript *, NFSPROC_T *); int nfsrv_delegupdate(nfsquad_t, nfsv4stateid_t *, vnode_t, int, struct ucred *, NFSPROC_T *); int nfsrv_releaselckown(struct nfsstate *, nfsquad_t, NFSPROC_T *); void nfsrv_zapclient(struct nfsclient *, NFSPROC_T *); int nfssvc_idname(struct nfsd_idargs *); void nfsrv_servertimer(void); int nfsrv_getclientipaddr(struct nfsrv_descript *, struct nfsclient *); void nfsrv_setupstable(NFSPROC_T *); void nfsrv_updatestable(NFSPROC_T *); void nfsrv_writestable(u_char *, int, int, NFSPROC_T *); void nfsrv_throwawayopens(NFSPROC_T *); int nfsrv_checkremove(vnode_t, int, NFSPROC_T *); void nfsd_recalldelegation(vnode_t, NFSPROC_T *); void nfsd_disabledelegation(vnode_t, NFSPROC_T *); int nfsrv_checksetattr(vnode_t, struct nfsrv_descript *, nfsv4stateid_t *, struct nfsvattr *, nfsattrbit_t *, struct nfsexstuff *, NFSPROC_T *); int nfsrv_checkgetattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, nfsattrbit_t *, struct ucred *, NFSPROC_T *); int nfsrv_nfsuserdport(u_short, NFSPROC_T *); void nfsrv_nfsuserddelport(void); void nfsrv_throwawayallstate(NFSPROC_T *); /* nfs_nfsdserv.c */ int nfsrvd_access(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_getattr(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_setattr(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_lookup(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_readlink(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_read(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_write(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_create(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_mknod(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_remove(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_rename(struct nfsrv_descript *, int, vnode_t, vnode_t, NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *); int nfsrvd_link(struct nfsrv_descript *, int, vnode_t, vnode_t, NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *); int nfsrvd_symlink(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_mkdir(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_readdir(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_readdirplus(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_commit(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_statfs(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_fsinfo(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_close(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_delegpurge(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_delegreturn(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_getfh(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_lock(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_lockt(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_locku(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_openconfirm(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_opendowngrade(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_renew(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_secinfo(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_setclientid(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_setclientidcfrm(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_verify(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_open(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_openattr(struct nfsrv_descript *, int, vnode_t, vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_releaselckown(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); int nfsrvd_pathconf(struct nfsrv_descript *, int, vnode_t, NFSPROC_T *, struct nfsexstuff *); /* nfs_nfsdsocket.c */ void nfsrvd_rephead(struct nfsrv_descript *); void nfsrvd_dorpc(struct nfsrv_descript *, int, NFSPROC_T *); /* nfs_nfsdcache.c */ void nfsrvd_initcache(void); int nfsrvd_getcache(struct nfsrv_descript *, struct socket *); struct nfsrvcache *nfsrvd_updatecache(struct nfsrv_descript *, struct socket *); void nfsrvd_sentcache(struct nfsrvcache *, struct socket *, int); void nfsrvd_cleancache(void); void nfsrvd_refcache(struct nfsrvcache *); void nfsrvd_derefcache(struct nfsrvcache *); void nfsrvd_delcache(struct nfsrvcache *); /* nfs_commonsubs.c */ void newnfs_init(void); int nfsaddr_match(int, union nethostaddr *, NFSSOCKADDR_T); int nfsaddr2_match(NFSSOCKADDR_T, NFSSOCKADDR_T); int nfsm_strtom(struct nfsrv_descript *, const char *, int); int nfsm_mbufuio(struct nfsrv_descript *, struct uio *, int); int nfsm_fhtom(struct nfsrv_descript *, u_int8_t *, int, int); int nfsm_advance(struct nfsrv_descript *, int, int); void *nfsm_dissct(struct nfsrv_descript *, int); void newnfs_trimleading(struct nfsrv_descript *); void newnfs_trimtrailing(struct nfsrv_descript *, mbuf_t, caddr_t); void newnfs_copycred(struct nfscred *, struct ucred *); void newnfs_copyincred(struct ucred *, struct nfscred *); int nfsrv_dissectacl(struct nfsrv_descript *, NFSACL_T *, int *, int *, NFSPROC_T *); int nfsrv_getattrbits(struct nfsrv_descript *, nfsattrbit_t *, int *, int *); int nfsv4_loadattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, struct nfsfh **, fhandle_t *, int, struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *, struct nfsfsinfo *, NFSACL_T *, int, int *, u_int32_t *, u_int32_t *, NFSPROC_T *, struct ucred *); int nfsv4_lock(struct nfsv4lock *, int, int *, void *, struct mount *); void nfsv4_unlock(struct nfsv4lock *, int); void nfsv4_relref(struct nfsv4lock *); void nfsv4_getref(struct nfsv4lock *, int *, void *, struct mount *); int nfsv4_getref_nonblock(struct nfsv4lock *); int nfsv4_testlock(struct nfsv4lock *); int nfsrv_mtostr(struct nfsrv_descript *, char *, int); int nfsrv_checkutf8(u_int8_t *, int); int newnfs_sndlock(int *); void newnfs_sndunlock(int *); +int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_storage *, + int *); +int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *, + struct mbuf **, uint16_t); +void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, struct mbuf *); +void nfsv4_setsequence(struct nfsrv_descript *, struct nfsclsession *, int); +void nfsv4_freeslot(struct nfsclsession *, int); /* nfs_clcomsubs.c */ void nfsm_uiombuf(struct nfsrv_descript *, struct uio *, int); void nfscl_reqstart(struct nfsrv_descript *, int, struct nfsmount *, - u_int8_t *, int, u_int32_t **); + u_int8_t *, int, u_int32_t **, struct nfsclsession *); nfsuint64 *nfscl_getcookie(struct nfsnode *, off_t off, int); void nfscl_fillsattr(struct nfsrv_descript *, struct vattr *, vnode_t, int, u_int32_t); u_int8_t *nfscl_getmyip(struct nfsmount *, int *); int nfsm_getfh(struct nfsrv_descript *, struct nfsfh **); int nfscl_mtofh(struct nfsrv_descript *, struct nfsfh **, struct nfsvattr *, int *); int nfscl_postop_attr(struct nfsrv_descript *, struct nfsvattr *, int *, void *); int nfscl_wcc_data(struct nfsrv_descript *, vnode_t, struct nfsvattr *, int *, int *, void *); int nfsm_loadattr(struct nfsrv_descript *, struct nfsvattr *); int nfscl_request(struct nfsrv_descript *, vnode_t, NFSPROC_T *, struct ucred *, void *); void nfsm_stateidtom(struct nfsrv_descript *, nfsv4stateid_t *, int); /* nfs_nfsdsubs.c */ void nfsd_fhtovp(struct nfsrv_descript *, struct nfsrvfh *, int, vnode_t *, struct nfsexstuff *, mount_t *, int, NFSPROC_T *); int nfsd_excred(struct nfsrv_descript *, struct nfsexstuff *, struct ucred *); int nfsrv_mtofh(struct nfsrv_descript *, struct nfsrvfh *); int nfsrv_putattrbit(struct nfsrv_descript *, nfsattrbit_t *); void nfsrv_wcc(struct nfsrv_descript *, int, struct nfsvattr *, int, struct nfsvattr *); int nfsv4_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, NFSACL_T *, struct vattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t); void nfsrv_fillattr(struct nfsrv_descript *, struct nfsvattr *); void nfsrv_adj(mbuf_t, int, int); void nfsrv_postopattr(struct nfsrv_descript *, int, struct nfsvattr *); int nfsd_errmap(struct nfsrv_descript *); void nfsv4_uidtostr(uid_t, u_char **, int *, NFSPROC_T *); int nfsv4_strtouid(struct nfsrv_descript *, u_char *, int, uid_t *, NFSPROC_T *); void nfsv4_gidtostr(gid_t, u_char **, int *, NFSPROC_T *); int nfsv4_strtogid(struct nfsrv_descript *, u_char *, int, gid_t *, NFSPROC_T *); int nfsrv_checkuidgid(struct nfsrv_descript *, struct nfsvattr *); void nfsrv_fixattr(struct nfsrv_descript *, vnode_t, struct nfsvattr *, NFSACL_T *, NFSPROC_T *, nfsattrbit_t *, struct nfsexstuff *); int nfsrv_errmoved(int); int nfsrv_putreferralattr(struct nfsrv_descript *, nfsattrbit_t *, struct nfsreferral *, int, int *); int nfsrv_parsename(struct nfsrv_descript *, char *, u_long *, NFSPATHLEN_T *); void nfsd_init(void); int nfsd_checkrootexp(struct nfsrv_descript *); /* nfs_clvfsops.c */ void nfscl_retopts(struct nfsmount *, char *, size_t); /* nfs_commonport.c */ int nfsrv_checksockseqnum(struct socket *, tcp_seq); int nfsrv_getsockseqnum(struct socket *, tcp_seq *); int nfsrv_getsocksndseq(struct socket *, tcp_seq *, tcp_seq *); int nfsrv_lookupfilename(struct nameidata *, char *, NFSPROC_T *); void nfsrv_object_create(vnode_t, NFSPROC_T *); int nfsrv_mallocmget_limit(void); int nfsvno_v4rootexport(struct nfsrv_descript *); void newnfs_portinit(void); struct ucred *newnfs_getcred(void); void newnfs_setroot(struct ucred *); int nfs_catnap(int, int, const char *); struct nfsreferral *nfsv4root_getreferral(vnode_t, vnode_t, u_int32_t); int nfsvno_pathconf(vnode_t, int, register_t *, struct ucred *, NFSPROC_T *); int nfsrv_atroot(vnode_t, long *); void newnfs_timer(void *); int nfs_supportsnfsv4acls(vnode_t); /* nfs_commonacl.c */ int nfsrv_dissectace(struct nfsrv_descript *, struct acl_entry *, int *, int *, NFSPROC_T *); int nfsrv_buildacl(struct nfsrv_descript *, NFSACL_T *, enum vtype, NFSPROC_T *); int nfsrv_setacl(vnode_t, NFSACL_T *, struct ucred *, NFSPROC_T *); int nfsrv_compareacl(NFSACL_T *, NFSACL_T *); /* nfs_clrpcops.c */ int nfsrpc_null(vnode_t, struct ucred *, NFSPROC_T *); int nfsrpc_access(vnode_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *); int nfsrpc_accessrpc(vnode_t, u_int32_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, u_int32_t *, void *); int nfsrpc_open(vnode_t, int, struct ucred *, NFSPROC_T *); int nfsrpc_openrpc(struct nfsmount *, vnode_t, u_int8_t *, int, u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *, int, int); int nfsrpc_opendowngrade(vnode_t, u_int32_t, struct nfsclopen *, struct ucred *, NFSPROC_T *); int nfsrpc_close(vnode_t, int, NFSPROC_T *); int nfsrpc_closerpc(struct nfsrv_descript *, struct nfsmount *, struct nfsclopen *, struct ucred *, NFSPROC_T *, int); int nfsrpc_openconfirm(vnode_t, u_int8_t *, int, struct nfsclopen *, struct ucred *, NFSPROC_T *); -int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, +int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, int, struct ucred *, NFSPROC_T *); int nfsrpc_getattr(vnode_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, void *); int nfsrpc_getattrnovp(struct nfsmount *, u_int8_t *, int, int, - struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *); + struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *, uint32_t *); int nfsrpc_setattr(vnode_t, struct vattr *, NFSACL_T *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_lookup(vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_write(vnode_t, struct uio *, int *, int *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int); int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t, enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_create(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_remove(vnode_t, char *, int, vnode_t, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_rename(vnode_t, vnode_t, char *, int, vnode_t, vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, int *, int *, void *, void *); int nfsrpc_link(vnode_t, vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, int *, int *, void *); int nfsrpc_symlink(vnode_t, char *, int, char *, struct vattr *, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_mkdir(vnode_t, char *, int, struct vattr *, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); int nfsrpc_rmdir(vnode_t, char *, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_readdir(vnode_t, struct uio *, nfsuint64 *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *); int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *); int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *, - NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *); + NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int, struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lockt(struct nfsrv_descript *, vnode_t, struct nfsclclient *, u_int64_t, u_int64_t, struct flock *, struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t, u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, int); int nfsrpc_statfs(vnode_t, struct nfsstatfs *, struct nfsfsinfo *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_fsinfo(vnode_t, struct nfsfsinfo *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); -int nfsrpc_renew(struct nfsclclient *, struct ucred *, +int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *, NFSPROC_T *); int nfsrpc_rellockown(struct nfsmount *, struct nfscllockowner *, uint8_t *, int, struct ucred *, NFSPROC_T *); int nfsrpc_getdirpath(struct nfsmount *, u_char *, struct ucred *, NFSPROC_T *); int nfsrpc_delegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *, int); int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *); +int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *, + struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *, + NFSPROC_T *); +int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *, + struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *); +int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *, + struct ucred *, NFSPROC_T *); +int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *, + struct ucred *, NFSPROC_T *); +int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t, + uint64_t, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *, + struct ucred *, NFSPROC_T *, void *); +int nfsrpc_getdeviceinfo(struct nfsmount *, uint8_t *, int, uint32_t *, + struct nfscldevinfo **, struct ucred *, NFSPROC_T *); +int nfsrpc_layoutcommit(struct nfsmount *, uint8_t *, int, int, + uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, uint8_t *, + struct ucred *, NFSPROC_T *, void *); +int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t, + int, uint64_t, uint64_t, nfsv4stateid_t *, int, uint32_t *, struct ucred *, + NFSPROC_T *, void *); +int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *); +int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, + struct ucred *, NFSPROC_T *); +int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t, + struct nfsclflayout **); +void nfscl_freenfsclds(struct nfsclds *); /* nfs_clstate.c */ int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *, NFSPROC_T *, struct nfsclowner **, struct nfsclopen **, int *, int *, int); -int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, struct ucred *, +int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *, NFSPROC_T *, nfsv4stateid_t *, void **); void nfscl_ownerrelease(struct nfsclowner *, int, int, int); void nfscl_openrelease(struct nfsclopen *, int, int); -int nfscl_getcl(vnode_t, struct ucred *, NFSPROC_T *, +int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, int, struct nfsclclient **); struct nfsclclient *nfscl_findcl(struct nfsmount *); void nfscl_clientrelease(struct nfsclclient *); void nfscl_freelock(struct nfscllock *, int); void nfscl_freelockowner(struct nfscllockowner *, int); int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int, u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *); int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t, struct ucred *, NFSPROC_T *, int, struct nfsclclient *, void *, int, struct nfscllockowner **, int *); int nfscl_checkwritelocked(vnode_t, struct flock *, struct ucred *, NFSPROC_T *, void *, int); void nfscl_lockrelease(struct nfscllockowner *, int, int); void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t); void nfscl_filllockowner(void *, u_int8_t *, int); void nfscl_freeopen(struct nfsclopen *, int); void nfscl_umount(struct nfsmount *, NFSPROC_T *); void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *); void nfscl_initiate_recovery(struct nfsclclient *); int nfscl_hasexpired(struct nfsclclient *, u_int32_t, NFSPROC_T *); void nfscl_dumpstate(struct nfsmount *, int, int, int, int); void nfscl_dupopen(vnode_t, int); int nfscl_getclose(vnode_t, struct nfsclclient **); int nfscl_doclose(vnode_t, struct nfsclclient **, NFSPROC_T *); void nfsrpc_doclose(struct nfsmount *, struct nfsclopen *, NFSPROC_T *); int nfscl_deleg(mount_t, struct nfsclclient *, u_int8_t *, int, struct ucred *, NFSPROC_T *, struct nfscldeleg **); void nfscl_lockinit(struct nfsv4lock *); void nfscl_lockexcl(struct nfsv4lock *, void *); void nfscl_lockunlock(struct nfsv4lock *); void nfscl_lockderef(struct nfsv4lock *); void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *); void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *, int); int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t, u_int64_t, struct flock *, NFSPROC_T *, void *, int); int nfscl_mustflush(vnode_t); int nfscl_nodeleg(vnode_t, int); int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *); int nfscl_getref(struct nfsmount *); void nfscl_relref(struct nfsmount *); int nfscl_renamedeleg(vnode_t, nfsv4stateid_t *, int *, vnode_t, nfsv4stateid_t *, int *, NFSPROC_T *); void nfscl_reclaimnode(vnode_t); void nfscl_newnode(vnode_t); void nfscl_delegmodtime(vnode_t); void nfscl_deleggetmodtime(vnode_t, struct timespec *); int nfscl_tryclose(struct nfsclopen *, struct ucred *, struct nfsmount *, NFSPROC_T *); void nfscl_cleanup(NFSPROC_T *); +int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *, + int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *, + NFSPROC_T *); +struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int, + uint64_t, struct nfsclflayout **, int *); +void nfscl_rellayout(struct nfscllayout *, int); +struct nfscldevinfo *nfscl_getdevinfo(struct nfsclclient *, uint8_t *, + struct nfscldevinfo *); +void nfscl_reldevinfo(struct nfscldevinfo *); +int nfscl_adddevinfo(struct nfsmount *, struct nfscldevinfo *, + struct nfsclflayout *); +void nfscl_freelayout(struct nfscllayout *); +void nfscl_freeflayout(struct nfsclflayout *); +void nfscl_freedevinfo(struct nfscldevinfo *); +int nfscl_layoutcommit(vnode_t, NFSPROC_T *); /* nfs_clport.c */ int nfscl_nget(mount_t, vnode_t, struct nfsfh *, struct componentname *, NFSPROC_T *, struct nfsnode **, void *, int); NFSPROC_T *nfscl_getparent(NFSPROC_T *); void nfscl_start_renewthread(struct nfsclclient *); void nfscl_loadsbinfo(struct nfsmount *, struct nfsstatfs *, void *); void nfscl_loadfsinfo (struct nfsmount *, struct nfsfsinfo *); void nfscl_delegreturn(struct nfscldeleg *, int, struct nfsmount *, struct ucred *, NFSPROC_T *); void nfsrvd_cbinit(int); int nfscl_checksattr(struct vattr *, struct nfsvattr *); int nfscl_ngetreopen(mount_t, u_int8_t *, int, NFSPROC_T *, struct nfsnode **); int nfscl_procdoesntexist(u_int8_t *); int nfscl_maperr(NFSPROC_T *, int, uid_t, gid_t); /* nfs_clsubs.c */ void nfscl_init(void); /* nfs_clbio.c */ int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int, int); /* nfs_clnode.c */ void ncl_invalcaches(vnode_t); /* nfs_nfsdport.c */ int nfsvno_getattr(vnode_t, struct nfsvattr *, struct ucred *, NFSPROC_T *, int); int nfsvno_setattr(vnode_t, struct nfsvattr *, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_getfh(vnode_t, fhandle_t *, NFSPROC_T *); int nfsvno_accchk(vnode_t, accmode_t, struct ucred *, struct nfsexstuff *, NFSPROC_T *, int, int, u_int32_t *); int nfsvno_namei(struct nfsrv_descript *, struct nameidata *, vnode_t, int, struct nfsexstuff *, NFSPROC_T *, vnode_t *); void nfsvno_setpathbuf(struct nameidata *, char **, u_long **); void nfsvno_relpathbuf(struct nameidata *); int nfsvno_readlink(vnode_t, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *, int *); int nfsvno_read(vnode_t, off_t, int, struct ucred *, NFSPROC_T *, mbuf_t *, mbuf_t *); int nfsvno_write(vnode_t, off_t, int, int, int, mbuf_t, char *, struct ucred *, NFSPROC_T *); int nfsvno_createsub(struct nfsrv_descript *, struct nameidata *, vnode_t *, struct nfsvattr *, int *, int32_t *, NFSDEV_T, NFSPROC_T *, struct nfsexstuff *); int nfsvno_mknod(struct nameidata *, struct nfsvattr *, struct ucred *, NFSPROC_T *); int nfsvno_mkdir(struct nameidata *, struct nfsvattr *, uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_symlink(struct nameidata *, struct nfsvattr *, char *, int, int, uid_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_getsymlink(struct nfsrv_descript *, struct nfsvattr *, NFSPROC_T *, char **, int *); int nfsvno_removesub(struct nameidata *, int, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_rmdirsub(struct nameidata *, int, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_rename(struct nameidata *, struct nameidata *, u_int32_t, u_int32_t, struct ucred *, NFSPROC_T *); int nfsvno_link(struct nameidata *, vnode_t, struct ucred *, NFSPROC_T *, struct nfsexstuff *); int nfsvno_fsync(vnode_t, u_int64_t, int, struct ucred *, NFSPROC_T *); int nfsvno_statfs(vnode_t, struct statfs *); void nfsvno_getfs(struct nfsfsinfo *, int); void nfsvno_open(struct nfsrv_descript *, struct nameidata *, nfsquad_t, nfsv4stateid_t *, struct nfsstate *, int *, struct nfsvattr *, int32_t *, int, NFSACL_T *, nfsattrbit_t *, struct ucred *, NFSPROC_T *, struct nfsexstuff *, vnode_t *); void nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct ucred *, NFSPROC_T *); int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t, struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *, struct ucred *, NFSPROC_T *, int, int, int, int, uint64_t); int nfsrv_sattr(struct nfsrv_descript *, struct nfsvattr *, nfsattrbit_t *, NFSACL_T *, NFSPROC_T *); int nfsv4_sattr(struct nfsrv_descript *, struct nfsvattr *, nfsattrbit_t *, NFSACL_T *, NFSPROC_T *); int nfsvno_checkexp(mount_t, NFSSOCKADDR_T, struct nfsexstuff *, struct ucred **); int nfsvno_fhtovp(mount_t, fhandle_t *, NFSSOCKADDR_T, int, vnode_t *, struct nfsexstuff *, struct ucred **); vnode_t nfsvno_getvp(fhandle_t *); int nfsvno_advlock(vnode_t, int, u_int64_t, u_int64_t, NFSPROC_T *); int nfsrv_v4rootexport(void *, struct ucred *, NFSPROC_T *); int nfsvno_testexp(struct nfsrv_descript *, struct nfsexstuff *); uint32_t nfsrv_hashfh(fhandle_t *); void nfsrv_backupstable(void); /* nfs_commonkrpc.c */ int newnfs_nmcancelreqs(struct nfsmount *); void newnfs_set_sigmask(struct thread *, sigset_t *); void newnfs_restore_sigmask(struct thread *, sigset_t *); int newnfs_msleep(struct thread *, void *, struct mtx *, int, char *, int); int newnfs_request(struct nfsrv_descript *, struct nfsmount *, struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *, - struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *); + struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *, + struct nfsclsession *); int newnfs_connect(struct nfsmount *, struct nfssockreq *, struct ucred *, NFSPROC_T *, int); void newnfs_disconnect(struct nfssockreq *); int newnfs_sigintr(struct nfsmount *, NFSPROC_T *); /* nfs_nfsdkrpc.c */ int nfsrvd_addsock(struct file *); int nfsrvd_nfsd(NFSPROC_T *, struct nfsd_nfsd_args *); void nfsrvd_init(int); /* nfs_clkrpc.c */ int nfscbd_addsock(struct file *); int nfscbd_nfsd(NFSPROC_T *, struct nfsd_nfscbd_args *); Index: head/sys/fs/nfs/nfscl.h =================================================================== --- head/sys/fs/nfs/nfscl.h (revision 244041) +++ head/sys/fs/nfs/nfscl.h (revision 244042) @@ -1,77 +1,77 @@ /*- * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSCL_H #define _NFS_NFSCL_H /* * Extra stuff for a NFSv4 nfsnode. * MALLOC'd to the correct length for the name and file handle. * n4_data has the file handle, followed by the file name. * The macro NFS4NODENAME() returns a pointer to the start of the * name. */ struct nfsv4node { u_int16_t n4_fhlen; u_int16_t n4_namelen; u_int8_t n4_data[1]; }; #define NFS4NODENAME(n) (&((n)->n4_data[(n)->n4_fhlen])) /* * Just a macro to convert the nfscl_reqstart arguments. */ #define NFSCL_REQSTART(n, p, v) \ nfscl_reqstart((n), (p), VFSTONFS((v)->v_mount), \ - VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL) + VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL, NULL) /* * These two macros convert between a lease duration and renew interval. * For now, just make the renew interval 1/2 the lease duration. * (They should be inverse operators.) */ #define NFSCL_RENEW(l) (((l) < 2) ? 1 : ((l) / 2)) #define NFSCL_LEASE(r) ((r) * 2) /* * These flag bits are used for the argument to nfscl_fillsattr() to * indicate special handling of the attributes. */ #define NFSSATTR_FULL 0x1 #define NFSSATTR_SIZE0 0x2 #define NFSSATTR_SIZENEG1 0x4 #define NFSSATTR_SIZERDEV 0x8 /* Use this macro for debug printfs. */ #define NFSCL_DEBUG(level, ...) do { \ if (nfscl_debuglevel >= (level)) \ printf(__VA_ARGS__); \ } while (0) #endif /* _NFS_NFSCL_H */ Index: head/sys/fs/nfs/nfsclstate.h =================================================================== --- head/sys/fs/nfs/nfsclstate.h (revision 244041) +++ head/sys/fs/nfs/nfsclstate.h (revision 244042) @@ -1,186 +1,370 @@ /*- * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSCLSTATE_H_ #define _NFS_NFSCLSTATE_H_ /* * Definitions for NFS V4 client state handling. */ LIST_HEAD(nfsclopenhead, nfsclopen); LIST_HEAD(nfscllockownerhead, nfscllockowner); SLIST_HEAD(nfscllockownerfhhead, nfscllockownerfh); LIST_HEAD(nfscllockhead, nfscllock); LIST_HEAD(nfsclhead, nfsclclient); LIST_HEAD(nfsclownerhead, nfsclowner); TAILQ_HEAD(nfscldeleghead, nfscldeleg); LIST_HEAD(nfscldeleghash, nfscldeleg); +TAILQ_HEAD(nfscllayouthead, nfscllayout); +LIST_HEAD(nfscllayouthash, nfscllayout); +LIST_HEAD(nfsclflayouthead, nfsclflayout); +LIST_HEAD(nfscldevinfohead, nfscldevinfo); +LIST_HEAD(nfsclrecalllayouthead, nfsclrecalllayout); #define NFSCLDELEGHASHSIZE 256 -#define NFSCLDELEGHASH(c, f, l) \ +#define NFSCLDELEGHASH(c, f, l) \ (&((c)->nfsc_deleghash[ncl_hash((f), (l)) % NFSCLDELEGHASHSIZE])) +#define NFSCLLAYOUTHASHSIZE 256 +#define NFSCLLAYOUTHASH(c, f, l) \ + (&((c)->nfsc_layouthash[ncl_hash((f), (l)) % NFSCLLAYOUTHASHSIZE])) +/* Structure for NFSv4.1 session stuff. */ +struct nfsclsession { + struct mtx nfsess_mtx; + struct nfsslot nfsess_cbslots[NFSV4_CBSLOTS]; + nfsquad_t nfsess_clientid; + uint32_t nfsess_slotseq[64]; /* Max for 64bit nm_slots */ + uint64_t nfsess_slots; + uint32_t nfsess_sequenceid; + uint32_t nfsess_maxcache; /* Max size for cached reply. */ + uint16_t nfsess_foreslots; + uint16_t nfsess_backslots; + uint8_t nfsess_sessionid[NFSX_V4SESSIONID]; +}; + +/* + * This structure holds the session, clientid and related information + * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS). + * It is malloc'd to the correct length. + */ +struct nfsclds { + TAILQ_ENTRY(nfsclds) nfsclds_list; + struct nfsclsession nfsclds_sess; + struct mtx nfsclds_mtx; + struct nfssockreq *nfsclds_sockp; + time_t nfsclds_expire; + uint16_t nfsclds_flags; + uint16_t nfsclds_servownlen; + uint8_t nfsclds_verf[NFSX_VERF]; + uint8_t nfsclds_serverown[0]; +}; + +/* + * Flags for nfsclds_flags. + */ +#define NFSCLDS_HASWRITEVERF 0x0001 +#define NFSCLDS_MDS 0x0002 +#define NFSCLDS_DS 0x0004 + struct nfsclclient { LIST_ENTRY(nfsclclient) nfsc_list; struct nfsclownerhead nfsc_owner; struct nfscldeleghead nfsc_deleg; struct nfscldeleghash nfsc_deleghash[NFSCLDELEGHASHSIZE]; - struct nfsv4lock nfsc_lock; - struct proc *nfsc_renewthread; - struct nfsmount *nfsc_nmp; - nfsquad_t nfsc_clientid; - time_t nfsc_expire; - u_int32_t nfsc_clientidrev; - u_int32_t nfsc_renew; - u_int32_t nfsc_cbident; - u_int16_t nfsc_flags; - u_int16_t nfsc_idlen; - u_int8_t nfsc_id[1]; /* Malloc'd to correct length */ + struct nfscllayouthead nfsc_layout; + struct nfscllayouthash nfsc_layouthash[NFSCLLAYOUTHASHSIZE]; + struct nfscldevinfohead nfsc_devinfo; + struct nfsv4lock nfsc_lock; + struct proc *nfsc_renewthread; + struct nfsmount *nfsc_nmp; + time_t nfsc_expire; + u_int32_t nfsc_clientidrev; + u_int32_t nfsc_rev; + u_int32_t nfsc_renew; + u_int32_t nfsc_cbident; + u_int16_t nfsc_flags; + u_int16_t nfsc_idlen; + u_int8_t nfsc_id[1]; /* Malloc'd to correct length */ }; /* * Bits for nfsc_flags. */ #define NFSCLFLAGS_INITED 0x0001 #define NFSCLFLAGS_HASCLIENTID 0x0002 #define NFSCLFLAGS_RECOVER 0x0004 #define NFSCLFLAGS_UMOUNT 0x0008 #define NFSCLFLAGS_HASTHREAD 0x0010 #define NFSCLFLAGS_AFINET6 0x0020 #define NFSCLFLAGS_EXPIREIT 0x0040 #define NFSCLFLAGS_FIRSTDELEG 0x0080 #define NFSCLFLAGS_GOTDELEG 0x0100 #define NFSCLFLAGS_RECVRINPROG 0x0200 struct nfsclowner { LIST_ENTRY(nfsclowner) nfsow_list; struct nfsclopenhead nfsow_open; struct nfsclclient *nfsow_clp; u_int32_t nfsow_seqid; u_int32_t nfsow_defunct; struct nfsv4lock nfsow_rwlock; u_int8_t nfsow_owner[NFSV4CL_LOCKNAMELEN]; }; /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfscldeleg { TAILQ_ENTRY(nfscldeleg) nfsdl_list; LIST_ENTRY(nfscldeleg) nfsdl_hash; struct nfsclownerhead nfsdl_owner; /* locally issued state */ struct nfscllockownerhead nfsdl_lock; nfsv4stateid_t nfsdl_stateid; struct acl_entry nfsdl_ace; /* Delegation ace */ struct nfsclclient *nfsdl_clp; struct nfsv4lock nfsdl_rwlock; /* for active I/O ops */ struct nfscred nfsdl_cred; /* Cred. used for Open */ time_t nfsdl_timestamp; /* used for stale cleanup */ u_int64_t nfsdl_sizelimit; /* Limit for file growth */ u_int64_t nfsdl_size; /* saved copy of file size */ u_int64_t nfsdl_change; /* and change attribute */ struct timespec nfsdl_modtime; /* local modify time */ u_int16_t nfsdl_fhlen; u_int8_t nfsdl_flags; u_int8_t nfsdl_fh[1]; /* must be last */ }; /* * nfsdl_flags bits. */ #define NFSCLDL_READ 0x01 #define NFSCLDL_WRITE 0x02 #define NFSCLDL_RECALL 0x04 #define NFSCLDL_NEEDRECLAIM 0x08 #define NFSCLDL_ZAPPED 0x10 #define NFSCLDL_MODTIMESET 0x20 #define NFSCLDL_DELEGRET 0x40 /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfsclopen { LIST_ENTRY(nfsclopen) nfso_list; struct nfscllockownerhead nfso_lock; nfsv4stateid_t nfso_stateid; struct nfsclowner *nfso_own; struct nfscred nfso_cred; /* Cred. used for Open */ u_int32_t nfso_mode; u_int32_t nfso_opencnt; u_int16_t nfso_fhlen; u_int8_t nfso_posixlock; /* 1 for POSIX type locking */ u_int8_t nfso_fh[1]; /* must be last */ }; /* * Return values for nfscl_open(). NFSCLOPEN_OK must == 0. */ #define NFSCLOPEN_OK 0 #define NFSCLOPEN_DOOPEN 1 #define NFSCLOPEN_DOOPENDOWNGRADE 2 #define NFSCLOPEN_SETCRED 3 struct nfscllockowner { LIST_ENTRY(nfscllockowner) nfsl_list; struct nfscllockhead nfsl_lock; struct nfsclopen *nfsl_open; NFSPROC_T *nfsl_inprog; nfsv4stateid_t nfsl_stateid; int nfsl_lockflags; u_int32_t nfsl_seqid; struct nfsv4lock nfsl_rwlock; u_int8_t nfsl_owner[NFSV4CL_LOCKNAMELEN]; u_int8_t nfsl_openowner[NFSV4CL_LOCKNAMELEN]; }; /* * Byte range entry for the above lock owner. */ struct nfscllock { LIST_ENTRY(nfscllock) nfslo_list; u_int64_t nfslo_first; u_int64_t nfslo_end; short nfslo_type; }; /* This structure is used to collect a list of lockowners to free up. */ struct nfscllockownerfh { SLIST_ENTRY(nfscllockownerfh) nfslfh_list; struct nfscllockownerhead nfslfh_lock; int nfslfh_len; uint8_t nfslfh_fh[NFSX_V4FHMAX]; }; + +/* + * MALLOC'd to the correct length to accommodate the file handle. + */ +struct nfscllayout { + TAILQ_ENTRY(nfscllayout) nfsly_list; + LIST_ENTRY(nfscllayout) nfsly_hash; + nfsv4stateid_t nfsly_stateid; + struct nfsv4lock nfsly_lock; + uint64_t nfsly_filesid[2]; + uint64_t nfsly_lastbyte; + struct nfsclflayouthead nfsly_flayread; + struct nfsclflayouthead nfsly_flayrw; + struct nfsclrecalllayouthead nfsly_recall; + time_t nfsly_timestamp; + struct nfsclclient *nfsly_clp; + uint16_t nfsly_flags; + uint16_t nfsly_fhlen; + uint8_t nfsly_fh[1]; +}; + +/* + * Flags for nfsly_flags. + */ +#define NFSLY_FILES 0x0001 +#define NFSLY_BLOCK 0x0002 +#define NFSLY_OBJECT 0x0004 +#define NFSLY_RECALL 0x0008 +#define NFSLY_RECALLFILE 0x0010 +#define NFSLY_RECALLFSID 0x0020 +#define NFSLY_RECALLALL 0x0040 +#define NFSLY_RETONCLOSE 0x0080 +#define NFSLY_WRITTEN 0x0100 /* Has been used to write to a DS. */ + +/* + * MALLOC'd to the correct length to accommodate the file handle list. + * These hang off of nfsly_flayread and nfsly_flayrw, sorted in increasing + * offset order. + * The nfsly_flayread list holds the ones with iomode == NFSLAYOUTIOMODE_READ, + * whereas the nfsly_flayrw holds the ones with iomode == NFSLAYOUTIOMODE_RW. + */ +struct nfsclflayout { + LIST_ENTRY(nfsclflayout) nfsfl_list; + uint8_t nfsfl_dev[NFSX_V4DEVICEID]; + uint64_t nfsfl_off; + uint64_t nfsfl_end; + uint64_t nfsfl_patoff; + struct nfscldevinfo *nfsfl_devp; + uint32_t nfsfl_iomode; + uint32_t nfsfl_util; + uint32_t nfsfl_stripe1; + uint16_t nfsfl_flags; + uint16_t nfsfl_fhcnt; + struct nfsfh *nfsfl_fh[1]; /* FH list for DS */ +}; + +/* + * Flags for nfsfl_flags. + */ +#define NFSFL_RECALL 0x0001 /* File layout has been recalled */ + +/* + * Structure that is used to store a LAYOUTRECALL. + */ +struct nfsclrecalllayout { + LIST_ENTRY(nfsclrecalllayout) nfsrecly_list; + uint64_t nfsrecly_off; + uint64_t nfsrecly_len; + int nfsrecly_recalltype; + uint32_t nfsrecly_iomode; + uint32_t nfsrecly_stateseqid; +}; + +/* + * Stores the NFSv4.1 Device Info. Malloc'd to the correct length to + * store the list of network connections and list of indices. + * nfsdi_data[] is allocated the following way: + * - nfsdi_addrcnt * struct nfsclds + * - stripe indices, each stored as one byte, since there can be many + * of them. (This implies a limit of 256 on nfsdi_addrcnt, since the + * indices select which address.) + */ +struct nfscldevinfo { + LIST_ENTRY(nfscldevinfo) nfsdi_list; + uint8_t nfsdi_deviceid[NFSX_V4DEVICEID]; + struct nfsclclient *nfsdi_clp; + uint32_t nfsdi_refcnt; + uint32_t nfsdi_layoutrefs; + uint16_t nfsdi_stripecnt; + uint16_t nfsdi_addrcnt; + struct nfsclds *nfsdi_data[0]; +}; + +/* These inline functions return values from nfsdi_data[]. */ +/* + * Return a pointer to the address at "pos". + */ +static __inline struct nfsclds ** +nfsfldi_addr(struct nfscldevinfo *ndi, int pos) +{ + + if (pos >= ndi->nfsdi_addrcnt) + return (NULL); + return (&ndi->nfsdi_data[pos]); +} + +/* + * Return the Nth ("pos") stripe index. + */ +static __inline int +nfsfldi_stripeindex(struct nfscldevinfo *ndi, int pos) +{ + uint8_t *valp; + + if (pos >= ndi->nfsdi_stripecnt) + return (-1); + valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; + valp += pos; + return ((int)*valp); +} + +/* + * Set the Nth ("pos") stripe index to "val". + */ +static __inline void +nfsfldi_setstripeindex(struct nfscldevinfo *ndi, int pos, uint8_t val) +{ + uint8_t *valp; + + if (pos >= ndi->nfsdi_stripecnt) + return; + valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; + valp += pos; + *valp = val; +} /* * Macro for incrementing the seqid#. */ #define NFSCL_INCRSEQID(s, n) do { \ if (((n)->nd_flag & ND_INCRSEQID)) \ (s)++; \ } while (0) #endif /* _NFS_NFSCLSTATE_H_ */ Index: head/sys/fs/nfs/nfsport.h =================================================================== --- head/sys/fs/nfs/nfsport.h (revision 244041) +++ head/sys/fs/nfs/nfsport.h (revision 244042) @@ -1,914 +1,999 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPORT_H_ #define _NFS_NFSPORT_H_ /* * In general, I'm not fond of #includes in .h files, but this seems * to be the cleanest way to handle #include files for the ports. */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * For Darwin, these functions should be "static" when built in a kext. * (This is always defined as nil otherwise.) */ #define APPLESTATIC #include #include #include #include #include #include #include #include #include #include #include "opt_nfs.h" #include "opt_ufs.h" /* * These types must be defined before the nfs includes. */ #define NFSSOCKADDR_T struct sockaddr * #define NFSPROC_T struct thread #define NFSDEV_T dev_t #define NFSSVCARGS nfssvc_args #define NFSACL_T struct acl /* * These should be defined as the types used for the corresponding VOP's * argument type. */ #define NFS_ACCESS_ARGS struct vop_access_args #define NFS_OPEN_ARGS struct vop_open_args #define NFS_GETATTR_ARGS struct vop_getattr_args #define NFS_LOOKUP_ARGS struct vop_lookup_args #define NFS_READDIR_ARGS struct vop_readdir_args /* * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL. */ #define NFSMGET(m) do { \ MGET((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_TRYWAIT, MT_DATA); \ } \ } while (0) #define NFSMGETHDR(m) do { \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ } \ } while (0) #define NFSMCLGET(m, w) do { \ MGET((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_TRYWAIT, MT_DATA); \ } \ MCLGET((m), (w)); \ } while (0) #define NFSMCLGETHDR(m, w) do { \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ } \ } while (0) #define NFSMTOD mtod /* * Client side constant for size of a lockowner name. */ #define NFSV4CL_LOCKNAMELEN 12 /* * Type for a mutex lock. */ #define NFSMUTEX_T struct mtx #endif /* _KERNEL */ /* * NFSv4 Operation numbers. */ #define NFSV4OP_ACCESS 3 #define NFSV4OP_CLOSE 4 #define NFSV4OP_COMMIT 5 #define NFSV4OP_CREATE 6 #define NFSV4OP_DELEGPURGE 7 #define NFSV4OP_DELEGRETURN 8 #define NFSV4OP_GETATTR 9 #define NFSV4OP_GETFH 10 #define NFSV4OP_LINK 11 #define NFSV4OP_LOCK 12 #define NFSV4OP_LOCKT 13 #define NFSV4OP_LOCKU 14 #define NFSV4OP_LOOKUP 15 #define NFSV4OP_LOOKUPP 16 #define NFSV4OP_NVERIFY 17 #define NFSV4OP_OPEN 18 #define NFSV4OP_OPENATTR 19 #define NFSV4OP_OPENCONFIRM 20 #define NFSV4OP_OPENDOWNGRADE 21 #define NFSV4OP_PUTFH 22 #define NFSV4OP_PUTPUBFH 23 #define NFSV4OP_PUTROOTFH 24 #define NFSV4OP_READ 25 #define NFSV4OP_READDIR 26 #define NFSV4OP_READLINK 27 #define NFSV4OP_REMOVE 28 #define NFSV4OP_RENAME 29 #define NFSV4OP_RENEW 30 #define NFSV4OP_RESTOREFH 31 #define NFSV4OP_SAVEFH 32 #define NFSV4OP_SECINFO 33 #define NFSV4OP_SETATTR 34 #define NFSV4OP_SETCLIENTID 35 #define NFSV4OP_SETCLIENTIDCFRM 36 #define NFSV4OP_VERIFY 37 #define NFSV4OP_WRITE 38 #define NFSV4OP_RELEASELCKOWN 39 /* * Must be one greater than the last Operation#. */ #define NFSV4OP_NOPS 40 +/* + * Additional Ops for NFSv4.1. + */ +#define NFSV4OP_BACKCHANNELCTL 40 +#define NFSV4OP_BINDCONNTOSESS 41 +#define NFSV4OP_EXCHANGEID 42 +#define NFSV4OP_CREATESESSION 43 +#define NFSV4OP_DESTROYSESSION 44 +#define NFSV4OP_FREESTATEID 45 +#define NFSV4OP_GETDIRDELEG 46 +#define NFSV4OP_GETDEVINFO 47 +#define NFSV4OP_GETDEVLIST 48 +#define NFSV4OP_LAYOUTCOMMIT 49 +#define NFSV4OP_LAYOUTGET 50 +#define NFSV4OP_LAYOUTRETURN 51 +#define NFSV4OP_SECINFONONAME 52 +#define NFSV4OP_SEQUENCE 53 +#define NFSV4OP_SETSSV 54 +#define NFSV4OP_TESTSTATEID 55 +#define NFSV4OP_WANTDELEG 56 +#define NFSV4OP_DESTROYCLIENTID 57 +#define NFSV4OP_RECLAIMCOMPL 58 + +/* + * Must be one more than last op#. + */ +#define NFSV41_NOPS 59 + /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 /* * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV4OP_NOPS. */ #define NFSV4OP_SYMLINK (NFSV4OP_NOPS) #define NFSV4OP_MKDIR (NFSV4OP_NOPS + 1) #define NFSV4OP_RMDIR (NFSV4OP_NOPS + 2) #define NFSV4OP_READDIRPLUS (NFSV4OP_NOPS + 3) #define NFSV4OP_MKNOD (NFSV4OP_NOPS + 4) #define NFSV4OP_FSSTAT (NFSV4OP_NOPS + 5) #define NFSV4OP_FSINFO (NFSV4OP_NOPS + 6) #define NFSV4OP_PATHCONF (NFSV4OP_NOPS + 7) #define NFSV4OP_V3CREATE (NFSV4OP_NOPS + 8) /* * This is the count of the fake operations listed above. */ #define NFSV4OP_FAKENOPS 9 /* * and the Callback OPs */ #define NFSV4OP_CBGETATTR 3 #define NFSV4OP_CBRECALL 4 /* * Must be one greater than the last Callback Operation#. */ #define NFSV4OP_CBNOPS 5 /* + * Additional Callback Ops for NFSv4.1 only. Not yet in nfsstats. + */ +#define NFSV4OP_CBLAYOUTRECALL 5 +#define NFSV4OP_CBNOTIFY 6 +#define NFSV4OP_CBPUSHDELEG 7 +#define NFSV4OP_CBRECALLANY 8 +#define NFSV4OP_CBRECALLOBJAVAIL 9 +#define NFSV4OP_CBRECALLSLOT 10 +#define NFSV4OP_CBSEQUENCE 11 +#define NFSV4OP_CBWANTCANCELLED 12 +#define NFSV4OP_CBNOTIFYLOCK 13 +#define NFSV4OP_CBNOTIFYDEVID 14 + +/* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 + +/* Additional procedures for NFSv4.1. */ +#define NFSPROC_EXCHANGEID 41 +#define NFSPROC_CREATESESSION 42 +#define NFSPROC_DESTROYSESSION 43 +#define NFSPROC_DESTROYCLIENT 44 +#define NFSPROC_FREESTATEID 45 +#define NFSPROC_LAYOUTGET 46 +#define NFSPROC_GETDEVICEINFO 47 +#define NFSPROC_LAYOUTCOMMIT 48 +#define NFSPROC_LAYOUTRETURN 49 +#define NFSPROC_RECLAIMCOMPL 50 +#define NFSPROC_WRITEDS 51 +#define NFSPROC_READDS 52 +#define NFSPROC_COMMITDS 53 + +/* + * Must be defined as one higher than the last NFSv4.1 Proc# above. + */ +#define NFSV41_NPROCS 54 + #endif /* NFS_V3NPROCS */ /* * Stats structure */ struct ext_nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int accesscache_hits; int accesscache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFSV4_NPROCS]; int rpcretries; int srvrpccnt[NFSV4OP_NOPS + NFSV4OP_FAKENOPS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvcache_tcppeak; int srvcache_size; int srvclients; int srvopenowners; int srvopens; int srvlockowners; int srvlocks; int srvdelegates; int cbrpccnt[NFSV4OP_CBNOPS]; int clopenowners; int clopens; int cllockowners; int cllocks; int cldelegates; int cllocalopenowners; int cllocalopens; int cllocallockowners; int cllocallocks; }; #ifdef _KERNEL /* * Define the ext_nfsstats as nfsstats for the kernel code. */ #define nfsstats ext_nfsstats /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif #include #include #include #include #include +#include #include #include #include #include #include #include -#include #include #include /* * Just to keep nfs_var.h happy. */ struct nfs_vattr { int junk; }; struct nfsvattr { struct vattr na_vattr; nfsattrbit_t na_suppattr; u_int32_t na_mntonfileno; u_int64_t na_filesid[2]; }; #define na_type na_vattr.va_type #define na_mode na_vattr.va_mode #define na_nlink na_vattr.va_nlink #define na_uid na_vattr.va_uid #define na_gid na_vattr.va_gid #define na_fsid na_vattr.va_fsid #define na_fileid na_vattr.va_fileid #define na_size na_vattr.va_size #define na_blocksize na_vattr.va_blocksize #define na_atime na_vattr.va_atime #define na_mtime na_vattr.va_mtime #define na_ctime na_vattr.va_ctime #define na_gen na_vattr.va_gen #define na_flags na_vattr.va_flags #define na_rdev na_vattr.va_rdev #define na_bytes na_vattr.va_bytes #define na_filerev na_vattr.va_filerev #define na_vaflags na_vattr.va_vaflags #include /* * This is the header structure used for the lists, etc. (It has the * above record in it. */ struct nfsrv_stablefirst { LIST_HEAD(, nfsrv_stable) nsf_head; /* Head of nfsrv_stable list */ time_t nsf_eograce; /* Time grace period ends */ time_t *nsf_bootvals; /* Previous boottime values */ struct file *nsf_fp; /* File table pointer */ u_char nsf_flags; /* NFSNSF_ flags */ struct nfsf_rec nsf_rec; /* and above first record */ }; #define nsf_lease nsf_rec.lease #define nsf_numboots nsf_rec.numboots /* NFSNSF_xxx flags */ #define NFSNSF_UPDATEDONE 0x01 #define NFSNSF_GRACEOVER 0x02 #define NFSNSF_NEEDLOCK 0x04 #define NFSNSF_EXPIREDCLIENT 0x08 #define NFSNSF_NOOPENS 0x10 #define NFSNSF_OK 0x20 /* * Maximum number of boot times allowed in record. Although there is * really no need for a fixed upper bound, this serves as a sanity check * for a corrupted file. */ #define NFSNSF_MAXNUMBOOTS 10000 /* * This structure defines the other records in the file. The * nst_client array is actually the size of the client string name. */ struct nfst_rec { u_int16_t len; u_char flag; u_char client[1]; }; /* and the values for flag */ #define NFSNST_NEWSTATE 0x1 #define NFSNST_REVOKE 0x2 #define NFSNST_GOTSTATE 0x4 /* * This structure is linked onto nfsrv_stablefirst for the duration of * reclaim. */ struct nfsrv_stable { LIST_ENTRY(nfsrv_stable) nst_list; struct nfsclient *nst_clp; struct nfst_rec nst_rec; }; #define nst_timestamp nst_rec.timestamp #define nst_len nst_rec.len #define nst_flag nst_rec.flag #define nst_client nst_rec.client /* * At some point the server will run out of kernel storage for * state structures. For FreeBSD5.2, this results in a panic * kmem_map is full. It happens at well over 1000000 opens plus * locks on a PIII-800 with 256Mbytes, so that is where I've set * the limit. If your server panics due to too many opens/locks, * decrease the size of NFSRV_V4STATELIMIT. If you find the server * returning NFS4ERR_RESOURCE a lot and have lots of memory, try * increasing it. */ #define NFSRV_V4STATELIMIT 500000 /* Max # of Opens + Locks */ /* * The type required differs with BSDen (just the second arg). */ void nfsrvd_rcv(struct socket *, void *, int); /* * Macros for handling socket addresses. (Hopefully this makes the code * more portable, since I've noticed some 'BSD don't have sockaddrs in * mbufs any more.) */ #define NFSSOCKADDR(a, t) ((t)(a)) #define NFSSOCKADDRALLOC(a) \ do { \ MALLOC((a), struct sockaddr *, sizeof (struct sockaddr), \ M_SONAME, M_WAITOK); \ NFSBZERO((a), sizeof (struct sockaddr)); \ } while (0) #define NFSSOCKADDRSIZE(a, s) ((a)->sa_len = (s)) #define NFSSOCKADDRFREE(a) \ do { \ if (a) \ FREE((caddr_t)(a), M_SONAME); \ } while (0) /* * These should be defined as a process or thread structure, as required * for signal handling, etc. */ #define NFSNEWCRED(c) (crdup(c)) #define NFSPROCCRED(p) ((p)->td_ucred) #define NFSFREECRED(c) (crfree(c)) #define NFSUIOPROC(u, p) ((u)->uio_td = NULL) #define NFSPROCP(p) ((p)->td_proc) /* * Define these so that cn_hash and its length is ignored. */ #define NFSCNHASHZERO(c) #define NFSCNHASH(c, v) #define NCHNAMLEN 9999999 /* * Define these to use the time of day clock. */ #define NFSGETTIME(t) (getmicrotime(t)) #define NFSGETNANOTIME(t) (getnanotime(t)) /* * These macros are defined to initialize and set the timer routine. */ #define NFS_TIMERINIT \ newnfs_timer(NULL) /* * Handle SMP stuff: */ #define NFSSTATESPINLOCK extern struct mtx nfs_state_mutex #define NFSLOCKSTATE() mtx_lock(&nfs_state_mutex) #define NFSUNLOCKSTATE() mtx_unlock(&nfs_state_mutex) #define NFSSTATEMUTEXPTR (&nfs_state_mutex) #define NFSREQSPINLOCK extern struct mtx nfs_req_mutex #define NFSLOCKREQ() mtx_lock(&nfs_req_mutex) #define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex) #define NFSCACHEMUTEX extern struct mtx nfs_cache_mutex #define NFSCACHEMUTEXPTR (&nfs_cache_mutex) #define NFSLOCKCACHE() mtx_lock(&nfs_cache_mutex) #define NFSUNLOCKCACHE() mtx_unlock(&nfs_cache_mutex) #define NFSCACHELOCKREQUIRED() mtx_assert(&nfs_cache_mutex, MA_OWNED) #define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex #define NFSSOCKMUTEXPTR (&nfs_slock_mutex) #define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex) #define NFSUNLOCKSOCK() mtx_unlock(&nfs_slock_mutex) #define NFSNAMEIDMUTEX extern struct mtx nfs_nameid_mutex #define NFSLOCKNAMEID() mtx_lock(&nfs_nameid_mutex) #define NFSUNLOCKNAMEID() mtx_unlock(&nfs_nameid_mutex) #define NFSNAMEIDREQUIRED() mtx_assert(&nfs_nameid_mutex, MA_OWNED) #define NFSCLSTATEMUTEX extern struct mtx nfs_clstate_mutex #define NFSCLSTATEMUTEXPTR (&nfs_clstate_mutex) #define NFSLOCKCLSTATE() mtx_lock(&nfs_clstate_mutex) #define NFSUNLOCKCLSTATE() mtx_unlock(&nfs_clstate_mutex) #define NFSDLOCKMUTEX extern struct mtx newnfsd_mtx #define NFSDLOCKMUTEXPTR (&newnfsd_mtx) #define NFSD_LOCK() mtx_lock(&newnfsd_mtx) #define NFSD_UNLOCK() mtx_unlock(&newnfsd_mtx) #define NFSD_LOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_OWNED) #define NFSD_UNLOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_NOTOWNED) #define NFSV4ROOTLOCKMUTEX extern struct mtx nfs_v4root_mutex #define NFSV4ROOTLOCKMUTEXPTR (&nfs_v4root_mutex) #define NFSLOCKV4ROOTMUTEX() mtx_lock(&nfs_v4root_mutex) #define NFSUNLOCKV4ROOTMUTEX() mtx_unlock(&nfs_v4root_mutex) #define NFSLOCKNODE(n) mtx_lock(&((n)->n_mtx)) #define NFSUNLOCKNODE(n) mtx_unlock(&((n)->n_mtx)) #define NFSLOCKMNT(m) mtx_lock(&((m)->nm_mtx)) #define NFSUNLOCKMNT(m) mtx_unlock(&((m)->nm_mtx)) #define NFSLOCKREQUEST(r) mtx_lock(&((r)->r_mtx)) #define NFSUNLOCKREQUEST(r) mtx_unlock(&((r)->r_mtx)) #define NFSPROCLISTLOCK() sx_slock(&allproc_lock) #define NFSPROCLISTUNLOCK() sx_sunlock(&allproc_lock) #define NFSLOCKSOCKREQ(r) mtx_lock(&((r)->nr_mtx)) #define NFSUNLOCKSOCKREQ(r) mtx_unlock(&((r)->nr_mtx)) +#define NFSLOCKDS(d) mtx_lock(&((d)->nfsclds_mtx)) +#define NFSUNLOCKDS(d) mtx_unlock(&((d)->nfsclds_mtx)) /* * Use these macros to initialize/free a mutex. */ #define NFSINITSOCKMUTEX(m) mtx_init((m), "nfssock", NULL, MTX_DEF) #define NFSFREEMUTEX(m) mtx_destroy((m)) int nfsmsleep(void *, void *, int, const char *, struct timespec *); /* * And weird vm stuff in the nfs server. */ #define PDIRUNLOCK 0x0 #define MAX_COMMIT_COUNT (1024 * 1024) /* * Define these to handle the type of va_rdev. */ #define NFSMAKEDEV(m, n) makedev((m), (n)) #define NFSMAJOR(d) major(d) #define NFSMINOR(d) minor(d) /* * Define this to be the macro that returns the minimum size required * for a directory entry. */ #define DIRENT_SIZE(dp) GENERIC_DIRSIZ(dp) /* * The vnode tag for nfsv4root. */ #define VT_NFSV4ROOT "nfsv4root" /* * Define whatever it takes to do a vn_rdwr(). */ #define NFSD_RDWR(r, v, b, l, o, s, i, c, a, p) \ vn_rdwr((r), (v), (b), (l), (o), (s), (i), (c), NULL, (a), (p)) /* * Macros for handling memory for different BSDen. * NFSBCOPY(src, dst, len) - copies len bytes, non-overlapping * NFSOVBCOPY(src, dst, len) - ditto, but data areas might overlap * NFSBCMP(cp1, cp2, len) - compare len bytes, return 0 if same * NFSBZERO(cp, len) - set len bytes to 0x0 */ #define NFSBCOPY(s, d, l) bcopy((s), (d), (l)) #define NFSOVBCOPY(s, d, l) ovbcopy((s), (d), (l)) #define NFSBCMP(s, d, l) bcmp((s), (d), (l)) #define NFSBZERO(s, l) bzero((s), (l)) /* * Some queue.h files don't have these dfined in them. */ #define LIST_END(head) NULL #define SLIST_END(head) NULL #define TAILQ_END(head) NULL /* * This must be defined to be a global variable that increments once * per second, but never stops or goes backwards, even when a "date" * command changes the TOD clock. It is used for delta times for * leases, etc. */ #define NFSD_MONOSEC time_uptime /* * Declare the malloc types. */ MALLOC_DECLARE(M_NEWNFSRVCACHE); MALLOC_DECLARE(M_NEWNFSDCLIENT); MALLOC_DECLARE(M_NEWNFSDSTATE); MALLOC_DECLARE(M_NEWNFSDLOCK); MALLOC_DECLARE(M_NEWNFSDLOCKFILE); MALLOC_DECLARE(M_NEWNFSSTRING); MALLOC_DECLARE(M_NEWNFSUSERGROUP); MALLOC_DECLARE(M_NEWNFSDREQ); MALLOC_DECLARE(M_NEWNFSFH); MALLOC_DECLARE(M_NEWNFSCLOWNER); MALLOC_DECLARE(M_NEWNFSCLOPEN); MALLOC_DECLARE(M_NEWNFSCLDELEG); MALLOC_DECLARE(M_NEWNFSCLCLIENT); MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER); MALLOC_DECLARE(M_NEWNFSCLLOCK); MALLOC_DECLARE(M_NEWNFSDIROFF); MALLOC_DECLARE(M_NEWNFSV4NODE); MALLOC_DECLARE(M_NEWNFSDIRECTIO); MALLOC_DECLARE(M_NEWNFSMNT); MALLOC_DECLARE(M_NEWNFSDROLLBACK); +MALLOC_DECLARE(M_NEWNFSLAYOUT); +MALLOC_DECLARE(M_NEWNFSFLAYOUT); +MALLOC_DECLARE(M_NEWNFSDEVINFO); +MALLOC_DECLARE(M_NEWNFSSOCKREQ); +MALLOC_DECLARE(M_NEWNFSCLDS); +MALLOC_DECLARE(M_NEWNFSLAYRECALL); #define M_NFSRVCACHE M_NEWNFSRVCACHE #define M_NFSDCLIENT M_NEWNFSDCLIENT #define M_NFSDSTATE M_NEWNFSDSTATE #define M_NFSDLOCK M_NEWNFSDLOCK #define M_NFSDLOCKFILE M_NEWNFSDLOCKFILE #define M_NFSSTRING M_NEWNFSSTRING #define M_NFSUSERGROUP M_NEWNFSUSERGROUP #define M_NFSDREQ M_NEWNFSDREQ #define M_NFSFH M_NEWNFSFH #define M_NFSCLOWNER M_NEWNFSCLOWNER #define M_NFSCLOPEN M_NEWNFSCLOPEN #define M_NFSCLDELEG M_NEWNFSCLDELEG #define M_NFSCLCLIENT M_NEWNFSCLCLIENT #define M_NFSCLLOCKOWNER M_NEWNFSCLLOCKOWNER #define M_NFSCLLOCK M_NEWNFSCLLOCK #define M_NFSDIROFF M_NEWNFSDIROFF #define M_NFSV4NODE M_NEWNFSV4NODE #define M_NFSDIRECTIO M_NEWNFSDIRECTIO #define M_NFSDROLLBACK M_NEWNFSDROLLBACK +#define M_NFSLAYOUT M_NEWNFSLAYOUT +#define M_NFSFLAYOUT M_NEWNFSFLAYOUT +#define M_NFSDEVINFO M_NEWNFSDEVINFO +#define M_NFSSOCKREQ M_NEWNFSSOCKREQ +#define M_NFSCLDS M_NEWNFSCLDS +#define M_NFSLAYRECALL M_NEWNFSLAYRECALL #define NFSINT_SIGMASK(set) \ (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \ SIGISMEMBER(set, SIGQUIT)) /* * Convert a quota block count to byte count. */ #define NFSQUOTABLKTOBYTE(q, b) (q) *= (b) /* * Define this as the largest file size supported. (It should probably * be available via a VFS_xxx Op, but it isn't. */ #define NFSRV_MAXFILESIZE ((u_int64_t)0x800000000000) /* * Set this macro to index() or strchr(), whichever is supported. */ #define STRCHR(s, c) strchr((s), (c)) /* * Set the n_time in the client write rpc, as required. */ #define NFSWRITERPC_SETTIME(w, n, v4) \ do { \ if (w) { \ (n)->n_mtime = (n)->n_vattr.na_vattr.va_mtime; \ if (v4) \ (n)->n_change = (n)->n_vattr.na_vattr.va_filerev; \ } \ } while (0) /* * Fake value, just to make the client work. */ #define NFS_LATTR_NOSHRINK 1 /* * Prototypes for functions where the arguments vary for different ports. */ int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, void *, int, int); void newnfs_realign(struct mbuf **); /* * If the port runs on an SMP box that can enforce Atomic ops with low * overheads, define these as atomic increments/decrements. If not, * don't worry about it, since these are used for stats that can be * "out by one" without disastrous consequences. */ #define NFSINCRGLOBAL(a) ((a)++) /* * Assorted funky stuff to make things work under Darwin8. */ /* * These macros checks for a field in vattr being set. */ #define NFSATTRISSET(t, v, a) ((v)->a != (t)VNOVAL) #define NFSATTRISSETTIME(v, a) ((v)->a.tv_sec != VNOVAL) /* * Manipulate mount flags. */ #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the fsinfo */ +#define NFSSTA_NOLAYOUTCOMMIT 0x04000000 /* Don't do LayoutCommit */ +#define NFSSTA_SESSPERSIST 0x08000000 /* Has a persistent session */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ #define NFSSTA_LOCKTIMEO 0x20000000 /* Experiencing a lockd timeout */ #define NFSSTA_HASSETFSID 0x40000000 /* Has set the fsid */ +#define NFSSTA_PNFS 0x80000000 /* pNFS is enabled */ #define NFSHASNFSV3(n) ((n)->nm_flag & NFSMNT_NFSV3) #define NFSHASNFSV4(n) ((n)->nm_flag & NFSMNT_NFSV4) +#define NFSHASNFSV4N(n) ((n)->nm_minorvers > 0) #define NFSHASNFSV3OR4(n) ((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) #define NFSHASGOTFSINFO(n) ((n)->nm_state & NFSSTA_GOTFSINFO) #define NFSHASHASSETFSID(n) ((n)->nm_state & NFSSTA_HASSETFSID) #define NFSHASSTRICT3530(n) ((n)->nm_flag & NFSMNT_STRICT3530) #define NFSHASWRITEVERF(n) ((n)->nm_state & NFSSTA_HASWRITEVERF) #define NFSHASINT(n) ((n)->nm_flag & NFSMNT_INT) #define NFSHASSOFT(n) ((n)->nm_flag & NFSMNT_SOFT) #define NFSHASINTORSOFT(n) ((n)->nm_flag & (NFSMNT_INT | NFSMNT_SOFT)) #define NFSHASDUMBTIMR(n) ((n)->nm_flag & NFSMNT_DUMBTIMR) #define NFSHASNOCONN(n) ((n)->nm_flag & NFSMNT_MNTD) #define NFSHASKERB(n) ((n)->nm_flag & NFSMNT_KERB) #define NFSHASALLGSSNAME(n) ((n)->nm_flag & NFSMNT_ALLGSSNAME) #define NFSHASINTEGRITY(n) ((n)->nm_flag & NFSMNT_INTEGRITY) #define NFSHASPRIVACY(n) ((n)->nm_flag & NFSMNT_PRIVACY) #define NFSSETWRITEVERF(n) ((n)->nm_state |= NFSSTA_HASWRITEVERF) #define NFSSETHASSETFSID(n) ((n)->nm_state |= NFSSTA_HASSETFSID) +#define NFSHASPNFSOPT(n) ((n)->nm_flag & NFSMNT_PNFS) +#define NFSHASNOLAYOUTCOMMIT(n) ((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT) +#define NFSHASSESSPERSIST(n) ((n)->nm_state & NFSSTA_SESSPERSIST) +#define NFSHASPNFS(n) ((n)->nm_state & NFSSTA_PNFS) /* * Gets the stats field out of the mount structure. */ #define vfs_statfs(m) (&((m)->mnt_stat)) /* * Set boottime. */ #define NFSSETBOOTTIME(b) ((b) = boottime) /* * The size of directory blocks in the buffer cache. * MUST BE in the range of PAGE_SIZE <= NFS_DIRBLKSIZ <= MAXBSIZE!! */ #define NFS_DIRBLKSIZ (16 * DIRBLKSIZ) /* Must be a multiple of DIRBLKSIZ */ /* * Define these macros to access mnt_flag fields. */ #define NFSMNT_RDONLY(m) ((m)->mnt_flag & MNT_RDONLY) #endif /* _KERNEL */ /* * Define a structure similar to ufs_args for use in exporting the V4 root. */ struct nfsex_args { char *fspec; struct export_args export; }; /* * These export flags should be defined, but there are no bits left. * Maybe a separate mnt_exflag field could be added or the mnt_flag * field increased to 64 bits? */ #ifndef MNT_EXSTRICTACCESS #define MNT_EXSTRICTACCESS 0x0 #endif #ifndef MNT_EXV4ONLY #define MNT_EXV4ONLY 0x0 #endif #ifdef _KERNEL /* * Define this to invalidate the attribute cache for the nfs node. */ #define NFSINVALATTRCACHE(n) ((n)->n_attrstamp = 0) /* Used for FreeBSD only */ void nfsd_mntinit(void); /* * Define these for vnode lock/unlock ops. * * These are good abstractions to macro out, so that they can be added to * later, for debugging or stats, etc. */ #define NFSVOPLOCK(v, f) vn_lock((v), (f)) #define NFSVOPUNLOCK(v, f) VOP_UNLOCK((v), (f)) #define NFSVOPISLOCKED(v) VOP_ISLOCKED((v)) /* * Define ncl_hash(). */ #define ncl_hash(f, l) (fnv_32_buf((f), (l), FNV1_32_INIT)) int newnfs_iosize(struct nfsmount *); #ifdef NFS_DEBUG extern int nfs_debug; #define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */ #define NFS_DEBUG_WG 2 /* server write gathering */ #define NFS_DEBUG_RC 4 /* server request caching */ #define NFS_DPF(cat, args) \ do { \ if (nfs_debug & NFS_DEBUG_##cat) printf args; \ } while (0) #else #define NFS_DPF(cat, args) #endif int newnfs_vncmpf(struct vnode *, void *); #ifndef NFS_MINDIRATTRTIMO #define NFS_MINDIRATTRTIMO 3 /* VDIR attrib cache timeout in sec */ #endif #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; u_int32_t r_flags; /* flags on request, see below */ struct nfsmount *r_nmp; /* Client mnt ptr */ struct mtx r_mtx; /* Mutex lock for this structure */ }; #ifndef NFS_MAXBSIZE #define NFS_MAXBSIZE MAXBSIZE #endif /* * This macro checks to see if issuing of delegations is allowed for this * vnode. */ #ifdef VV_DISABLEDELEG #define NFSVNO_DELEGOK(v) \ ((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0) #else #define NFSVNO_DELEGOK(v) (1) #endif /* * Define this as the flags argument for msleep() when catching signals * while holding a resource that other threads would block for, such as * a vnode lock. */ #define NFS_PCATCH (PCATCH | PBDRY) #endif /* _KERNEL */ #endif /* _NFS_NFSPORT_H */ Index: head/sys/fs/nfs/nfsproto.h =================================================================== --- head/sys/fs/nfs/nfsproto.h (revision 244041) +++ head/sys/fs/nfs/nfsproto.h (revision 244042) @@ -1,1092 +1,1216 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPROTO_H_ #define _NFS_NFSPROTO_H_ /* * nfs definitions as per the Version 2, 3 and 4 specs */ /* * Constants as defined in the NFS Version 2, 3 and 4 specs. * "NFS: Network File System Protocol Specification" RFC1094 * and in the "NFS: Network File System Version 3 Protocol * Specification" */ #define NFS_PORT 2049 #define NFS_PROG 100003 #define NFS_CALLBCKPROG 0x40000000 /* V4 only */ #define NFS_VER2 2 #define NFS_VER3 3 #define NFS_VER4 4 #define NFS_V2MAXDATA 8192 #define NFS_MAXDGRAMDATA 16384 #define NFS_MAXDATA NFS_MAXBSIZE #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 #define NFS_MAXPKTHDR 404 #define NFS_MAXPACKET (NFS_MAXDATA + 2048) #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ #define NFSV4_MINORVERSION 0 /* V4 Minor version */ +#define NFSV41_MINORVERSION 1 /* V4 Minor version */ #define NFSV4_CBVERS 1 /* V4 CB Version */ +#define NFSV41_CBVERS 4 /* V4.1 CB Version */ #define NFSV4_SMALLSTR 50 /* Strings small enough for stack */ /* Stat numbers for rpc returns (version 2, 3 and 4) */ /* * These numbers are hard-wired in the RFCs, so they can't be changed. * The code currently assumes that the ones < 10000 are the same as * sys/errno.h and that sys/errno.h will never go as high as 10000. * If the value in sys/errno.h of any entry listed below is changed, * the NFS code must be modified to do the mapping between them. * (You can ignore NFSERR_WFLUSH, since it is never actually used.) */ #define NFSERR_OK 0 #define NFSERR_PERM 1 #define NFSERR_NOENT 2 #define NFSERR_IO 5 #define NFSERR_NXIO 6 #define NFSERR_ACCES 13 #define NFSERR_EXIST 17 #define NFSERR_XDEV 18 /* Version 3, 4 only */ #define NFSERR_NODEV 19 #define NFSERR_NOTDIR 20 #define NFSERR_ISDIR 21 #define NFSERR_INVAL 22 /* Version 3, 4 only */ #define NFSERR_FBIG 27 #define NFSERR_NOSPC 28 #define NFSERR_ROFS 30 #define NFSERR_MLINK 31 /* Version 3, 4 only */ #define NFSERR_NAMETOL 63 #define NFSERR_NOTEMPTY 66 #define NFSERR_DQUOT 69 #define NFSERR_STALE 70 #define NFSERR_REMOTE 71 /* Version 3 only */ #define NFSERR_WFLUSH 99 /* Version 2 only */ #define NFSERR_BADHANDLE 10001 /* These are Version 3, 4 only */ #define NFSERR_NOT_SYNC 10002 /* Version 3 Only */ #define NFSERR_BAD_COOKIE 10003 #define NFSERR_NOTSUPP 10004 #define NFSERR_TOOSMALL 10005 #define NFSERR_SERVERFAULT 10006 #define NFSERR_BADTYPE 10007 #define NFSERR_DELAY 10008 /* Called NFSERR_JUKEBOX for V3 */ #define NFSERR_SAME 10009 /* These are Version 4 only */ #define NFSERR_DENIED 10010 #define NFSERR_EXPIRED 10011 #define NFSERR_LOCKED 10012 #define NFSERR_GRACE 10013 #define NFSERR_FHEXPIRED 10014 #define NFSERR_SHAREDENIED 10015 #define NFSERR_WRONGSEC 10016 #define NFSERR_CLIDINUSE 10017 #define NFSERR_RESOURCE 10018 #define NFSERR_MOVED 10019 #define NFSERR_NOFILEHANDLE 10020 #define NFSERR_MINORVERMISMATCH 10021 #define NFSERR_STALECLIENTID 10022 #define NFSERR_STALESTATEID 10023 #define NFSERR_OLDSTATEID 10024 #define NFSERR_BADSTATEID 10025 #define NFSERR_BADSEQID 10026 #define NFSERR_NOTSAME 10027 #define NFSERR_LOCKRANGE 10028 #define NFSERR_SYMLINK 10029 #define NFSERR_RESTOREFH 10030 #define NFSERR_LEASEMOVED 10031 #define NFSERR_ATTRNOTSUPP 10032 #define NFSERR_NOGRACE 10033 #define NFSERR_RECLAIMBAD 10034 #define NFSERR_RECLAIMCONFLICT 10035 #define NFSERR_BADXDR 10036 #define NFSERR_LOCKSHELD 10037 #define NFSERR_OPENMODE 10038 #define NFSERR_BADOWNER 10039 #define NFSERR_BADCHAR 10040 #define NFSERR_BADNAME 10041 #define NFSERR_BADRANGE 10042 #define NFSERR_LOCKNOTSUPP 10043 #define NFSERR_OPILLEGAL 10044 #define NFSERR_DEADLOCK 10045 #define NFSERR_FILEOPEN 10046 #define NFSERR_ADMINREVOKED 10047 #define NFSERR_CBPATHDOWN 10048 +/* NFSv4.1 specific errors. */ +#define NFSERR_BADIOMODE 10049 +#define NFSERR_BADLAYOUT 10050 +#define NFSERR_BADSESSIONDIGEST 10051 +#define NFSERR_BADSESSION 10052 +#define NFSERR_BADSLOT 10053 +#define NFSERR_COMPLETEALREADY 10054 +#define NFSERR_NOTBNDTOSESS 10055 +#define NFSERR_DELEGALREADYWANT 10056 +#define NFSERR_BACKCHANBUSY 10057 +#define NFSERR_LAYOUTTRYLATER 10058 +#define NFSERR_LAYOUTUNAVAIL 10059 +#define NFSERR_NOMATCHLAYOUT 10060 +#define NFSERR_RECALLCONFLICT 10061 +#define NFSERR_UNKNLAYOUTTYPE 10062 +#define NFSERR_SEQMISORDERED 10063 +#define NFSERR_SEQUENCEPOS 10064 +#define NFSERR_REQTOOBIG 10065 +#define NFSERR_REPTOOBIG 10066 +#define NFSERR_REPTOOBIGTOCACHE 10067 +#define NFSERR_RETRYUNCACHEDREP 10068 +#define NFSERR_UNSAFECOMPOUND 10069 +#define NFSERR_TOOMANYOPS 10070 +#define NFSERR_OPNOTINSESS 10071 +#define NFSERR_HASHALGUNSUPP 10072 +#define NFSERR_CLIENTIDBUSY 10074 +#define NFSERR_PNFSIOHOLE 10075 +#define NFSERR_SEQFALSERETRY 10076 +#define NFSERR_BADHIGHSLOT 10077 +#define NFSERR_DEADSESSION 10078 +#define NFSERR_ENCRALGUNSUPP 10079 +#define NFSERR_PNFSNOLAYOUT 10080 +#define NFSERR_NOTONLYOP 10081 +#define NFSERR_WRONGCRED 10082 +#define NFSERR_WRONGTYPE 10083 +#define NFSERR_DIRDELEGUNAVAIL 10084 +#define NFSERR_REJECTDELEG 10085 +#define NFSERR_RETURNCONFLICT 10086 +#define NFSERR_DELEGREVOKED 10087 + #define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ #define NFSERR_DONTREPLY 30003 /* Don't process request */ #define NFSERR_RETVOID 30004 /* Return void, not error */ #define NFSERR_REPLYFROMCACHE 30005 /* Reply from recent request cache */ #define NFSERR_STALEDONTRECOVER 30006 /* Don't initiate recovery */ #define NFSERR_RPCERR 0x40000000 /* Mark an RPC layer error */ #define NFSERR_AUTHERR 0x80000000 /* Mark an authentication error */ #define NFSERR_RPCMISMATCH (NFSERR_RPCERR | RPC_MISMATCH) #define NFSERR_PROGUNAVAIL (NFSERR_RPCERR | RPC_PROGUNAVAIL) #define NFSERR_PROGMISMATCH (NFSERR_RPCERR | RPC_PROGMISMATCH) #define NFSERR_PROGNOTV4 (NFSERR_RPCERR | 0xffff) #define NFSERR_PROCUNAVAIL (NFSERR_RPCERR | RPC_PROCUNAVAIL) #define NFSERR_GARBAGE (NFSERR_RPCERR | RPC_GARBAGE) /* Sizes in bytes of various nfs rpc components */ #define NFSX_UNSIGNED 4 #define NFSX_HYPER (2 * NFSX_UNSIGNED) /* specific to NFS Version 2 */ #define NFSX_V2FH 32 #define NFSX_V2FATTR 68 #define NFSX_V2SATTR 32 #define NFSX_V2COOKIE 4 #define NFSX_V2STATFS 20 /* specific to NFS Version 3 */ #define NFSX_V3FHMAX 64 /* max. allowed by protocol */ #define NFSX_V3FATTR 84 #define NFSX_V3SATTR 60 /* max. all fields filled in */ #define NFSX_V3SRVSATTR (sizeof (struct nfsv3_sattr)) #define NFSX_V3POSTOPATTR (NFSX_V3FATTR + NFSX_UNSIGNED) #define NFSX_V3WCCDATA (NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED) #define NFSX_V3STATFS 52 #define NFSX_V3FSINFO 48 #define NFSX_V3PATHCONF 24 /* specific to NFS Version 4 */ #define NFSX_V4FHMAX 128 #define NFSX_V4FSID (2 * NFSX_HYPER) #define NFSX_V4SPECDATA (2 * NFSX_UNSIGNED) #define NFSX_V4TIME (NFSX_HYPER + NFSX_UNSIGNED) #define NFSX_V4SETTIME (NFSX_UNSIGNED + NFSX_V4TIME) +#define NFSX_V4SESSIONID 16 +#define NFSX_V4DEVICEID 16 /* sizes common to multiple NFS versions */ #define NFSX_FHMAX (NFSX_V4FHMAX) #define NFSX_MYFH (sizeof (fhandle_t)) /* size this server uses */ #define NFSX_VERF 8 #define NFSX_STATEIDOTHER 12 #define NFSX_STATEID (NFSX_UNSIGNED + NFSX_STATEIDOTHER) #define NFSX_GSSH 12 /* variants for multiple versions */ #define NFSX_STATFS(v3) ((v3) ? NFSX_V3STATFS : NFSX_V2STATFS) /* nfs rpc procedure numbers (before version mapping) */ #define NFSPROC_NULL 0 #define NFSPROC_GETATTR 1 #define NFSPROC_SETATTR 2 #define NFSPROC_LOOKUP 3 #define NFSPROC_ACCESS 4 #define NFSPROC_READLINK 5 #define NFSPROC_READ 6 #define NFSPROC_WRITE 7 #define NFSPROC_CREATE 8 #define NFSPROC_MKDIR 9 #define NFSPROC_SYMLINK 10 #define NFSPROC_MKNOD 11 #define NFSPROC_REMOVE 12 #define NFSPROC_RMDIR 13 #define NFSPROC_RENAME 14 #define NFSPROC_LINK 15 #define NFSPROC_READDIR 16 #define NFSPROC_READDIRPLUS 17 #define NFSPROC_FSSTAT 18 #define NFSPROC_FSINFO 19 #define NFSPROC_PATHCONF 20 #define NFSPROC_COMMIT 21 /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 + +/* Additional procedures for NFSv4.1. */ +#define NFSPROC_EXCHANGEID 41 +#define NFSPROC_CREATESESSION 42 +#define NFSPROC_DESTROYSESSION 43 +#define NFSPROC_DESTROYCLIENT 44 +#define NFSPROC_FREESTATEID 45 +#define NFSPROC_LAYOUTGET 46 +#define NFSPROC_GETDEVICEINFO 47 +#define NFSPROC_LAYOUTCOMMIT 48 +#define NFSPROC_LAYOUTRETURN 49 +#define NFSPROC_RECLAIMCOMPL 50 +#define NFSPROC_WRITEDS 51 +#define NFSPROC_READDS 52 +#define NFSPROC_COMMITDS 53 + +/* + * Must be defined as one higher than the last NFSv4.1 Proc# above. + */ +#define NFSV41_NPROCS 54 + #endif /* NFS_V3NPROCS */ /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif /* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure - * or Operation#. Since the NFS V4 Op #s go higher, use NFSV4OP_NOPS, which + * or Operation#. Since the NFS V4 Op #s go higher, use NFSV41_NOPS, which * is one greater than the highest Op#. */ -#define NFSPROC_NOOP NFSV4OP_NOPS +#define NFSPROC_NOOP NFSV41_NOPS /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 #define NFSV2PROC_GETATTR 1 #define NFSV2PROC_SETATTR 2 #define NFSV2PROC_NOOP 3 #define NFSV2PROC_ROOT NFSV2PROC_NOOP /* Obsolete */ #define NFSV2PROC_LOOKUP 4 #define NFSV2PROC_READLINK 5 #define NFSV2PROC_READ 6 #define NFSV2PROC_WRITECACHE NFSV2PROC_NOOP /* Obsolete */ #define NFSV2PROC_WRITE 8 #define NFSV2PROC_CREATE 9 #define NFSV2PROC_REMOVE 10 #define NFSV2PROC_RENAME 11 #define NFSV2PROC_LINK 12 #define NFSV2PROC_SYMLINK 13 #define NFSV2PROC_MKDIR 14 #define NFSV2PROC_RMDIR 15 #define NFSV2PROC_READDIR 16 #define NFSV2PROC_STATFS 17 /* * V4 Procedure numbers */ #define NFSV4PROC_COMPOUND 1 #define NFSV4PROC_CBNULL 0 #define NFSV4PROC_CBCOMPOUND 1 /* * Constants used by the Version 3 and 4 protocols for various RPCs */ #define NFSV3SATTRTIME_DONTCHANGE 0 #define NFSV3SATTRTIME_TOSERVER 1 #define NFSV3SATTRTIME_TOCLIENT 2 #define NFSV4SATTRTIME_TOSERVER 0 #define NFSV4SATTRTIME_TOCLIENT 1 #define NFSV4LOCKT_READ 1 #define NFSV4LOCKT_WRITE 2 #define NFSV4LOCKT_READW 3 #define NFSV4LOCKT_WRITEW 4 #define NFSV4LOCKT_RELEASE 5 #define NFSV4OPEN_NOCREATE 0 #define NFSV4OPEN_CREATE 1 #define NFSV4OPEN_CLAIMNULL 0 #define NFSV4OPEN_CLAIMPREVIOUS 1 #define NFSV4OPEN_CLAIMDELEGATECUR 2 #define NFSV4OPEN_CLAIMDELEGATEPREV 3 #define NFSV4OPEN_DELEGATENONE 0 #define NFSV4OPEN_DELEGATEREAD 1 #define NFSV4OPEN_DELEGATEWRITE 2 #define NFSV4OPEN_LIMITSIZE 1 #define NFSV4OPEN_LIMITBLOCKS 2 /* * Nfs V4 ACE stuff */ #define NFSV4ACE_ALLOWEDTYPE 0x00000000 #define NFSV4ACE_DENIEDTYPE 0x00000001 #define NFSV4ACE_AUDITTYPE 0x00000002 #define NFSV4ACE_ALARMTYPE 0x00000003 #define NFSV4ACE_SUPALLOWED 0x00000001 #define NFSV4ACE_SUPDENIED 0x00000002 #define NFSV4ACE_SUPAUDIT 0x00000004 #define NFSV4ACE_SUPALARM 0x00000008 #define NFSV4ACE_SUPTYPES (NFSV4ACE_SUPALLOWED | NFSV4ACE_SUPDENIED) #define NFSV4ACE_FILEINHERIT 0x00000001 #define NFSV4ACE_DIRECTORYINHERIT 0x00000002 #define NFSV4ACE_NOPROPAGATEINHERIT 0x00000004 #define NFSV4ACE_INHERITONLY 0x00000008 #define NFSV4ACE_SUCCESSFULACCESS 0x00000010 #define NFSV4ACE_FAILEDACCESS 0x00000020 #define NFSV4ACE_IDENTIFIERGROUP 0x00000040 #define NFSV4ACE_READDATA 0x00000001 #define NFSV4ACE_LISTDIRECTORY 0x00000001 #define NFSV4ACE_WRITEDATA 0x00000002 #define NFSV4ACE_ADDFILE 0x00000002 #define NFSV4ACE_APPENDDATA 0x00000004 #define NFSV4ACE_ADDSUBDIRECTORY 0x00000004 #define NFSV4ACE_READNAMEDATTR 0x00000008 #define NFSV4ACE_WRITENAMEDATTR 0x00000010 #define NFSV4ACE_EXECUTE 0x00000020 #define NFSV4ACE_SEARCH 0x00000020 #define NFSV4ACE_DELETECHILD 0x00000040 #define NFSV4ACE_READATTRIBUTES 0x00000080 #define NFSV4ACE_WRITEATTRIBUTES 0x00000100 #define NFSV4ACE_DELETE 0x00010000 #define NFSV4ACE_READACL 0x00020000 #define NFSV4ACE_WRITEACL 0x00040000 #define NFSV4ACE_WRITEOWNER 0x00080000 #define NFSV4ACE_SYNCHRONIZE 0x00100000 /* * Here are the mappings between mode bits and acl mask bits for * directories and other files. * (Named attributes have not been included, since named attributes are * not yet supported.) * The mailing list seems to indicate that NFSV4ACE_EXECUTE refers to * searching a directory, although I can't find a statement of that in * the RFC. */ #define NFSV4ACE_ALLFILESMASK (NFSV4ACE_READATTRIBUTES | NFSV4ACE_READACL) #define NFSV4ACE_OWNERMASK (NFSV4ACE_WRITEATTRIBUTES | NFSV4ACE_WRITEACL) #define NFSV4ACE_DIRREADMASK NFSV4ACE_LISTDIRECTORY #define NFSV4ACE_DIREXECUTEMASK NFSV4ACE_EXECUTE #define NFSV4ACE_DIRWRITEMASK (NFSV4ACE_ADDFILE | \ NFSV4ACE_ADDSUBDIRECTORY | NFSV4ACE_DELETECHILD) #define NFSV4ACE_READMASK NFSV4ACE_READDATA #define NFSV4ACE_WRITEMASK (NFSV4ACE_WRITEDATA | NFSV4ACE_APPENDDATA) #define NFSV4ACE_EXECUTEMASK NFSV4ACE_EXECUTE #define NFSV4ACE_ALLFILEBITS (NFSV4ACE_READMASK | NFSV4ACE_WRITEMASK | \ NFSV4ACE_EXECUTEMASK | NFSV4ACE_SYNCHRONIZE) #define NFSV4ACE_ALLDIRBITS (NFSV4ACE_DIRREADMASK | \ NFSV4ACE_DIRWRITEMASK | NFSV4ACE_DIREXECUTEMASK) #define NFSV4ACE_AUDITMASK 0x0 /* * These GENERIC masks are not used and are no longer believed to be useful. */ #define NFSV4ACE_GENERICREAD 0x00120081 #define NFSV4ACE_GENERICWRITE 0x00160106 #define NFSV4ACE_GENERICEXECUTE 0x001200a0 #define NFSSTATEID_PUTALLZERO 0 #define NFSSTATEID_PUTALLONE 1 #define NFSSTATEID_PUTSTATEID 2 +#define NFSSTATEID_PUTSEQIDZERO 3 /* * Bits for share access and deny. */ #define NFSV4OPEN_ACCESSREAD 0x00000001 #define NFSV4OPEN_ACCESSWRITE 0x00000002 #define NFSV4OPEN_ACCESSBOTH 0x00000003 #define NFSV4OPEN_DENYNONE 0x00000000 #define NFSV4OPEN_DENYREAD 0x00000001 #define NFSV4OPEN_DENYWRITE 0x00000002 #define NFSV4OPEN_DENYBOTH 0x00000003 /* * Open result flags * (The first two are in the spec. The rest are used internally.) */ #define NFSV4OPEN_RESULTCONFIRM 0x00000002 #define NFSV4OPEN_LOCKTYPEPOSIX 0x00000004 #define NFSV4OPEN_RFLAGS \ (NFSV4OPEN_RESULTCONFIRM | NFSV4OPEN_LOCKTYPEPOSIX) #define NFSV4OPEN_RECALL 0x00010000 #define NFSV4OPEN_READDELEGATE 0x00020000 #define NFSV4OPEN_WRITEDELEGATE 0x00040000 /* * NFS V4 File Handle types */ #define NFSV4FHTYPE_PERSISTENT 0x0 #define NFSV4FHTYPE_NOEXPIREWITHOPEN 0x1 #define NFSV4FHTYPE_VOLATILEANY 0x2 #define NFSV4FHTYPE_VOLATILEMIGRATE 0x4 #define NFSV4FHTYPE_VOLATILERENAME 0x8 /* * Maximum size of V4 opaque strings. */ #define NFSV4_OPAQUELIMIT 1024 /* * These are the same for V3 and V4. */ #define NFSACCESS_READ 0x01 #define NFSACCESS_LOOKUP 0x02 #define NFSACCESS_MODIFY 0x04 #define NFSACCESS_EXTEND 0x08 #define NFSACCESS_DELETE 0x10 #define NFSACCESS_EXECUTE 0x20 #define NFSWRITE_UNSTABLE 0 #define NFSWRITE_DATASYNC 1 #define NFSWRITE_FILESYNC 2 #define NFSCREATE_UNCHECKED 0 #define NFSCREATE_GUARDED 1 #define NFSCREATE_EXCLUSIVE 2 +#define NFSCREATE_EXCLUSIVE41 3 #define NFSV3FSINFO_LINK 0x01 #define NFSV3FSINFO_SYMLINK 0x02 #define NFSV3FSINFO_HOMOGENEOUS 0x08 #define NFSV3FSINFO_CANSETTIME 0x10 + +/* Flags for Exchange ID */ +#define NFSV4EXCH_SUPPMOVEDREFER 0x00000001 +#define NFSV4EXCH_SUPPMOVEDMIGR 0x00000002 +#define NFSV4EXCH_BINDPRINCSTATEID 0x00000100 +#define NFSV4EXCH_USENONPNFS 0x00010000 +#define NFSV4EXCH_USEPNFSMDS 0x00020000 +#define NFSV4EXCH_USEPNFSDS 0x00040000 +#define NFSV4EXCH_MASKPNFS 0x00070000 +#define NFSV4EXCH_UPDCONFIRMEDRECA 0x40000000 +#define NFSV4EXCH_CONFIRMEDR 0x80000000 + +/* State Protects */ +#define NFSV4EXCH_SP4NONE 0 +#define NFSV4EXCH_SP4MACHCRED 1 +#define NFSV4EXCH_SP4SSV 2 + +/* Flags for Create Session */ +#define NFSV4CRSESS_PERSIST 0x00000001 +#define NFSV4CRSESS_CONNBACKCHAN 0x00000002 +#define NFSV4CRSESS_CONNRDMA 0x00000004 + +/* Flags for Sequence */ +#define NFSV4SEQ_CBPATHDOWN 0x00000001 +#define NFSV4SEQ_CBGSSCONTEXPIRING 0x00000002 +#define NFSV4SEQ_CBGSSCONTEXPIRED 0x00000004 +#define NFSV4SEQ_EXPIREDALLSTATEREVOKED 0x00000008 +#define NFSV4SEQ_EXPIREDSOMESTATEREVOKED 0x00000010 +#define NFSV4SEQ_ADMINSTATEREVOKED 0x00000020 +#define NFSV4SEQ_RECALLABLESTATEREVOKED 0x00000040 +#define NFSV4SEQ_LEASEMOVED 0x00000080 +#define NFSV4SEQ_RESTARTRECLAIMNEEDED 0x00000100 +#define NFSV4SEQ_CBPATHDOWNSESSION 0x00000200 +#define NFSV4SEQ_BACKCHANNELFAULT 0x00000400 +#define NFSV4SEQ_DEVIDCHANGED 0x00000800 +#define NFSV4SEQ_DEVIDDELETED 0x00001000 + +/* Flags for Layout. */ +#define NFSLAYOUTRETURN_FILE 1 +#define NFSLAYOUTRETURN_FSID 2 +#define NFSLAYOUTRETURN_ALL 3 + +#define NFSLAYOUT_NFSV4_1_FILES 0x1 +#define NFSLAYOUT_OSD2_OBJECTS 0x2 +#define NFSLAYOUT_BLOCK_VOLUME 0x3 + +#define NFSLAYOUTIOMODE_READ 1 +#define NFSLAYOUTIOMODE_RW 2 +#define NFSLAYOUTIOMODE_ANY 3 + +/* Flags for Get Device Info. */ +#define NFSDEVICEIDNOTIFY_CHANGEBIT 0x1 +#define NFSDEVICEIDNOTIFY_DELETEBIT 0x2 + +/* Flags for File Layout. */ +#define NFSFLAYUTIL_DENSE 0x1 +#define NFSFLAYUTIL_COMMIT_THRU_MDS 0x2 /* Conversion macros */ #define vtonfsv2_mode(t,m) \ txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ MAKEIMODE((t), (m))) #define vtonfsv34_mode(m) txdr_unsigned((m) & 07777) #define nfstov_mode(a) (fxdr_unsigned(u_int16_t, (a))&07777) #define vtonfsv2_type(a) (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \ txdr_unsigned(newnfsv2_type[((u_int32_t)(a))])) #define vtonfsv34_type(a) (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \ txdr_unsigned(nfsv34_type[((u_int32_t)(a))])) #define nfsv2tov_type(a) newnv2tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] #define nfsv34tov_type(a) nv34tov_type[fxdr_unsigned(u_int32_t,(a))&0x7] #define vtonfs_dtype(a) (((u_int32_t)(a)) >= 9 ? IFTODT(VTTOIF(VNON)) : \ IFTODT(VTTOIF(a))) /* File types */ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, NFSOCK=6, NFFIFO=7, NFATTRDIR=8, NFNAMEDATTR=9 } nfstype; /* Structs for common parts of the rpc's */ struct nfsv2_time { u_int32_t nfsv2_sec; u_int32_t nfsv2_usec; }; typedef struct nfsv2_time nfstime2; struct nfsv3_time { u_int32_t nfsv3_sec; u_int32_t nfsv3_nsec; }; typedef struct nfsv3_time nfstime3; struct nfsv4_time { u_int32_t nfsv4_highsec; u_int32_t nfsv4_sec; u_int32_t nfsv4_nsec; }; typedef struct nfsv4_time nfstime4; /* * Quads are defined as arrays of 2 longs to ensure dense packing for the * protocol and to facilitate xdr conversion. */ struct nfs_uquad { u_int32_t nfsuquad[2]; }; typedef struct nfs_uquad nfsuint64; /* * Used to convert between two u_longs and a u_quad_t. */ union nfs_quadconvert { u_int32_t lval[2]; u_quad_t qval; }; typedef union nfs_quadconvert nfsquad_t; /* * NFS Version 3 special file number. */ struct nfsv3_spec { u_int32_t specdata1; u_int32_t specdata2; }; typedef struct nfsv3_spec nfsv3spec; /* * File attributes and setable attributes. These structures cover both * NFS version 2 and the version 3 protocol. Note that the union is only * used so that one pointer can refer to both variants. These structures * go out on the wire and must be densely packed, so no quad data types * are used. (all fields are longs or u_longs or structures of same) * NB: You can't do sizeof(struct nfs_fattr), you must use the * NFSX_FATTR(v3) macro. */ struct nfs_fattr { u_int32_t fa_type; u_int32_t fa_mode; u_int32_t fa_nlink; u_int32_t fa_uid; u_int32_t fa_gid; union { struct { u_int32_t nfsv2fa_size; u_int32_t nfsv2fa_blocksize; u_int32_t nfsv2fa_rdev; u_int32_t nfsv2fa_blocks; u_int32_t nfsv2fa_fsid; u_int32_t nfsv2fa_fileid; nfstime2 nfsv2fa_atime; nfstime2 nfsv2fa_mtime; nfstime2 nfsv2fa_ctime; } fa_nfsv2; struct { nfsuint64 nfsv3fa_size; nfsuint64 nfsv3fa_used; nfsv3spec nfsv3fa_rdev; nfsuint64 nfsv3fa_fsid; nfsuint64 nfsv3fa_fileid; nfstime3 nfsv3fa_atime; nfstime3 nfsv3fa_mtime; nfstime3 nfsv3fa_ctime; } fa_nfsv3; } fa_un; }; /* and some ugly defines for accessing union components */ #define fa2_size fa_un.fa_nfsv2.nfsv2fa_size #define fa2_blocksize fa_un.fa_nfsv2.nfsv2fa_blocksize #define fa2_rdev fa_un.fa_nfsv2.nfsv2fa_rdev #define fa2_blocks fa_un.fa_nfsv2.nfsv2fa_blocks #define fa2_fsid fa_un.fa_nfsv2.nfsv2fa_fsid #define fa2_fileid fa_un.fa_nfsv2.nfsv2fa_fileid #define fa2_atime fa_un.fa_nfsv2.nfsv2fa_atime #define fa2_mtime fa_un.fa_nfsv2.nfsv2fa_mtime #define fa2_ctime fa_un.fa_nfsv2.nfsv2fa_ctime #define fa3_size fa_un.fa_nfsv3.nfsv3fa_size #define fa3_used fa_un.fa_nfsv3.nfsv3fa_used #define fa3_rdev fa_un.fa_nfsv3.nfsv3fa_rdev #define fa3_fsid fa_un.fa_nfsv3.nfsv3fa_fsid #define fa3_fileid fa_un.fa_nfsv3.nfsv3fa_fileid #define fa3_atime fa_un.fa_nfsv3.nfsv3fa_atime #define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime #define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime struct nfsv2_sattr { u_int32_t sa_mode; u_int32_t sa_uid; u_int32_t sa_gid; u_int32_t sa_size; nfstime2 sa_atime; nfstime2 sa_mtime; }; /* * NFS Version 3 sattr structure for the new node creation case. */ struct nfsv3_sattr { u_int32_t sa_modetrue; u_int32_t sa_mode; u_int32_t sa_uidfalse; u_int32_t sa_gidfalse; u_int32_t sa_sizefalse; u_int32_t sa_atimetype; nfstime3 sa_atime; u_int32_t sa_mtimetype; nfstime3 sa_mtime; }; /* * The attribute bits used for V4. * NFSATTRBIT_xxx defines the attribute# (and its bit position) * NFSATTRBM_xxx is a 32bit mask with the correct bit set within the * appropriate 32bit word. * NFSATTRBIT_MAX is one greater than the largest NFSATTRBIT_xxx */ #define NFSATTRBIT_SUPPORTEDATTRS 0 #define NFSATTRBIT_TYPE 1 #define NFSATTRBIT_FHEXPIRETYPE 2 #define NFSATTRBIT_CHANGE 3 #define NFSATTRBIT_SIZE 4 #define NFSATTRBIT_LINKSUPPORT 5 #define NFSATTRBIT_SYMLINKSUPPORT 6 #define NFSATTRBIT_NAMEDATTR 7 #define NFSATTRBIT_FSID 8 #define NFSATTRBIT_UNIQUEHANDLES 9 #define NFSATTRBIT_LEASETIME 10 #define NFSATTRBIT_RDATTRERROR 11 #define NFSATTRBIT_ACL 12 #define NFSATTRBIT_ACLSUPPORT 13 #define NFSATTRBIT_ARCHIVE 14 #define NFSATTRBIT_CANSETTIME 15 #define NFSATTRBIT_CASEINSENSITIVE 16 #define NFSATTRBIT_CASEPRESERVING 17 #define NFSATTRBIT_CHOWNRESTRICTED 18 #define NFSATTRBIT_FILEHANDLE 19 #define NFSATTRBIT_FILEID 20 #define NFSATTRBIT_FILESAVAIL 21 #define NFSATTRBIT_FILESFREE 22 #define NFSATTRBIT_FILESTOTAL 23 #define NFSATTRBIT_FSLOCATIONS 24 #define NFSATTRBIT_HIDDEN 25 #define NFSATTRBIT_HOMOGENEOUS 26 #define NFSATTRBIT_MAXFILESIZE 27 #define NFSATTRBIT_MAXLINK 28 #define NFSATTRBIT_MAXNAME 29 #define NFSATTRBIT_MAXREAD 30 #define NFSATTRBIT_MAXWRITE 31 #define NFSATTRBIT_MIMETYPE 32 #define NFSATTRBIT_MODE 33 #define NFSATTRBIT_NOTRUNC 34 #define NFSATTRBIT_NUMLINKS 35 #define NFSATTRBIT_OWNER 36 #define NFSATTRBIT_OWNERGROUP 37 #define NFSATTRBIT_QUOTAHARD 38 #define NFSATTRBIT_QUOTASOFT 39 #define NFSATTRBIT_QUOTAUSED 40 #define NFSATTRBIT_RAWDEV 41 #define NFSATTRBIT_SPACEAVAIL 42 #define NFSATTRBIT_SPACEFREE 43 #define NFSATTRBIT_SPACETOTAL 44 #define NFSATTRBIT_SPACEUSED 45 #define NFSATTRBIT_SYSTEM 46 #define NFSATTRBIT_TIMEACCESS 47 #define NFSATTRBIT_TIMEACCESSSET 48 #define NFSATTRBIT_TIMEBACKUP 49 #define NFSATTRBIT_TIMECREATE 50 #define NFSATTRBIT_TIMEDELTA 51 #define NFSATTRBIT_TIMEMETADATA 52 #define NFSATTRBIT_TIMEMODIFY 53 #define NFSATTRBIT_TIMEMODIFYSET 54 #define NFSATTRBIT_MOUNTEDONFILEID 55 #define NFSATTRBM_SUPPORTEDATTRS 0x00000001 #define NFSATTRBM_TYPE 0x00000002 #define NFSATTRBM_FHEXPIRETYPE 0x00000004 #define NFSATTRBM_CHANGE 0x00000008 #define NFSATTRBM_SIZE 0x00000010 #define NFSATTRBM_LINKSUPPORT 0x00000020 #define NFSATTRBM_SYMLINKSUPPORT 0x00000040 #define NFSATTRBM_NAMEDATTR 0x00000080 #define NFSATTRBM_FSID 0x00000100 #define NFSATTRBM_UNIQUEHANDLES 0x00000200 #define NFSATTRBM_LEASETIME 0x00000400 #define NFSATTRBM_RDATTRERROR 0x00000800 #define NFSATTRBM_ACL 0x00001000 #define NFSATTRBM_ACLSUPPORT 0x00002000 #define NFSATTRBM_ARCHIVE 0x00004000 #define NFSATTRBM_CANSETTIME 0x00008000 #define NFSATTRBM_CASEINSENSITIVE 0x00010000 #define NFSATTRBM_CASEPRESERVING 0x00020000 #define NFSATTRBM_CHOWNRESTRICTED 0x00040000 #define NFSATTRBM_FILEHANDLE 0x00080000 #define NFSATTRBM_FILEID 0x00100000 #define NFSATTRBM_FILESAVAIL 0x00200000 #define NFSATTRBM_FILESFREE 0x00400000 #define NFSATTRBM_FILESTOTAL 0x00800000 #define NFSATTRBM_FSLOCATIONS 0x01000000 #define NFSATTRBM_HIDDEN 0x02000000 #define NFSATTRBM_HOMOGENEOUS 0x04000000 #define NFSATTRBM_MAXFILESIZE 0x08000000 #define NFSATTRBM_MAXLINK 0x10000000 #define NFSATTRBM_MAXNAME 0x20000000 #define NFSATTRBM_MAXREAD 0x40000000 #define NFSATTRBM_MAXWRITE 0x80000000 #define NFSATTRBM_MIMETYPE 0x00000001 #define NFSATTRBM_MODE 0x00000002 #define NFSATTRBM_NOTRUNC 0x00000004 #define NFSATTRBM_NUMLINKS 0x00000008 #define NFSATTRBM_OWNER 0x00000010 #define NFSATTRBM_OWNERGROUP 0x00000020 #define NFSATTRBM_QUOTAHARD 0x00000040 #define NFSATTRBM_QUOTASOFT 0x00000080 #define NFSATTRBM_QUOTAUSED 0x00000100 #define NFSATTRBM_RAWDEV 0x00000200 #define NFSATTRBM_SPACEAVAIL 0x00000400 #define NFSATTRBM_SPACEFREE 0x00000800 #define NFSATTRBM_SPACETOTAL 0x00001000 #define NFSATTRBM_SPACEUSED 0x00002000 #define NFSATTRBM_SYSTEM 0x00004000 #define NFSATTRBM_TIMEACCESS 0x00008000 #define NFSATTRBM_TIMEACCESSSET 0x00010000 #define NFSATTRBM_TIMEBACKUP 0x00020000 #define NFSATTRBM_TIMECREATE 0x00040000 #define NFSATTRBM_TIMEDELTA 0x00080000 #define NFSATTRBM_TIMEMETADATA 0x00100000 #define NFSATTRBM_TIMEMODIFY 0x00200000 #define NFSATTRBM_TIMEMODIFYSET 0x00400000 #define NFSATTRBM_MOUNTEDONFILEID 0x00800000 #define NFSATTRBIT_MAX 56 /* * Sets of attributes that are supported, by words in the bitmap. */ /* * NFSATTRBIT_SUPPORTED - SUPP0 - bits 0<->31 * SUPP1 - bits 32<->63 */ #define NFSATTRBIT_SUPP0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_FHEXPIRETYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_NAMEDATTR | \ NFSATTRBM_FSID | \ NFSATTRBM_UNIQUEHANDLES | \ NFSATTRBM_LEASETIME | \ NFSATTRBM_RDATTRERROR | \ NFSATTRBM_ACL | \ NFSATTRBM_ACLSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_CASEINSENSITIVE | \ NFSATTRBM_CASEPRESERVING | \ NFSATTRBM_CHOWNRESTRICTED | \ NFSATTRBM_FILEHANDLE | \ NFSATTRBM_FILEID | \ NFSATTRBM_FILESAVAIL | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_FSLOCATIONS | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXLINK | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSATTRBIT_S1 - subset of SUPP1 - OR of the following bits: */ #define NFSATTRBIT_S1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NOTRUNC | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMEDELTA | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY | \ NFSATTRBM_MOUNTEDONFILEID) #ifdef QUOTA /* * If QUOTA OR in NFSATTRBIT_QUOTAHARD, NFSATTRBIT_QUOTASOFT and * NFSATTRBIT_QUOTAUSED. */ #define NFSATTRBIT_SUPP1 (NFSATTRBIT_S1 | \ NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED) #else #define NFSATTRBIT_SUPP1 NFSATTRBIT_S1 #endif /* * NFSATTRBIT_SUPPSETONLY is the OR of NFSATTRBIT_TIMEACCESSSET and * NFSATTRBIT_TIMEMODIFYSET. */ #define NFSATTRBIT_SUPPSETONLY (NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) /* * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31 * SETABLE1 - bits 32<->63 */ #define NFSATTRBIT_SETABLE0 \ (NFSATTRBM_SIZE | \ NFSATTRBM_ACL) #define NFSATTRBIT_SETABLE1 \ (NFSATTRBM_MODE | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) /* * Set of attributes that the getattr vnode op needs. * OR of the following bits. * NFSATTRBIT_GETATTR0 - bits 0<->31 */ #define NFSATTRBIT_GETATTR0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ NFSATTRBM_MAXREAD) /* * NFSATTRBIT_GETATTR1 - bits 32<->63 */ #define NFSATTRBIT_GETATTR1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY) /* * Subset of the above that the Write RPC gets. * OR of the following bits. * NFSATTRBIT_WRITEGETATTR0 - bits 0<->31 */ #define NFSATTRBIT_WRITEGETATTR0 \ (NFSATTRBM_SUPPORTEDATTRS | \ NFSATTRBM_TYPE | \ NFSATTRBM_CHANGE | \ NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ NFSATTRBM_MAXREAD) /* * NFSATTRBIT_WRITEGETATTR1 - bits 32<->63 */ #define NFSATTRBIT_WRITEGETATTR1 \ (NFSATTRBM_MODE | \ NFSATTRBM_NUMLINKS | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMEMETADATA | \ NFSATTRBM_TIMEMODIFY) /* * Set of attributes that the wccattr operation op needs. * OR of the following bits. * NFSATTRBIT_WCCATTR0 - bits 0<->31 */ #define NFSATTRBIT_WCCATTR0 0 /* * NFSATTRBIT_WCCATTR1 - bits 32<->63 */ #define NFSATTRBIT_WCCATTR1 \ (NFSATTRBM_TIMEMODIFY) /* * NFSATTRBIT_CBGETATTR0 - bits 0<->31 */ #define NFSATTRBIT_CBGETATTR0 (NFSATTRBM_CHANGE | NFSATTRBM_SIZE) /* * NFSATTRBIT_CBGETATTR1 - bits 32<->63 */ #define NFSATTRBIT_CBGETATTR1 0x0 /* * Sets of attributes that require a VFS_STATFS() call to get the * values of. * NFSATTRBIT_STATFS0 - bits 0<->31 */ #define NFSATTRBIT_STATFS0 \ (NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_FILESAVAIL | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSATTRBIT_STATFS1 - bits 32<->63 */ #define NFSATTRBIT_STATFS1 \ (NFSATTRBM_QUOTAHARD | \ NFSATTRBM_QUOTASOFT | \ NFSATTRBM_QUOTAUSED | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ NFSATTRBM_TIMEDELTA) /* * These are the bits that are needed by the nfs_statfs() call. * (The regular getattr bits are or'd in so the vnode gets the correct * type, etc.) * NFSGETATTRBIT_STATFS0 - bits 0<->31 */ #define NFSGETATTRBIT_STATFS0 (NFSATTRBIT_GETATTR0 | \ NFSATTRBM_LINKSUPPORT | \ NFSATTRBM_SYMLINKSUPPORT | \ NFSATTRBM_CANSETTIME | \ NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXNAME | \ NFSATTRBM_MAXREAD | \ NFSATTRBM_MAXWRITE) /* * NFSGETATTRBIT_STATFS1 - bits 32<->63 */ #define NFSGETATTRBIT_STATFS1 (NFSATTRBIT_GETATTR1 | \ NFSATTRBM_SPACEAVAIL | \ NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_TIMEDELTA) /* * Set of attributes for the equivalent of an nfsv3 pathconf rpc. * NFSGETATTRBIT_PATHCONF0 - bits 0<->31 */ #define NFSGETATTRBIT_PATHCONF0 (NFSATTRBIT_GETATTR0 | \ NFSATTRBM_CASEINSENSITIVE | \ NFSATTRBM_CASEPRESERVING | \ NFSATTRBM_CHOWNRESTRICTED | \ NFSATTRBM_MAXLINK | \ NFSATTRBM_MAXNAME) /* * NFSGETATTRBIT_PATHCONF1 - bits 32<->63 */ #define NFSGETATTRBIT_PATHCONF1 (NFSATTRBIT_GETATTR1 | \ NFSATTRBM_NOTRUNC) /* * Sets of attributes required by readdir and readdirplus. * NFSATTRBIT_READDIRPLUS0 (NFSATTRBIT_GETATTR0 | NFSATTRBIT_FILEHANDLE | * NFSATTRBIT_RDATTRERROR) */ #define NFSATTRBIT_READDIRPLUS0 (NFSATTRBIT_GETATTR0 | NFSATTRBM_FILEHANDLE | \ NFSATTRBM_RDATTRERROR) #define NFSATTRBIT_READDIRPLUS1 NFSATTRBIT_GETATTR1 /* * Set of attributes supported by Referral vnodes. */ #define NFSATTRBIT_REFERRAL0 (NFSATTRBM_TYPE | NFSATTRBM_FSID | \ NFSATTRBM_RDATTRERROR | NFSATTRBM_FSLOCATIONS) #define NFSATTRBIT_REFERRAL1 NFSATTRBM_MOUNTEDONFILEID /* * Structure for data handled by the statfs rpc. Since some fields are * u_int64_t, this cannot be used for copying data on/off the wire, due * to alignment concerns. */ struct nfsstatfs { union { struct { u_int32_t nfsv2sf_tsize; u_int32_t nfsv2sf_bsize; u_int32_t nfsv2sf_blocks; u_int32_t nfsv2sf_bfree; u_int32_t nfsv2sf_bavail; } sf_nfsv2; struct { u_int64_t nfsv3sf_tbytes; u_int64_t nfsv3sf_fbytes; u_int64_t nfsv3sf_abytes; u_int64_t nfsv3sf_tfiles; u_int64_t nfsv3sf_ffiles; u_int64_t nfsv3sf_afiles; u_int32_t nfsv3sf_invarsec; } sf_nfsv3; } sf_un; }; #define sf_tsize sf_un.sf_nfsv2.nfsv2sf_tsize #define sf_bsize sf_un.sf_nfsv2.nfsv2sf_bsize #define sf_blocks sf_un.sf_nfsv2.nfsv2sf_blocks #define sf_bfree sf_un.sf_nfsv2.nfsv2sf_bfree #define sf_bavail sf_un.sf_nfsv2.nfsv2sf_bavail #define sf_tbytes sf_un.sf_nfsv3.nfsv3sf_tbytes #define sf_fbytes sf_un.sf_nfsv3.nfsv3sf_fbytes #define sf_abytes sf_un.sf_nfsv3.nfsv3sf_abytes #define sf_tfiles sf_un.sf_nfsv3.nfsv3sf_tfiles #define sf_ffiles sf_un.sf_nfsv3.nfsv3sf_ffiles #define sf_afiles sf_un.sf_nfsv3.nfsv3sf_afiles #define sf_invarsec sf_un.sf_nfsv3.nfsv3sf_invarsec /* * Now defined using u_int64_t for the 64 bit field(s). * (Cannot be used to move data on/off the wire, due to alignment concerns.) */ struct nfsfsinfo { u_int32_t fs_rtmax; u_int32_t fs_rtpref; u_int32_t fs_rtmult; u_int32_t fs_wtmax; u_int32_t fs_wtpref; u_int32_t fs_wtmult; u_int32_t fs_dtpref; u_int64_t fs_maxfilesize; struct timespec fs_timedelta; u_int32_t fs_properties; }; /* * Bits for fs_properties */ #define NFSV3_FSFLINK 0x1 #define NFSV3_FSFSYMLINK 0x2 #define NFSV3_FSFHOMOGENEOUS 0x4 #define NFSV3_FSFCANSETTIME 0x8 /* * Yikes, overload fs_rtmult as fs_maxname for V4. */ #define fs_maxname fs_rtmult struct nfsv3_pathconf { u_int32_t pc_linkmax; u_int32_t pc_namemax; u_int32_t pc_notrunc; u_int32_t pc_chownrestricted; u_int32_t pc_caseinsensitive; u_int32_t pc_casepreserving; }; /* * NFS V4 data structures. */ struct nfsv4stateid { u_int32_t seqid; u_int32_t other[NFSX_STATEIDOTHER / NFSX_UNSIGNED]; }; typedef struct nfsv4stateid nfsv4stateid_t; #endif /* _NFS_NFSPROTO_H_ */ Index: head/sys/fs/nfsclient/nfs_clbio.c =================================================================== --- head/sys/fs/nfsclient/nfs_clbio.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clbio.c (revision 244042) @@ -1,1862 +1,1864 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdtrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int newnfs_directio_allow_mmap; extern struct nfsstats newnfsstats; extern struct mtx ncl_iod_mutex; extern int ncl_numasync; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern int newnfs_directio_enable; extern int nfs_keep_dirty_on_error; int ncl_pbuf_freecnt = -1; /* start out unlimited */ static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct thread *td); static int nfs_directio_write(struct vnode *vp, struct uio *uiop, struct ucred *cred, int ioflag); /* * Vnode op for VM getpages. */ int ncl_getpages(struct vop_getpages_args *ap) { int i, error, nextoff, size, toff, count, npages; struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; struct vnode *vp; struct thread *td; struct ucred *cred; struct nfsmount *nmp; vm_object_t object; vm_page_t *pages; struct nfsnode *np; vp = ap->a_vp; np = VTONFS(vp); td = curthread; /* XXX */ cred = curthread->td_ucred; /* XXX */ nmp = VFSTONFS(vp->v_mount); pages = ap->a_m; count = ap->a_count; if ((object = vp->v_object) == NULL) { ncl_printf("nfs_getpages: called with non-merged cache vnode??\n"); return (VM_PAGER_ERROR); } if (newnfs_directio_enable && !newnfs_directio_allow_mmap) { mtx_lock(&np->n_mtx); if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) { mtx_unlock(&np->n_mtx); ncl_printf("nfs_getpages: called on non-cacheable vnode??\n"); return (VM_PAGER_ERROR); } else mtx_unlock(&np->n_mtx); } mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { mtx_unlock(&nmp->nm_mtx); /* We'll never get here for v4, because we always have fsinfo */ (void)ncl_fsinfo(nmp, vp, cred, td); } else mtx_unlock(&nmp->nm_mtx); npages = btoc(count); /* * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. */ VM_OBJECT_LOCK(object); if (pages[ap->a_reqpage]->valid != 0) { for (i = 0; i < npages; ++i) { if (i != ap->a_reqpage) { vm_page_lock(pages[i]); vm_page_free(pages[i]); vm_page_unlock(pages[i]); } } VM_OBJECT_UNLOCK(object); return (0); } VM_OBJECT_UNLOCK(object); /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(&ncl_pbuf_freecnt); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); PCPU_INC(cnt.v_vnodein); PCPU_ADD(cnt.v_vnodepgsin, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = ncl_readrpc(vp, &uio, cred); pmap_qremove(kva, npages); relpbuf(bp, &ncl_pbuf_freecnt); if (error && (uio.uio_resid == count)) { ncl_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); for (i = 0; i < npages; ++i) { if (i != ap->a_reqpage) { vm_page_lock(pages[i]); vm_page_free(pages[i]); vm_page_unlock(pages[i]); } } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } /* * Calculate the number of bytes read and validate only that number * of bytes. Note that due to pending writes, size may be 0. This * does not mean that the remaining data is invalid! */ size = count - uio.uio_resid; VM_OBJECT_LOCK(object); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; if (nextoff <= size) { /* * Read operation filled an entire page */ m->valid = VM_PAGE_BITS_ALL; KASSERT(m->dirty == 0, ("nfs_getpages: page %p is dirty", m)); } else if (size > toff) { /* * Read operation filled a partial page. */ m->valid = 0; vm_page_set_valid_range(m, 0, size - toff); KASSERT(m->dirty == 0, ("nfs_getpages: page %p is dirty", m)); } else { /* * Read operation was short. If no error * occured we may have hit a zero-fill * section. We leave valid set to 0, and page * is freed by vm_page_readahead_finish() if * its index is not equal to requested, or * page is zeroed and set valid by * vm_pager_get_pages() for requested page. */ ; } if (i != ap->a_reqpage) vm_page_readahead_finish(m); } VM_OBJECT_UNLOCK(object); return (0); } /* * Vnode op for VM putpages. */ int ncl_putpages(struct vop_putpages_args *ap) { struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; int iomode, must_commit, i, error, npages, count; off_t offset; int *rtvals; struct vnode *vp; struct thread *td; struct ucred *cred; struct nfsmount *nmp; struct nfsnode *np; vm_page_t *pages; vp = ap->a_vp; np = VTONFS(vp); td = curthread; /* XXX */ /* Set the cred to n_writecred for the write rpcs. */ if (np->n_writecred != NULL) cred = crhold(np->n_writecred); else cred = crhold(curthread->td_ucred); /* XXX */ nmp = VFSTONFS(vp->v_mount); pages = ap->a_m; count = ap->a_count; rtvals = ap->a_rtvals; npages = btoc(count); offset = IDX_TO_OFF(pages[0]->pindex); mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { mtx_unlock(&nmp->nm_mtx); (void)ncl_fsinfo(nmp, vp, cred, td); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&np->n_mtx); if (newnfs_directio_enable && !newnfs_directio_allow_mmap && (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) { mtx_unlock(&np->n_mtx); ncl_printf("ncl_putpages: called on noncache-able vnode??\n"); mtx_lock(&np->n_mtx); } for (i = 0; i < npages; i++) rtvals[i] = VM_PAGER_ERROR; /* * When putting pages, do not extend file past EOF. */ if (offset + count > np->n_size) { count = np->n_size - offset; if (count < 0) count = 0; } mtx_unlock(&np->n_mtx); /* * We use only the kva address for the buffer, but this is extremely * convienient and fast. */ bp = getpbuf(&ncl_pbuf_freecnt); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); PCPU_INC(cnt.v_vnodeout); PCPU_ADD(cnt.v_vnodepgsout, count); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = offset; uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) iomode = NFSWRITE_UNSTABLE; else iomode = NFSWRITE_FILESYNC; error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0); crfree(cred); pmap_qremove(kva, npages); relpbuf(bp, &ncl_pbuf_freecnt); if (error == 0 || !nfs_keep_dirty_on_error) { vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); if (must_commit) ncl_clearcommit(vp->v_mount); } return rtvals[0]; } /* * For nfs, cache consistency can only be maintained approximately. * Although RFC1094 does not specify the criteria, the following is * believed to be compatible with the reference port. * For nfs: * If the file's modify time on the server has changed since the * last read rpc or you have written to the file, * you may have lost data cache consistency with the * server, so flush all of the file's data out of the cache. * Then force a getattr rpc to ensure that you have up to date * attributes. * NB: This implies that cache data can be read when up to * NFS_ATTRTIMEO seconds out of date. If you find that you need current * attributes this could be forced by setting n_attrstamp to 0 before * the VOP_GETATTR() call. */ static inline int nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred) { int error = 0; struct vattr vattr; struct nfsnode *np = VTONFS(vp); int old_lock; /* * Grab the exclusive lock before checking whether the cache is * consistent. * XXX - We can make this cheaper later (by acquiring cheaper locks). * But for now, this suffices. */ old_lock = ncl_upgrade_vnlock(vp); if (vp->v_iflag & VI_DOOMED) { ncl_downgrade_vnlock(vp, old_lock); return (EBADF); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (vp->v_type != VREG) { if (vp->v_type != VDIR) panic("nfs: bioread, not dir"); ncl_invaldir(vp); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error) goto out; } np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); error = VOP_GETATTR(vp, &vattr, cred); if (error) goto out; mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; mtx_unlock(&np->n_mtx); } else { mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, cred); if (error) return (error); mtx_lock(&np->n_mtx); if ((np->n_flag & NSIZECHANGED) || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) { mtx_unlock(&np->n_mtx); if (vp->v_type == VDIR) ncl_invaldir(vp); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error) goto out; mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; np->n_flag &= ~NSIZECHANGED; } mtx_unlock(&np->n_mtx); } out: ncl_downgrade_vnlock(vp, old_lock); return error; } /* * Vnode op for read using bio */ int ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) { struct nfsnode *np = VTONFS(vp); int biosize, i; struct buf *bp, *rabp; struct thread *td; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn, rabn; int bcount; int seqcount; int nra, error = 0, n = 0, on = 0; off_t tmp_off; KASSERT(uio->uio_rw == UIO_READ, ("ncl_read mode")); if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */ return (EINVAL); td = uio->uio_td; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { mtx_unlock(&nmp->nm_mtx); (void)ncl_fsinfo(nmp, vp, cred, td); mtx_lock(&nmp->nm_mtx); } if (nmp->nm_rsize == 0 || nmp->nm_readdirsize == 0) (void) newnfs_iosize(nmp); tmp_off = uio->uio_offset + uio->uio_resid; if (vp->v_type != VDIR && (tmp_off > nmp->nm_maxfilesize || tmp_off < uio->uio_offset)) { mtx_unlock(&nmp->nm_mtx); return (EFBIG); } mtx_unlock(&nmp->nm_mtx); if (newnfs_directio_enable && (ioflag & IO_DIRECT) && (vp->v_type == VREG)) /* No caching/ no readaheads. Just read data into the user buffer */ return ncl_readrpc(vp, uio, cred); biosize = vp->v_bufobj.bo_bsize; seqcount = (int)((off_t)(ioflag >> IO_SEQSHIFT) * biosize / BKVASIZE); error = nfs_bioread_check_cons(vp, td, cred); if (error) return error; do { u_quad_t nsize; mtx_lock(&np->n_mtx); nsize = np->n_size; mtx_unlock(&np->n_mtx); switch (vp->v_type) { case VREG: NFSINCRGLOBAL(newnfsstats.biocache_reads); lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); /* * Start the read ahead(s), as required. */ if (nmp->nm_readahead > 0) { for (nra = 0; nra < nmp->nm_readahead && nra < seqcount && (off_t)(lbn + 1 + nra) * biosize < nsize; nra++) { rabn = lbn + 1 + nra; if (incore(&vp->v_bufobj, rabn) == NULL) { rabp = nfs_getcacheblk(vp, rabn, biosize, td); if (!rabp) { error = newnfs_sigintr(nmp, td); return (error ? error : EINTR); } if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= B_ASYNC; rabp->b_iocmd = BIO_READ; vfs_busy_pages(rabp, 0); if (ncl_asyncio(nmp, rabp, cred, td)) { rabp->b_flags |= B_INVAL; rabp->b_ioflags |= BIO_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); break; } } else { brelse(rabp); } } } } /* Note that bcount is *not* DEV_BSIZE aligned. */ bcount = biosize; if ((off_t)lbn * biosize >= nsize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > nsize) { bcount = nsize - (off_t)lbn * biosize; } bp = nfs_getcacheblk(vp, lbn, bcount, td); if (!bp) { error = newnfs_sigintr(nmp, td); return (error ? error : EINTR); } /* * If B_CACHE is not set, we must issue the read. If this * fails, we return an error. */ if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); error = ncl_doio(vp, bp, cred, td, 0); if (error) { brelse(bp); return (error); } } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount) n = MIN((unsigned)(bcount - on), uio->uio_resid); break; case VLNK: NFSINCRGLOBAL(newnfsstats.biocache_readlinks); bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, td); if (!bp) { error = newnfs_sigintr(nmp, td); return (error ? error : EINTR); } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); error = ncl_doio(vp, bp, cred, td, 0); if (error) { bp->b_ioflags |= BIO_ERROR; brelse(bp); return (error); } } n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); on = 0; break; case VDIR: NFSINCRGLOBAL(newnfsstats.biocache_readdirs); if (np->n_direofoffset && uio->uio_offset >= np->n_direofoffset) { return (0); } lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ; on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, td); if (!bp) { error = newnfs_sigintr(nmp, td); return (error ? error : EINTR); } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); error = ncl_doio(vp, bp, cred, td, 0); if (error) { brelse(bp); } while (error == NFSERR_BAD_COOKIE) { ncl_invaldir(vp); error = ncl_vinvalbuf(vp, 0, td, 1); /* * Yuck! The directory has been modified on the * server. The only way to get the block is by * reading from the beginning to get all the * offset cookies. * * Leave the last bp intact unless there is an error. * Loop back up to the while if the error is another * NFSERR_BAD_COOKIE (double yuch!). */ for (i = 0; i <= lbn && !error; i++) { if (np->n_direofoffset && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) return (0); bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, td); if (!bp) { error = newnfs_sigintr(nmp, td); return (error ? error : EINTR); } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); error = ncl_doio(vp, bp, cred, td, 0); /* * no error + B_INVAL == directory EOF, * use the block. */ if (error == 0 && (bp->b_flags & B_INVAL)) break; } /* * An error will throw away the block and the * for loop will break out. If no error and this * is not the block we want, we throw away the * block and go for the next one via the for loop. */ if (error || i < lbn) brelse(bp); } } /* * The above while is repeated if we hit another cookie * error. If we hit an error and it wasn't a cookie error, * we give up. */ if (error) return (error); } /* * If not eof and read aheads are enabled, start one. * (You need the current block first, so that you have the * directory offset cookie of the next block.) */ if (nmp->nm_readahead > 0 && (bp->b_flags & B_INVAL) == 0 && (np->n_direofoffset == 0 || (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && incore(&vp->v_bufobj, lbn + 1) == NULL) { rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, td); if (rabp) { if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= B_ASYNC; rabp->b_iocmd = BIO_READ; vfs_busy_pages(rabp, 0); if (ncl_asyncio(nmp, rabp, cred, td)) { rabp->b_flags |= B_INVAL; rabp->b_ioflags |= BIO_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); } } else { brelse(rabp); } } } /* * Unlike VREG files, whos buffer size ( bp->b_bcount ) is * chopped for the EOF condition, we cannot tell how large * NFS directories are going to be until we hit EOF. So * an NFS directory buffer is *not* chopped to its EOF. Now, * it just so happens that b_resid will effectively chop it * to EOF. *BUT* this information is lost if the buffer goes * away and is reconstituted into a B_CACHE state ( due to * being VMIO ) later. So we keep track of the directory eof * in np->n_direofoffset and chop it off as an extra step * right here. */ n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset) n = np->n_direofoffset - uio->uio_offset; break; default: ncl_printf(" ncl_bioread: type %x unexpected\n", vp->v_type); bp = NULL; break; }; if (n > 0) { error = vn_io_fault_uiomove(bp->b_data + on, (int)n, uio); } if (vp->v_type == VLNK) n = 0; if (bp != NULL) brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n > 0); return (error); } /* * The NFS write path cannot handle iovecs with len > 1. So we need to * break up iovecs accordingly (restricting them to wsize). * For the SYNC case, we can do this with 1 copy (user buffer -> mbuf). * For the ASYNC case, 2 copies are needed. The first a copy from the * user buffer to a staging buffer and then a second copy from the staging * buffer to mbufs. This can be optimized by copying from the user buffer * directly into mbufs and passing the chain down, but that requires a * fair amount of re-working of the relevant codepaths (and can be done * later). */ static int nfs_directio_write(vp, uiop, cred, ioflag) struct vnode *vp; struct uio *uiop; struct ucred *cred; int ioflag; { int error; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct thread *td = uiop->uio_td; int size; int wsize; mtx_lock(&nmp->nm_mtx); wsize = nmp->nm_wsize; mtx_unlock(&nmp->nm_mtx); if (ioflag & IO_SYNC) { int iomode, must_commit; struct uio uio; struct iovec iov; do_sync: while (uiop->uio_resid > 0) { size = MIN(uiop->uio_resid, wsize); size = MIN(uiop->uio_iov->iov_len, size); iov.iov_base = uiop->uio_iov->iov_base; iov.iov_len = size; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = uiop->uio_offset; uio.uio_resid = size; uio.uio_segflg = UIO_USERSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; iomode = NFSWRITE_FILESYNC; error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0); KASSERT((must_commit == 0), ("ncl_directio_write: Did not commit write")); if (error) return (error); uiop->uio_offset += size; uiop->uio_resid -= size; if (uiop->uio_iov->iov_len <= size) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + size; uiop->uio_iov->iov_len -= size; } } } else { struct uio *t_uio; struct iovec *t_iov; struct buf *bp; /* * Break up the write into blocksize chunks and hand these * over to nfsiod's for write back. * Unfortunately, this incurs a copy of the data. Since * the user could modify the buffer before the write is * initiated. * * The obvious optimization here is that one of the 2 copies * in the async write path can be eliminated by copying the * data here directly into mbufs and passing the mbuf chain * down. But that will require a fair amount of re-working * of the code and can be done if there's enough interest * in NFS directio access. */ while (uiop->uio_resid > 0) { size = MIN(uiop->uio_resid, wsize); size = MIN(uiop->uio_iov->iov_len, size); bp = getpbuf(&ncl_pbuf_freecnt); t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK); t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK); t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK); t_iov->iov_len = size; t_uio->uio_iov = t_iov; t_uio->uio_iovcnt = 1; t_uio->uio_offset = uiop->uio_offset; t_uio->uio_resid = size; t_uio->uio_segflg = UIO_SYSSPACE; t_uio->uio_rw = UIO_WRITE; t_uio->uio_td = td; KASSERT(uiop->uio_segflg == UIO_USERSPACE || uiop->uio_segflg == UIO_SYSSPACE, ("nfs_directio_write: Bad uio_segflg")); if (uiop->uio_segflg == UIO_USERSPACE) { error = copyin(uiop->uio_iov->iov_base, t_iov->iov_base, size); if (error != 0) goto err_free; } else /* * UIO_SYSSPACE may never happen, but handle * it just in case it does. */ bcopy(uiop->uio_iov->iov_base, t_iov->iov_base, size); bp->b_flags |= B_DIRECT; bp->b_iocmd = BIO_WRITE; if (cred != NOCRED) { crhold(cred); bp->b_wcred = cred; } else bp->b_wcred = NOCRED; bp->b_caller1 = (void *)t_uio; bp->b_vp = vp; error = ncl_asyncio(nmp, bp, NOCRED, td); err_free: if (error) { free(t_iov->iov_base, M_NFSDIRECTIO); free(t_iov, M_NFSDIRECTIO); free(t_uio, M_NFSDIRECTIO); bp->b_vp = NULL; relpbuf(bp, &ncl_pbuf_freecnt); if (error == EINTR) return (error); goto do_sync; } uiop->uio_offset += size; uiop->uio_resid -= size; if (uiop->uio_iov->iov_len <= size) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + size; uiop->uio_iov->iov_len -= size; } } } return (0); } /* * Vnode op for write using bio */ int ncl_write(struct vop_write_args *ap) { int biosize; struct uio *uio = ap->a_uio; struct thread *td = uio->uio_td; struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct ucred *cred = ap->a_cred; int ioflag = ap->a_ioflag; struct buf *bp; struct vattr vattr; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn; int bcount; int bp_cached, n, on, error = 0, error1; size_t orig_resid, local_resid; off_t orig_size, tmp_off; KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("ncl_write proc")); if (vp->v_type != VREG) return (EIO); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; mtx_unlock(&np->n_mtx); return (np->n_error); } else mtx_unlock(&np->n_mtx); mtx_lock(&nmp->nm_mtx); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { mtx_unlock(&nmp->nm_mtx); (void)ncl_fsinfo(nmp, vp, cred, td); mtx_lock(&nmp->nm_mtx); } if (nmp->nm_wsize == 0) (void) newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); /* * Synchronously flush pending buffers if we are in synchronous * mode or if we are appending. */ if (ioflag & (IO_APPEND | IO_SYNC)) { mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); #ifdef notyet /* Needs matching nonblock semantics elsewhere, too. */ /* * Require non-blocking, synchronous writes to * dirty files to inform the program it needs * to fsync(2) explicitly. */ if (ioflag & IO_NDELAY) return (EAGAIN); #endif flush_and_restart: np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error) return (error); } else mtx_unlock(&np->n_mtx); } orig_resid = uio->uio_resid; mtx_lock(&np->n_mtx); orig_size = np->n_size; mtx_unlock(&np->n_mtx); /* * If IO_APPEND then load uio_offset. We restart here if we cannot * get the append lock. */ if (ioflag & IO_APPEND) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); error = VOP_GETATTR(vp, &vattr, cred); if (error) return (error); mtx_lock(&np->n_mtx); uio->uio_offset = np->n_size; mtx_unlock(&np->n_mtx); } if (uio->uio_offset < 0) return (EINVAL); tmp_off = uio->uio_offset + uio->uio_resid; if (tmp_off > nmp->nm_maxfilesize || tmp_off < uio->uio_offset) return (EFBIG); if (uio->uio_resid == 0) return (0); if (newnfs_directio_enable && (ioflag & IO_DIRECT) && vp->v_type == VREG) return nfs_directio_write(vp, uio, cred, ioflag); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, i don't think it matters */ if (vn_rlimit_fsize(vp, uio, td)) return (EFBIG); biosize = vp->v_bufobj.bo_bsize; /* * Find all of this file's B_NEEDCOMMIT buffers. If our writes * would exceed the local maximum per-file write commit size when * combined with those, we must decide whether to flush, * go synchronous, or return error. We don't bother checking * IO_UNIT -- we just make all writes atomic anyway, as there's * no point optimizing for something that really won't ever happen. */ if (!(ioflag & IO_SYNC)) { int nflag; mtx_lock(&np->n_mtx); nflag = np->n_flag; mtx_unlock(&np->n_mtx); int needrestart = 0; if (nmp->nm_wcommitsize < uio->uio_resid) { /* * If this request could not possibly be completed * without exceeding the maximum outstanding write * commit size, see if we can convert it into a * synchronous write operation. */ if (ioflag & IO_NDELAY) return (EAGAIN); ioflag |= IO_SYNC; if (nflag & NMODIFIED) needrestart = 1; } else if (nflag & NMODIFIED) { int wouldcommit = 0; BO_LOCK(&vp->v_bufobj); if (vp->v_bufobj.bo_dirty.bv_cnt != 0) { TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { if (bp->b_flags & B_NEEDCOMMIT) wouldcommit += bp->b_bcount; } } BO_UNLOCK(&vp->v_bufobj); /* * Since we're not operating synchronously and * bypassing the buffer cache, we are in a commit * and holding all of these buffers whether * transmitted or not. If not limited, this * will lead to the buffer cache deadlocking, * as no one else can flush our uncommitted buffers. */ wouldcommit += uio->uio_resid; /* * If we would initially exceed the maximum * outstanding write commit size, flush and restart. */ if (wouldcommit > nmp->nm_wcommitsize) needrestart = 1; } if (needrestart) goto flush_and_restart; } do { NFSINCRGLOBAL(newnfsstats.biocache_writes); lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); n = MIN((unsigned)(biosize - on), uio->uio_resid); again: /* * Handle direct append and file extension cases, calculate * unaligned buffer size. */ mtx_lock(&np->n_mtx); if (uio->uio_offset == np->n_size && n) { mtx_unlock(&np->n_mtx); /* * Get the buffer (in its pre-append state to maintain * B_CACHE if it was previously set). Resize the * nfsnode after we have locked the buffer to prevent * readers from reading garbage. */ bcount = on; bp = nfs_getcacheblk(vp, lbn, bcount, td); if (bp != NULL) { long save; mtx_lock(&np->n_mtx); np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; vnode_pager_setsize(vp, np->n_size); mtx_unlock(&np->n_mtx); save = bp->b_flags & B_CACHE; bcount += n; allocbuf(bp, bcount); bp->b_flags |= save; } } else { /* * Obtain the locked cache block first, and then * adjust the file's size as appropriate. */ bcount = on + n; if ((off_t)lbn * biosize + bcount < np->n_size) { if ((off_t)(lbn + 1) * biosize < np->n_size) bcount = biosize; else bcount = np->n_size - (off_t)lbn * biosize; } mtx_unlock(&np->n_mtx); bp = nfs_getcacheblk(vp, lbn, bcount, td); mtx_lock(&np->n_mtx); if (uio->uio_offset + n > np->n_size) { np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; vnode_pager_setsize(vp, np->n_size); } mtx_unlock(&np->n_mtx); } if (!bp) { error = newnfs_sigintr(nmp, td); if (!error) error = EINTR; break; } /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ bp_cached = 1; if (on == 0 && n == bcount) { if ((bp->b_flags & B_CACHE) == 0) bp_cached = 0; bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); error = ncl_doio(vp, bp, cred, td, 0); if (error) { brelse(bp); break; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); mtx_lock(&np->n_mtx); np->n_flag |= NMODIFIED; mtx_unlock(&np->n_mtx); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { ncl_printf("NFS append race @%lx:%d\n", (long)bp->b_blkno * DEV_BSIZE, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * As an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { if (bwrite(bp) == EINTR) { error = EINTR; break; } goto again; } local_resid = uio->uio_resid; error = vn_io_fault_uiomove((char *)bp->b_data + on, n, uio); if (error != 0 && !bp_cached) { /* * This block has no other content then what * possibly was written by the faulty uiomove. * Release it, forgetting the data pages, to * prevent the leak of uninitialized data to * usermode. */ bp->b_ioflags |= BIO_ERROR; brelse(bp); uio->uio_offset -= local_resid - uio->uio_resid; uio->uio_resid = local_resid; break; } /* * Since this block is being modified, it must be written * again and not just committed. Since write clustering does * not work for the stage 1 data write, only the stage 2 * commit rpc, we have to clear B_CLUSTEROK as well. */ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); /* * Get the partial update on the progress made from * uiomove, if an error occured. */ if (error != 0) n = local_resid - uio->uio_resid; /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n > 0) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = min(on, bp->b_dirtyoff); bp->b_dirtyend = max((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } /* * If IO_SYNC do bwrite(). * * IO_INVAL appears to be unused. The idea appears to be * to turn off caching in this case. Very odd. XXX */ if ((ioflag & IO_SYNC)) { if (ioflag & IO_INVAL) bp->b_flags |= B_NOCACHE; error1 = bwrite(bp); if (error1 != 0) { if (error == 0) error = error1; break; } } else if ((n + on) == biosize) { bp->b_flags |= B_ASYNC; (void) ncl_writebp(bp, 0, NULL); } else { bdwrite(bp); } if (error != 0) break; } while (uio->uio_resid > 0 && n > 0); if (error != 0) { if (ioflag & IO_UNIT) { VATTR_NULL(&vattr); vattr.va_size = orig_size; /* IO_SYNC is handled implicitely */ (void)VOP_SETATTR(vp, &vattr, cred); uio->uio_offset -= orig_resid - uio->uio_resid; uio->uio_resid = orig_resid; } } return (error); } /* * Get an nfs cache block. * * Allocate a new one if the block isn't currently in the cache * and return the block marked busy. If the calling process is * interrupted by a signal for an interruptible mount point, return * NULL. * * The caller must carefully deal with the possible B_INVAL state of * the buffer. ncl_doio() clears B_INVAL (and ncl_asyncio() clears it * indirectly), so synchronous reads can be issued without worrying about * the B_INVAL state. We have to be a little more careful when dealing * with writes (see comments in nfs_write()) when extending a file past * its EOF. */ static struct buf * nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct thread *td) { struct buf *bp; struct mount *mp; struct nfsmount *nmp; mp = vp->v_mount; nmp = VFSTONFS(mp); if (nmp->nm_flag & NFSMNT_INT) { sigset_t oldset; newnfs_set_sigmask(td, &oldset); bp = getblk(vp, bn, size, NFS_PCATCH, 0, 0); newnfs_restore_sigmask(td, &oldset); while (bp == NULL) { if (newnfs_sigintr(nmp, td)) return (NULL); bp = getblk(vp, bn, size, 0, 2 * hz, 0); } } else { bp = getblk(vp, bn, size, 0, 0, 0); } if (vp->v_type == VREG) bp->b_blkno = bn * (vp->v_bufobj.bo_bsize / DEV_BSIZE); return (bp); } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg) { struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slpflag, slptimeo; int old_lock = 0; ASSERT_VOP_LOCKED(vp, "ncl_vinvalbuf"); if ((nmp->nm_flag & NFSMNT_INT) == 0) intrflg = 0; if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) intrflg = 1; if (intrflg) { slpflag = NFS_PCATCH; slptimeo = 2 * hz; } else { slpflag = 0; slptimeo = 0; } old_lock = ncl_upgrade_vnlock(vp); if (vp->v_iflag & VI_DOOMED) { /* * Since vgonel() uses the generic vinvalbuf() to flush * dirty buffers and it does not call this function, it * is safe to just return OK when VI_DOOMED is set. */ ncl_downgrade_vnlock(vp, old_lock); return (0); } /* * Now, flush as required. */ if ((flags & V_SAVE) && (vp->v_bufobj.bo_object != NULL)) { VM_OBJECT_LOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); /* * If the page clean was interrupted, fail the invalidation. * Not doing so, we run the risk of losing dirty pages in the * vinvalbuf() call below. */ if (intrflg && (error = newnfs_sigintr(nmp, td))) goto out; } error = vinvalbuf(vp, flags, slpflag, 0); while (error) { if (intrflg && (error = newnfs_sigintr(nmp, td))) goto out; error = vinvalbuf(vp, flags, 0, slptimeo); } + if (NFSHASPNFS(nmp)) + nfscl_layoutcommit(vp, td); mtx_lock(&np->n_mtx); if (np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); out: ncl_downgrade_vnlock(vp, old_lock); return error; } /* * Initiate asynchronous I/O. Return an error if no nfsiods are available. * This is mainly to avoid queueing async I/O requests when the nfsiods * are all hung on a dead server. * * Note: ncl_asyncio() does not clear (BIO_ERROR|B_INVAL) but when the bp * is eventually dequeued by the async daemon, ncl_doio() *will*. */ int ncl_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thread *td) { int iod; int gotiod; int slpflag = 0; int slptimeo = 0; int error, error2; /* * Commits are usually short and sweet so lets save some cpu and * leave the async daemons for more important rpc's (such as reads * and writes). */ mtx_lock(&ncl_iod_mutex); if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) && (nmp->nm_bufqiods > ncl_numasync / 2)) { mtx_unlock(&ncl_iod_mutex); return(EIO); } again: if (nmp->nm_flag & NFSMNT_INT) slpflag = NFS_PCATCH; gotiod = FALSE; /* * Find a free iod to process this request. */ for (iod = 0; iod < ncl_numasync; iod++) if (ncl_iodwant[iod] == NFSIOD_AVAILABLE) { gotiod = TRUE; break; } /* * Try to create one if none are free. */ if (!gotiod) ncl_nfsiodnew(); else { /* * Found one, so wake it up and tell it which * mount to process. */ NFS_DPF(ASYNCIO, ("ncl_asyncio: waking iod %d for mount %p\n", iod, nmp)); ncl_iodwant[iod] = NFSIOD_NOT_AVAILABLE; ncl_iodmount[iod] = nmp; nmp->nm_bufqiods++; wakeup(&ncl_iodwant[iod]); } /* * If none are free, we may already have an iod working on this mount * point. If so, it will process our request. */ if (!gotiod) { if (nmp->nm_bufqiods > 0) { NFS_DPF(ASYNCIO, ("ncl_asyncio: %d iods are already processing mount %p\n", nmp->nm_bufqiods, nmp)); gotiod = TRUE; } } /* * If we have an iod which can process the request, then queue * the buffer. */ if (gotiod) { /* * Ensure that the queue never grows too large. We still want * to asynchronize so we block rather then return EIO. */ while (nmp->nm_bufqlen >= 2*ncl_numasync) { NFS_DPF(ASYNCIO, ("ncl_asyncio: waiting for mount %p queue to drain\n", nmp)); nmp->nm_bufqwant = TRUE; error = newnfs_msleep(td, &nmp->nm_bufq, &ncl_iod_mutex, slpflag | PRIBIO, "nfsaio", slptimeo); if (error) { error2 = newnfs_sigintr(nmp, td); if (error2) { mtx_unlock(&ncl_iod_mutex); return (error2); } if (slpflag == NFS_PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } /* * We might have lost our iod while sleeping, * so check and loop if nescessary. */ goto again; } /* We might have lost our nfsiod */ if (nmp->nm_bufqiods == 0) { NFS_DPF(ASYNCIO, ("ncl_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); goto again; } if (bp->b_iocmd == BIO_READ) { if (bp->b_rcred == NOCRED && cred != NOCRED) bp->b_rcred = crhold(cred); } else { if (bp->b_wcred == NOCRED && cred != NOCRED) bp->b_wcred = crhold(cred); } if (bp->b_flags & B_REMFREE) bremfreef(bp); BUF_KERNPROC(bp); TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen++; if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) { mtx_lock(&(VTONFS(bp->b_vp))->n_mtx); VTONFS(bp->b_vp)->n_flag |= NMODIFIED; VTONFS(bp->b_vp)->n_directio_asyncwr++; mtx_unlock(&(VTONFS(bp->b_vp))->n_mtx); } mtx_unlock(&ncl_iod_mutex); return (0); } mtx_unlock(&ncl_iod_mutex); /* * All the iods are busy on other mounts, so return EIO to * force the caller to process the i/o synchronously. */ NFS_DPF(ASYNCIO, ("ncl_asyncio: no iods available, i/o is synchronous\n")); return (EIO); } void ncl_doio_directwrite(struct buf *bp) { int iomode, must_commit; struct uio *uiop = (struct uio *)bp->b_caller1; char *iov_base = uiop->uio_iov->iov_base; iomode = NFSWRITE_FILESYNC; uiop->uio_td = NULL; /* NULL since we're in nfsiod */ ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0); KASSERT((must_commit == 0), ("ncl_doio_directwrite: Did not commit write")); free(iov_base, M_NFSDIRECTIO); free(uiop->uio_iov, M_NFSDIRECTIO); free(uiop, M_NFSDIRECTIO); if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) { struct nfsnode *np = VTONFS(bp->b_vp); mtx_lock(&np->n_mtx); np->n_directio_asyncwr--; if (np->n_directio_asyncwr == 0) { np->n_flag &= ~NMODIFIED; if ((np->n_flag & NFSYNCWAIT)) { np->n_flag &= ~NFSYNCWAIT; wakeup((caddr_t)&np->n_directio_asyncwr); } } mtx_unlock(&np->n_mtx); } bp->b_vp = NULL; relpbuf(bp, &ncl_pbuf_freecnt); } /* * Do an I/O operation to/from a cache block. This may be called * synchronously or from an nfsiod. */ int ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, int called_from_strategy) { struct uio *uiop; struct nfsnode *np; struct nfsmount *nmp; int error = 0, iomode, must_commit = 0; struct uio uio; struct iovec io; struct proc *p = td ? td->td_proc : NULL; uint8_t iocmd; np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = td; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("ncl_doio: bp %p already marked done", bp)); iocmd = bp->b_iocmd; if (iocmd == BIO_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; NFSINCRGLOBAL(newnfsstats.read_bios); error = ncl_readrpc(vp, uiop, cr); if (!error) { if (uiop->uio_resid) { /* * If we had a short read with no error, we must have * hit a file hole. We should zero-fill the remainder. * This can also occur if the server hits the file EOF. * * Holes used to be able to occur due to pending * writes, but that is not possible any longer. */ int nread = bp->b_bcount - uiop->uio_resid; ssize_t left = uiop->uio_resid; if (left > 0) bzero((char *)bp->b_data + nread, left); uiop->uio_resid = 0; } } /* ASSERT_VOP_LOCKED(vp, "ncl_doio"); */ if (p && (vp->v_vflag & VV_TEXT)) { mtx_lock(&np->n_mtx); if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.na_mtime)) { mtx_unlock(&np->n_mtx); PROC_LOCK(p); killproc(p, "text file modification"); PROC_UNLOCK(p); } else mtx_unlock(&np->n_mtx); } break; case VLNK: uiop->uio_offset = (off_t)0; NFSINCRGLOBAL(newnfsstats.readlink_bios); error = ncl_readlinkrpc(vp, uiop, cr); break; case VDIR: NFSINCRGLOBAL(newnfsstats.readdir_bios); uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; if ((nmp->nm_flag & NFSMNT_RDIRPLUS) != 0) { error = ncl_readdirplusrpc(vp, uiop, cr, td); if (error == NFSERR_NOTSUPP) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) error = ncl_readdirrpc(vp, uiop, cr, td); /* * end-of-directory sets B_INVAL but does not generate an * error. */ if (error == 0 && uiop->uio_resid == bp->b_bcount) bp->b_flags |= B_INVAL; break; default: ncl_printf("ncl_doio: type %x unexpected\n", vp->v_type); break; }; if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * If we only need to commit, try to commit */ if (bp->b_flags & B_NEEDCOMMIT) { int retv; off_t off; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; retv = ncl_commit(vp, off, bp->b_dirtyend-bp->b_dirtyoff, bp->b_wcred, td); if (retv == 0) { bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); bp->b_resid = 0; bufdone(bp); return (0); } if (retv == NFSERR_STALEWRITEVERF) { ncl_clearcommit(vp->v_mount); } } /* * Setup for actual write */ mtx_lock(&np->n_mtx); if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; mtx_unlock(&np->n_mtx); if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; NFSINCRGLOBAL(newnfsstats.write_bios); if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) iomode = NFSWRITE_UNSTABLE; else iomode = NFSWRITE_FILESYNC; error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit, called_from_strategy); /* * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try * to cluster the buffers needing commit. This will allow * the system to submit a single commit rpc for the whole * cluster. We can do this even if the buffer is not 100% * dirty (relative to the NFS blocksize), so we optimize the * append-to-file-case. * * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be * cleared because write clustering only works for commit * rpc's, not for the data portion of the write). */ if (!error && iomode == NFSWRITE_UNSTABLE) { bp->b_flags |= B_NEEDCOMMIT; if (bp->b_dirtyoff == 0 && bp->b_dirtyend == bp->b_bcount) bp->b_flags |= B_CLUSTEROK; } else { bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); } /* * For an interrupted write, the buffer is still valid * and the write hasn't been pushed to the server yet, * so we can't set BIO_ERROR and report the interruption * by setting B_EINTR. For the B_ASYNC case, B_EINTR * is not relevant, so the rpc attempt is essentially * a noop. For the case of a V3 write rpc not being * committed to stable storage, the block is still * dirty and requires either a commit rpc or another * write rpc with iomode == NFSV3WRITE_FILESYNC before * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. * * EIO is returned by ncl_writerpc() to indicate a recoverable * write error and is handled as above, except that * B_EINTR isn't set. One cause of this is a stale stateid * error for the RPC that indicates recovery is required, * when called with called_from_strategy != 0. * * If the buffer is marked B_PAGING, it does not reside on * the vp's paging queues so we cannot call bdirty(). The * bp in this case is not an NFS cache block so we should * be safe. XXX * * The logic below breaks up errors into recoverable and * unrecoverable. For the former, we clear B_INVAL|B_NOCACHE * and keep the buffer around for potential write retries. * For the latter (eg ESTALE), we toss the buffer away (B_INVAL) * and save the error in the nfsnode. This is less than ideal * but necessary. Keeping such buffers around could potentially * cause buffer exhaustion eventually (they can never be written * out, so will get constantly be re-dirtied). It also causes * all sorts of vfs panics. For non-recoverable write errors, * also invalidate the attrcache, so we'll be forced to go over * the wire for this object, returning an error to user on next * call (most of the time). */ if (error == EINTR || error == EIO || error == ETIMEDOUT || (!error && (bp->b_flags & B_NEEDCOMMIT))) { int s; s = splbio(); bp->b_flags &= ~(B_INVAL|B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; splx(s); } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = np->n_error = error; mtx_lock(&np->n_mtx); np->n_flag |= NWRITEERR; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); mtx_unlock(&np->n_mtx); } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; if (must_commit) ncl_clearcommit(vp->v_mount); bufdone(bp); return (error); } /* * Used to aid in handling ftruncate() operations on the NFS client side. * Truncation creates a number of special problems for NFS. We have to * throw away VM pages and buffer cache buffers that are beyond EOF, and * we have to properly handle VM pages or (potentially dirty) buffers * that straddle the truncation point. */ int ncl_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad_t nsize) { struct nfsnode *np = VTONFS(vp); u_quad_t tsize; int biosize = vp->v_bufobj.bo_bsize; int error = 0; mtx_lock(&np->n_mtx); tsize = np->n_size; np->n_size = nsize; mtx_unlock(&np->n_mtx); if (nsize < tsize) { struct buf *bp; daddr_t lbn; int bufsize; /* * vtruncbuf() doesn't get the buffer overlapping the * truncation point. We may have a B_DELWRI and/or B_CACHE * buffer that now needs to be truncated. */ error = vtruncbuf(vp, cred, nsize, biosize); lbn = nsize / biosize; bufsize = nsize & (biosize - 1); bp = nfs_getcacheblk(vp, lbn, bufsize, td); if (!bp) return EINTR; if (bp->b_dirtyoff > bp->b_bcount) bp->b_dirtyoff = bp->b_bcount; if (bp->b_dirtyend > bp->b_bcount) bp->b_dirtyend = bp->b_bcount; bp->b_flags |= B_RELBUF; /* don't leave garbage around */ brelse(bp); } else { vnode_pager_setsize(vp, nsize); } return(error); } Index: head/sys/fs/nfsclient/nfs_clcomsubs.c =================================================================== --- head/sys/fs/nfsclient/nfs_clcomsubs.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clcomsubs.c (revision 244042) @@ -1,514 +1,574 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include extern struct nfsstats newnfsstats; -extern struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; extern enum vtype nv34tov_type[8]; +extern int nfs_bigreply[NFSV41_NPROCS]; NFSCLSTATEMUTEX; #endif /* !APPLEKEXT */ static nfsuint64 nfs_nullcookie = {{ 0, 0 }}; static struct { int op; int opcnt; const u_char *tag; int taglen; -} nfsv4_opmap[NFS_NPROCS] = { +} nfsv4_opmap[NFSV41_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, { NFSV4OP_LOOKUP, 3, "Lookup", 6, }, { NFSV4OP_ACCESS, 2, "Access", 6, }, { NFSV4OP_READLINK, 2, "Readlink", 8, }, { NFSV4OP_READ, 1, "Read", 4, }, { NFSV4OP_WRITE, 2, "Write", 5, }, { NFSV4OP_OPEN, 3, "Open", 4, }, { NFSV4OP_CREATE, 3, "Create", 6, }, { NFSV4OP_CREATE, 1, "Create", 6, }, { NFSV4OP_CREATE, 3, "Create", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_SAVEFH, 5, "Rename", 6, }, { NFSV4OP_SAVEFH, 4, "Link", 4, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_COMMIT, 2, "Commit", 6, }, { NFSV4OP_LOOKUPP, 3, "Lookupp", 7, }, { NFSV4OP_SETCLIENTID, 1, "SetClientID", 11, }, { NFSV4OP_SETCLIENTIDCFRM, 1, "SetClientIDConfirm", 18, }, { NFSV4OP_LOCK, 1, "Lock", 4, }, { NFSV4OP_LOCKU, 1, "LockU", 5, }, { NFSV4OP_OPEN, 2, "Open", 4, }, { NFSV4OP_CLOSE, 1, "Close", 5, }, { NFSV4OP_OPENCONFIRM, 1, "Openconfirm", 11, }, { NFSV4OP_LOCKT, 1, "LockT", 5, }, { NFSV4OP_OPENDOWNGRADE, 1, "Opendowngrade", 13, }, { NFSV4OP_RENEW, 1, "Renew", 5, }, { NFSV4OP_PUTROOTFH, 1, "Dirpath", 7, }, { NFSV4OP_RELEASELCKOWN, 1, "Rellckown", 9, }, { NFSV4OP_DELEGRETURN, 1, "Delegret", 8, }, { NFSV4OP_DELEGRETURN, 3, "DelegRemove", 11, }, { NFSV4OP_DELEGRETURN, 7, "DelegRename1", 12, }, { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, + { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, + { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, + { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, + { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, + { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, + { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, + { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, + { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, + { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, + { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, + { NFSV4OP_WRITE, 1, "WriteDS", 7, }, + { NFSV4OP_READ, 1, "ReadDS", 6, }, + { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, }; - /* * NFS RPCS that have large request message size. */ -static int nfs_bigrequest[NFS_NPROCS] = { +static int nfs_bigrequest[NFSV41_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0 }; /* * Start building a request. Mostly just put the first file handle in * place. */ APPLESTATIC void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, - u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp) + u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep) { struct mbuf *mb; u_int32_t *tl; int opcnt; nfsattrbit_t attrbits; /* * First, fill in some of the fields of nd. */ - if (NFSHASNFSV4(nmp)) + nd->nd_slotseq = NULL; + if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; - else if (NFSHASNFSV3(nmp)) + if (NFSHASNFSV4N(nmp)) + nd->nd_flag |= ND_NFSV41; + } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; nd->nd_procnum = procnum; nd->nd_repstat = 0; /* * Get the first mbuf for the request. */ if (nfs_bigrequest[procnum]) NFSMCLGET(mb, M_WAITOK); else NFSMGET(mb); mbuf_setlen(mb, 0); nd->nd_mreq = nd->nd_mb = mb; nd->nd_bpos = NFSMTOD(mb, caddr_t); /* * And fill the first file handle into the request. */ if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; + if ((nd->nd_flag & ND_NFSV41) != 0) { + opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; + if (procnum == NFSPROC_RENEW) + /* + * For the special case of Renew, just do a + * Sequence Op. + */ + opcnt = 1; + else if (procnum == NFSPROC_WRITEDS || + procnum == NFSPROC_COMMITDS) + /* + * For the special case of a Writeor Commit to + * a DS, the opcnt == 3, for Sequence, PutFH, + * Write/Commit. + */ + opcnt = 3; + } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); - NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(NFSV4_MINORVERSION); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if ((nd->nd_flag & ND_NFSV41) != 0) + *tl++ = txdr_unsigned(NFSV41_MINORVERSION); + else + *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; - *tl++ = txdr_unsigned(opcnt); + *tl = txdr_unsigned(opcnt); + if ((nd->nd_flag & ND_NFSV41) != 0 && + nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_SEQUENCE); + if (sep == NULL) + nfsv4_setsequence(nd, NFSMNT_MDSSESSION(nmp), + nfs_bigreply[procnum]); + else + nfsv4_setsequence(nd, sep, + nfs_bigreply[procnum]); + } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); - if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh==2){ + if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh + == 2 && procnum != NFSPROC_WRITEDS && + procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; } + } + if (procnum != NFSPROC_RENEW || + (nd->nd_flag & ND_NFSV41) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } - *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); + if (procnum < NFSV4_NPROCS) + NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); } #ifndef APPLE /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ APPLESTATIC void nfsm_uiombuf(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *uiocp; struct mbuf *mp, *mp2; int xfer, left, mlen; int uiosiz, clflg, rem; char *cp, *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; mp = mp2 = nd->nd_mb; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else copyin(CAST_USER_ADDR_T(uiocp), NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); mbuf_setlen(mp, mbuf_len(mp) + xfer); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); } cp = NFSMTOD(mp, caddr_t) + mbuf_len(mp); for (left = 0; left < rem; left++) *cp++ = '\0'; mbuf_setlen(mp, mbuf_len(mp) + rem); nd->nd_bpos = cp; } else nd->nd_bpos = NFSMTOD(mp, caddr_t) + mbuf_len(mp); nd->nd_mb = mp; } #endif /* !APPLE */ /* * Load vnode attributes from the xdr file attributes. * Returns EBADRPC if they can't be parsed, 0 otherwise. */ APPLESTATIC int nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap) { struct nfs_fattr *fp; int error = 0; if (nd->nd_flag & ND_NFSV4) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR); nap->na_type = nfsv34tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); nap->na_rdev = makedev(fxdr_unsigned(u_char, fp->fa3_rdev.specdata1), fxdr_unsigned(u_char, fp->fa3_rdev.specdata2)); nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_hyper(&fp->fa3_size); nap->na_blocksize = NFS_FABLKSIZE; nap->na_bytes = fxdr_hyper(&fp->fa3_used); nap->na_fileid = fxdr_hyper(&fp->fa3_fileid); fxdr_nfsv3time(&fp->fa3_atime, &nap->na_atime); fxdr_nfsv3time(&fp->fa3_ctime, &nap->na_ctime); fxdr_nfsv3time(&fp->fa3_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_filerev = 0; } else { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V2FATTR); nap->na_type = nfsv2tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); if (nap->na_type == VNON || nap->na_type == VREG) nap->na_type = IFTOVT(nap->na_mode); nap->na_rdev = fxdr_unsigned(dev_t, fp->fa2_rdev); /* * Really ugly NFSv2 kludge. */ if (nap->na_type == VCHR && nap->na_rdev == ((dev_t)-1)) nap->na_type = VFIFO; nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_unsigned(u_int32_t, fp->fa2_size); nap->na_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); nap->na_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) * NFS_FABLKSIZE; nap->na_fileid = fxdr_unsigned(uint64_t, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &nap->na_atime); fxdr_nfsv2time(&fp->fa2_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_ctime.tv_sec = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_sec); nap->na_ctime.tv_nsec = 0; nap->na_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); nap->na_filerev = 0; } nfsmout: return (error); } /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ APPLESTATIC nfsuint64 * nfscl_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { KASSERT(!add, ("nfs getcookie add at 0")); return (&nfs_nullcookie); } pos--; dp = LIST_FIRST(&np->n_cookies); if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return (NULL); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list) != NULL) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return (NULL); dp = LIST_NEXT(dp, ndm_list); } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return (NULL); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return (NULL); } return (&dp->ndm_cookies[pos]); } /* * Gets a file handle out of an nfs reply sent to the client and returns * the file handle and the file's attributes. * For V4, it assumes that Getfh and Getattr Op's results are here. */ APPLESTATIC int nfscl_mtofh(struct nfsrv_descript *nd, struct nfsfh **nfhpp, struct nfsvattr *nap, int *attrflagp) { u_int32_t *tl; int error = 0, flag = 1; *nfhpp = NULL; *attrflagp = 0; /* * First get the file handle and vnode. */ if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); flag = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } if (flag) { error = nfsm_getfh(nd, nfhpp); if (error) return (error); } /* * Now, get the attributes. */ if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (flag) { flag = fxdr_unsigned(int, *tl); } else if (fxdr_unsigned(int, *tl)) { error = nfsm_advance(nd, NFSX_V3FATTR, -1); if (error) return (error); } } if (flag) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } nfsmout: return (error); } /* * Put a state Id in the mbuf list. */ APPLESTATIC void nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) { nfsv4stateid_t *st; NFSM_BUILD(st, nfsv4stateid_t *, NFSX_STATEID); if (flag == NFSSTATEID_PUTALLZERO) { st->seqid = 0; st->other[0] = 0; st->other[1] = 0; st->other[2] = 0; } else if (flag == NFSSTATEID_PUTALLONE) { st->seqid = 0xffffffff; st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; + } else if (flag == NFSSTATEID_PUTSEQIDZERO) { + st->seqid = 0; + st->other[0] = stateidp->other[0]; + st->other[1] = stateidp->other[1]; + st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } } /* * Initialize the owner/delegation sleep lock. */ APPLESTATIC void nfscl_lockinit(struct nfsv4lock *lckp) { lckp->nfslock_usecnt = 0; lckp->nfslock_lock = 0; } /* * Get an exclusive lock. (Not needed for OpenBSD4, since there is only one * thread for each posix process in the kernel.) */ APPLESTATIC void nfscl_lockexcl(struct nfsv4lock *lckp, void *mutex) { int igotlock; do { igotlock = nfsv4_lock(lckp, 1, NULL, mutex, NULL); } while (!igotlock); } /* * Release an exclusive lock. */ APPLESTATIC void nfscl_lockunlock(struct nfsv4lock *lckp) { nfsv4_unlock(lckp, 0); } /* * Called to derefernce a lock on a stateid (delegation or open owner). */ APPLESTATIC void nfscl_lockderef(struct nfsv4lock *lckp) { NFSLOCKCLSTATE(); lckp->nfslock_usecnt--; if (lckp->nfslock_usecnt == 0 && (lckp->nfslock_lock & NFSV4LOCK_WANTED)) { lckp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)lckp); } NFSUNLOCKCLSTATE(); } Index: head/sys/fs/nfsclient/nfs_clkdtrace.c =================================================================== --- head/sys/fs/nfsclient/nfs_clkdtrace.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clkdtrace.c (revision 244042) @@ -1,600 +1,600 @@ /*- * Copyright (c) 2009 Robert N. M. Watson * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include /* * dtnfscl is a DTrace provider that tracks the intent to perform RPCs * in the NFS client, as well as acess to and maintenance of the access and * attribute caches. This is not quite the same as RPCs, because NFS may * issue multiple RPC transactions in the event that authentication fails, * there's a jukebox error, or none at all if the access or attribute cache * hits. However, it cleanly represents the logical layer between RPC * transmission and vnode/vfs operations, providing access to state linking * the two. */ static int dtnfsclient_unload(void); static void dtnfsclient_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); static void dtnfsclient_provide(void *, dtrace_probedesc_t *); static void dtnfsclient_destroy(void *, dtrace_id_t, void *); static void dtnfsclient_enable(void *, dtrace_id_t, void *); static void dtnfsclient_disable(void *, dtrace_id_t, void *); static void dtnfsclient_load(void *); static dtrace_pattr_t dtnfsclient_attr = { { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, }; /* * Description of NFSv4, NFSv3 and (optional) NFSv2 probes for a procedure. */ struct dtnfsclient_rpc { char *nr_v4_name; char *nr_v3_name; /* Or NULL if none. */ char *nr_v2_name; /* Or NULL if none. */ /* * IDs for the start and done cases, for NFSv2, NFSv3 and NFSv4. */ uint32_t nr_v2_id_start, nr_v2_id_done; uint32_t nr_v3_id_start, nr_v3_id_done; uint32_t nr_v4_id_start, nr_v4_id_done; }; /* * This table is indexed by NFSv3 procedure number, but also used for NFSv2 * procedure names and NFSv4 operations. */ -static struct dtnfsclient_rpc dtnfsclient_rpcs[NFS_NPROCS + 1] = { +static struct dtnfsclient_rpc dtnfsclient_rpcs[NFSV41_NPROCS + 1] = { { "null", "null", "null" }, { "getattr", "getattr", "getattr" }, { "setattr", "setattr", "setattr" }, { "lookup", "lookup", "lookup" }, { "access", "access", "noop" }, { "readlink", "readlink", "readlink" }, { "read", "read", "read" }, { "write", "write", "write" }, { "create", "create", "create" }, { "mkdir", "mkdir", "mkdir" }, { "symlink", "symlink", "symlink" }, { "mknod", "mknod" }, { "remove", "remove", "remove" }, { "rmdir", "rmdir", "rmdir" }, { "rename", "rename", "rename" }, { "link", "link", "link" }, { "readdir", "readdir", "readdir" }, { "readdirplus", "readdirplus" }, { "fsstat", "fsstat", "statfs" }, { "fsinfo", "fsinfo" }, { "pathconf", "pathconf" }, { "commit", "commit" }, { "lookupp" }, { "setclientid" }, { "setclientidcfrm" }, { "lock" }, { "locku" }, { "open" }, { "close" }, { "openconfirm" }, { "lockt" }, { "opendowngrade" }, { "renew" }, { "putrootfh" }, { "releaselckown" }, { "delegreturn" }, { "retdelegremove" }, { "retdelegrename1" }, { "retdelegrename2" }, { "getacl" }, { "setacl" }, { "noop", "noop", "noop" } }; /* * Module name strings. */ static char *dtnfsclient_accesscache_str = "accesscache"; static char *dtnfsclient_attrcache_str = "attrcache"; static char *dtnfsclient_nfs2_str = "nfs2"; static char *dtnfsclient_nfs3_str = "nfs3"; static char *dtnfsclient_nfs4_str = "nfs4"; /* * Function name strings. */ static char *dtnfsclient_flush_str = "flush"; static char *dtnfsclient_load_str = "load"; static char *dtnfsclient_get_str = "get"; /* * Name strings. */ static char *dtnfsclient_done_str = "done"; static char *dtnfsclient_hit_str = "hit"; static char *dtnfsclient_miss_str = "miss"; static char *dtnfsclient_start_str = "start"; static dtrace_pops_t dtnfsclient_pops = { dtnfsclient_provide, NULL, dtnfsclient_enable, dtnfsclient_disable, NULL, NULL, dtnfsclient_getargdesc, NULL, NULL, dtnfsclient_destroy }; static dtrace_provider_id_t dtnfsclient_id; /* * Most probes are generated from the above RPC table, but for access and * attribute caches, we have specific IDs we recognize and handle specially * in various spots. */ extern uint32_t nfscl_accesscache_flush_done_id; extern uint32_t nfscl_accesscache_get_hit_id; extern uint32_t nfscl_accesscache_get_miss_id; extern uint32_t nfscl_accesscache_load_done_id; extern uint32_t nfscl_attrcache_flush_done_id; extern uint32_t nfscl_attrcache_get_hit_id; extern uint32_t nfscl_attrcache_get_miss_id; extern uint32_t nfscl_attrcache_load_done_id; /* * When tracing on a procedure is enabled, the DTrace ID for an RPC event is * stored in one of these two NFS client-allocated arrays; 0 indicates that * the event is not being traced so probes should not be called. * - * For simplicity, we allocate both v2, v3 and v4 arrays as NFS_NPROCS + 1, and - * the v2, v3 arrays are simply sparse. + * For simplicity, we allocate both v2, v3 and v4 arrays as NFSV41_NPROCS + 1, + * and the v2, v3 arrays are simply sparse. */ -extern uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; -extern uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; -extern uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; /* * Look up a DTrace probe ID to see if it's associated with a "done" event -- * if so, we will return a fourth argument type of "int". */ static int dtnfs234_isdoneprobe(dtrace_id_t id) { int i; - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v4_id_done == id || dtnfsclient_rpcs[i].nr_v3_id_done == id || dtnfsclient_rpcs[i].nr_v2_id_done == id) return (1); } return (0); } static void dtnfsclient_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { const char *p = NULL; if (id == nfscl_accesscache_flush_done_id || id == nfscl_attrcache_flush_done_id || id == nfscl_attrcache_get_miss_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_accesscache_get_hit_id || id == nfscl_accesscache_get_miss_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "uid_t"; break; case 2: p = "uint32_t"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_accesscache_load_done_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "uid_t"; break; case 2: p = "uint32_t"; break; case 3: p = "int"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_attrcache_get_hit_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "struct vattr *"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_attrcache_load_done_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "struct vattr *"; break; case 2: p = "int"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "struct mbuf *"; break; case 2: p = "struct ucred *"; break; case 3: p = "int"; break; case 4: if (dtnfs234_isdoneprobe(id)) { p = "int"; break; } /* FALLSTHROUGH */ default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } if (p != NULL) strlcpy(desc->dtargd_native, p, sizeof(desc->dtargd_native)); } static void dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) { int i; if (desc != NULL) return; /* * Register access cache probes. */ if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_flush_str, dtnfsclient_done_str) == 0) { nfscl_accesscache_flush_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_hit_str) == 0) { nfscl_accesscache_get_hit_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_miss_str) == 0) { nfscl_accesscache_get_miss_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_load_str, dtnfsclient_done_str) == 0) { nfscl_accesscache_load_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL); } /* * Register attribute cache probes. */ if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_flush_str, dtnfsclient_done_str) == 0) { nfscl_attrcache_flush_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_hit_str) == 0) { nfscl_attrcache_get_hit_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_miss_str) == 0) { nfscl_attrcache_get_miss_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_load_str, dtnfsclient_done_str) == 0) { nfscl_attrcache_load_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL); } /* * Register NFSv2 RPC procedures; note sparseness check for each slot * in the NFSv3, NFSv4 procnum-indexed array. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v2_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) == 0) { dtnfsclient_rpcs[i].nr_v2_id_start = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str, 0, &nfscl_nfs2_start_probes[i]); } if (dtnfsclient_rpcs[i].nr_v2_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_done_str) == 0) { dtnfsclient_rpcs[i].nr_v2_id_done = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_done_str, 0, &nfscl_nfs2_done_probes[i]); } } /* * Register NFSv3 RPC procedures; note sparseness check for each slot * in the NFSv4 procnum-indexed array. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v3_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) == 0) { dtnfsclient_rpcs[i].nr_v3_id_start = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str, 0, &nfscl_nfs3_start_probes[i]); } if (dtnfsclient_rpcs[i].nr_v3_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_done_str) == 0) { dtnfsclient_rpcs[i].nr_v3_id_done = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_done_str, 0, &nfscl_nfs3_done_probes[i]); } } /* * Register NFSv4 RPC procedures. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str) == 0) { dtnfsclient_rpcs[i].nr_v4_id_start = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str, 0, &nfscl_nfs4_start_probes[i]); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_done_str) == 0) { dtnfsclient_rpcs[i].nr_v4_id_done = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_done_str, 0, &nfscl_nfs4_done_probes[i]); } } } static void dtnfsclient_destroy(void *arg, dtrace_id_t id, void *parg) { } static void dtnfsclient_enable(void *arg, dtrace_id_t id, void *parg) { uint32_t *p = parg; void *f = dtrace_probe; if (id == nfscl_accesscache_flush_done_id) dtrace_nfscl_accesscache_flush_done_probe = f; else if (id == nfscl_accesscache_get_hit_id) dtrace_nfscl_accesscache_get_hit_probe = f; else if (id == nfscl_accesscache_get_miss_id) dtrace_nfscl_accesscache_get_miss_probe = f; else if (id == nfscl_accesscache_load_done_id) dtrace_nfscl_accesscache_load_done_probe = f; else if (id == nfscl_attrcache_flush_done_id) dtrace_nfscl_attrcache_flush_done_probe = f; else if (id == nfscl_attrcache_get_hit_id) dtrace_nfscl_attrcache_get_hit_probe = f; else if (id == nfscl_attrcache_get_miss_id) dtrace_nfscl_attrcache_get_miss_probe = f; else if (id == nfscl_attrcache_load_done_id) dtrace_nfscl_attrcache_load_done_probe = f; else *p = id; } static void dtnfsclient_disable(void *arg, dtrace_id_t id, void *parg) { uint32_t *p = parg; if (id == nfscl_accesscache_flush_done_id) dtrace_nfscl_accesscache_flush_done_probe = NULL; else if (id == nfscl_accesscache_get_hit_id) dtrace_nfscl_accesscache_get_hit_probe = NULL; else if (id == nfscl_accesscache_get_miss_id) dtrace_nfscl_accesscache_get_miss_probe = NULL; else if (id == nfscl_accesscache_load_done_id) dtrace_nfscl_accesscache_load_done_probe = NULL; else if (id == nfscl_attrcache_flush_done_id) dtrace_nfscl_attrcache_flush_done_probe = NULL; else if (id == nfscl_attrcache_get_hit_id) dtrace_nfscl_attrcache_get_hit_probe = NULL; else if (id == nfscl_attrcache_get_miss_id) dtrace_nfscl_attrcache_get_miss_probe = NULL; else if (id == nfscl_attrcache_load_done_id) dtrace_nfscl_attrcache_load_done_probe = NULL; else *p = 0; } static void dtnfsclient_load(void *dummy) { if (dtrace_register("nfscl", &dtnfsclient_attr, DTRACE_PRIV_USER, NULL, &dtnfsclient_pops, NULL, &dtnfsclient_id) != 0) return; dtrace_nfscl_nfs234_start_probe = (dtrace_nfsclient_nfs23_start_probe_func_t)dtrace_probe; dtrace_nfscl_nfs234_done_probe = (dtrace_nfsclient_nfs23_done_probe_func_t)dtrace_probe; } static int dtnfsclient_unload() { dtrace_nfscl_nfs234_start_probe = NULL; dtrace_nfscl_nfs234_done_probe = NULL; return (dtrace_unregister(dtnfsclient_id)); } static int dtnfsclient_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(dtnfsclient_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, dtnfsclient_load, NULL); SYSUNINIT(dtnfsclient_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, dtnfsclient_unload, NULL); DEV_MODULE(dtnfscl, dtnfsclient_modevent, NULL); MODULE_VERSION(dtnfscl, 1); MODULE_DEPEND(dtnfscl, dtrace, 1, 1, 1); MODULE_DEPEND(dtnfscl, opensolaris, 1, 1, 1); MODULE_DEPEND(dtnfscl, nfscl, 1, 1, 1); MODULE_DEPEND(dtnfscl, nfscommon, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clkrpc.c =================================================================== --- head/sys/fs/nfsclient/nfs_clkrpc.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clkrpc.c (revision 244042) @@ -1,289 +1,294 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_kgssapi.h" #include #include #include #include NFSDLOCKMUTEX; -SVCPOOL *nfscbd_pool; +extern SVCPOOL *nfscbd_pool; static int nfs_cbproc(struct nfsrv_descript *, u_int32_t); extern u_long sb_max_adj; extern int nfs_numnfscbd; +extern int nfscl_debuglevel; /* * NFS client system calls for handling callbacks. */ /* * Handles server to client callbacks. */ static void nfscb_program(struct svc_req *rqst, SVCXPRT *xprt) { struct nfsrv_descript nd; int cacherep, credflavor; memset(&nd, 0, sizeof(nd)); if (rqst->rq_proc != NFSPROC_NULL && rqst->rq_proc != NFSV4PROC_CBCOMPOUND) { svcerr_noproc(rqst); svc_freereq(rqst); return; } nd.nd_procnum = rqst->rq_proc; nd.nd_flag = (ND_NFSCB | ND_NFSV4); /* * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 - * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP * mounts. */ nd.nd_mrep = rqst->rq_args; rqst->rq_args = NULL; newnfs_realign(&nd.nd_mrep); nd.nd_md = nd.nd_mrep; nd.nd_dpos = mtod(nd.nd_md, caddr_t); nd.nd_nam = svc_getrpccaller(rqst); nd.nd_nam2 = rqst->rq_addr; nd.nd_mreq = NULL; nd.nd_cred = NULL; + NFSCL_DEBUG(1, "cbproc=%d\n",nd.nd_procnum); if (nd.nd_procnum != NFSPROC_NULL) { if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) { svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); return; } /* For now, I don't care what credential flavor was used. */ #ifdef notyet #ifdef MAC mac_cred_associate_nfsd(nd.nd_cred); #endif #endif cacherep = nfs_cbproc(&nd, rqst->rq_xid); } else { NFSMGET(nd.nd_mreq); nd.nd_mreq->m_len = 0; cacherep = RC_REPLY; } if (nd.nd_mrep != NULL) m_freem(nd.nd_mrep); if (nd.nd_cred != NULL) crfree(nd.nd_cred); if (cacherep == RC_DROPIT) { if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); svc_freereq(rqst); return; } if (nd.nd_mreq == NULL) { svcerr_decode(rqst); svc_freereq(rqst); return; } if (nd.nd_repstat & NFSERR_AUTHERR) { svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); - } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) { + } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) svcerr_systemerr(rqst); - } + else + NFSCL_DEBUG(1, "cbrep sent\n"); svc_freereq(rqst); } /* * Check the cache and, optionally, do the RPC. * Return the appropriate cache response. */ static int nfs_cbproc(struct nfsrv_descript *nd, u_int32_t xid) { struct thread *td = curthread; int cacherep; if (nd->nd_nam2 == NULL) nd->nd_flag |= ND_STREAMSOCK; nfscl_docb(nd, td); if (nd->nd_repstat == NFSERR_DONTREPLY) cacherep = RC_DROPIT; else cacherep = RC_REPLY; return (cacherep); } /* * Adds a socket to the list for servicing by nfscbds. */ int nfscbd_addsock(struct file *fp) { int siz; struct socket *so; int error; SVCXPRT *xprt; so = fp->f_data; siz = sb_max_adj; error = soreserve(so, siz, siz); if (error) return (error); /* * Steal the socket from userland so that it doesn't close * unexpectedly. */ if (so->so_type == SOCK_DGRAM) xprt = svc_dg_create(nfscbd_pool, so, 0, 0); else xprt = svc_vc_create(nfscbd_pool, so, 0, 0); if (xprt) { fp->f_ops = &badfileops; fp->f_data = NULL; svc_reg(xprt, NFS_CALLBCKPROG, NFSV4_CBVERS, nfscb_program, NULL); SVC_RELEASE(xprt); } return (0); } /* * Called by nfssvc() for nfscbds. Just loops around servicing rpc requests * until it is killed by a signal. * * For now, only support callbacks via RPCSEC_GSS if there is a KerberosV * keytab entry with a host based entry in it on the client. (I'm not even * sure that getting Acceptor credentials for a user principal with a * credentials cache is possible, but even if it is, major changes to the * kgssapi would be required.) * I don't believe that this is a serious limitation since, as of 2009, most * NFSv4 servers supporting callbacks are using AUTH_SYS for callbacks even * when the client is using RPCSEC_GSS. (This BSD server uses AUTH_SYS * for callbacks unless nfsrv_gsscallbackson is set non-zero.) */ int nfscbd_nfsd(struct thread *td, struct nfsd_nfscbd_args *args) { char principal[128]; int error; if (args != NULL) { error = copyinstr(args->principal, principal, sizeof(principal), NULL); if (error) return (error); } else { principal[0] = '\0'; } /* * Only the first nfsd actually does any work. The RPC code * adds threads to it as needed. Any extra processes offered * by nfsd just exit. If nfsd is new enough, it will call us * once with a structure that specifies how many threads to * use. */ NFSD_LOCK(); if (nfs_numnfscbd == 0) { nfs_numnfscbd++; NFSD_UNLOCK(); if (principal[0] != '\0') rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_CALLBCKPROG, NFSV4_CBVERS); nfscbd_pool->sp_minthreads = 4; nfscbd_pool->sp_maxthreads = 4; svc_run(nfscbd_pool); rpc_gss_clear_svc_name_call(NFS_CALLBCKPROG, NFSV4_CBVERS); NFSD_LOCK(); nfs_numnfscbd--; nfsrvd_cbinit(1); } NFSD_UNLOCK(); return (0); } /* * Initialize the data structures for the server. * Handshake with any new nfsds starting up to avoid any chance of * corruption. */ void nfsrvd_cbinit(int terminating) { NFSD_LOCK_ASSERT(); if (terminating) { + /* Wait for any xprt registrations to complete. */ + while (nfs_numnfscbd > 0) + msleep(&nfs_numnfscbd, NFSDLOCKMUTEXPTR, PZERO, + "nfscbdt", 0); NFSD_UNLOCK(); svcpool_destroy(nfscbd_pool); nfscbd_pool = NULL; - NFSD_LOCK(); - } - - NFSD_UNLOCK(); + } else + NFSD_UNLOCK(); nfscbd_pool = svcpool_create("nfscbd", NULL); nfscbd_pool->sp_rcache = NULL; nfscbd_pool->sp_assign = NULL; nfscbd_pool->sp_done = NULL; NFSD_LOCK(); } Index: head/sys/fs/nfsclient/nfs_clport.c =================================================================== --- head/sys/fs/nfsclient/nfs_clport.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clport.c (revision 244042) @@ -1,1334 +1,1337 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_kdtrace.h" #include /* * generally, I don't like #includes inside .h files, but it seems to * be the easiest way to handle the port. */ #include #include #include #include #include #ifdef KDTRACE_HOOKS dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfscl_attrcache_flush_done_probe; uint32_t nfscl_attrcache_flush_done_id; dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfscl_attrcache_get_hit_probe; uint32_t nfscl_attrcache_get_hit_id; dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfscl_attrcache_get_miss_probe; uint32_t nfscl_attrcache_get_miss_id; dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfscl_attrcache_load_done_probe; uint32_t nfscl_attrcache_load_done_id; #endif /* !KDTRACE_HOOKS */ extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern struct vop_vector newnfs_vnodeops; extern struct vop_vector newnfs_fifoops; extern uma_zone_t newnfsnode_zone; extern struct buf_ops buf_ops_newnfs; extern int ncl_pbuf_freecnt; extern short nfsv4_cbport; extern int nfscl_enablecallb; extern int nfs_numnfscbd; extern int nfscl_inited; struct mtx nfs_clstate_mutex; struct mtx ncl_iod_mutex; NFSDLOCKMUTEX; extern void (*ncl_call_invalcaches)(struct vnode *); /* * Comparison function for vfs_hash functions. */ int newnfs_vncmpf(struct vnode *vp, void *arg) { struct nfsfh *nfhp = (struct nfsfh *)arg; struct nfsnode *np = VTONFS(vp); if (np->n_fhp->nfh_len != nfhp->nfh_len || NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len)) return (1); return (0); } /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. * This variant takes a "struct nfsfh *" as second argument and uses * that structure up, either by hanging off the nfsnode or FREEing it. */ int nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, struct componentname *cnp, struct thread *td, struct nfsnode **npp, void *stuff, int lkflags) { struct nfsnode *np, *dnp; struct vnode *vp, *nvp; struct nfsv4node *newd, *oldd; int error; u_int hash; struct nfsmount *nmp; nmp = VFSTONFS(mntp); dnp = VTONFS(dvp); *npp = NULL; hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); error = vfs_hash_get(mntp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { /* * I believe there is a slight chance that vgonel() could * get called on this vnode between when NFSVOPLOCK() drops * the VI_LOCK() and vget() acquires it again, so that it * hasn't yet had v_usecount incremented. If this were to * happen, the VI_DOOMED flag would be set, so check for * that here. Since we now have the v_usecount incremented, * we should be ok until we vrele() it, if the VI_DOOMED * flag isn't set now. */ VI_LOCK(nvp); if ((nvp->v_iflag & VI_DOOMED)) { VI_UNLOCK(nvp); vrele(nvp); error = ENOENT; } else { VI_UNLOCK(nvp); } } if (error) { FREE((caddr_t)nfhp, M_NFSFH); return (error); } if (nvp != NULL) { np = VTONFS(nvp); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ oldd = newd = NULL; if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { MALLOC(newd, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); NFSLOCKNODE(np); if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { oldd = np->n_v4; np->n_v4 = newd; newd = NULL; np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } NFSUNLOCKNODE(np); } if (newd != NULL) FREE((caddr_t)newd, M_NFSV4NODE); if (oldd != NULL) FREE((caddr_t)oldd, M_NFSV4NODE); *npp = np; FREE((caddr_t)nfhp, M_NFSFH); return (0); } np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); error = getnewvnode("newnfs", mntp, &newnfs_vnodeops, &nvp); if (error) { uma_zfree(newnfsnode_zone, np); FREE((caddr_t)nfhp, M_NFSFH); return (error); } vp = nvp; KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0")); vp->v_bufobj.bo_ops = &buf_ops_newnfs; vp->v_data = np; np->n_vnode = vp; /* * Initialize the mutex even if the vnode is going to be a loser. * This simplifies the logic in reclaim, which can then unconditionally * destroy the mutex (in the case of the loser, or if hash_insert * happened to return an error no special casing is needed). */ mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); /* * Are we getting the root? If so, make sure the vnode flags * are correct */ if ((nfhp->nfh_len == nmp->nm_fhsize) && !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) { if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; } np->n_fhp = nfhp; /* * For NFSv4, we have to attach the directory file handle and * file name, so that Open Ops can be done later. */ if (nmp->nm_flag & NFSMNT_NFSV4) { MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } else { np->n_v4 = NULL; } /* * NFS supports recursive and shared locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); error = insmntque(vp, mntp); if (error != 0) { *npp = NULL; mtx_destroy(&np->n_mtx); FREE((caddr_t)nfhp, M_NFSFH); if (np->n_v4 != NULL) FREE((caddr_t)np->n_v4, M_NFSV4NODE); uma_zfree(newnfsnode_zone, np); return (error); } error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); /* vfs_hash_insert() vput()'s the losing vnode */ return (0); } *npp = np; return (0); } /* * Anothe variant of nfs_nget(). This one is only used by reopen. It * takes almost the same args as nfs_nget(), but only succeeds if an entry * exists in the cache. (Since files should already be "open" with a * vnode ref cnt on the node when reopen calls this, it should always * succeed.) * Also, don't get a vnode lock, since it may already be locked by some * other process that is handling it. This is ok, since all other threads * on the client are blocked by the nfsc_lock being exclusively held by the * caller of this function. */ int nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize, struct thread *td, struct nfsnode **npp) { struct vnode *nvp; u_int hash; struct nfsfh *nfhp; int error; *npp = NULL; /* For forced dismounts, just return error. */ if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF)) return (EINTR); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, &nfhp->nfh_fh[0], fhsize); nfhp->nfh_len = fhsize; hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); /* * First, try to get the vnode locked, but don't block for the lock. */ error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { NFSVOPUNLOCK(nvp, 0); } else if (error == EBUSY) { /* * The LK_EXCLOTHER lock type tells nfs_lock1() to not try * and lock the vnode, but just get a v_usecount on it. * LK_NOWAIT is set so that when vget() returns ENOENT, * vfs_hash_get() fails instead of looping. * If this succeeds, it is safe so long as a vflush() with * FORCECLOSE has not been done. Since the Renew thread is * stopped and the MNTK_UNMOUNTF flag is set before doing * a vflush() with FORCECLOSE, we should be ok here. */ if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF)) error = EINTR; else error = vfs_hash_get(mntp, hash, (LK_EXCLOTHER | LK_NOWAIT), td, &nvp, newnfs_vncmpf, nfhp); } FREE(nfhp, M_NFSFH); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); return (0); } return (EINVAL); } /* * Load the attribute cache (that lives in the nfsnode entry) with * the attributes of the second argument and * Iff vaper not NULL * copy the attributes to *vaper * Similar to nfs_loadattrcache(), except the attributes are passed in * instead of being parsed out of the mbuf list. */ int nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, void *stuff, int writeattr, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper; struct nfsnode *np; struct nfsmount *nmp; struct timespec mtime_save; /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); NFSLOCKNODE(np); if (vp->v_type != nvap->va_type) { vp->v_type = nvap->va_type; if (vp->v_type == VFIFO) vp->v_op = &newnfs_fifoops; np->n_mtime = nvap->va_mtime; } nmp = VFSTONFS(vp->v_mount); vap = &np->n_vattr.na_vattr; mtime_save = vap->va_mtime; if (writeattr) { np->n_vattr.na_filerev = nap->na_filerev; np->n_vattr.na_size = nap->na_size; np->n_vattr.na_mtime = nap->na_mtime; np->n_vattr.na_ctime = nap->na_ctime; np->n_vattr.na_fsid = nap->na_fsid; np->n_vattr.na_mode = nap->na_mode; } else { NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr, sizeof (struct nfsvattr)); } /* * For NFSv4, if the node's fsid is not equal to the mount point's * fsid, return the low order 32bits of the node's fsid. This * allows getcwd(3) to work. There is a chance that the fsid might * be the same as a local fs, but since this is in an NFS mount * point, I don't think that will cause any problems? */ if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) && (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] || nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) { /* * va_fsid needs to be set to some value derived from * np->n_vattr.na_filesid that is not equal * vp->v_mount->mnt_stat.f_fsid[0], so that it changes * from the value used for the top level server volume * in the mounted subtree. */ if (vp->v_mount->mnt_stat.f_fsid.val[0] != (uint32_t)np->n_vattr.na_filesid[0]) vap->va_fsid = (uint32_t)np->n_vattr.na_filesid[0]; else vap->va_fsid = (uint32_t)hash32_buf( np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0); } else vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (dontshrink && vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. */ vap->va_size = np->n_size; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger * of our size, and the server's size. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } vnode_pager_setsize(vp, np->n_size); } else { np->n_size = vap->va_size; } } /* * The following checks are added to prevent a race between (say) * a READDIR+ and a WRITE. * READDIR+, WRITE requests sent out. * READDIR+ resp, WRITE resp received on client. * However, the WRITE resp was handled before the READDIR+ resp * causing the post op attrs from the write to be loaded first * and the attrs from the READDIR+ to be loaded later. If this * happens, we have stale attrs loaded into the attrcache. * We detect this by for the mtime moving back. We invalidate the * attrcache when this happens. */ if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { /* Size changed or mtime went backwards */ np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (vaper != NULL) { NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } #ifdef KDTRACE_HOOKS if (np->n_attrstamp != 0) KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, 0); #endif NFSUNLOCKNODE(np); return (0); } /* * Fill in the client id name. For these bytes: * 1 - they must be unique * 2 - they should be persistent across client reboots * 1 is more critical than 2 * Use the mount point's unique id plus either the uuid or, if that * isn't set, random junk. */ void nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) { int uuidlen; /* * First, put in the 64bit mount point identifier. */ if (idlen >= sizeof (u_int64_t)) { NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t)); cp += sizeof (u_int64_t); idlen -= sizeof (u_int64_t); } /* * If uuid is non-zero length, use it. */ uuidlen = strlen(uuid); if (uuidlen > 0 && idlen >= uuidlen) { NFSBCOPY(uuid, cp, uuidlen); cp += uuidlen; idlen -= uuidlen; } /* * This only normally happens if the uuid isn't set. */ while (idlen > 0) { *cp++ = (u_int8_t)(arc4random() % 256); idlen--; } } /* * Fill in a lock owner name. For now, pid + the process's creation time. */ void nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; if (id == NULL) { printf("NULL id\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); return; } if ((flags & F_POSIX) != 0) { p = (struct proc *)id; tl.lval = p->p_pid; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_sec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_usec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp = tl.cval[3]; } else if ((flags & F_FLOCK) != 0) { bcopy(&id, cp, sizeof(id)); bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); } else { printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); } } /* * Find the parent process for the thread passed in as an argument. * If none exists, return NULL, otherwise return a thread for the parent. * (Can be any of the threads, since it is only used for td->td_proc.) */ NFSPROC_T * nfscl_getparent(struct thread *td) { struct proc *p; struct thread *ptd; if (td == NULL) return (NULL); p = td->td_proc; if (p->p_pid == 0) return (NULL); p = p->p_pptr; if (p == NULL) return (NULL); ptd = TAILQ_FIRST(&p->p_threads); return (ptd); } /* * Start up the renew kernel thread. */ static void start_nfscl(void *arg) { struct nfsclclient *clp; struct thread *td; clp = (struct nfsclclient *)arg; td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads); nfscl_renewthread(clp, td); kproc_exit(0); } void nfscl_start_renewthread(struct nfsclclient *clp) { kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0, "nfscl"); } /* * Handle wcc_data. * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr * as the first Op after PutFH. * (For NFSv4, the postop attributes are after the Op, so they can't be * parsed here. A separate call to nfscl_postop_attr() is required.) */ int nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff) { u_int32_t *tl; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; int error = 0; if (wccflagp != NULL) *wccflagp = 0; if (nd->nd_flag & ND_NFSV3) { *flagp = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); if (wccflagp != NULL) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && np->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); mtx_unlock(&np->n_mtx); } } error = nfscl_postop_attr(nd, nap, flagp, stuff); } else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); if (error) return (error); /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl) nd->nd_flag |= ND_NOMOREDATA; if (wccflagp != NULL && nfsva.na_vattr.va_mtime.tv_sec != 0) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == nfsva.na_vattr.va_mtime.tv_sec && np->n_mtime.tv_nsec == nfsva.na_vattr.va_mtime.tv_sec); mtx_unlock(&np->n_mtx); } } nfsmout: return (error); } /* * Get postop attributes. */ int nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp, void *stuff) { u_int32_t *tl; int error = 0; *retp = 0; if (nd->nd_flag & ND_NOMOREDATA) return (error); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); *retp = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { /* * For NFSv4, the postop attr are at the end, so no point * in looking if nd_repstat != 0. */ if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) /* should never happen since nd_repstat != 0 */ nd->nd_flag |= ND_NOMOREDATA; else *retp = 1; } } else if (!nd->nd_repstat) { /* For NFSv2, the attributes are here iff nd_repstat == 0 */ *retp = 1; } if (*retp) { error = nfsm_loadattr(nd, nap); if (error) *retp = 0; } nfsmout: return (error); } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; struct timeval curtime; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: getmicrotime(&curtime); if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if (vap->va_atime.tv_sec != curtime.tv_sec) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_mtime.tv_sec != curtime.tv_sec) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); break; }; } /* * nfscl_request() - mostly a wrapper for newnfs_request(). */ int nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, struct ucred *cred, void *stuff) { int ret, vers; struct nfsmount *nmp; nmp = VFSTONFS(vp->v_mount); if (nd->nd_flag & ND_NFSV4) vers = NFS_VER4; else if (nd->nd_flag & ND_NFSV3) vers = NFS_VER3; else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, vers, NULL, 1, NULL); + NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } /* * fill in this bsden's variant of statfs using nfsstatfs. */ void nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) { struct statfs *sbp = (struct statfs *)statfs; if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE; sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE; /* * Although sf_abytes is uint64_t and f_bavail is int64_t, * the value after dividing by NFS_FABLKSIZE is small * enough that it will fit in 63bits, so it is ok to * assign it to f_bavail without fear that it will become * negative. */ sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE; sbp->f_files = sfp->sf_tfiles; /* Since f_ffree is int64_t, clip it to 63bits. */ if (sfp->sf_ffiles > INT64_MAX) sbp->f_ffree = INT64_MAX; else sbp->f_ffree = sfp->sf_ffiles; } else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) { /* * The type casts to (int32_t) ensure that this code is * compatible with the old NFS client, in that it will * propagate bit31 to the high order bits. This may or may * not be correct for NFSv2, but since it is a legacy * environment, I'd rather retain backwards compatibility. */ sbp->f_bsize = (int32_t)sfp->sf_bsize; sbp->f_blocks = (int32_t)sfp->sf_blocks; sbp->f_bfree = (int32_t)sfp->sf_bfree; sbp->f_bavail = (int32_t)sfp->sf_bavail; sbp->f_files = 0; sbp->f_ffree = 0; } } /* * Use the fsinfo stuff to update the mount point. */ void nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) { if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && fsp->fs_wtpref >= NFS_FABLKSIZE) nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) { nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = fsp->fs_wtmax; } if (nmp->nm_wsize < NFS_FABLKSIZE) nmp->nm_wsize = NFS_FABLKSIZE; if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) && fsp->fs_rtpref >= NFS_FABLKSIZE) nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) { nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = fsp->fs_rtmax; } if (nmp->nm_rsize < NFS_FABLKSIZE) nmp->nm_rsize = NFS_FABLKSIZE; if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize) && fsp->fs_dtpref >= NFS_DIRBLKSIZ) nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) { nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = fsp->fs_rtmax; } if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; if (fsp->fs_maxfilesize > 0 && fsp->fs_maxfilesize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = fsp->fs_maxfilesize; nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; } /* * Get a pointer to my IP addrress and return it. * Return NULL if you can't find one. */ u_int8_t * nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) { struct sockaddr_in sad, *sin; struct rtentry *rt; u_int8_t *retp = NULL; static struct in_addr laddr; *isinet6p = 0; /* * Loop up a route for the destination address. */ if (nmp->nm_nam->sa_family == AF_INET) { bzero(&sad, sizeof (sad)); sin = (struct sockaddr_in *)nmp->nm_nam; sad.sin_family = AF_INET; sad.sin_len = sizeof (struct sockaddr_in); sad.sin_addr.s_addr = sin->sin_addr.s_addr; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); rt = rtalloc1_fib((struct sockaddr *)&sad, 0, 0UL, curthread->td_proc->p_fibnum); if (rt != NULL) { if (rt->rt_ifp != NULL && rt->rt_ifa != NULL && ((rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) && rt->rt_ifa->ifa_addr->sa_family == AF_INET) { sin = (struct sockaddr_in *) rt->rt_ifa->ifa_addr; laddr.s_addr = sin->sin_addr.s_addr; retp = (u_int8_t *)&laddr; } RTFREE_LOCKED(rt); } CURVNET_RESTORE(); #ifdef INET6 } else if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 sad6, *sin6; static struct in6_addr laddr6; bzero(&sad6, sizeof (sad6)); sin6 = (struct sockaddr_in6 *)nmp->nm_nam; sad6.sin6_family = AF_INET6; sad6.sin6_len = sizeof (struct sockaddr_in6); sad6.sin6_addr = sin6->sin6_addr; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); rt = rtalloc1_fib((struct sockaddr *)&sad6, 0, 0UL, curthread->td_proc->p_fibnum); if (rt != NULL) { if (rt->rt_ifp != NULL && rt->rt_ifa != NULL && ((rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) && rt->rt_ifa->ifa_addr->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *) rt->rt_ifa->ifa_addr; laddr6 = sin6->sin6_addr; retp = (u_int8_t *)&laddr6; *isinet6p = 1; } RTFREE_LOCKED(rt); } CURVNET_RESTORE(); #endif } return (retp); } /* * Copy NFS uid, gids from the cred structure. */ void newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) { int i; KASSERT(cr->cr_ngroups >= 0, ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); for (i = 0; i < nfscr->nfsc_ngroups; i++) nfscr->nfsc_groups[i] = cr->cr_groups[i]; } /* * Do any client specific initialization. */ void nfscl_init(void) { static int inited = 0; if (inited) return; inited = 1; nfscl_inited = 1; ncl_pbuf_freecnt = nswbuf / 2 + 1; } /* * Check each of the attributes to be set, to ensure they aren't already * the correct value. Disable setting ones already correct. */ int nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap) { if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode == nvap->na_mode) vap->va_mode = (mode_t)VNOVAL; } if (vap->va_uid != (uid_t)VNOVAL) { if (vap->va_uid == nvap->na_uid) vap->va_uid = (uid_t)VNOVAL; } if (vap->va_gid != (gid_t)VNOVAL) { if (vap->va_gid == nvap->na_gid) vap->va_gid = (gid_t)VNOVAL; } if (vap->va_size != VNOVAL) { if (vap->va_size == nvap->na_size) vap->va_size = VNOVAL; } /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. */ if (vap->va_mtime.tv_sec == VNOVAL) vfs_timestamp(&vap->va_mtime); if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; return (1); } /* * Map nfsv4 errors to errno.h errors. * The uid and gid arguments are only used for NFSERR_BADOWNER and that * error should only be returned for the Open, Create and Setattr Ops. * As such, most calls can just pass in 0 for those arguments. */ APPLESTATIC int nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) { struct proc *p; if (error < 10000) return (error); if (td != NULL) p = td->td_proc; else p = NULL; switch (error) { case NFSERR_BADOWNER: tprintf(p, LOG_INFO, "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); + case NFSERR_BADNAME: + case NFSERR_BADCHAR: + printf("nfsv4 char/name not handled by server\n"); + return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: + case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: case NFSERR_SERVERFAULT: case NFSERR_BADTYPE: case NFSERR_FHEXPIRED: case NFSERR_RESOURCE: case NFSERR_MOVED: case NFSERR_NOFILEHANDLE: case NFSERR_MINORVERMISMATCH: case NFSERR_OLDSTATEID: case NFSERR_BADSEQID: case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: - case NFSERR_BADCHAR: - case NFSERR_BADNAME: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); return (EIO); default: tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error); return (EIO); }; } /* * Check to see if the process for this owner exists. Return 1 if it doesn't * and 0 otherwise. */ int nfscl_procdoesntexist(u_int8_t *own) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; pid_t pid; int ret = 0; tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; pid = tl.lval; p = pfind_locked(pid); if (p == NULL) return (1); if (p->p_stats == NULL) { PROC_UNLOCK(p); return (0); } tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; if (tl.lval != p->p_stats->p_start.tv_sec) { ret = 1; } else { tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own; if (tl.lval != p->p_stats->p_start.tv_usec) ret = 1; } PROC_UNLOCK(p); return (ret); } /* * - nfs pseudo system call for the client */ /* * MPSAFE */ static int nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfscbd_args nfscbdarg; struct nfsd_nfscbd_args nfscbdarg2; int error; struct nameidata nd; struct nfscl_dumpmntopts dumpmntopts; char *buf; if (uap->flag & NFSSVC_CBADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); if (error) return (error); /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_ALL, &fp)) != 0) { return (error); } if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); return (EPERM); } error = nfscbd_addsock(fp); fdrop(fp, td); if (!error && nfscl_enablecallb == 0) { nfsv4_cbport = nfscbdarg.port; nfscl_enablecallb = 1; } } else if (uap->flag & NFSSVC_NFSCBD) { if (uap->argp == NULL) return (EINVAL); error = copyin(uap->argp, (caddr_t)&nfscbdarg2, sizeof(nfscbdarg2)); if (error) return (error); error = nfscbd_nfsd(td, &nfscbdarg2); } else if (uap->flag & NFSSVC_DUMPMNTOPTS) { error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts)); if (error == 0 && (dumpmntopts.ndmnt_blen < 256 || dumpmntopts.ndmnt_blen > 1024)) error = EINVAL; if (error == 0) error = nfsrv_lookupfilename(&nd, dumpmntopts.ndmnt_fname, td); if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = EINVAL; } if (error == 0) { buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK); nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf, dumpmntopts.ndmnt_blen); vput(nd.ni_vp); error = copyout(buf, dumpmntopts.ndmnt_buf, dumpmntopts.ndmnt_blen); free(buf, M_TEMP); } } else { error = EINVAL; } return (error); } extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscl_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) return (0); newnfs_portinit(); mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF); mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF); nfscl_init(); NFSD_LOCK(); nfsrvd_cbinit(0); NFSD_UNLOCK(); ncl_call_invalcaches = ncl_invalcaches; nfsd_call_nfscl = nfssvc_nfscl; loaded = 1; break; case MOD_UNLOAD: if (nfs_numnfscbd != 0) { error = EBUSY; break; } /* * XXX: Unloading of nfscl module is unsupported. */ #if 0 ncl_call_invalcaches = NULL; nfsd_call_nfscl = NULL; /* and get rid of the mutexes */ mtx_destroy(&nfs_clstate_mutex); mtx_destroy(&ncl_iod_mutex); loaded = 0; break; #else /* FALLTHROUGH */ #endif default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfscl_mod = { "nfscl", nfscl_modevent, NULL, }; DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscl, 1); MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1); MODULE_DEPEND(nfscl, krpc, 1, 1, 1); MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscl, nfslock, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clrpcops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clrpcops.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clrpcops.c (revision 244042) @@ -1,4232 +1,5877 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Rpc op calls, generally called from the vnode op calls or through the * buffer cache, for NFS v2, 3 and 4. * These do not normally make any changes to vnode arguments or use * structures that might change between the VFS variants. The returned * arguments are all at the end, after the NFSPROC_T *p one. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include /* * Global variables */ extern int nfs_numnfscbd; extern struct timeval nfsboottime; extern u_int32_t newnfs_false, newnfs_true; extern nfstype nfsv34_type[9]; extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; int nfscl_enablecallb = 0; short nfsv4_cbport = NFSV4_CBPORT; int nfstest_openallsetattr = 0; #endif /* !APPLEKEXT */ #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) +/* + * nfscl_getsameserver() can return one of three values: + * NFSDSP_USETHISSESSION - Use this session for the DS. + * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new + * session. + * NFSDSP_NOTFOUND - No matching server was found. + */ +enum nfsclds_state { + NFSDSP_USETHISSESSION = 0, + NFSDSP_SEQTHISSESSION = 1, + NFSDSP_NOTFOUND = 2, +}; + static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, struct nfscllockowner *, u_int64_t, u_int64_t, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); +static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, + uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, + struct ucred *, NFSPROC_T *); +static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *, + struct nfsclds **, NFSPROC_T *); +static void nfscl_initsessionslots(struct nfsclsession *); +static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, + nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, + struct nfsclflayout *, uint64_t, uint64_t, struct ucred *, NFSPROC_T *); +static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, + struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *, + NFSPROC_T *); +static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, + nfsv4stateid_t *, struct nfsclds *, uint64_t, int, + struct nfsfh *, int, struct ucred *, NFSPROC_T *); +static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, + struct nfsclds *, struct nfsclds **); +#ifdef notyet +static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, + struct nfsfh *, struct ucred *, NFSPROC_T *, void *); +#endif /* * nfs null call from vfs. */ APPLESTATIC int nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p) { int error; struct nfsrv_descript nfsd, *nd = &nfsd; NFSCL_REQSTART(nd, NFSPROC_NULL, vp); error = nfscl_request(nd, vp, p, cred, NULL); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs access rpc op. * For nfs version 3 and 4, use the access rpc to check accessibility. If file * modes are changed on the server, accesses might still fail later. */ APPLESTATIC int nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp) { int error; u_int32_t mode, rmode; if (acmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vnode_vtype(vp) == VDIR) { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE); if (acmode & VEXEC) mode |= NFSACCESS_LOOKUP; } else { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (acmode & VEXEC) mode |= NFSACCESS_EXECUTE; } /* * Now, just call nfsrpc_accessrpc() to do the actual RPC. */ error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode, NULL); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if (!error && (rmode & mode) != mode) error = EACCES; return (error); } /* * The actual rpc, separated out for Darwin. */ APPLESTATIC int nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep, void *stuff) { u_int32_t *tl; u_int32_t supported, rmode; int error; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *attrflagp = 0; supported = mode; NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(mode); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); supported = fxdr_unsigned(u_int32_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } rmode = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); /* * It's not obvious what should be done about * unsupported access modes. For now, be paranoid * and clear the unsupported ones. */ rmode &= supported; *rmodep = rmode; } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs open rpc */ APPLESTATIC int nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int32_t mode, clidrev; int ret, newone, error, expireret = 0, retrycnt; /* * For NFSv4, Open Ops are only done on Regular Files. */ if (vnode_vtype(vp) != VREG) return (0); mode = 0; if (amode & FREAD) mode |= NFSV4OPEN_ACCESSREAD; if (amode & FWRITE) mode |= NFSV4OPEN_ACCESSWRITE; nfhp = np->n_fhp; retrycnt = 0; #ifdef notdef { char name[100]; int namel; namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99; bcopy(NFS4NODENAME(np->n_v4), name, namel); name[namel] = '\0'; printf("rpcopen p=0x%x name=%s",p->p_pid,name); if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]); else printf(" fhl=0\n"); } #endif do { dp = NULL; error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1, cred, p, NULL, &op, &newone, &ret, 1); if (error) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (ret == NFSCLOPEN_DOOPEN) { if (np->n_v4 != NULL) { error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, 0, 0x0, cred, p, 0, 0); if (dp != NULL) { #ifdef APPLE OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag); #else NFSLOCKNODE(np); np->n_flag &= ~NDELEGMOD; /* * Invalidate the attribute cache, so that * attributes that pre-date the issue of a * delegation are not cached, since the * cached attributes will remain valid while * the delegation is held. */ NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); #endif (void) nfscl_deleg(nmp->nm_mountp, op->nfso_own->nfsow_clp, nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp); } } else { error = EIO; } newnfs_copyincred(cred, &op->nfso_cred); } else if (ret == NFSCLOPEN_SETCRED) /* * This is a new local open on a delegation. It needs * to have credentials so that an open can be done * against the server during recovery. */ newnfs_copyincred(cred, &op->nfso_cred); /* * nfso_opencnt is the count of how many VOP_OPEN()s have * been done on this Open successfully and a VOP_CLOSE() * is expected for each of these. * If error is non-zero, don't increment it, since the Open * hasn't succeeded yet. */ if (!error) op->nfso_opencnt++; nfscl_openrelease(op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * the actual open rpc */ APPLESTATIC int nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **dpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p, int syscred, int recursed) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *dp, *ndp = NULL; struct nfsvattr nfsva; u_int32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby; dp = *dpp; *dpp = NULL; - nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); - *tl++ = op->nfso_own->nfsow_clp->nfsc_clientid.lval[0]; - *tl = op->nfso_own->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); if (reclaim) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(delegtype); } else { if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = dp->nfsdl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; } else { *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); } (void) nfsm_strtom(nd, name, namelen); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); (void) nfsrv_putattrbit(nd, &attrbits); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); MALLOC(ndp, struct nfscldeleg *, sizeof (struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else { ndp->nfsdl_flags = NFSCLDL_READ; } if (ret) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; } if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open"); } while (ret == NFSERR_DELAY); error = ret; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) && (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL && ndp == NULL && !recursed) { do { ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, &ndp, 0, 0x0, cred, p, syscred, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open2"); } while (ret == NFSERR_DELAY); if (ret) { if (ndp != NULL) FREE((caddr_t)ndp, M_NFSCLDELEG); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALECLIENTID) + if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) *dpp = ndp; else if (ndp != NULL) FREE((caddr_t)ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * open downgrade rpc */ APPLESTATIC int nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); - *tl++ = op->nfso_stateid.seqid; + if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) + *tl++ = 0; + else + *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Close operation. */ APPLESTATIC int nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p) { struct nfsclclient *clp; int error; if (vnode_vtype(vp) != VREG) return (0); if (doclose) error = nfscl_doclose(vp, &clp, p); else error = nfscl_getclose(vp, &clp); if (error) return (error); nfscl_clientrelease(clp); return (0); } /* * Close the open. */ APPLESTATIC void nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct ucred *tcred; u_int64_t off = 0, len = 0; u_int32_t type = NFSV4LOCKT_READ; int error, do_unlock, trycnt; tcred = newnfs_getcred(); newnfs_copycred(&op->nfso_cred, tcred); /* * (Theoretically this could be done in the same * compound as the close, but having multiple * sequenced Ops in the same compound might be * too scary for some servers.) */ if (op->nfso_posixlock) { off = 0; len = NFS64BITSSET; type = NFSV4LOCKT_READ; } /* * Since this function is only called from VOP_INACTIVE(), no * other thread will be manipulating this Open. As such, the * lock lists are not being changed by other threads, so it should * be safe to do this without locking. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { do_unlock = 1; LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { if (op->nfso_posixlock == 0) { off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; if (lop->nfslo_type == F_WRLCK) type = NFSV4LOCKT_WRITE; else type = NFSV4LOCKT_READ; } if (do_unlock) { trycnt = 0; do { error = nfsrpc_locku(nd, nmp, lp, off, len, type, tcred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_close"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0 && trycnt++ < 5); if (op->nfso_posixlock) do_unlock = 0; } nfscl_freelock(lop, 0); } /* * Do a ReleaseLockOwner. * The lock owner name nfsl_owner may be used by other opens for * other files but the lock_owner4 name that nfsrpc_rellockown() * puts on the wire has the file handle for this file appended * to it, so it can be done now. */ (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, tcred, p); } /* * There could be other Opens for different files on the same * OpenOwner, so locking is required. */ NFSLOCKCLSTATE(); nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR); NFSUNLOCKCLSTATE(); do { error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfs_close"); } while (error == NFSERR_GRACE); NFSLOCKCLSTATE(); nfscl_lockunlock(&op->nfso_own->nfsow_rwlock); LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) nfscl_freelockowner(lp, 0); nfscl_freeopen(op, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * The actual Close RPC. */ APPLESTATIC int nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, - op->nfso_fhlen, NULL); + op->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); - *tl++ = op->nfso_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Open Confirm RPC. */ APPLESTATIC int nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp; int error; - nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, VFSTONFS(vnode_mount(vp)), - nfhp, fhlen, NULL); + nmp = VFSTONFS(vnode_mount(vp)); + if (NFSHASNFSV4N(nmp)) + return (0); /* No confirmation for NFSv4.1. */ + nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl = txdr_unsigned(op->nfso_own->nfsow_seqid); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs() * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int -nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, +nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9]; u_short port; int error, isinet6 = 0, callblen; nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; + struct nfsclds *dsp, *ndsp, *tdsp; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); - nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL); + clp->nfsc_rev = rev++; + if (NFSHASNFSV4N(nmp)) { + error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, + NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); + NFSCL_DEBUG(1, "aft exch=%d\n", error); + if (error == 0) { + error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, + &nmp->nm_sockreq, + dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); + if (error == 0) { + NFSLOCKMNT(nmp); + TAILQ_FOREACH_SAFE(tdsp, &nmp->nm_sess, + nfsclds_list, ndsp) + nfscl_freenfsclds(tdsp); + TAILQ_INIT(&nmp->nm_sess); + TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, + nfsclds_list); + NFSUNLOCKMNT(nmp); + } else + nfscl_freenfsclds(dsp); + NFSCL_DEBUG(1, "aft createsess=%d\n", error); + } + if (error == 0 && reclaim == 0) { + error = nfsrpc_reclaimcomplete(nmp, cred, p); + NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); + if (error == NFSERR_COMPLETEALREADY || + error == NFSERR_NOTSUPP) + /* Ignore this error. */ + error = 0; + } + return (error); + } + + /* + * Allocate a single session structure for NFSv4.0, because some of + * the fields are used by NFSv4.0 although it doesn't do a session. + */ + dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); + mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); + mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); + NFSLOCKMNT(nmp); + TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); + NFSUNLOCKMNT(nmp); + + nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); - *tl = txdr_unsigned(rev++); + *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* * set up the callback address */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_CALLBCKPROG); callblen = strlen(nfsv4_callbackaddr); if (callblen == 0) cp = nfscl_getmyip(nmp, &isinet6); if (nfscl_enablecallb && nfs_numnfscbd > 0 && (callblen > 0 || cp != NULL)) { port = htons(nfsv4_cbport); cp2 = (u_int8_t *)&port; #ifdef INET6 if ((callblen > 0 && strchr(nfsv4_callbackaddr, ':')) || isinet6) { char ip6buf[INET6_ADDRSTRLEN], *ip6add; (void) nfsm_strtom(nd, "tcp6", 4); if (callblen == 0) { ip6_sprintf(ip6buf, (struct in6_addr *)cp); ip6add = ip6buf; } else { ip6add = nfsv4_callbackaddr; } snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", ip6add, cp2[0], cp2[1]); } else #endif { (void) nfsm_strtom(nd, "tcp", 3); if (callblen == 0) snprintf(addr, INET6_ADDRSTRLEN + 9, "%d.%d.%d.%d.%d.%d", cp[0], cp[1], cp[2], cp[3], cp2[0], cp2[1]); else snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", nfsv4_callbackaddr, cp2[0], cp2[1]); } (void) nfsm_strtom(nd, addr, strlen(addr)); } else { (void) nfsm_strtom(nd, "tcp", 3); (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); - clp->nfsc_clientid.lval[0] = *tl++; - clp->nfsc_clientid.lval[1] = *tl++; + NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0] = *tl++; + NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; /* * and confirm it. */ - nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL); + nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, + NULL); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); - *tl++ = clp->nfsc_clientid.lval[0]; - *tl++ = clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, - cred, NFS_PROG, NFS_VER4, NULL, 1, NULL); + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, - nmp->nm_fhsize, NULL); + nmp->nm_fhsize, NULL, NULL); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, - cred, NFS_PROG, NFS_VER4, NULL, 1, NULL); + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred); if (error) goto nfsmout; clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; } } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call. */ APPLESTATIC int nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsm_loadattr(nd, nap); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call with non-vnode arguemnts. */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, - struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp) + struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, + uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; - nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, vers, NULL, 1, xidp); + NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); - if (!nd->nd_repstat) - error = nfsm_loadattr(nd, nap); - else + if (nd->nd_repstat == 0) { + if ((nd->nd_flag & ND_NFSV4) != 0) + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, + NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, + NULL, NULL); + else + error = nfsm_loadattr(nd, nap); + } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs setattr operation. */ APPLESTATIC int nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { int error, expireret = 0, openerr, retrycnt; u_int32_t clidrev = 0, mode; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsfh *nfhp; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; retrycnt = 0; do { lckp = NULL; openerr = 1; if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, - nfhp->nfh_len, mode, cred, p, &stateid, &lckp); + nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { /* * No Open stateid, so try and open the file * now. */ if (mode == NFSV4OPEN_ACCESSWRITE) openerr = nfsrpc_open(vp, FWRITE, cred, p); else openerr = nfsrpc_open(vp, FREAD, cred, p); if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - mode, cred, p, &stateid, &lckp); + mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p, rnap, attrflagp, stuff); else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (!openerr) (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } static int nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); vap->va_type = vnode_vtype(vp); nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff); if ((nd->nd_flag & ND_NFSV4) && !error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error) error = nfscl_postop_attr(nd, rnap, attrflagp, stuff); mbuf_freem(nd->nd_mrep); if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs lookup rpc */ APPLESTATIC int nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; int error = 0, lookupp = 0; *attrflagp = 0; *dattrflagp = 0; if (vnode_vtype(dvp) != VDIR) return (ENOTDIR); nmp = VFSTONFS(vnode_mount(dvp)); if (len > NFS_MAXNAMLEN) return (ENAMETOOLONG); if (NFSHASNFSV4(nmp) && len == 1 && name[0] == '.') { /* * Just return the current dir's fh. */ np = VTONFS(dvp); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; return (0); } if (NFSHASNFSV4(nmp) && len == 2 && name[0] == '.' && name[1] == '.') { lookupp = 1; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp); } else { NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp); (void) nfsm_strtom(nd, name, len); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * When an NFSv4 Lookupp returns ENOENT, it means that * the lookup is at the root of an fs, so return this dir. */ if (nd->nd_repstat == NFSERR_NOENT && lookupp) { np = VTONFS(dvp); MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; mbuf_freem(nd->nd_mrep); return (0); } if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); goto nfsmout; } if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) { nd->nd_flag |= ND_NOMOREDATA; goto nfsmout; } } error = nfsm_getfh(nd, nfhpp); if (error) goto nfsmout; error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); nfsmout: mbuf_freem(nd->nd_mrep); if (!error && nd->nd_repstat) error = nd->nd_repstat; return (error); } /* * Do a readlink rpc. */ APPLESTATIC int nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np = VTONFS(vp); nfsattrbit_t attrbits; int error, len, cangetattr = 1; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READLINK, vp); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_STRSIZ(len, NFS_MAXPATHLEN); /* * This seems weird to me, but must have been added to * FreeBSD for some reason. The only thing I can think of * is that there was/is some server that replies with * more link data than it should? */ if (len == NFS_MAXPATHLEN) { NFSLOCKNODE(np); if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) { len = np->n_size; cangetattr = 0; } NFSUNLOCKNODE(np); } error = nfsm_mbufuio(nd, uiop, len); if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Read operation. */ APPLESTATIC int nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { int error, expireret = 0, retrycnt; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { nfhp = np->n_fhp; newcred = NFSNEWCRED(cred); } retrycnt = 0; do { lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - NFSV4OPEN_ACCESSREAD, newcred, p, &stateid, &lckp); + NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, + &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual read RPC. */ static int nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; int error = 0, len, retlen, tsiz, eof = 0; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; int rsize; off_t tmp_off; *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } rsize = nmp->nm_rsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; while (tsiz > 0) { *attrflagp = 0; len = (tsiz > rsize) ? rsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_READ, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3); if (nd->nd_flag & ND_NFSV2) { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } else { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } /* * Since I can't do a Getattr for NFSv4 for Write, there * doesn't seem any point in doing one here, either. * (See the comment in nfsrpc_writerpc() for more info.) */ error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, rsize); error = nfsm_mbufuio(nd, uiop, retlen); if (error) goto nfsmout; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= retlen; if (!(nd->nd_flag & ND_NFSV2)) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } return (0); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * nfs write operation * When called_from_strategy != 0, it should return EIO for an error that * indicates recovery is in progress, so that the buffer will be left * dirty and be written back to the server later. If it loops around, * the recovery thread could get stuck waiting for the buffer and recovery * will then deadlock. */ APPLESTATIC int nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff, int called_from_strategy) { int error, expireret = 0, retrycnt, nostateid; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; *must_commit = 0; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { newcred = NFSNEWCRED(cred); nfhp = np->n_fhp; } retrycnt = 0; do { lckp = NULL; nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - NFSV4OPEN_ACCESSWRITE, newcred, p, &stateid, &lckp); + NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, + &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; NFSCL_DEBUG(1, "stateid0 in write\n"); } } /* * If there is no stateid for NFSv4, it means this is an * extraneous write after close. Basically a poorly * implemented buffer cache. Just don't do the write. */ if (nostateid) error = 0; else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || - ((error == NFSERR_STALESTATEID || + ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || - ((error == NFSERR_STALESTATEID || + ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual write RPC. */ static int nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC; int wccflag = 0, wsize; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; off_t tmp_off; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } wsize = nmp->nm_wsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; /* NFSv2 sometimes does a write with */ nd->nd_repstat = 0; /* uio_resid == 0, so the while is not done */ while (tsiz > 0) { *attrflagp = 0; len = (tsiz > wsize) ? wsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_WRITE, vp); if (nd->nd_flag & ND_NFSV4) { nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* * Not sure why someone changed this, since the * RFC clearly states that "beginoffset" and * "totalcount" are ignored, but it wouldn't * surprise me if there's a busted server out there. */ /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiombuf(nd, uiop, len); /* * Although it is tempting to do a normal Getattr Op in the * NFSv4 compound, the result can be a nearly hung client * system if the Getattr asks for Owner and/or OwnerGroup. * It occurs when the client can't map either the Owner or * Owner_group name in the Getattr reply to a uid/gid. When * there is a cache miss, the kernel does an upcall to the * nfsuserd. Then, it can try and read the local /etc/passwd * or /etc/group file. It can then block in getnewbuf(), * waiting for dirty writes to be pushed to the NFS server. * The only reason this doesn't result in a complete * deadlock, is that the upcall times out and allows * the write to complete. However, progress is so slow * that it might just as well be deadlocked. * As such, we get the rest of the attributes, but not * Owner or Owner_group. * nb: nfscl_loadattrcache() needs to be told that these * partial attributes from a write rpc are being * passed in, via a argument flag. */ if (nd->nd_flag & ND_NFSV4) { NFSWRITEGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY((caddr_t)tl, (caddr_t)&nmp->nm_verf[0], NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } if (nd->nd_flag & ND_NFSV4) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = NFS_LATTR_NOSHRINK; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; NFSWRITERPC_SETTIME(wccflag, np, (nd->nd_flag & ND_NFSV4)); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= len; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ APPLESTATIC int nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap, u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp); if (nd->nd_flag & ND_NFSV4) { if (vtyp == VBLK || vtyp == VCHR) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = vtonfsv34_type(vtyp); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if ((nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!error && nd->nd_repstat) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs file create call * Mostly just call the approriate routine. (I separated out v4, so that * error recovery wouldn't be as difficult.) */ APPLESTATIC int nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { int error = 0, newone, expireret = 0, retrycnt, unlocked; struct nfsclowner *owp; struct nfscldeleg *dp; struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp)); u_int32_t clidrev; if (NFSHASNFSV4(nmp)) { retrycnt = 0; do { dp = NULL; error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone, NULL, 1); if (error) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); /* * There is no need to invalidate cached attributes here, * since new post-delegation issue attributes are always * returned by nfsrpc_createv4() and these will update the * attribute cache. */ if (dp != NULL) (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; } else { error = nfsrpc_createv23(dvp, name, namelen, vap, cverf, fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff); } return (error); } /* * The create rpc for v2 and 3. */ static int nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } static int nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { u_int32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; + struct nfsmount *nmp; + nmp = VFSTONFS(dvp->v_mount); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); - *tl++ = owp->nfsow_clp->nfsc_clientid.lval[0]; - *tl = owp->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { - *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); - NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); - *tl++ = cverf.lval[0]; - *tl = cverf.lval[1]; + if (NFSHASNFSV4N(nmp)) { + if (NFSHASSESSPERSIST(nmp)) { + /* Use GUARDED for persistent sessions. */ + *tl = txdr_unsigned(NFSCREATE_GUARDED); + nfscl_fillsattr(nd, vap, dvp, 0, 0); + } else { + /* Otherwise, use EXCLUSIVE4_1. */ + *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); + NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); + *tl++ = cverf.lval[0]; + *tl = cverf.lval[1]; + nfscl_fillsattr(nd, vap, dvp, 0, 0); + } + } else { + /* NFSv4.0 */ + *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); + NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); + *tl++ = cverf.lval[0]; + *tl = cverf.lval[1]; + } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); (void) nfsm_strtom(nd, name, namelen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (error) goto nfsmout; NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); MALLOC(dp, struct nfscldeleg *, sizeof (struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; if (dp != NULL && *attrflagp) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); if ((rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh, nfhp->nfh_len, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_create"); } while (ret == NFSERR_DELAY); error = ret; } /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if ((rflags & NFSV4OPEN_RESULTCONFIRM) && (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL) { np = VTONFS(dvp); do { ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op, name, namelen, &dp, 0, 0x0, cred, p, 0, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_crt2"); } while (ret == NFSERR_DELAY); if (ret) { if (dp != NULL) FREE((caddr_t)dp, M_NFSCLDELEG); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) error = ret; } } nfscl_openrelease(op, error, newone); *unlockedp = 1; } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALECLIENTID) + if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) *dpp = dp; else if (dp != NULL) FREE((caddr_t)dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Nfs remove rpc */ APPLESTATIC int nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np; struct nfsmount *nmp; nfsv4stateid_t dstateid; int error, ret = 0, i; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); nmp = VFSTONFS(vnode_mount(dvp)); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_removedeleg(vp, p, &dstateid); if (ret == 1) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - *tl++ = dstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(dvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_REMOVE); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } } error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs rename rpc. */ APPLESTATIC int nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; nfsattrbit_t attrbits; nfsv4stateid_t fdstateid, tdstateid; int error = 0, ret = 0, gottd = 0, gotfd = 0, i; *fattrflagp = 0; *tattrflagp = 0; nmp = VFSTONFS(vnode_mount(fdvp)); if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp, &tdstateid, &gottd, p); if (gotfd && gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp); } else if (gotfd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp); } else if (gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp); } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = fdstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(tvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_DELEGRETURN); } } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = tdstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; } if (ret > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(fdvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SAVEFH); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_RENAME); } (void) nfsm_strtom(nd, fnameptr, fnamelen); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); (void) nfsm_strtom(nd, tnameptr, tnamelen); error = nfscl_request(nd, fdvp, p, cred, fstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) { if (i == 0 && ret > 1) { /* * If the Delegreturn failed, try again * without it. The server will Recall, as * required. * If ret > 1, the first iteration of this * loop is the second DelegReturn result. */ mbuf_freem(nd->nd_mrep); goto tryagain; } else { nd->nd_flag |= ND_NOMOREDATA; } } } } /* Now, the first wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, fstuff); /* and the second wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } if (!error) error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp, NULL, tstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs hard link create rpc */ APPLESTATIC int nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_LINK, vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); } (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh, VTONFS(dvp)->n_fhp->nfh_len, 0); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LINK); } (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, vp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, dstuff); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* * First, parse out the PutFH and Getattr result. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (!(*(tl + 1))) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; /* * Get the pre-op attributes. */ error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs symbolic link create rpc */ APPLESTATIC int nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int slen, error = 0; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); slen = strlen(target); if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFLNK); (void) nfsm_strtom(nd, target, slen); } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_strtom(nd, target, slen); if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if ((nd->nd_flag & ND_NFSV3) && !error) { if (!nd->nd_repstat) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs make dir rpc */ APPLESTATIC int nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); } (void) nfsm_strtom(nd, name, namelen); nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat && !error) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (!error) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); } if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs remove directory call */ APPLESTATIC int nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * Readdir rpc. * Always returns with either uio_resid unchanged, if you are at the * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks * filled in. * I felt this would allow caching of directory blocks more easily * than returning a pertially filled block. * Directory offset cookies: * Oh my, what to do with them... * I can think of three ways to deal with them: * 1 - have the layer above these RPCs maintain a map between logical * directory byte offsets and the NFS directory offset cookies * 2 - pass the opaque directory offset cookies up into userland * and let the libc functions deal with them, via the system call * 3 - return them to userland in the "struct dirent", so future versions * of libc can use them and do whatever is necessary to amke things work * above these rpc calls, in the meantime * For now, I do #3 by "hiding" the directory offset cookies after the * d_name field in struct dirent. This is space inside d_reclen that * will be ignored by anything that doesn't know about them. * The directory offset cookies are filled in as the last 8 bytes of * each directory entry, after d_name. Someday, the userland libc * functions may be able to use these. In the meantime, it satisfies * OpenBSD's requirements for cookies being returned. * If expects the directory offset cookie for the read to be in uio_offset * and returns the one for the next entry after this directory block in * there, as well. */ APPLESTATIC int nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; nfsquad_t cookie, ncookie; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp); struct nfsvattr nfsva; struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0; long dotfileid, dotdotfileid = 0; u_int32_t fakefileno = 0xffffffff, rderr; char *cp; nfsattrbit_t attrbits, dattrbits; u_int32_t *tl2 = NULL; size_t tresid; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * There is no point in reading a lot more than uio_resid, however * adding one additional DIRBLKSIZ makes sense. Since uio_resid * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this * will never make readsize > nm_readdirsize. */ readsize = nmp->nm_readdirsize; if (readsize > uio_uio_resid(uiop)) readsize = uio_uio_resid(uiop) + DIRBLKSIZ; *attrflagp = 0; if (eofp) *eofp = 0; tresid = uio_uio_resid(uiop); cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; nd->nd_mrep = NULL; /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { reqsize = 6 * NFSX_UNSIGNED; NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { #if defined(__FreeBSD_version) && __FreeBSD_version >= 800000 error = VOP_GETATTR(vp, &nfsva.na_vattr, cred); #else error = VOP_GETATTR(vp, &nfsva.na_vattr, cred, p); #endif if (error) return (error); dotfileid = nfsva.na_fileid; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; dp->d_name[0] = '.'; dp->d_name[1] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_name[2] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR); } else { reqsize = 5 * NFSX_UNSIGNED; } /* * Loop around doing readdir rpc's of size readsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIR, vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.lval[1]; *tl = txdr_unsigned(readsize); } else { NFSM_BUILD(tl, u_int32_t *, reqsize); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } if (nd->nd_flag & ND_NFSV4) { *tl++ = txdr_unsigned(readsize); *tl = txdr_unsigned(readsize); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } else { *tl = txdr_unsigned(readsize); } } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!(nd->nd_flag & ND_NFSV2)) { if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; NFSUNLOCKNODE(dnp); } } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; len = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_unsigned(long, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = NFSM_RNDUP(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); tlen -= len; *cp = '\0'; /* null terminate */ cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); ncookie.lval[0] = 0; ncookie.lval[1] = *tl++; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else { if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } dp->d_type = vtonfs_dtype(nfsva.na_type); } } else { dp->d_fileno = nfsva.na_fileid; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; } more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp) { if (tresid == ((size_t)(uio_uio_resid(uiop)))) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) { dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #ifndef APPLE /* * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir(). * (Also used for NFS V4 when mount flag set.) * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.) */ APPLESTATIC int nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; vnode_t newvp = NULLVP; struct nfsrv_descript nfsd, *nd = &nfsd; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp), *np; struct nfsvattr nfsva; struct nfsfh *nfhp; nfsquad_t cookie, ncookie; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0; int isdotdot = 0, unlocknewvp = 0; long dotfileid, dotdotfileid = 0, fileno = 0; char *cp; nfsattrbit_t attrbits, dattrbits; size_t tresid; u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr; struct timespec dctime; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); timespecclear(&dctime); *attrflagp = 0; if (eofp != NULL) *eofp = 0; ndp->ni_dvp = vp; nd->nd_mrep = NULL; cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; tresid = uio_uio_resid(uiop); /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { #if defined(__FreeBSD_version) && __FreeBSD_version >= 800000 error = VOP_GETATTR(vp, &nfsva.na_vattr, cred); #else error = VOP_GETATTR(vp, &nfsva.na_vattr, cred, p); #endif if (error) return (error); dctime = nfsva.na_ctime; dotfileid = nfsva.na_fileid; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; dp->d_name[0] = '.'; dp->d_name[1] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_name[2] = '\0'; dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[4]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSREADDIRPLUS_ATTRBIT(&attrbits); if (gotmnton) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); } /* * Loop around doing readdir rpc's of size nm_readdirsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp); NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_readdirsize); if (nd->nd_flag & ND_NFSV4) { (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0) dctime = nap->na_ctime; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; NFSUNLOCKNODE(dnp); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NFSV4) { ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { fileno = fxdr_unsigned(long, *++tl); tl++; } len = fxdr_unsigned(int, *tl); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = NFSM_RNDUP(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); cnp->cn_nameptr = uio_iov_base(uiop); cnp->cn_namelen = len; NFSCNHASHZERO(cnp); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; if (len == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') isdotdot = 1; else isdotdot = 0; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } nfhp = NULL; if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; attrflag = fxdr_unsigned(int, *tl); if (attrflag) { error = nfsm_loadattr(nd, &nfsva); if (error) goto nfsmout; } NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED); if (*tl) { error = nfsm_getfh(nd, &nfhp); if (error) goto nfsmout; } if (!attrflag && nfhp != NULL) { FREE((caddr_t)nfhp, M_NFSFH); nfhp = NULL; } } else { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } } else { dp->d_fileno = fileno; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; if (nfhp != NULL) { if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len, dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) { VREF(vp); newvp = vp; unlocknewvp = 0; FREE((caddr_t)nfhp, M_NFSFH); np = dnp; } else if (isdotdot != 0) { /* * Skip doing a nfscl_nget() call for "..". * There's a race between acquiring the nfs * node here and lookups that look for the * directory being read (in the parent). * It would try to get a lock on ".." here, * owning the lock on the directory being * read. Lookup will hold the lock on ".." * and try to acquire the lock on the * directory being read. * If the directory is unlocked/relocked, * then there is a LOR with the buflock * vp is relocked. */ free(nfhp, M_NFSFH); } else { error = nfscl_nget(vnode_mount(vp), vp, nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE); if (!error) { newvp = NFSTOV(np); unlocknewvp = 1; } } nfhp = NULL; if (newvp != NULLVP) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 0); if (error) { if (unlocknewvp) vput(newvp); else vrele(newvp); goto nfsmout; } dp->d_type = vtonfs_dtype(np->n_vattr.na_type); ndp->ni_vp = newvp; NFSCNHASH(cnp, HASHINIT); if (cnp->cn_namelen <= NCHNAMLEN && (newvp->v_type != VDIR || dctime.tv_sec != 0)) { cache_enter_time(ndp->ni_dvp, ndp->ni_vp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dctime); } if (unlocknewvp) vput(newvp); else vrele(newvp); newvp = NULLVP; } } } else if (nfhp != NULL) { FREE((caddr_t)nfhp, M_NFSFH); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp != NULL) { if (tresid == uio_uio_resid(uiop)) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #endif /* !APPLE */ /* * Nfs commit rpc */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, - NFSPROC_T *p, u_char *verfp, struct nfsvattr *nap, int *attrflagp, - void *stuff) + NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); - NFSBCOPY((caddr_t)tl, verfp, NFSX_VERF); + NFSLOCKMNT(nmp); + if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + nd->nd_repstat = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } nfsmout: if (!error && nd->nd_repstat) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * NFS byte range lock rpc. * (Mostly just calls one of the three lower level RPC routines.) */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; struct nfsfh *nfhp; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int64_t off, len; off_t start, end; u_int32_t clidrev = 0; int error = 0, newone = 0, expireret = 0, retrycnt, donelocally; int callcnt, dorpc; /* * Convert the flock structure into a start and end and do POSIX * bounds checking. */ switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ start = fl->l_start; off = fl->l_start; break; case SEEK_END: start = size + fl->l_start; off = size + fl->l_start; break; default: return (EINVAL); }; if (start < 0) return (EINVAL); if (fl->l_len != 0) { end = start + fl->l_len - 1; if (end < start) return (EINVAL); } len = fl->l_len; if (len == 0) len = NFS64BITSSET; retrycnt = 0; do { nd->nd_repstat = 0; if (op == F_GETLK) { - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, p, id, flags); } else if (error == -1) { error = 0; } nfscl_clientrelease(clp); } else if (op == F_UNLCK && fl->l_type == F_UNLCK) { /* * We must loop around for all lockowner cases. */ callcnt = 0; - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ if (lp == NULL) { if (callcnt == 0) nfscl_clientrelease(clp); else nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; /* * If the server doesn't support Posix lock semantics, * only allow locks on the entire file, since it won't * handle overlapping byte ranges. * There might still be a problem when a lock * upgrade/downgrade (read<->write) occurs, since the * server "might" expect an unlock first? */ if (dorpc && (lp->nfsl_open->nfso_posixlock || (off == 0 && len == NFS64BITSSET))) { /* * Since the lock records will go away, we must * wait for grace and delay here. */ do { error = nfsrpc_locku(nd, nmp, lp, off, len, NFSV4LOCKT_READ, cred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_advlock"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0); } callcnt++; } while (error == 0 && nd->nd_repstat == 0); nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; if (!lp->nfsl_open->nfso_posixlock && (off != 0 || len != NFS64BITSSET)) { error = EINVAL; } else { error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, lp, newone, reclaim, off, len, fl->l_type, cred, p, 0); } if (!error) error = nd->nd_repstat; nfscl_lockrelease(lp, error, newone); } else { error = EINVAL; } if (!error) error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || - error == NFSERR_STALECLIENTID || error == NFSERR_DELAY) { + error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * The lower level routine for the LockT case. */ APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; + struct nfsmount *nmp; + nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; - *tl++ = clp->nfsc_clientid.lval[0]; - *tl = clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], np->n_fhp->nfh_len); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); if (nd->nd_repstat == 0) { fl->l_type = F_UNLCK; } else if (nd->nd_repstat == NFSERR_DENIED) { nd->nd_repstat = 0; fl->l_whence = SEEK_SET; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); fl->l_start = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (len == NFS64BITSSET) fl->l_len = 0; else fl->l_len = len; type = fxdr_unsigned(int, *tl++); if (type == NFSV4LOCKT_WRITE) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; /* * XXX For now, I have no idea what to do with the * conflicting lock_owner, so I'll just set the pid == 0 * and skip over the lock_owner. */ fl->l_pid = (pid_t)0; tl += 2; size = fxdr_unsigned(int, *tl); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); - } else if (nd->nd_repstat == NFSERR_STALECLIENTID) + } else if (nd->nd_repstat == NFSERR_STALECLIENTID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Lower level function that performs the LockU RPC. */ static int nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfscllockowner *lp, u_int64_t off, u_int64_t len, u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, - lp->nfsl_open->nfso_fhlen, NULL); + lp->nfsl_open->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; - *tl++ = lp->nfsl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; - } else if (nd->nd_repstat == NFSERR_STALESTATEID) + } else if (nd->nd_repstat == NFSERR_STALESTATEID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * The actual Lock RPC. */ APPLESTATIC int nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; - nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); *tl++ = txdr_unsigned(reclaim); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (newone) { *tl = newnfs_true; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); - *tl++ = lp->nfsl_open->nfso_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); - *tl++ = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[0]; - *tl = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - *tl++ = lp->nfsl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); size = fxdr_unsigned(int, *(tl + 7)); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); - } else if (nd->nd_repstat == NFSERR_STALESTATEID) + } else if (nd->nd_repstat == NFSERR_STALESTATEID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs statfs rpc * (always called with the vp for the mount point) */ APPLESTATIC int nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl = NULL; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSSTATFS_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) { nmp->nm_fsid[0] = nap->na_filesid[0]; nmp->nm_fsid[1] = nap->na_filesid[1]; NFSSETHASSETFSID(nmp); *attrflagp = 1; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; } else { NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (nd->nd_repstat) { error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATFS(nd->nd_flag & ND_NFSV3)); } if (NFSHASNFSV3(nmp)) { sbp->sf_tbytes = fxdr_hyper(tl); tl += 2; sbp->sf_fbytes = fxdr_hyper(tl); tl += 2; sbp->sf_abytes = fxdr_hyper(tl); tl += 2; sbp->sf_tfiles = fxdr_hyper(tl); tl += 2; sbp->sf_ffiles = fxdr_hyper(tl); tl += 2; sbp->sf_afiles = fxdr_hyper(tl); tl += 2; sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl); } else if (NFSHASNFSV4(nmp) == 0) { sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs pathconf rpc */ APPLESTATIC int nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; u_int32_t *tl; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSPATHCONF_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) *attrflagp = 1; } else { error = nd->nd_repstat; } } else { NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF); pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++); pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl++); pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl++); pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs version 3 fsinfo rpc call */ APPLESTATIC int nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO); fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_maxfilesize = fxdr_hyper(tl); tl += 2; fxdr_nfsv3time(tl, &fsp->fs_timedelta); tl += 2; fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Renew RPC. */ APPLESTATIC int -nfsrpc_renew(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) +nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, + NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; + struct nfssockreq *nrp; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); - nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL); - NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = clp->nfsc_clientid.lval[0]; - *tl = clp->nfsc_clientid.lval[1]; + nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, + &dsp->nfsclds_sess); + if (!NFSHASNFSV4N(nmp)) { + /* NFSv4.1 just uses a Sequence Op and not a Renew. */ + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + } + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; - error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Releaselockowner RPC. */ APPLESTATIC int nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; u_int32_t *tl; int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; - nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL); - NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = nmp->nm_clp->nfsc_clientid.lval[0]; - *tl = nmp->nm_clp->nfsc_clientid.lval[1]; - NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); - NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); - (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); + if (NFSHASNFSV4N(nmp)) { + /* For NFSv4.1, do a FreeStateID. */ + nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, + NULL); + nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); + } else { + nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, + NULL); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); + NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); + (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); + } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Compound to get the mount pt FH. */ APPLESTATIC int nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2; int error, cnt, len, setnil; u_int32_t *opcntp; - nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp); + nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL); cp = dirpath; cnt = 0; do { setnil = 0; while (*cp == '/') cp++; cp2 = cp; while (*cp2 != '\0' && *cp2 != '/') cp2++; if (*cp2 == '/') { setnil = 1; *cp2 = '\0'; } if (cp2 != cp) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LOOKUP); nfsm_strtom(nd, cp, strlen(cp)); cnt++; } if (setnil) *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); - *opcntp = txdr_unsigned(2 + cnt); + if (NFSHASNFSV4N(nmp)) + /* Has a Sequence Op done by nfscl_reqstart(). */ + *opcntp = txdr_unsigned(3 + cnt); + else + *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); tl += (2 + 2 * cnt); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len); if (nd->nd_repstat == 0) nmp->nm_fhsize = len; } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Delegreturn RPC. */ APPLESTATIC int nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p, int syscred) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, - dp->nfsdl_fhlen, NULL); + dp->nfsdl_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = dp->nfsdl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getacl call. */ APPLESTATIC int nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_GETACL, vp); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs setacl call. */ APPLESTATIC int nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff); return (error); } /* * nfs setacl call. */ static int nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_SETACL, vp); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); /* Don't care about the pre/postop attributes */ mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } + +/* + * Do the NFSv4.1 Exchange ID. + */ +int +nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, + struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl, v41flags; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfsclds *dsp; + struct timespec verstime; + int error, len; + + *dspp = NULL; + nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ + *tl = txdr_unsigned(clp->nfsc_rev); + (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); + + NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(exchflags); + *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); + + /* Set the implementation id4 */ + *tl = txdr_unsigned(1); + (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); + (void) nfsm_strtom(nd, version, strlen(version)); + NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); + verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ + verstime.tv_nsec = 0; + txdr_nfsv4time(&verstime, tl); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, + (int)nd->nd_repstat); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); + len = fxdr_unsigned(int, *(tl + 7)); + if (len < 0 || len > NFSV4_OPAQUELIMIT) { + error = NFSERR_BADXDR; + goto nfsmout; + } + dsp = malloc(sizeof(struct nfsclds) + len, M_NFSCLDS, + M_WAITOK | M_ZERO); + dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; + dsp->nfsclds_servownlen = len; + dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; + dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; + dsp->nfsclds_sess.nfsess_sequenceid = + fxdr_unsigned(uint32_t, *tl++); + v41flags = fxdr_unsigned(uint32_t, *tl); + if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && + NFSHASPNFSOPT(nmp)) { + NFSCL_DEBUG(1, "set PNFS\n"); + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_PNFS; + NFSUNLOCKMNT(nmp); + dsp->nfsclds_flags |= NFSCLDS_MDS; + } + if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) + dsp->nfsclds_flags |= NFSCLDS_DS; + if (len > 0) + nd->nd_repstat = nfsrv_mtostr(nd, + dsp->nfsclds_serverown, len); + if (nd->nd_repstat == 0) { + mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); + mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", + NULL, MTX_DEF); + nfscl_initsessionslots(&dsp->nfsclds_sess); + *dspp = dsp; + } else + free(dsp, M_NFSCLDS); + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Create Session. + */ +int +nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, + struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t crflags, *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error, irdcnt; + + nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); + *tl++ = sep->nfsess_clientid.lval[0]; + *tl++ = sep->nfsess_clientid.lval[1]; + *tl++ = txdr_unsigned(sequenceid); + crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); + if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) + crflags |= NFSV4CRSESS_CONNBACKCHAN; + *tl = txdr_unsigned(crflags); + + /* Fill in fore channel attributes. */ + NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); + *tl++ = 0; /* Header pad size */ + *tl++ = txdr_unsigned(100000); /* Max request size */ + *tl++ = txdr_unsigned(100000); /* Max response size */ + *tl++ = txdr_unsigned(4096); /* Max response size cached */ + *tl++ = txdr_unsigned(20); /* Max operations */ + *tl++ = txdr_unsigned(64); /* Max slots */ + *tl = 0; /* No rdma ird */ + + /* Fill in back channel attributes. */ + NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); + *tl++ = 0; /* Header pad size */ + *tl++ = txdr_unsigned(10000); /* Max request size */ + *tl++ = txdr_unsigned(10000); /* Max response size */ + *tl++ = txdr_unsigned(4096); /* Max response size cached */ + *tl++ = txdr_unsigned(4); /* Max operations */ + *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ + *tl = 0; /* No rdma ird */ + + NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ + + /* Allow AUTH_SYS callbacks as uid, gid == 0. */ + *tl++ = txdr_unsigned(1); /* Auth_sys only */ + *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ + *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ + *tl++ = 0; /* Null machine name */ + *tl++ = 0; /* Uid == 0 */ + *tl++ = 0; /* Gid == 0 */ + *tl = 0; /* No additional gids */ + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, + NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 2 * NFSX_UNSIGNED); + bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); + crflags = fxdr_unsigned(uint32_t, *tl); + if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_SESSPERSIST; + NFSUNLOCKMNT(nmp); + } + + /* Get the fore channel slot count. */ + NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); + tl += 3; /* Skip the other counts. */ + sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); + tl++; + sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); + NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); + irdcnt = fxdr_unsigned(int, *tl); + if (irdcnt > 0) + NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); + + /* and the back channel slot count. */ + NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); + tl += 5; + sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); + NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Destroy Session. + */ +int +nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); + bcopy(NFSMNT_MDSSESSION(nmp)->nfsess_sessionid, tl, NFSX_V4SESSIONID); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Destroy Client. + */ +int +nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutGet. + */ +int +nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, + uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen, + nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, + struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsfh *nfhp; + struct nfsclflayout *flp, *prevflp, *tflp; + int cnt, error, gotiomode, fhcnt, nfhlen, i, j; + uint8_t *cp; + uint64_t retlen; + + flp = NULL; + gotiomode = -1; + nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + *tl++ = newnfs_false; /* Don't signal availability. */ + *tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES); + *tl++ = txdr_unsigned(iomode); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + txdr_hyper(minlen, tl); + tl += 2; + *tl++ = txdr_unsigned(stateidp->seqid); + NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl = txdr_unsigned(layoutlen); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); + if (*tl++ != 0) + *retonclosep = 1; + else + *retonclosep = 0; + stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); + NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, + (int)stateidp->seqid); + stateidp->other[0] = *tl++; + stateidp->other[1] = *tl++; + stateidp->other[2] = *tl++; + cnt = fxdr_unsigned(int, *tl); + NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); + if (cnt <= 0 || cnt > 10000) { + /* Don't accept more than 10000 layouts in reply. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + for (i = 0; i < cnt; i++) { + /* Dissect all the way to the file handle cnt. */ + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER + + 6 * NFSX_UNSIGNED + NFSX_V4DEVICEID); + fhcnt = fxdr_unsigned(int, *(tl + 11 + + NFSX_V4DEVICEID / NFSX_UNSIGNED)); + NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); + if (fhcnt < 0 || fhcnt > 100) { + /* Don't accept more than 100 file handles. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + if (fhcnt > 1) + flp = malloc(sizeof(*flp) + (fhcnt - 1) * + sizeof(struct nfsfh *), + M_NFSFLAYOUT, M_WAITOK); + else + flp = malloc(sizeof(*flp), + M_NFSFLAYOUT, M_WAITOK); + flp->nfsfl_flags = 0; + flp->nfsfl_fhcnt = 0; + flp->nfsfl_devp = NULL; + flp->nfsfl_off = fxdr_hyper(tl); tl += 2; + retlen = fxdr_hyper(tl); tl += 2; + if (flp->nfsfl_off + retlen < flp->nfsfl_off) + flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; + else + flp->nfsfl_end = flp->nfsfl_off + retlen; + flp->nfsfl_iomode = fxdr_unsigned(int, *tl++); + if (gotiomode == -1) + gotiomode = flp->nfsfl_iomode; + NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode, + (int)flp->nfsfl_iomode); + if (fxdr_unsigned(int, *tl++) != + NFSLAYOUT_NFSV4_1_FILES) { + printf("NFSv4.1: got non-files layout\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); + NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); + flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); + flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; + if (fxdr_unsigned(int, *tl) != fhcnt) { + printf("EEK! bad fhcnt\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + for (j = 0; j < fhcnt; j++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + nfhlen = fxdr_unsigned(int, *tl); + if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { + error = NFSERR_BADXDR; + goto nfsmout; + } + nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, + M_NFSFH, M_WAITOK); + flp->nfsfl_fh[j] = nfhp; + flp->nfsfl_fhcnt++; + nfhp->nfh_len = nfhlen; + NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); + NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); + } + if (flp->nfsfl_iomode == gotiomode) { + /* Keep the list in increasing offset order. */ + tflp = LIST_FIRST(flhp); + prevflp = NULL; + while (tflp != NULL && + tflp->nfsfl_off < flp->nfsfl_off) { + prevflp = tflp; + tflp = LIST_NEXT(tflp, nfsfl_list); + } + if (prevflp == NULL) + LIST_INSERT_HEAD(flhp, flp, nfsfl_list); + else + LIST_INSERT_AFTER(prevflp, flp, + nfsfl_list); + } else { + printf("nfscl_layoutget(): got wrong iomode\n"); + nfscl_freeflayout(flp); + } + flp = NULL; + } + } + if (nd->nd_repstat != 0 && error == 0) + error = nd->nd_repstat; +nfsmout: + if (error != 0 && flp != NULL) + nfscl_freeflayout(flp); + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Get Device Info. + */ +int +nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, + uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t cnt, *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct sockaddr_storage ss; + struct nfsclds *dsp = NULL, **dspp; + struct nfscldevinfo *ndi; + int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt; + uint8_t stripeindex; + + *ndip = NULL; + ndi = NULL; + nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); + NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(layouttype); + *tl++ = txdr_unsigned(100000); + if (notifybitsp != NULL && *notifybitsp != 0) { + *tl = txdr_unsigned(1); /* One word of bits. */ + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(*notifybitsp); + } else + *tl = txdr_unsigned(0); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + if (layouttype != fxdr_unsigned(int, *tl++)) + printf("EEK! devinfo layout type not same!\n"); + stripecnt = fxdr_unsigned(int, *++tl); + NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); + if (stripecnt < 1 || stripecnt > 4096) { + printf("NFS devinfo stripecnt %d: out of range\n", + stripecnt); + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); + addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); + NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); + if (addrcnt < 1 || addrcnt > 128) { + printf("NFS devinfo addrcnt %d: out of range\n", + addrcnt); + error = NFSERR_BADXDR; + goto nfsmout; + } + + /* + * Now we know how many stripe indices and addresses, so + * we can allocate the structure the correct size. + */ + i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + + 1; + NFSCL_DEBUG(4, "stripeindices=%d\n", i); + ndi = malloc(sizeof(*ndi) + (addrcnt + i) * + sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); + NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); + ndi->nfsdi_refcnt = 0; + ndi->nfsdi_stripecnt = stripecnt; + ndi->nfsdi_addrcnt = addrcnt; + /* Fill in the stripe indices. */ + for (i = 0; i < stripecnt; i++) { + stripeindex = fxdr_unsigned(uint8_t, *tl++); + NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); + if (stripeindex >= addrcnt) { + printf("NFS devinfo stripeindex %d: too big\n", + (int)stripeindex); + error = NFSERR_BADXDR; + goto nfsmout; + } + nfsfldi_setstripeindex(ndi, i, stripeindex); + } + + /* Now, dissect the server address(es). */ + safilled = 0; + for (i = 0; i < addrcnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + cnt = fxdr_unsigned(uint32_t, *tl); + if (cnt == 0) { + printf("NFS devinfo 0 len addrlist\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + dspp = nfsfldi_addr(ndi, i); + pos = arc4random() % cnt; /* Choose one. */ + safilled = 0; + for (j = 0; j < cnt; j++) { + error = nfsv4_getipaddr(nd, &ss, &isudp); + if (error != 0 && error != EPERM) { + error = NFSERR_BADXDR; + goto nfsmout; + } + if (error == 0 && isudp == 0) { + /* + * The algorithm is: + * - use "pos" entry if it is of the + * same af_family or none of them + * is of the same af_family + * else + * - use the first one of the same + * af_family. + */ + if ((safilled == 0 && ss.ss_family == + nmp->nm_nam->sa_family) || + (j == pos && + (safilled == 0 || ss.ss_family == + nmp->nm_nam->sa_family)) || + (safilled == 1 && ss.ss_family == + nmp->nm_nam->sa_family)) { + error = nfsrpc_fillsa(nmp, &ss, + &dsp, p); + if (error == 0) { + *dspp = dsp; + if (ss.ss_family == + nmp->nm_nam->sa_family) + safilled = 2; + else + safilled = 1; + } + } + } + } + if (safilled == 0) + break; + } + + /* And the notify bits. */ + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (safilled != 0) { + bitcnt = fxdr_unsigned(int, *tl); + if (bitcnt > 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (notifybitsp != NULL) + *notifybitsp = + fxdr_unsigned(uint32_t, *tl); + } + *ndip = ndi; + } else + error = EPERM; + } + if (nd->nd_repstat != 0) + error = nd->nd_repstat; +nfsmout: + if (error != 0 && ndi != NULL) + nfscl_freedevinfo(ndi); + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutCommit. + */ +int +nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, + uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, + int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred, + NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + int error, outcnt, i; + uint8_t *cp; + + nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + txdr_hyper(off, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + if (reclaim != 0) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl++ = newnfs_true; + if (lastbyte < off) + lastbyte = off; + else if (lastbyte >= (off + len)) + lastbyte = off + len - 1; + txdr_hyper(lastbyte, tl); + tl += 2; + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(layouttype); + *tl = txdr_unsigned(layoutupdatecnt); + if (layoutupdatecnt > 0) { + KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES, + ("Must be nil for Files Layout")); + outcnt = NFSM_RNDUP(layoutupdatecnt); + NFSM_BUILD(cp, uint8_t *, outcnt); + NFSBCOPY(layp, cp, layoutupdatecnt); + cp += layoutupdatecnt; + for (i = 0; i < (outcnt - layoutupdatecnt); i++) + *cp++ = 0x0; + } + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutReturn. + */ +int +nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, + int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, + uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp, + struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + int error, outcnt, i; + uint8_t *cp; + + nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); + if (reclaim != 0) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(layouttype); + *tl++ = txdr_unsigned(iomode); + *tl = txdr_unsigned(layoutreturn); + if (layoutreturn == NFSLAYOUTRETURN_FILE) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); + *tl++ = txdr_unsigned(stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl = txdr_unsigned(layoutcnt); + if (layoutcnt > 0) { + outcnt = NFSM_RNDUP(layoutcnt); + NFSM_BUILD(cp, uint8_t *, outcnt); + NFSBCOPY(layp, cp, layoutcnt); + cp += layoutcnt; + for (i = 0; i < (outcnt - layoutcnt); i++) + *cp++ = 0x0; + } + } + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); + stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); + stateidp->other[0] = *tl++; + stateidp->other[1] = *tl++; + stateidp->other[2] = *tl; + } + } else + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Acquire a layout and devinfo, if possible. The caller must have acquired + * a reference count on the nfsclclient structure before calling this. + * Return the layout in lypp with a reference count on it, if successful. + */ +static int +nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, + int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, + struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) +{ + struct nfscllayout *lyp; + struct nfsclflayout *flp, *tflp; + struct nfscldevinfo *dip; + struct nfsclflayouthead flh; + int error = 0, islocked, layoutlen, recalled, retonclose; + nfsv4stateid_t stateid; + + *lypp = NULL; + /* + * If lyp is returned non-NULL, there will be a refcnt (shared lock) + * on it, iff flp != NULL or a lock (exclusive lock) on it iff + * flp == NULL. + */ + lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, + off, &flp, &recalled); + islocked = 0; + if (lyp == NULL || flp == NULL) { + if (recalled != 0) + return (EIO); + LIST_INIT(&flh); + layoutlen = NFSMNT_MDSSESSION(nmp)->nfsess_maxcache - + (NFSX_STATEID + 3 * NFSX_UNSIGNED); + if (lyp == NULL) { + stateid.seqid = 0; + stateid.other[0] = stateidp->other[0]; + stateid.other[1] = stateidp->other[1]; + stateid.other[2] = stateidp->other[2]; + error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, + nfhp->nfh_len, iomode, (uint64_t)0, INT64_MAX, + (uint64_t)0, layoutlen, &stateid, &retonclose, + &flh, cred, p, NULL); + } else { + islocked = 1; + stateid.seqid = lyp->nfsly_stateid.seqid; + stateid.other[0] = lyp->nfsly_stateid.other[0]; + stateid.other[1] = lyp->nfsly_stateid.other[1]; + stateid.other[2] = lyp->nfsly_stateid.other[2]; + error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, + nfhp->nfh_len, iomode, off, INT64_MAX, + (uint64_t)0, layoutlen, &stateid, &retonclose, + &flh, cred, p, NULL); + } + if (error == 0) + LIST_FOREACH(tflp, &flh, nfsfl_list) { + error = nfscl_adddevinfo(nmp, NULL, tflp); + if (error != 0) { + error = nfsrpc_getdeviceinfo(nmp, + tflp->nfsfl_dev, + NFSLAYOUT_NFSV4_1_FILES, + notifybitsp, &dip, cred, p); + if (error != 0) + break; + error = nfscl_adddevinfo(nmp, dip, + tflp); + if (error != 0) + printf( + "getlayout: cannot add\n"); + } + } + if (error == 0) { + /* + * nfscl_layout() always returns with the nfsly_lock + * set to a refcnt (shared lock). + */ + error = nfscl_layout(nmp, vp, nfhp->nfh_fh, + nfhp->nfh_len, &stateid, retonclose, &flh, &lyp, + cred, p); + if (error == 0) + *lypp = lyp; + } else if (islocked != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + } else + *lypp = lyp; + return (error); +} + +/* + * Do a TCP connection plus exchange id and create session. + * If successful, a "struct nfsclds" is linked into the list for the + * mount point and a pointer to it is returned. + */ +static int +nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp, + struct nfsclds **dspp, NFSPROC_T *p) +{ + struct sockaddr_in *msad, *sad, *ssd; + struct sockaddr_in6 *msad6, *sad6, *ssd6; + struct nfsclclient *clp; + struct nfssockreq *nrp; + struct nfsclds *dsp, *tdsp; + int error; + enum nfsclds_state retv; + uint32_t sequenceid; + + KASSERT(nmp->nm_sockreq.nr_cred != NULL, + ("nfsrpc_fillsa: NULL nr_cred")); + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + NFSUNLOCKCLSTATE(); + if (clp == NULL) + return (EPERM); + if (ssp->ss_family == AF_INET) { + ssd = (struct sockaddr_in *)ssp; + NFSLOCKMNT(nmp); + + /* + * Check to see if we already have a session for this + * address that is usable for a DS. + * Note that the MDS's address is in a different place + * than the sessions already acquired for DS's. + */ + msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; + tdsp = TAILQ_FIRST(&nmp->nm_sess); + while (tdsp != NULL) { + if (msad != NULL && msad->sin_family == AF_INET && + ssd->sin_addr.s_addr == msad->sin_addr.s_addr && + ssd->sin_port == msad->sin_port && + (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *dspp = tdsp; + NFSUNLOCKMNT(nmp); + NFSCL_DEBUG(4, "fnd same addr\n"); + return (0); + } + tdsp = TAILQ_NEXT(tdsp, nfsclds_list); + if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) + msad = (struct sockaddr_in *) + tdsp->nfsclds_sockp->nr_nam; + else + msad = NULL; + } + NFSUNLOCKMNT(nmp); + + /* No IP address match, so look for new/trunked one. */ + sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); + sad->sin_len = sizeof(*sad); + sad->sin_family = AF_INET; + sad->sin_port = ssd->sin_port; + sad->sin_addr.s_addr = ssd->sin_addr.s_addr; + nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); + nrp->nr_nam = (struct sockaddr *)sad; + } else if (ssp->ss_family == AF_INET6) { + ssd6 = (struct sockaddr_in6 *)ssp; + NFSLOCKMNT(nmp); + + /* + * Check to see if we already have a session for this + * address that is usable for a DS. + * Note that the MDS's address is in a different place + * than the sessions already acquired for DS's. + */ + msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; + tdsp = TAILQ_FIRST(&nmp->nm_sess); + while (tdsp != NULL) { + if (msad6 != NULL && msad6->sin6_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr, + &msad6->sin6_addr) && + ssd6->sin6_port == msad6->sin6_port && + (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *dspp = tdsp; + NFSUNLOCKMNT(nmp); + return (0); + } + tdsp = TAILQ_NEXT(tdsp, nfsclds_list); + if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) + msad6 = (struct sockaddr_in6 *) + tdsp->nfsclds_sockp->nr_nam; + else + msad6 = NULL; + } + NFSUNLOCKMNT(nmp); + + /* No IP address match, so look for new/trunked one. */ + sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); + sad6->sin6_len = sizeof(*sad6); + sad6->sin6_family = AF_INET6; + sad6->sin6_port = ssd6->sin6_port; + NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr, + sizeof(struct in6_addr)); + nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); + nrp->nr_nam = (struct sockaddr *)sad6; + } else + return (EPERM); + + nrp->nr_sotype = SOCK_STREAM; + mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); + nrp->nr_prog = NFS_PROG; + nrp->nr_vers = NFS_VER4; + + /* + * Use the credentials that were used for the mount, which are + * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. + * Ref. counting the credentials with crhold() is probably not + * necessary, since nm_sockreq.nr_cred won't be crfree()'d until + * unmount, but I did it anyhow. + */ + nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); + error = newnfs_connect(nmp, nrp, NULL, p, 0); + NFSCL_DEBUG(3, "DS connect=%d\n", error); + + /* Now, do the exchangeid and create session. */ + if (error == 0) + error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, + &dsp, nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + if (error == 0) { + dsp->nfsclds_sockp = nrp; + NFSLOCKMNT(nmp); + retv = nfscl_getsameserver(nmp, dsp, &tdsp); + NFSCL_DEBUG(3, "getsame ret=%d\n", retv); + if (retv == NFSDSP_USETHISSESSION) { + NFSUNLOCKMNT(nmp); + /* + * If there is already a session for this server, + * use it. + */ + (void)newnfs_disconnect(nrp); + nfscl_freenfsclds(dsp); + *dspp = tdsp; + return (0); + } + if (retv == NFSDSP_SEQTHISSESSION) + sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; + else + sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; + NFSUNLOCKMNT(nmp); + error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, + nrp, sequenceid, 0, nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS createsess=%d\n", error); + } else { + NFSFREECRED(nrp->nr_cred); + NFSFREEMUTEX(&nrp->nr_mtx); + free(nrp->nr_nam, M_SONAME); + free(nrp, M_NFSSOCKREQ); + } + if (error == 0) { + NFSCL_DEBUG(3, "add DS session\n"); + /* + * Put it at the end of the list. That way the list + * is ordered by when the entry was added. This matters + * since the one done first is the one that should be + * used for sequencid'ing any subsequent create sessions. + */ + NFSLOCKMNT(nmp); + TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); + NFSUNLOCKMNT(nmp); + *dspp = dsp; + } else if (dsp != NULL) + nfscl_freenfsclds(dsp); + return (error); +} + +/* + * Do the NFSv4.1 Reclaim Complete. + */ +int +nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = newnfs_false; + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Initialize the slot tables for a session. + */ +static void +nfscl_initsessionslots(struct nfsclsession *sep) +{ + int i; + + for (i = 0; i < NFSV4_CBSLOTS; i++) { + if (sep->nfsess_cbslots[i].nfssl_reply != NULL) + m_freem(sep->nfsess_cbslots[i].nfssl_reply); + NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); + } + for (i = 0; i < 64; i++) + sep->nfsess_slotseq[i] = 0; + sep->nfsess_slots = 0; +} + +/* + * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). + */ +int +nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + uint32_t rwaccess, struct ucred *cred, NFSPROC_T *p) +{ + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfscllayout *layp; + struct nfscldevinfo *dip; + struct nfsclflayout *rflp; + nfsv4stateid_t stateid; + struct ucred *newcred; + uint64_t lastbyte, len, off, oresid, xfer; + int eof, error, iolaymode, recalled; + void *lckp; + + if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || + (np->n_flag & NNOLAYOUT) != 0) + return (EIO); + /* Now, get a reference cnt on the clientid for this mount. */ + if (nfscl_getref(nmp) == 0) + return (EIO); + + /* Find an appropriate stateid. */ + newcred = NFSNEWCRED(cred); + error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, + rwaccess, 1, newcred, p, &stateid, &lckp); + if (error != 0) { + NFSFREECRED(newcred); + nfscl_relref(nmp); + return (error); + } + /* Search for a layout for this file. */ + off = uiop->uio_offset; + layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, + np->n_fhp->nfh_len, off, &rflp, &recalled); + if (layp == NULL || rflp == NULL) { + if (recalled != 0) { + NFSFREECRED(newcred); + nfscl_relref(nmp); + return (EIO); + } + if (layp != NULL) { + nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); + layp = NULL; + } + /* Try and get a Layout, if it is supported. */ + if (rwaccess == NFSV4OPEN_ACCESSWRITE || + (np->n_flag & NWRITEOPENED) != 0) + iolaymode = NFSLAYOUTIOMODE_RW; + else + iolaymode = NFSLAYOUTIOMODE_READ; + error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, + NULL, &stateid, off, &layp, newcred, p); + if (error != 0) { + NFSLOCKNODE(np); + np->n_flag |= NNOLAYOUT; + NFSUNLOCKNODE(np); + if (lckp != NULL) + nfscl_lockderef(lckp); + NFSFREECRED(newcred); + if (layp != NULL) + nfscl_rellayout(layp, 0); + nfscl_relref(nmp); + return (error); + } + } + + /* + * Loop around finding a layout that works for the first part of + * this I/O operation, and then call the function that actually + * does the RPC. + */ + eof = 0; + len = (uint64_t)uiop->uio_resid; + while (len > 0 && error == 0 && eof == 0) { + off = uiop->uio_offset; + error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); + if (error == 0) { + oresid = xfer = (uint64_t)uiop->uio_resid; + if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) + xfer = rflp->nfsfl_end - rflp->nfsfl_off; + dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev, + rflp->nfsfl_devp); + if (dip != NULL) { + error = nfscl_doflayoutio(vp, uiop, iomode, + must_commit, &eof, &stateid, rwaccess, dip, + layp, rflp, off, xfer, newcred, p); + nfscl_reldevinfo(dip); + lastbyte = off + xfer - 1; + if (error == 0) { + NFSLOCKCLSTATE(); + if (lastbyte > layp->nfsly_lastbyte) + layp->nfsly_lastbyte = lastbyte; + NFSUNLOCKCLSTATE(); + } + } else + error = EIO; + if (error == 0) + len -= (oresid - (uint64_t)uiop->uio_resid); + } + } + if (lckp != NULL) + nfscl_lockderef(lckp); + NFSFREECRED(newcred); + nfscl_rellayout(layp, 0); + nfscl_relref(nmp); + return (error); +} + +/* + * Find a file layout that will handle the first bytes of the requested + * range and return the information from it needed to to the I/O operation. + */ +int +nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, + struct nfsclflayout **retflpp) +{ + struct nfsclflayout *flp, *nflp, *rflp; + uint32_t rw; + + rflp = NULL; + rw = rwaccess; + /* For reading, do the Read list first and then the Write list. */ + do { + if (rw == NFSV4OPEN_ACCESSREAD) + flp = LIST_FIRST(&lyp->nfsly_flayread); + else + flp = LIST_FIRST(&lyp->nfsly_flayrw); + while (flp != NULL) { + nflp = LIST_NEXT(flp, nfsfl_list); + if (flp->nfsfl_off > off) + break; + if (flp->nfsfl_end > off && + (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) + rflp = flp; + flp = nflp; + } + if (rw == NFSV4OPEN_ACCESSREAD) + rw = NFSV4OPEN_ACCESSWRITE; + else + rw = 0; + } while (rw != 0); + if (rflp != NULL) { + /* This one covers the most bytes starting at off. */ + *retflpp = rflp; + return (0); + } + return (EIO); +} + +/* + * Do I/O using an NFSv4.1 file layout. + */ +static int +nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, + struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, + uint64_t len, struct ucred *cred, NFSPROC_T *p) +{ + uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; + int commit_thru_mds, error = 0, stripe_index, stripe_pos; + struct nfsnode *np; + struct nfsfh *fhp; + struct nfsclds **dspp; + + np = VTONFS(vp); + rel_off = off - flp->nfsfl_patoff; + stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; + stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % + dp->nfsdi_stripecnt; + transfer = stripe_unit_size - (rel_off % stripe_unit_size); + + /* Loop around, doing I/O for each stripe unit. */ + while (len > 0 && error == 0) { + stripe_index = nfsfldi_stripeindex(dp, stripe_pos); + dspp = nfsfldi_addr(dp, stripe_index); + if (len > transfer) + xfer = transfer; + else + xfer = len; + if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { + /* Dense layout. */ + if (stripe_pos >= flp->nfsfl_fhcnt) + return (EIO); + fhp = flp->nfsfl_fh[stripe_pos]; + io_off = (rel_off / (stripe_unit_size * + dp->nfsdi_stripecnt)) * stripe_unit_size + + rel_off % stripe_unit_size; + } else { + /* Sparse layout. */ + if (flp->nfsfl_fhcnt > 1) { + if (stripe_index >= flp->nfsfl_fhcnt) + return (EIO); + fhp = flp->nfsfl_fh[stripe_index]; + } else if (flp->nfsfl_fhcnt == 1) + fhp = flp->nfsfl_fh[0]; + else + fhp = np->n_fhp; + io_off = off; + } + if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) + commit_thru_mds = 1; + else + commit_thru_mds = 0; + if (rwflag == FREAD) + error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, + io_off, xfer, fhp, cred, p); + else { + error = nfsrpc_writeds(vp, uiop, iomode, must_commit, + stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, + cred, p); + if (error == 0) { + NFSLOCKCLSTATE(); + lyp->nfsly_flags |= NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + } + } + if (error == 0) { + transfer = stripe_unit_size; + stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; + len -= xfer; + off += xfer; + } + } + return (error); +} + +/* + * The actual read RPC done to a DS. + */ +static int +nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, + struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error, retlen; + struct nfsrv_descript nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsrv_descript *nd = &nfsd; + struct nfssockreq *nrp; + + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); + txdr_hyper(io_off, tl); + *(tl + 2) = txdr_unsigned(len); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) { + error = nd->nd_repstat; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + *eofp = fxdr_unsigned(int, *tl); + NFSM_STRSIZ(retlen, len); + error = nfsm_mbufuio(nd, uiop, retlen); +nfsmout: + if (nd->nd_mrep != NULL) + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The actual write RPC done to a DS. + */ +static int +nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, + struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + int error, rlen, commit, committed = NFSWRITE_FILESYNC; + int32_t backup; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfssockreq *nrp; + + KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(io_off, tl); + tl += 2; + *tl++ = txdr_unsigned(*iomode); + *tl = txdr_unsigned(len); + nfsm_uiombuf(nd, uiop, len); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) { + /* + * In case the rpc gets retried, roll + * the uio fileds changed by nfsm_uiombuf() + * back. + */ + uiop->uio_offset -= len; + uio_uio_resid_add(uiop, len); + uio_iov_base_add(uiop, -len); + uio_iov_len_add(uiop, len); + error = nd->nd_repstat; + } else { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); + rlen = fxdr_unsigned(int, *tl++); + if (rlen == 0) { + error = NFSERR_IO; + goto nfsmout; + } else if (rlen < len) { + backup = len - rlen; + uio_iov_base_add(uiop, -(backup)); + uio_iov_len_add(uiop, backup); + uiop->uio_offset -= backup; + uio_uio_resid_add(uiop, backup); + len = rlen; + } + commit = fxdr_unsigned(int, *tl++); + + /* + * Return the lowest committment level + * obtained by any of the RPCs. + */ + if (committed == NFSWRITE_FILESYNC) + committed = commit; + else if (committed == NFSWRITE_DATASYNC && + commit == NFSWRITE_UNSTABLE) + committed = commit; + if (commit_thru_mds != 0) { + NFSLOCKMNT(nmp); + if (!NFSHASWRITEVERF(nmp)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + NFSSETWRITEVERF(nmp); + } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + } + NFSUNLOCKMNT(nmp); + } else { + NFSLOCKDS(dsp); + if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; + } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + } + NFSUNLOCKDS(dsp); + } + } +nfsmout: + if (nd->nd_mrep != NULL) + mbuf_freem(nd->nd_mrep); + *iomode = committed; + if (nd->nd_repstat != 0 && error == 0) + error = nd->nd_repstat; + return (error); +} + +/* + * Free up the nfsclds structure. + */ +void +nfscl_freenfsclds(struct nfsclds *dsp) +{ + int i; + + if (dsp == NULL) + return; + if (dsp->nfsclds_sockp != NULL) { + NFSFREECRED(dsp->nfsclds_sockp->nr_cred); + NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); + free(dsp->nfsclds_sockp->nr_nam, M_SONAME); + free(dsp->nfsclds_sockp, M_NFSSOCKREQ); + } + NFSFREEMUTEX(&dsp->nfsclds_mtx); + NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); + for (i = 0; i < NFSV4_CBSLOTS; i++) { + if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) + m_freem( + dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); + } + free(dsp, M_NFSCLDS); +} + +static enum nfsclds_state +nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, + struct nfsclds **retdspp) +{ + struct nfsclds *dsp, *cur_dsp; + + /* + * Search the list of nfsclds structures for one with the same + * server. + */ + cur_dsp = NULL; + TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { + if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && + dsp->nfsclds_servownlen != 0 && + !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, + dsp->nfsclds_servownlen)) { + NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", + TAILQ_FIRST(&nmp->nm_sess), dsp, + dsp->nfsclds_flags); + /* Server major id matches. */ + if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *retdspp = dsp; + return (NFSDSP_USETHISSESSION); + } + + /* + * Note the first match, so it can be used for + * sequence'ing new sessions. + */ + if (cur_dsp == NULL) + cur_dsp = dsp; + } + } + if (cur_dsp != NULL) { + *retdspp = cur_dsp; + return (NFSDSP_SEQTHISSESSION); + } + return (NFSDSP_NOTFOUND); +} + +#ifdef notyet +/* + * NFS commit rpc to a DS. + */ +static int +nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, + struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfssockreq *nrp; + int error; + + nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + *tl = txdr_unsigned(cnt); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); + NFSLOCKDS(dsp); + if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + error = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKDS(dsp); + } +nfsmout: + if (error == 0 && nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} +#endif + Index: head/sys/fs/nfsclient/nfs_clstate.c =================================================================== --- head/sys/fs/nfsclient/nfs_clstate.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clstate.c (revision 244042) @@ -1,4232 +1,5208 @@ /*- * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions implement the client side state handling for NFSv4. * NFSv4 state handling: * - A lockowner is used to determine lock contention, so it * corresponds directly to a Posix pid. (1 to 1 mapping) * - The correct granularity of an OpenOwner is not nearly so * obvious. An OpenOwner does the following: * - provides a serial sequencing of Open/Close/Lock-with-new-lockowner * - is used to check for Open/Share contention (not applicable to * this client, since all Opens are Deny_None) * As such, I considered both extreme. * 1 OpenOwner per ClientID - Simple to manage, but fully serializes * all Open, Close and Lock (with a new lockowner) Ops. * 1 OpenOwner for each Open - This one results in an OpenConfirm for * every Open, for most servers. * So, I chose to use the same mapping as I did for LockOwnwers. * The main concern here is that you can end up with multiple Opens * for the same File Handle, but on different OpenOwners (opens * inherited from parents, grandparents...) and you do not know * which of these the vnodeop close applies to. This is handled by * delaying the Close Op(s) until all of the Opens have been closed. * (It is not yet obvious if this is the correct granularity.) * - How the code handles serialization: * - For the ClientId, it uses an exclusive lock while getting its * SetClientId and during recovery. Otherwise, it uses a shared * lock via a reference count. * - For the rest of the data structures, it uses an SMP mutex * (once the nfs client is SMP safe) and doesn't sleep while * manipulating the linked lists. * - The serialization of Open/Close/Lock/LockU falls out in the * "wash", since OpenOwners and LockOwners are both mapped from * Posix pid. In other words, there is only one Posix pid using * any given owner, so that owner is serialized. (If you change * the granularity of the OpenOwner, then code must be added to * serialize Ops on the OpenOwner.) * - When to get rid of OpenOwners and LockOwners. * - The function nfscl_cleanup_common() is executed after a process exits. * It goes through the client list looking for all Open and Lock Owners. * When one is found, it is marked "defunct" or in the case of * an OpenOwner without any Opens, freed. * The renew thread scans for defunct Owners and gets rid of them, * if it can. The LockOwners will also be deleted when the * associated Open is closed. * - If the LockU or Close Op(s) fail during close in a way * that could be recovered upon retry, they are relinked to the * ClientId's defunct open list and retried by the renew thread * until they succeed or an unmount/recovery occurs. * (Since we are done with them, they do not need to be recovered.) */ #ifndef APPLEKEXT #include /* * Global variables */ extern struct nfsstats newnfsstats; extern struct nfsreqhead nfsd_reqq; +extern u_int32_t newnfs_false, newnfs_true; +extern int nfscl_debuglevel; NFSREQSPINLOCK; NFSCLSTATEMUTEX; int nfscl_inited = 0; struct nfsclhead nfsclhead; /* Head of clientid list */ int nfscl_deleghighwater = NFSCLDELEGHIGHWATER; +int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER; #endif /* !APPLEKEXT */ static int nfscl_delegcnt = 0; +static int nfscl_layoutcnt = 0; static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **); static void nfscl_clrelease(struct nfsclclient *); static void nfscl_cleanclient(struct nfsclclient *); static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *, struct ucred *, NFSPROC_T *); static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *, struct nfsmount *, struct ucred *, NFSPROC_T *); static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *); static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *, struct nfscllock *, int); static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **, struct nfscllock **, int); static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *); static u_int32_t nfscl_nextcbident(void); -static mount_t nfscl_getmnt(u_int32_t); +static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **); +static struct nfsclclient *nfscl_getclnt(u_int32_t); +static struct nfsclclient *nfscl_getclntsess(uint8_t *); static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *, int); +static void nfscl_retoncloselayout(struct nfsclclient *, uint8_t *, int); +static void nfscl_reldevinfo_locked(struct nfscldevinfo *); +static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *, + int); +static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *); static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *, u_int8_t *, struct nfscllock **); static void nfscl_freealllocks(struct nfscllockownerhead *, int); static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int, struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **); static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *, struct nfsclowner **, struct nfsclowner **, struct nfsclopen **, struct nfsclopen **, u_int8_t *, u_int8_t *, int, int *); static int nfscl_moveopen(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfsclopen *, struct nfsclowner *, struct nfscldeleg *, struct ucred *, NFSPROC_T *); static void nfscl_totalrecall(struct nfsclclient *); static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *); static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int, u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *); static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *); static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t, struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *); static int nfscl_errmap(struct nfsrv_descript *); static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *); static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *, struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int); static void nfscl_freeopenowner(struct nfsclowner *, int); static void nfscl_cleandeleg(struct nfscldeleg *); static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *); static void nfscl_emptylockowner(struct nfscllockowner *, struct nfscllockownerfhhead *); +static void nfscl_mergeflayouts(struct nfsclflayouthead *, + struct nfsclflayouthead *); +static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t, + uint64_t, uint32_t, struct nfsclrecalllayout *); +static int nfscl_seq(uint32_t, uint32_t); +static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *, + struct ucred *, NFSPROC_T *); +static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *, + struct ucred *, NFSPROC_T *); static short nfscberr_null[] = { 0, 0, }; static short nfscberr_getattr[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short nfscberr_recall[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short *nfscl_cberrmap[] = { nfscberr_null, nfscberr_null, nfscberr_null, nfscberr_getattr, nfscberr_recall }; #define NETFAMILY(clp) \ (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET) /* * Called for an open operation. * If the nfhp argument is NULL, just get an openowner. */ APPLESTATIC int nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp, struct nfsclopen **opp, int *newonep, int *retp, int lockit) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op = NULL, *nop = NULL; struct nfscldeleg *dp; struct nfsclownerhead *ohp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret; if (newonep != NULL) *newonep = 0; if (opp != NULL) *opp = NULL; if (owpp != NULL) *owpp = NULL; /* * Might need one or both of these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); if (nfhp != NULL) MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + fhlen - 1, M_NFSCLOPEN, M_WAITOK); - ret = nfscl_getcl(vp, cred, p, &clp); + ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (ret != 0) { FREE((caddr_t)nowp, M_NFSCLOWNER); if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); return (ret); } /* * Get the Open iff it already exists. * If none found, add the new one or return error, depending upon * "create". */ nfscl_filllockowner(p->td_proc, own, F_POSIX); NFSLOCKCLSTATE(); dp = NULL; /* First check the delegation list */ if (nfhp != NULL && usedeleg) { LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(amode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) break; dp = NULL; break; } } } if (dp != NULL) ohp = &dp->nfsdl_owner; else ohp = &clp->nfsc_owner; /* Now, search for an openowner */ LIST_FOREACH(owp, ohp, nfsow_list) { if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) break; } /* * Create a new open, as required. */ nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen, newonep); /* * Serialize modifications to the open owner for multiple threads * within the same process using a read/write sleep lock. */ if (lockit) nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR); NFSUNLOCKCLSTATE(); if (nowp != NULL) FREE((caddr_t)nowp, M_NFSCLOWNER); if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); if (owpp != NULL) *owpp = owp; if (opp != NULL) *opp = op; if (retp != NULL) { if (nfhp != NULL && dp != NULL && nop == NULL) /* new local open on delegation */ *retp = NFSCLOPEN_SETCRED; else *retp = NFSCLOPEN_OK; } /* * Now, check the mode on the open and return the appropriate * value. */ if (op != NULL && (amode & ~(op->nfso_mode))) { op->nfso_mode |= amode; if (retp != NULL && dp == NULL) *retp = NFSCLOPEN_DOOPEN; } return (0); } /* * Create a new open, as required. */ static void nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp, struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen, int *newonep) { struct nfsclowner *owp = *owpp, *nowp; struct nfsclopen *op, *nop; if (nowpp != NULL) nowp = *nowpp; else nowp = NULL; if (nopp != NULL) nop = *nopp; else nop = NULL; if (owp == NULL && nowp != NULL) { NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); if (dp != NULL) { newnfsstats.cllocalopenowners++; LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list); } else { newnfsstats.clopenowners++; LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list); } owp = *owpp = nowp; *nowpp = NULL; if (newonep != NULL) *newonep = 1; } /* If an fhp has been specified, create an Open as well. */ if (fhp != NULL) { /* and look for the correct open, based upon FH */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) break; } if (op == NULL && nop != NULL) { nop->nfso_own = owp; nop->nfso_mode = 0; nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = fhlen; NFSBCOPY(fhp, nop->nfso_fh, fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; if (dp != NULL) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; newnfsstats.cllocalopens++; } else { newnfsstats.clopens++; } LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list); *opp = nop; *nopp = NULL; if (newonep != NULL) *newonep = 1; } else { *opp = op; } } } /* * Called to find/add a delegation to a client. */ APPLESTATIC int nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp, int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp) { struct nfscldeleg *dp = *dpp, *tdp; /* * First, if we have received a Read delegation for a file on a * read/write file system, just return it, because they aren't * useful, imho. */ if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) && (dp->nfsdl_flags & NFSCLDL_READ)) { (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p); FREE((caddr_t)dp, M_NFSCLDELEG); *dpp = NULL; return (0); } /* Look for the correct deleg, based upon FH */ NFSLOCKCLSTATE(); tdp = nfscl_finddeleg(clp, nfhp, fhlen); if (tdp == NULL) { if (dp == NULL) { NFSUNLOCKCLSTATE(); return (NFSERR_BADSTATEID); } *dpp = NULL; TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp, nfsdl_hash); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; newnfsstats.cldelegates++; nfscl_delegcnt++; } else { /* * Delegation already exists, what do we do if a new one?? */ if (dp != NULL) { printf("Deleg already exists!\n"); FREE((caddr_t)dp, M_NFSCLDELEG); *dpp = NULL; } else { *dpp = tdp; } } NFSUNLOCKCLSTATE(); return (0); } /* * Find a delegation for this file handle. Return NULL upon failure. */ static struct nfscldeleg * nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscldeleg *dp; LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(dp->nfsdl_fh, fhp, fhlen)) break; } return (dp); } /* * Get a stateid for an I/O operation. First, look for an open and iff * found, return either a lockowner stateid or the open stateid. * If no Open is found, just return error and the special stateid of all zeros. */ APPLESTATIC int nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, - struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, + int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, void **lckpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op = NULL; struct nfscllockowner *lp; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error, done; *lckpp = NULL; /* * Initially, just set the special stateid of all zeros. + * (Don't do this for a DS, since the special stateid can't be used.) */ - stateidp->seqid = 0; - stateidp->other[0] = 0; - stateidp->other[1] = 0; - stateidp->other[2] = 0; + if (fords == 0) { + stateidp->seqid = 0; + stateidp->other[0] = 0; + stateidp->other[1] = 0; + stateidp->other[2] = 0; + } if (vnode_vtype(vp) != VREG) return (EISDIR); np = VTONFS(vp); NFSLOCKCLSTATE(); clp = nfscl_findcl(VFSTONFS(vnode_mount(vp))); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } /* * Wait for recovery to complete. */ while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG)) (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR, PZERO, "nfsrecvr", NULL); /* * First, look for a delegation. */ LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(mode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) { stateidp->seqid = dp->nfsdl_stateid.seqid; stateidp->other[0] = dp->nfsdl_stateid.other[0]; stateidp->other[1] = dp->nfsdl_stateid.other[1]; stateidp->other[2] = dp->nfsdl_stateid.other[2]; if (!(np->n_flag & NDELEGRECALL)) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; dp->nfsdl_rwlock.nfslock_usecnt++; *lckpp = (void *)&dp->nfsdl_rwlock; } NFSUNLOCKCLSTATE(); return (0); } break; } } if (p != NULL) { /* * If p != NULL, we want to search the parentage tree * for a matching OpenOwner and use that. */ nfscl_filllockowner(p->td_proc, own, F_POSIX); lp = NULL; error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own, mode, &lp, &op); - if (error == 0 && lp != NULL) { + if (error == 0 && lp != NULL && fords == 0) { + /* Don't return a lock stateid for a DS. */ stateidp->seqid = lp->nfsl_stateid.seqid; stateidp->other[0] = lp->nfsl_stateid.other[0]; stateidp->other[1] = lp->nfsl_stateid.other[1]; stateidp->other[2] = lp->nfsl_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } } if (op == NULL) { /* If not found, just look for any OpenOwner that will work. */ done = 0; owp = LIST_FIRST(&clp->nfsc_owner); while (!done && owp != NULL) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen) && (mode & op->nfso_mode) == mode) { done = 1; break; } } if (!done) owp = LIST_NEXT(owp, nfsow_list); } if (!done) { NFSUNLOCKCLSTATE(); return (ENOENT); } /* * For read aheads or write behinds, use the open cred. * A read ahead or write behind is indicated by p == NULL. */ if (p == NULL) newnfs_copycred(&op->nfso_cred, cred); } /* * No lock stateid, so return the open stateid. */ stateidp->seqid = op->nfso_stateid.seqid; stateidp->other[0] = op->nfso_stateid.other[0]; stateidp->other[1] = op->nfso_stateid.other[1]; stateidp->other[2] = op->nfso_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } /* * Search for a matching file, mode and, optionally, lockowner. */ static int nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown, u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp) { struct nfsclowner *owp; struct nfsclopen *op, *rop, *rop2; struct nfscllockowner *lp; int keep_looping; if (lpp != NULL) *lpp = NULL; /* * rop will be set to the open to be returned. There are three * variants of this, all for an open of the correct file: * 1 - A match of lockown. * 2 - A match of the openown, when no lockown match exists. * 3 - A match for any open, if no openown or lockown match exists. * Looking for #2 over #3 probably isn't necessary, but since * RFC3530 is vague w.r.t. the relationship between openowners and * lockowners, I think this is the safer way to go. */ rop = NULL; rop2 = NULL; keep_looping = 1; /* Search the client list */ owp = LIST_FIRST(ohp); while (owp != NULL && keep_looping != 0) { /* and look for the correct open */ op = LIST_FIRST(&owp->nfsow_open); while (op != NULL && keep_looping != 0) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen) && (op->nfso_mode & mode) == mode) { if (lpp != NULL) { /* Now look for a matching lockowner. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, lockown, NFSV4CL_LOCKNAMELEN)) { *lpp = lp; rop = op; keep_looping = 0; break; } } } if (rop == NULL && !NFSBCMP(owp->nfsow_owner, openown, NFSV4CL_LOCKNAMELEN)) { rop = op; if (lpp == NULL) keep_looping = 0; } if (rop2 == NULL) rop2 = op; } op = LIST_NEXT(op, nfso_list); } owp = LIST_NEXT(owp, nfsow_list); } if (rop == NULL) rop = rop2; if (rop == NULL) return (EBADF); *opp = rop; return (0); } /* * Release use of an open owner. Called when open operations are done * with the open owner. */ APPLESTATIC void nfscl_ownerrelease(struct nfsclowner *owp, __unused int error, __unused int candelete, int unlocked) { if (owp == NULL) return; NFSLOCKCLSTATE(); if (!unlocked) nfscl_lockunlock(&owp->nfsow_rwlock); nfscl_clrelease(owp->nfsow_clp); NFSUNLOCKCLSTATE(); } /* * Release use of an open structure under an open owner. */ APPLESTATIC void nfscl_openrelease(struct nfsclopen *op, int error, int candelete) { struct nfsclclient *clp; struct nfsclowner *owp; if (op == NULL) return; NFSLOCKCLSTATE(); owp = op->nfso_own; nfscl_lockunlock(&owp->nfsow_rwlock); clp = owp->nfsow_clp; if (error && candelete && op->nfso_opencnt == 0) nfscl_freeopen(op, 0); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to get a clientid structure. It will optionally lock the * client data structures to do the SetClientId/SetClientId_confirm, * but will release that lock and return the clientid with a refernce * count on it. * If the "cred" argument is NULL, a new clientid should not be created. * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot * be done. + * The start_renewthread argument tells nfscl_getcl() to start a renew + * thread if this creates a new clp. * It always clpp with a reference count on it, unless returning an error. */ APPLESTATIC int -nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, - struct nfsclclient **clpp) +nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p, + int start_renewthread, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclclient *newclp = NULL; - struct mount *mp; struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; - mp = vnode_mount(vp); nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); idlen = strlen(uuid); if (idlen > 0) idlen += sizeof (u_int64_t); else idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */ MALLOC(newclp, struct nfsclclient *, sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT, - M_WAITOK); + M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); /* * If a forced dismount is already in progress, don't * allocate a new clientid and get out now. For the case where * clp != NULL, this is a harmless optimization. */ if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); return (EBADF); } clp = nmp->nm_clp; if (clp == NULL) { if (newclp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } clp = newclp; - NFSBZERO((caddr_t)clp, sizeof(struct nfsclclient) + idlen - 1); clp->nfsc_idlen = idlen; LIST_INIT(&clp->nfsc_owner); TAILQ_INIT(&clp->nfsc_deleg); + TAILQ_INIT(&clp->nfsc_layout); + LIST_INIT(&clp->nfsc_devinfo); for (i = 0; i < NFSCLDELEGHASHSIZE; i++) LIST_INIT(&clp->nfsc_deleghash[i]); + for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) + LIST_INIT(&clp->nfsc_layouthash[i]); clp->nfsc_flags = NFSCLFLAGS_INITED; clp->nfsc_clientidrev = 1; clp->nfsc_cbident = nfscl_nextcbident(); nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id, clp->nfsc_idlen); LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list); nmp->nm_clp = clp; clp->nfsc_nmp = nmp; NFSUNLOCKCLSTATE(); - nfscl_start_renewthread(clp); + if (start_renewthread != 0) + nfscl_start_renewthread(clp); } else { NFSUNLOCKCLSTATE(); if (newclp != NULL) - FREE((caddr_t)newclp, M_NFSCLCLIENT); + free(newclp, M_NFSCLCLIENT); } NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock && (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0) igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); if (!igotlock) nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (igotlock == 0 && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { /* * Both nfsv4_lock() and nfsv4_getref() know to check * for MNTK_UNMOUNTF and return without sleeping to * wait for the exclusive lock to be released, since it * might be held by nfscl_umount() and we need to get out * now for that case and not wait until nfscl_umount() * releases it. */ NFSUNLOCKCLSTATE(); return (EBADF); } NFSUNLOCKCLSTATE(); /* * If it needs a clientid, do the setclientid now. */ if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) { if (!igotlock) panic("nfscl_clget"); if (p == NULL || cred == NULL) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (EACCES); } /* * If RFC3530 Sec. 14.2.33 is taken literally, * NFSERR_CLIDINUSE will be returned persistently for the * case where a new mount of the same file system is using * a different principal. In practice, NFSERR_CLIDINUSE is * only returned when there is outstanding unexpired state * on the clientid. As such, try for twice the lease * interval, if we know what that is. Otherwise, make a * wild ass guess. * The case of returning NFSERR_STALECLIENTID is far less * likely, but might occur if there is a significant delay * between doing the SetClientID and SetClientIDConfirm Ops, * such that the server throws away the clientid before * receiving the SetClientIDConfirm. */ if (clp->nfsc_renew > 0) clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2; else clidinusedelay = 120; trystalecnt = 3; do { - error = nfsrpc_setclient(VFSTONFS(vnode_mount(vp)), - clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 0, cred, p); if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || + error == NFSERR_BADSESSION || error == NFSERR_CLIDINUSE) { (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || (error == NFSERR_CLIDINUSE && --clidinusedelay > 0)); if (error) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (error); } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; } if (igotlock) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 1); NFSUNLOCKCLSTATE(); } *clpp = clp; return (0); } /* * Get a reference to a clientid and return it, if valid. */ APPLESTATIC struct nfsclclient * nfscl_findcl(struct nfsmount *nmp) { struct nfsclclient *clp; clp = nmp->nm_clp; if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) return (NULL); return (clp); } /* * Release the clientid structure. It may be locked or reference counted. */ static void nfscl_clrelease(struct nfsclclient *clp) { if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); } /* * External call for nfscl_clrelease. */ APPLESTATIC void nfscl_clientrelease(struct nfsclclient *clp) { NFSLOCKCLSTATE(); if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Called when wanting to lock a byte region. */ APPLESTATIC int nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp, int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp, struct nfscllockowner **lpp, int *newonep, int *donelocallyp) { struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllockowner *nlp; struct nfscllock *nlop, *otherlop; struct nfscldeleg *dp = NULL, *ldp = NULL; struct nfscllockownerhead *lhp = NULL; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN]; u_int8_t *openownp; int error = 0, ret, donelocally = 0; u_int32_t mode; /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */ mode = 0; np = VTONFS(vp); *lpp = NULL; lp = NULL; *newonep = 0; *donelocallyp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ MALLOC(nlp, struct nfscllockowner *, sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK); MALLOC(otherlop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); MALLOC(nlop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = type; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) error = NFSERR_INVAL; } if (!error) { if (recovery) clp = rclp; else - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); } if (error) { FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); FREE((caddr_t)otherlop, M_NFSCLLOCK); FREE((caddr_t)nlop, M_NFSCLLOCK); return (error); } op = NULL; if (recovery) { ownp = rownp; openownp = ropenownp; } else { nfscl_filllockowner(id, own, flags); ownp = own; nfscl_filllockowner(p->td_proc, openown, F_POSIX); openownp = openown; } if (!recovery) { NFSLOCKCLSTATE(); /* * First, search for a delegation. If one exists for this file, * the lock can be done locally against it, so long as there * isn't a local lock conflict. */ ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* Just sanity check for correct type of delegation */ if (dp != NULL && ((dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 || (type == F_WRLCK && (dp->nfsdl_flags & NFSCLDL_WRITE) == 0))) dp = NULL; } if (dp != NULL) { /* Now, find an open and maybe a lockowner. */ ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (ret) ret = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (!ret) { lhp = &dp->nfsdl_lock; TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; donelocally = 1; } else { dp = NULL; } } if (!donelocally) { /* * Get the related Open and maybe lockowner. */ error = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, &lp, &op); if (!error) lhp = &op->nfso_lock; } if (!error && !recovery) error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nlop, ownp, ldp, NULL); if (error) { if (!recovery) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); FREE((caddr_t)otherlop, M_NFSCLLOCK); FREE((caddr_t)nlop, M_NFSCLLOCK); return (error); } /* * Ok, see if a lockowner exists and create one, as required. */ if (lp == NULL) LIST_FOREACH(lp, lhp, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN)) break; } if (lp == NULL) { NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN); if (recovery) NFSBCOPY(ropenownp, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); else NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); nlp->nfsl_seqid = 0; nlp->nfsl_lockflags = flags; nlp->nfsl_inprog = NULL; nfscl_lockinit(&nlp->nfsl_rwlock); LIST_INIT(&nlp->nfsl_lock); if (donelocally) { nlp->nfsl_open = NULL; newnfsstats.cllocallockowners++; } else { nlp->nfsl_open = op; newnfsstats.cllockowners++; } LIST_INSERT_HEAD(lhp, nlp, nfsl_list); lp = nlp; nlp = NULL; *newonep = 1; } /* * Now, update the byte ranges for locks. */ ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally); if (!ret) donelocally = 1; if (donelocally) { *donelocallyp = 1; if (!recovery) nfscl_clrelease(clp); } else { /* * Serial modifications on the lock owner for multiple threads * for the same process using a read/write lock. */ if (!recovery) nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); } if (!recovery) NFSUNLOCKCLSTATE(); if (nlp) FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); if (nlop) FREE((caddr_t)nlop, M_NFSCLLOCK); if (otherlop) FREE((caddr_t)otherlop, M_NFSCLLOCK); *lpp = lp; return (0); } /* * Called to unlock a byte range, for LockU. */ APPLESTATIC int nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, __unused struct ucred *cred, NFSPROC_T *p, int callcnt, struct nfsclclient *clp, void *id, int flags, struct nfscllockowner **lpp, int *dorpcp) { struct nfscllockowner *lp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllock *nlop, *other_lop = NULL; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret = 0, fnd; np = VTONFS(vp); *lpp = NULL; *dorpcp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ MALLOC(nlop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = F_UNLCK; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) { FREE((caddr_t)nlop, M_NFSCLLOCK); return (NFSERR_INVAL); } } if (callcnt == 0) { MALLOC(other_lop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); *other_lop = *nlop; } nfscl_filllockowner(id, own, flags); dp = NULL; NFSLOCKCLSTATE(); if (callcnt == 0) dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* * First, unlock any local regions on a delegation. */ if (dp != NULL) { /* Look for this lockowner. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) /* Use other_lop, so nlop is still available */ (void)nfscl_updatelock(lp, &other_lop, NULL, 1); } /* * Now, find a matching open/lockowner that hasn't already been done, * as marked by nfsl_inprog. */ lp = NULL; fnd = 0; LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == NULL && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { fnd = 1; break; } } if (fnd) break; } } if (fnd) break; } if (lp != NULL) { ret = nfscl_updatelock(lp, &nlop, NULL, 0); if (ret) *dorpcp = 1; /* * Serial modifications on the lock owner for multiple * threads for the same process using a read/write lock. */ lp->nfsl_inprog = p; nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); *lpp = lp; } NFSUNLOCKCLSTATE(); if (nlop) FREE((caddr_t)nlop, M_NFSCLLOCK); if (other_lop) FREE((caddr_t)other_lop, M_NFSCLLOCK); return (0); } /* * Release all lockowners marked in progess for this process and file. */ APPLESTATIC void nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == p && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { lp->nfsl_inprog = NULL; nfscl_lockunlock(&lp->nfsl_rwlock); } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to find out if any bytes within the byte range specified are * write locked by the calling process. Used to determine if flushing * is required before a LockU. * If in doubt, return 1, so the flush will occur. */ APPLESTATIC int nfscl_checkwritelocked(vnode_t vp, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllock *lop; struct nfscldeleg *dp; struct nfsnode *np; u_int64_t off, end; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error = 0; np = VTONFS(vp); switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ off = fl->l_start; break; case SEEK_END: off = np->n_size + fl->l_start; break; default: return (1); }; if (fl->l_len != 0) { end = off + fl->l_len; if (end < off) return (1); } else { end = NFS64BITSSET; } - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (1); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); /* * First check the delegation locks. */ dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } /* * Now, check state against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (0); } /* * Release a byte range lock owner structure. */ APPLESTATIC void nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete) { struct nfsclclient *clp; if (lp == NULL) return; NFSLOCKCLSTATE(); clp = lp->nfsl_open->nfso_own->nfsow_clp; if (error != 0 && candelete && (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0) nfscl_freelockowner(lp, 0); else nfscl_lockunlock(&lp->nfsl_rwlock); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Free up an open structure and any associated byte range lock structures. */ APPLESTATIC void nfscl_freeopen(struct nfsclopen *op, int local) { LIST_REMOVE(op, nfso_list); nfscl_freealllocks(&op->nfso_lock, local); FREE((caddr_t)op, M_NFSCLOPEN); if (local) newnfsstats.cllocalopens--; else newnfsstats.clopens--; } /* * Free up all lock owners and associated locks. */ static void nfscl_freealllocks(struct nfscllockownerhead *lhp, int local) { struct nfscllockowner *lp, *nlp; LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, local); } } /* * Called for an Open when NFSERR_EXPIRED is received from the server. * If there are no byte range locks nor a Share Deny lost, try to do a * fresh Open. Otherwise, free the open. */ static int nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *lp; struct nfscldeleg *dp; int mustdelete = 0, error; /* * Look for any byte range lock(s). */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { mustdelete = 1; break; } } /* * If no byte range lock(s) nor a Share deny, try to re-open. */ if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) { newnfs_copycred(&op->nfso_cred, cred); dp = NULL; error = nfsrpc_reopen(nmp, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p); if (error) { mustdelete = 1; if (dp != NULL) { FREE((caddr_t)dp, M_NFSCLDELEG); dp = NULL; } } if (dp != NULL) nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh, op->nfso_fhlen, cred, p, &dp); } /* * If a byte range lock or Share deny or couldn't re-open, free it. */ if (mustdelete) nfscl_freeopen(op, 0); return (mustdelete); } /* * Free up an open owner structure. */ static void nfscl_freeopenowner(struct nfsclowner *owp, int local) { LIST_REMOVE(owp, nfsow_list); FREE((caddr_t)owp, M_NFSCLOWNER); if (local) newnfsstats.cllocalopenowners--; else newnfsstats.clopenowners--; } /* * Free up a byte range lock owner structure. */ APPLESTATIC void nfscl_freelockowner(struct nfscllockowner *lp, int local) { struct nfscllock *lop, *nlop; LIST_REMOVE(lp, nfsl_list); LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { nfscl_freelock(lop, local); } FREE((caddr_t)lp, M_NFSCLLOCKOWNER); if (local) newnfsstats.cllocallockowners--; else newnfsstats.cllockowners--; } /* * Free up a byte range lock structure. */ APPLESTATIC void nfscl_freelock(struct nfscllock *lop, int local) { LIST_REMOVE(lop, nfslo_list); FREE((caddr_t)lop, M_NFSCLLOCK); if (local) newnfsstats.cllocallocks--; else newnfsstats.cllocks--; } /* * Clean out the state related to a delegation. */ static void nfscl_cleandeleg(struct nfscldeleg *dp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscleandel"); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } nfscl_freealllocks(&dp->nfsdl_lock, 1); } /* * Free a delegation. */ static void nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp) { TAILQ_REMOVE(hdp, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); FREE((caddr_t)dp, M_NFSCLDELEG); newnfsstats.cldelegates--; nfscl_delegcnt--; } /* * Free up all state related to this client structure. */ static void nfscl_cleanclient(struct nfsclclient *clp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; /* Now, all the OpenOwners, etc. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { nfscl_freeopen(op, 0); } nfscl_freeopenowner(owp, 0); } } /* * Called when an NFSERR_EXPIRED is received from the server. */ static void nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp, *towp; struct nfsclopen *op, *nop, *top; struct nfscldeleg *dp, *ndp; int ret, printed = 0; /* * First, merge locally issued Opens into the list for the server. */ dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); owp = LIST_FIRST(&dp->nfsdl_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfsclexp"); LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) break; } if (towp != NULL) { /* Merge opens in */ LIST_FOREACH(top, &towp->nfsow_open, nfso_list) { if (top->nfso_fhlen == op->nfso_fhlen && !NFSBCMP(top->nfso_fh, op->nfso_fh, op->nfso_fhlen)) { top->nfso_mode |= op->nfso_mode; top->nfso_opencnt += op->nfso_opencnt; break; } } if (top == NULL) { /* Just add the open to the owner list */ LIST_REMOVE(op, nfso_list); op->nfso_own = towp; LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list); newnfsstats.cllocalopens--; newnfsstats.clopens++; } } else { /* Just add the openowner to the client list */ LIST_REMOVE(owp, nfsow_list); owp->nfsow_clp = clp; LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list); newnfsstats.cllocalopenowners--; newnfsstats.clopenowners++; newnfsstats.cllocalopens--; newnfsstats.clopens++; } } owp = nowp; } if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) { printed = 1; printf("nfsv4 expired locks lost\n"); } nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); dp = ndp; } if (!TAILQ_EMPTY(&clp->nfsc_deleg)) panic("nfsclexp"); /* * Now, try and reopen against the server. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { owp->nfsow_seqid = 0; LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { ret = nfscl_expireopen(clp, op, nmp, cred, p); if (ret && !printed) { printed = 1; printf("nfsv4 expired locks lost\n"); } } if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); } } /* * This function must be called after the process represented by "own" has * exited. Must be called with CLSTATE lock held. */ static void nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own) { struct nfsclowner *owp, *nowp; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; /* First, get rid of local locks on delegations. */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, 1); } } } owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) { /* * If there are children that haven't closed the * file descriptors yet, the opens will still be * here. For that case, let the renew thread clear * out the OpenOwner later. */ if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); else owp->nfsow_defunct = 1; } owp = nowp; } } /* * Find open/lock owners for processes that have exited. */ static void nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; NFSPROCLISTLOCK(); NFSLOCKCLSTATE(); LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) { if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_emptylockowner(lp, lhp); } } if (nfscl_procdoesntexist(owp->nfsow_owner)) nfscl_cleanup_common(clp, owp->nfsow_owner); } NFSUNLOCKCLSTATE(); NFSPROCLISTUNLOCK(); } /* * Take the empty lock owner and move it to the local lhp list if the * associated process no longer exists. */ static void nfscl_emptylockowner(struct nfscllockowner *lp, struct nfscllockownerfhhead *lhp) { struct nfscllockownerfh *lfhp, *mylfhp; struct nfscllockowner *nlp; int fnd_it; /* If not a Posix lock owner, just return. */ if ((lp->nfsl_lockflags & F_POSIX) == 0) return; fnd_it = 0; mylfhp = NULL; /* * First, search to see if this lock owner is already in the list. * If it is, then the associated process no longer exists. */ SLIST_FOREACH(lfhp, lhp, nfslfh_list) { if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen && !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh, lfhp->nfslfh_len)) mylfhp = lfhp; LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list) if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner, NFSV4CL_LOCKNAMELEN)) fnd_it = 1; } /* If not found, check if process still exists. */ if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0) return; /* Move the lock owner over to the local list. */ if (mylfhp == NULL) { mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP, M_NOWAIT); if (mylfhp == NULL) return; mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen; NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh, mylfhp->nfslfh_len); LIST_INIT(&mylfhp->nfslfh_lock); SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list); } LIST_REMOVE(lp, nfsl_list); LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list); } static int fake_global; /* Used to force visibility of MNTK_UNMOUNTF */ /* * Called from nfs umount to free up the clientid. */ APPLESTATIC void nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) { struct nfsclclient *clp; struct ucred *cred; int igotlock; /* * For the case that matters, this is the thread that set * MNTK_UNMOUNTF, so it will see it set. The code that follows is * done to ensure that any thread executing nfscl_getcl() after * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the * mutex for NFSLOCKCLSTATE(), so it is "m" for the following * explanation, courtesy of Alan Cox. * What follows is a snippet from Alan Cox's email at: * http://docs.FreeBSD.org/cgi/ * mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw * * 1. Set MNTK_UNMOUNTF * 2. Acquire a standard FreeBSD mutex "m". * 3. Update some data structures. * 4. Release mutex "m". * * Then, other threads that acquire "m" after step 4 has occurred will * see MNTK_UNMOUNTF as set. But, other threads that beat thread X to * step 2 may or may not see MNTK_UNMOUNTF as set. */ NFSLOCKCLSTATE(); if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { fake_global++; NFSUNLOCKCLSTATE(); NFSLOCKCLSTATE(); } clp = nmp->nm_clp; if (clp != NULL) { if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0) panic("nfscl umount"); /* * First, handshake with the nfscl renew thread, to terminate * it. */ clp->nfsc_flags |= NFSCLFLAGS_UMOUNT; while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfsclumnt", hz); /* * Now, get the exclusive lock on the client state, so * that no uses of the state are still in progress. */ do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); /* * Free up all the state. It will expire on the server, but * maybe we should do a SetClientId/SetClientIdConfirm so * the server throws it away? */ LIST_REMOVE(clp, nfsc_list); nfscl_delegreturnall(clp, p); cred = newnfs_getcred(); - (void) nfsrpc_setclient(nmp, clp, cred, p); + if (NFSHASNFSV4N(nmp)) { + (void)nfsrpc_destroysession(nmp, clp, cred, p); + (void)nfsrpc_destroyclient(nmp, clp, cred, p); + } else + (void)nfsrpc_setclient(nmp, clp, 0, cred, p); nfscl_cleanclient(clp); nmp->nm_clp = NULL; NFSFREECRED(cred); - FREE((caddr_t)clp, M_NFSCLCLIENT); + free(clp, M_NFSCLCLIENT); } else NFSUNLOCKCLSTATE(); } /* * This function is called when a server replies with NFSERR_STALECLIENTID - * or NFSERR_STALESTATEID. It traverses the clientid lists, doing Opens - * and Locks with reclaim. If these fail, it deletes the corresponding state. + * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists, + * doing Opens and Locks with reclaim. If these fail, it deletes the + * corresponding state. */ static void nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct nfscldeleg *dp, *ndp, *tdp; struct nfsmount *nmp; struct ucred *tcred; struct nfsclopenhead extra_open; struct nfscldeleghead extra_deleg; struct nfsreq *rep; u_int64_t len; u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode; - int igotlock = 0, error, trycnt, firstlock, s; + int i, igotlock = 0, error, trycnt, firstlock, s; + struct nfscllayout *lyp, *nlyp; /* * First, lock the client structure, so everyone else will * block when trying to use state. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl recover"); + + /* + * For now, just get rid of all layouts. There may be a need + * to do LayoutCommit Ops with reclaim == true later. + */ + TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) + nfscl_freelayout(lyp); + TAILQ_INIT(&clp->nfsc_layout); + for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) + LIST_INIT(&clp->nfsc_layouthash[i]); + trycnt = 5; do { - error = nfsrpc_setclient(nmp, clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 1, cred, p); } while ((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { nfscl_cleanclient(clp); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID | NFSCLFLAGS_RECOVER | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return; } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; /* * Mark requests already queued on the server, so that they don't * initiate another recovery cycle. Any requests already in the * queue that handle state information will have the old stale - * clientid/stateid and will get a NFSERR_STALESTATEID or - * NFSERR_STALECLIENTID reply from the server. This will be - * translated to NFSERR_STALEDONTRECOVER when R_DONTRECOVER is set. + * clientid/stateid and will get a NFSERR_STALESTATEID, + * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server. + * This will be translated to NFSERR_STALEDONTRECOVER when + * R_DONTRECOVER is set. */ s = splsoftclock(); NFSLOCKREQ(); TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) { if (rep->r_nmp == nmp) rep->r_flags |= R_DONTRECOVER; } NFSUNLOCKREQ(); splx(s); /* * Now, mark all delegations "need reclaim". */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM; TAILQ_INIT(&extra_deleg); LIST_INIT(&extra_open); /* * Now traverse the state lists, doing Open and Lock Reclaims. */ tcred = newnfs_getcred(); owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); owp->nfsow_seqid = 0; op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { nop = LIST_NEXT(op, nfso_list); if (error != NFSERR_NOGRACE) { /* Search for a delegation to reclaim with the open */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) break; } ndp = dp; if (dp == NULL) delegtype = NFSV4OPEN_DELEGATENONE; newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryopen(nmp, NULL, op->nfso_fh, op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype, tcred, p); if (!error) { /* Handle any replied delegation */ if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE) || NFSMNT_RDONLY(nmp->nm_mountp))) { if ((ndp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) { dp->nfsdl_stateid = ndp->nfsdl_stateid; dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit; dp->nfsdl_ace = ndp->nfsdl_ace; dp->nfsdl_change = ndp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((ndp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; FREE((caddr_t)ndp, M_NFSCLDELEG); ndp = NULL; break; } } } if (ndp != NULL) TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list); /* and reclaim all byte range locks */ lp = LIST_FIRST(&op->nfso_lock); while (lp != NULL) { nlp = LIST_NEXT(lp, nfsl_list); lp->nfsl_seqid = 0; firstlock = 1; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { nlop = LIST_NEXT(lop, nfslo_list); if (lop->nfslo_end == NFS64BITSSET) len = NFS64BITSSET; else len = lop->nfslo_end - lop->nfslo_first; if (error != NFSERR_NOGRACE) error = nfscl_trylock(nmp, NULL, op->nfso_fh, op->nfso_fhlen, lp, firstlock, 1, lop->nfslo_first, len, lop->nfslo_type, tcred, p); if (error != 0) nfscl_freelock(lop, 0); else firstlock = 0; lop = nlop; } /* If no locks, but a lockowner, just delete it. */ if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_freelockowner(lp, 0); lp = nlp; } } else { nfscl_freeopen(op, 0); } } op = nop; } owp = nowp; } /* * Now, try and get any delegations not yet reclaimed by cobbling * to-gether an appropriate open. */ nowp = NULL; dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) { if (nowp == NULL) { MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); /* * Name must be as long an largest possible * NFSV4CL_LOCKNAMELEN. 12 for now. */ NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); } nop = NULL; if (error != NFSERR_NOGRACE) { MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK); nop->nfso_own = nowp; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { nop->nfso_mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { nop->nfso_mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = dp->nfsdl_fhlen; NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; newnfs_copycred(&dp->nfsdl_cred, tcred); newnfs_copyincred(tcred, &nop->nfso_cred); tdp = NULL; error = nfscl_tryopen(nmp, NULL, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_mode, nop, NULL, 0, &tdp, 1, delegtype, tcred, p); if (tdp != NULL) { if ((tdp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; if ((nop->nfso_mode & mode) == mode && nop->nfso_fhlen == tdp->nfsdl_fhlen && !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh, nop->nfso_fhlen)) { dp->nfsdl_stateid = tdp->nfsdl_stateid; dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit; dp->nfsdl_ace = tdp->nfsdl_ace; dp->nfsdl_change = tdp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((tdp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; FREE((caddr_t)tdp, M_NFSCLDELEG); } else { TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list); } } } if (error) { if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); /* * Couldn't reclaim it, so throw the state * away. Ouch!! */ nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } else { LIST_INSERT_HEAD(&extra_open, nop, nfso_list); } } dp = ndp; } /* * Now, get rid of extra Opens and Delegations. */ LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) { do { newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexcls"); } while (error == NFSERR_GRACE); LIST_REMOVE(op, nfso_list); FREE((caddr_t)op, M_NFSCLOPEN); } if (nowp != NULL) FREE((caddr_t)nowp, M_NFSCLOWNER); TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) { do { newnfs_copycred(&dp->nfsdl_cred, tcred); error = nfscl_trydelegreturn(dp, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexdlg"); } while (error == NFSERR_GRACE); TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list); FREE((caddr_t)dp, M_NFSCLDELEG); } + /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */ + if (NFSHASNFSV4N(nmp)) + (void)nfsrpc_reclaimcomplete(nmp, cred, p); + NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * This function is called when a server replies with NFSERR_EXPIRED. * It deletes all state for the client and does a fresh SetClientId/confirm. * XXX Someday it should post a signal to the process(es) that hold the * state, so they know that lock state has been lost. */ APPLESTATIC int nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) { struct nfsmount *nmp; struct ucred *cred; int igotlock = 0, error, trycnt; /* * If the clientid has gone away or a new SetClientid has already * been done, just return ok. */ if (clp == NULL || clidrev != clp->nfsc_clientidrev) return (0); /* * First, lock the client structure, so everyone else will * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so * that only one thread does the work. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT)); if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) { if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (0); } clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl expired"); cred = newnfs_getcred(); trycnt = 5; do { - error = nfsrpc_setclient(nmp, clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 0, cred, p); } while ((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { /* * Clear out any state. */ nfscl_cleanclient(clp); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID | NFSCLFLAGS_RECOVER); } else { /* * Expire the state for the client. */ nfscl_expireclient(clp, nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(cred); return (error); } /* * This function inserts a lock in the list after insert_lop. */ static void nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop, struct nfscllock *insert_lop, int local) { if ((struct nfscllockowner *)insert_lop == lp) LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list); else LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list); if (local) newnfsstats.cllocallocks++; else newnfsstats.cllocks++; } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. * Returns 1 if the locks were modified, 0 otherwise. */ static int nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp, struct nfscllock **other_lopp, int local) { struct nfscllock *new_lop = *new_lopp; struct nfscllock *lop, *tlop, *ilop; struct nfscllock *other_lop; int unlock = 0, modified = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->nfslo_type == F_UNLCK) unlock = 1; ilop = (struct nfscllock *)lp; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->nfslo_end >= new_lop->nfslo_first) { if (new_lop->nfslo_end < lop->nfslo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->nfslo_type == lop->nfslo_type || (new_lop->nfslo_first <= lop->nfslo_first && new_lop->nfslo_end >= lop->nfslo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (new_lop->nfslo_type != lop->nfslo_type || new_lop->nfslo_first != lop->nfslo_first || new_lop->nfslo_end != lop->nfslo_end) modified = 1; if (lop->nfslo_first < new_lop->nfslo_first) new_lop->nfslo_first = lop->nfslo_first; if (lop->nfslo_end > new_lop->nfslo_end) new_lop->nfslo_end = lop->nfslo_end; tlop = lop; lop = LIST_NEXT(lop, nfslo_list); nfscl_freelock(tlop, local); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->nfslo_first <= lop->nfslo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ if (lop->nfslo_first != new_lop->nfslo_end) { lop->nfslo_first = new_lop->nfslo_end; modified = 1; } break; } if (new_lop->nfslo_end >= lop->nfslo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ if (lop->nfslo_end != new_lop->nfslo_first) { lop->nfslo_end = new_lop->nfslo_first; modified = 1; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->nfslo_type any longer. */ tmp = new_lop->nfslo_first; if (unlock) { other_lop = new_lop; *new_lopp = NULL; } else { other_lop = *other_lopp; *other_lopp = NULL; } other_lop->nfslo_first = new_lop->nfslo_end; other_lop->nfslo_end = lop->nfslo_end; other_lop->nfslo_type = lop->nfslo_type; lop->nfslo_end = tmp; nfscl_insertlock(lp, other_lop, lop, local); ilop = lop; modified = 1; break; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); if (lop == NULL) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfscl_insertlock(lp, new_lop, ilop, local); *new_lopp = NULL; modified = 1; } return (modified); } /* * This function must be run as a kernel thread. * It does Renew Ops and recovery, when required. */ APPLESTATIC void nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleghead dh; struct nfscldeleg *dp, *ndp; struct ucred *cred; u_int32_t clidrev; int error, cbpathdown, islept, igotlock, ret, clearok; uint32_t recover_done_time = 0; struct timespec mytime; static time_t prevsec = 0; struct nfscllockownerfh *lfhp, *nlfhp; struct nfscllockownerfhhead lfh; + struct nfscllayout *lyp, *nlyp; + struct nfscldevinfo *dip, *ndip; + struct nfscllayouthead rlh; + struct nfsclrecalllayout *recallp; + struct nfsclds *dsp; cred = newnfs_getcred(); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); for(;;) { newnfs_setroot(cred); cbpathdown = 0; if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) { /* * Only allow one recover within 1/2 of the lease * duration (nfsc_renew). */ if (recover_done_time < NFSD_MONOSEC) { recover_done_time = NFSD_MONOSEC + clp->nfsc_renew; nfscl_recover(clp, cred, p); } else { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } } if (clp->nfsc_expire <= NFSD_MONOSEC && (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clidrev = clp->nfsc_clientidrev; - error = nfsrpc_renew(clp, cred, p); + error = nfsrpc_renew(clp, + TAILQ_FIRST(&clp->nfsc_nmp->nm_sess), cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; - else if (error == NFSERR_STALECLIENTID) { + else if (error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION) { NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_EXPIRED) (void) nfscl_hasexpired(clp, clidrev, p); } + /* Do renews for any DS sessions. */ +checkdsrenew: + NFSLOCKMNT(clp->nfsc_nmp); + /* Skip first entry, since the MDS is handled above. */ + dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess); + if (dsp != NULL) + dsp = TAILQ_NEXT(dsp, nfsclds_list); + while (dsp != NULL) { + if (dsp->nfsclds_expire <= NFSD_MONOSEC) { + dsp->nfsclds_expire = NFSD_MONOSEC + + clp->nfsc_renew; + NFSUNLOCKMNT(clp->nfsc_nmp); + (void)nfsrpc_renew(clp, dsp, cred, p); + goto checkdsrenew; + } + dsp = TAILQ_NEXT(dsp, nfsclds_list); + } + NFSUNLOCKMNT(clp->nfsc_nmp); + TAILQ_INIT(&dh); NFSLOCKCLSTATE(); if (cbpathdown) /* It's a Total Recall! */ nfscl_totalrecall(clp); /* * Now, handle defunct owners. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { if (LIST_EMPTY(&owp->nfsow_open)) { if (owp->nfsow_defunct != 0) nfscl_freeopenowner(owp, 0); } } /* * Do the recall on any delegations. To avoid trouble, always * come back up here after having slept. */ igotlock = 0; tryagain: dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_RECALL)) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); goto tryagain; } while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) goto tryagain; } NFSUNLOCKCLSTATE(); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp, NULL, cred, p, 1); if (!ret) { nfscl_cleandeleg(dp); TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; newnfsstats.cldelegates--; } NFSLOCKCLSTATE(); } dp = ndp; } /* * Clear out old delegations, if we are above the high water * mark. Only clear out ones with no state related to them. * The tailq list is in LRU order. */ dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead); while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) { ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list); if (dp->nfsdl_rwlock.nfslock_usecnt == 0 && dp->nfsdl_rwlock.nfslock_lock == 0 && dp->nfsdl_timestamp < NFSD_MONOSEC && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED | NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) { clearok = 1; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { clearok = 0; break; } } if (clearok) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { clearok = 0; break; } } } if (clearok) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; newnfsstats.cldelegates--; } } dp = ndp; } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); + + /* + * Do the recall on any layouts. To avoid trouble, always + * come back up here after having slept. + */ + TAILQ_INIT(&rlh); +tryagain2: + TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) { + if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) { + /* + * Wait for outstanding I/O ops to be done. + */ + if (lyp->nfsly_lock.nfslock_usecnt > 0 || + (lyp->nfsly_lock.nfslock_lock & + NFSV4LOCK_LOCK) != 0) { + lyp->nfsly_lock.nfslock_lock |= + NFSV4LOCK_WANTED; + (void)nfsmsleep(&lyp->nfsly_lock, + NFSCLSTATEMUTEXPTR, PZERO, "nfslyp", + NULL); + goto tryagain2; + } + /* Move the layout to the recall list. */ + TAILQ_REMOVE(&clp->nfsc_layout, lyp, + nfsly_list); + LIST_REMOVE(lyp, nfsly_hash); + TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list); + + /* Handle any layout commits. */ + if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) && + (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { + lyp->nfsly_flags &= ~NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + NFSCL_DEBUG(3, "do layoutcommit\n"); + nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, + cred, p); + NFSLOCKCLSTATE(); + goto tryagain2; + } + } + } + + /* Now, look for stale layouts. */ + lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead); + while (lyp != NULL) { + nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list); + if (lyp->nfsly_timestamp < NFSD_MONOSEC && + (lyp->nfsly_flags & NFSLY_RECALL) == 0 && + lyp->nfsly_lock.nfslock_usecnt == 0 && + lyp->nfsly_lock.nfslock_lock == 0) { + NFSCL_DEBUG(4, "ret stale lay=%d\n", + nfscl_layoutcnt); + recallp = malloc(sizeof(*recallp), + M_NFSLAYRECALL, M_NOWAIT); + if (recallp == NULL) + break; + (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, + lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX, + lyp->nfsly_stateid.seqid, recallp); + } + lyp = nlyp; + } + + /* + * Free up any unreferenced device info structures. + */ + LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) { + if (dip->nfsdi_layoutrefs == 0 && + dip->nfsdi_refcnt == 0) { + NFSCL_DEBUG(4, "freeing devinfo\n"); + LIST_REMOVE(dip, nfsdi_list); + nfscl_freedevinfo(dip); + } + } NFSUNLOCKCLSTATE(); + /* Do layout return(s), as required. */ + TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) { + TAILQ_REMOVE(&rlh, lyp, nfsly_list); + NFSCL_DEBUG(4, "ret layout\n"); + nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p); + nfscl_freelayout(lyp); + } + /* * Delegreturn any delegations cleaned out or recalled. */ TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) { newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); TAILQ_REMOVE(&dh, dp, nfsdl_list); FREE((caddr_t)dp, M_NFSCLDELEG); } SLIST_INIT(&lfh); /* * Call nfscl_cleanupkext() once per second to check for * open/lock owners where the process has exited. */ NFSGETNANOTIME(&mytime); if (prevsec != mytime.tv_sec) { prevsec = mytime.tv_sec; nfscl_cleanupkext(clp, &lfh); } /* * Do a ReleaseLockOwner for all lock owners where the * associated process no longer exists, as found by * nfscl_cleanupkext(). */ newnfs_setroot(cred); SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) { LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list, nlp) { (void)nfsrpc_rellockown(clp->nfsc_nmp, lp, lfhp->nfslfh_fh, lfhp->nfslfh_len, cred, p); nfscl_freelockowner(lp, 0); } free(lfhp, M_TEMP); } SLIST_INIT(&lfh); NFSLOCKCLSTATE(); if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl", hz); if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) { clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); NFSFREECRED(cred); wakeup((caddr_t)clp); return; } NFSUNLOCKCLSTATE(); } } /* - * Initiate state recovery. Called when NFSERR_STALECLIENTID or - * NFSERR_STALESTATEID is received. + * Initiate state recovery. Called when NFSERR_STALECLIENTID, + * NFSERR_STALESTATEID or NFSERR_BADSESSION is received. */ APPLESTATIC void nfscl_initiate_recovery(struct nfsclclient *clp) { if (clp == NULL) return; NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); wakeup((caddr_t)clp); } /* * Dump out the state stuff for debugging. */ APPLESTATIC void nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens, int lockowner, int locks) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfscllock *lop; struct nfscldeleg *dp; clp = nmp->nm_clp; if (clp == NULL) { printf("nfscl dumpstate NULL clp\n"); return; } NFSLOCKCLSTATE(); TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } NFSUNLOCKCLSTATE(); } /* * Check for duplicate open owners and opens. * (Only used as a diagnostic aid.) */ APPLESTATIC void nfscl_dupopen(vnode_t vp, int dupopens) { struct nfsclclient *clp; struct nfsclowner *owp, *owp2; struct nfsclopen *op, *op2; struct nfsfh *nfhp; clp = VFSTONFS(vnode_mount(vp))->nm_clp; if (clp == NULL) { printf("nfscl dupopen NULL clp\n"); return; } nfhp = VTONFS(vp)->n_fhp; NFSLOCKCLSTATE(); /* * First, search for duplicate owners. * These should never happen! */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (owp != owp2 && !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("DUP OWNER\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } /* * Now, search for duplicate stateids. * These shouldn't happen, either. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && (op->nfso_stateid.other[0] != 0 || op->nfso_stateid.other[1] != 0 || op->nfso_stateid.other[2] != 0) && op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] && op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] && op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) { NFSUNLOCKCLSTATE(); printf("DUP STATEID\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } /* * Now search for duplicate opens. * Duplicate opens for the same owner * should never occur. Other duplicates are * possible and are checked for if "dupopens" * is true. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { if (nfhp->nfh_len == op2->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && nfhp->nfh_len == op->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) && (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) || dupopens)) { if (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("BADDUP OPEN\n"); } else { NFSUNLOCKCLSTATE(); printf("DUP OPEN\n"); } nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } } NFSUNLOCKCLSTATE(); } /* * During close, find an open that needs to be dereferenced and * dereference it. If there are no more opens for this file, * log a message to that effect. * Opens aren't actually Close'd until VOP_INACTIVE() is performed * on the file's vnode. * This is the safe way, since it is difficult to identify * which open the close is for and I/O can be performed after the * close(2) system call when a file is mmap'd. * If it returns 0 for success, there will be a referenced * clp returned via clpp. */ APPLESTATIC int nfscl_getclose(vnode_t vp, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; int error, notdecr; - error = nfscl_getcl(vp, NULL, NULL, &clp); + error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; notdecr = 1; NFSLOCKCLSTATE(); /* * First, look for one under a delegation that was locally issued * and just decrement the opencnt for it. Since all my Opens against * the server are DENY_NONE, I don't see a problem with hanging * onto them. (It is much easier to use one of the extant Opens * that I already have on the server when a Delegation is recalled * than to do fresh Opens.) Someday, I might need to rethink this, but. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { /* * Since a delegation is for a file, there * should never be more than one open for * each openowner. */ if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscdeleg opens"); if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; break; } } } } /* Now process the opens against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, decrement cnt if possible */ if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; } /* * There are more opens, so just return. */ if (op->nfso_opencnt > 0) { NFSUNLOCKCLSTATE(); return (0); } } } } NFSUNLOCKCLSTATE(); if (notdecr) printf("nfscl: never fnd open\n"); return (0); } APPLESTATIC int nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; int error; - error = nfscl_getcl(vp, NULL, NULL, &clp); + error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; NFSLOCKCLSTATE(); /* * First get rid of the local Open structures, which should be no * longer in use. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on deleg")); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } } + /* Return any layouts marked return on close. */ + nfscl_retoncloselayout(clp, nfhp->nfh_fh, nfhp->nfh_len); + /* Now process the opens against the server. */ lookformore: LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, close it. */ KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on server")); NFSUNLOCKCLSTATE(); nfsrpc_doclose(VFSTONFS(vnode_mount(vp)), op, p); NFSLOCKCLSTATE(); goto lookformore; } op = LIST_NEXT(op, nfso_list); } } NFSUNLOCKCLSTATE(); return (0); } /* * Return all delegations on this client. * (Must be called with client sleep lock.) */ static void nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p) { struct nfscldeleg *dp, *ndp; struct ucred *cred; cred = newnfs_getcred(); TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) { nfscl_cleandeleg(dp); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); nfscl_freedeleg(&clp->nfsc_deleg, dp); } NFSFREECRED(cred); } /* * Do a callback RPC. */ APPLESTATIC void nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) { - int i, op; + int clist, gotseq_ok, i, j, k, op, rcalls; u_int32_t *tl; struct nfsclclient *clp; struct nfscldeleg *dp = NULL; - int numops, taglen = -1, error = 0, trunc, ret = 0; + int numops, taglen = -1, error = 0, trunc; u_int32_t minorvers, retops = 0, *retopsp = NULL, *repp, cbident; u_char tag[NFSV4_SMALLSTR + 1], *tagstr; vnode_t vp = NULL; struct nfsnode *np; struct vattr va; struct nfsfh *nfhp; mount_t mp; nfsattrbit_t attrbits, rattrbits; nfsv4stateid_t stateid; + uint32_t seqid, slotid = 0, highslot, cachethis; + uint8_t sessionid[NFSX_V4SESSIONID]; + struct mbuf *rep; + struct nfscllayout *lyp; + uint64_t filesid[2], len, off; + int changed, gotone, laytype, recalltype; + uint32_t iomode; + struct nfsclrecalllayout *recallp = NULL; + gotseq_ok = 0; nfsrvd_rephead(nd); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); if (taglen < 0) { error = EBADRPC; goto nfsmout; } if (taglen <= NFSV4_SMALLSTR) tagstr = tag; else tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, tagstr, taglen); if (error) { if (taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); taglen = -1; goto nfsmout; } (void) nfsm_strtom(nd, tag, taglen); if (taglen > NFSV4_SMALLSTR) { free(tagstr, M_TEMP); } NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); - if (minorvers != NFSV4_MINORVERSION) + if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) numops = 0; else numops = fxdr_unsigned(int, *tl); /* * Loop around doing the sub ops. */ for (i = 0; i < numops; i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp++ = *tl; op = fxdr_unsigned(int, *tl); - if (op < NFSV4OP_CBGETATTR || op > NFSV4OP_CBRECALL) { + if (op < NFSV4OP_CBGETATTR || + (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || + (op > NFSV4OP_CBNOTIFYDEVID && + minorvers == NFSV41_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd); retops++; break; } nd->nd_procnum = op; - newnfsstats.cbrpccnt[nd->nd_procnum]++; + if (op < NFSV4OP_CBNOPS) + newnfsstats.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: - clp = NULL; + NFSCL_DEBUG(4, "cbgetattr\n"); + mp = NULL; + vp = NULL; error = nfsm_getfh(nd, &nfhp); if (!error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); + if (error == 0 && i == 0 && + minorvers != NFSV4_MINORVERSION) + error = NFSERR_OPNOTINSESS; if (!error) { - mp = nfscl_getmnt(cbident); + mp = nfscl_getmnt(minorvers, sessionid, cbident, + &clp); if (mp == NULL) error = NFSERR_SERVERFAULT; } if (!error) { - dp = NULL; - NFSLOCKCLSTATE(); - clp = nfscl_findcl(VFSTONFS(mp)); - if (clp != NULL) - dp = nfscl_finddeleg(clp, nfhp->nfh_fh, - nfhp->nfh_len); - NFSUNLOCKCLSTATE(); - if (dp == NULL) - error = NFSERR_SERVERFAULT; - } - if (!error) { - ret = nfscl_ngetreopen(mp, nfhp->nfh_fh, + error = nfscl_ngetreopen(mp, nfhp->nfh_fh, nfhp->nfh_len, p, &np); - if (!ret) + if (!error) vp = NFSTOV(np); } - if (nfhp != NULL) - FREE((caddr_t)nfhp, M_NFSFH); if (!error) { NFSZERO_ATTRBIT(&rattrbits); - if (NFSISSET_ATTRBIT(&attrbits, - NFSATTRBIT_SIZE)) { - if (!ret) - va.va_size = np->n_size; - else - va.va_size = dp->nfsdl_size; - NFSSETBIT_ATTRBIT(&rattrbits, - NFSATTRBIT_SIZE); - } - if (NFSISSET_ATTRBIT(&attrbits, - NFSATTRBIT_CHANGE)) { - va.va_filerev = dp->nfsdl_change; - if (ret || (np->n_flag & NDELEGMOD)) - va.va_filerev++; - NFSSETBIT_ATTRBIT(&rattrbits, - NFSATTRBIT_CHANGE); - } + NFSLOCKCLSTATE(); + dp = nfscl_finddeleg(clp, nfhp->nfh_fh, + nfhp->nfh_len); + if (dp != NULL) { + if (NFSISSET_ATTRBIT(&attrbits, + NFSATTRBIT_SIZE)) { + if (vp != NULL) + va.va_size = np->n_size; + else + va.va_size = + dp->nfsdl_size; + NFSSETBIT_ATTRBIT(&rattrbits, + NFSATTRBIT_SIZE); + } + if (NFSISSET_ATTRBIT(&attrbits, + NFSATTRBIT_CHANGE)) { + va.va_filerev = + dp->nfsdl_change; + if (vp == NULL || + (np->n_flag & NDELEGMOD)) + va.va_filerev++; + NFSSETBIT_ATTRBIT(&rattrbits, + NFSATTRBIT_CHANGE); + } + } else + error = NFSERR_SERVERFAULT; + NFSUNLOCKCLSTATE(); + } + if (vp != NULL) + vrele(vp); + if (mp != NULL) + vfs_unbusy(mp); + if (nfhp != NULL) + FREE((caddr_t)nfhp, M_NFSFH); + if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); - if (!ret) - vrele(vp); - } break; case NFSV4OP_CBRECALL: - clp = NULL; + NFSCL_DEBUG(4, "cbrecall\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stateid.seqid = *tl++; NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); trunc = fxdr_unsigned(int, *tl); error = nfsm_getfh(nd, &nfhp); + if (error == 0 && i == 0 && + minorvers != NFSV4_MINORVERSION) + error = NFSERR_OPNOTINSESS; if (!error) { - mp = nfscl_getmnt(cbident); - if (mp == NULL) - error = NFSERR_SERVERFAULT; - } - if (!error) { NFSLOCKCLSTATE(); - clp = nfscl_findcl(VFSTONFS(mp)); + if (minorvers == NFSV4_MINORVERSION) + clp = nfscl_getclnt(cbident); + else + clp = nfscl_getclntsess(sessionid); if (clp != NULL) { dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) { dp->nfsdl_flags |= NFSCLDL_RECALL; wakeup((caddr_t)clp); } } else { error = NFSERR_SERVERFAULT; } NFSUNLOCKCLSTATE(); } if (nfhp != NULL) FREE((caddr_t)nfhp, M_NFSFH); break; + case NFSV4OP_CBLAYOUTRECALL: + NFSCL_DEBUG(4, "cblayrec\n"); + nfhp = NULL; + NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); + laytype = fxdr_unsigned(int, *tl++); + iomode = fxdr_unsigned(uint32_t, *tl++); + if (newnfs_true == *tl++) + changed = 1; + else + changed = 0; + recalltype = fxdr_unsigned(int, *tl); + recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, + M_WAITOK); + if (laytype != NFSLAYOUT_NFSV4_1_FILES) + error = NFSERR_NOMATCHLAYOUT; + else if (recalltype == NFSLAYOUTRETURN_FILE) { + error = nfsm_getfh(nd, &nfhp); + NFSCL_DEBUG(4, "retfile getfh=%d\n", error); + if (error != 0) + goto nfsmout; + NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER + + NFSX_STATEID); + off = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + if (minorvers == NFSV4_MINORVERSION) + error = NFSERR_NOTSUPP; + else if (i == 0) + error = NFSERR_OPNOTINSESS; + if (error == 0) { + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + NFSCL_DEBUG(4, "cbly clp=%p\n", clp); + if (clp != NULL) { + lyp = nfscl_findlayout(clp, + nfhp->nfh_fh, + nfhp->nfh_len); + NFSCL_DEBUG(4, "cblyp=%p\n", + lyp); + if (lyp != NULL && + (lyp->nfsly_flags & + NFSLY_FILES) != 0 && + !NFSBCMP(stateid.other, + lyp->nfsly_stateid.other, + NFSX_STATEIDOTHER)) { + error = + nfscl_layoutrecall( + recalltype, + lyp, iomode, off, + len, stateid.seqid, + recallp); + recallp = NULL; + wakeup(clp); + NFSCL_DEBUG(4, + "aft layrcal=%d\n", + error); + } else + error = + NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } + free(nfhp, M_NFSFH); + } else if (recalltype == NFSLAYOUTRETURN_FSID) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER); + filesid[0] = fxdr_hyper(tl); tl += 2; + filesid[1] = fxdr_hyper(tl); tl += 2; + gotone = 0; + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + TAILQ_FOREACH(lyp, &clp->nfsc_layout, + nfsly_list) { + if (lyp->nfsly_filesid[0] == + filesid[0] && + lyp->nfsly_filesid[1] == + filesid[1]) { + error = + nfscl_layoutrecall( + recalltype, + lyp, iomode, 0, + UINT64_MAX, + lyp->nfsly_stateid.seqid, + recallp); + recallp = NULL; + gotone = 1; + } + } + if (gotone != 0) + wakeup(clp); + else + error = NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } else if (recalltype == NFSLAYOUTRETURN_ALL) { + gotone = 0; + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + TAILQ_FOREACH(lyp, &clp->nfsc_layout, + nfsly_list) { + error = nfscl_layoutrecall( + recalltype, lyp, iomode, 0, + UINT64_MAX, + lyp->nfsly_stateid.seqid, + recallp); + recallp = NULL; + gotone = 1; + } + if (gotone != 0) + wakeup(clp); + else + error = NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } else + error = NFSERR_NOMATCHLAYOUT; + if (recallp != NULL) { + free(recallp, M_NFSLAYRECALL); + recallp = NULL; + } + break; + case NFSV4OP_CBSEQUENCE: + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 5 * NFSX_UNSIGNED); + bcopy(tl, sessionid, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + seqid = fxdr_unsigned(uint32_t, *tl++); + slotid = fxdr_unsigned(uint32_t, *tl++); + highslot = fxdr_unsigned(uint32_t, *tl++); + cachethis = *tl++; + /* Throw away the referring call stuff. */ + clist = fxdr_unsigned(int, *tl); + for (j = 0; j < clist; j++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + NFSX_UNSIGNED); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + rcalls = fxdr_unsigned(int, *tl); + for (k = 0; k < rcalls; k++) { + NFSM_DISSECT(tl, uint32_t *, + 2 * NFSX_UNSIGNED); + } + } + NFSLOCKCLSTATE(); + if (i == 0) { + clp = nfscl_getclntsess(sessionid); + if (clp == NULL) + error = NFSERR_SERVERFAULT; + } else + error = NFSERR_SEQUENCEPOS; + if (error == 0) + error = nfsv4_seqsession(seqid, slotid, + highslot, + NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_cbslots, &rep, + NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_backslots); + NFSUNLOCKCLSTATE(); + if (error == 0) { + gotseq_ok = 1; + if (rep != NULL) { + NFSCL_DEBUG(4, "Got cbretry\n"); + m_freem(nd->nd_mreq); + nd->nd_mreq = rep; + rep = NULL; + goto out; + } + NFSM_BUILD(tl, uint32_t *, + NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); + bcopy(sessionid, tl, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + *tl++ = txdr_unsigned(seqid); + *tl++ = txdr_unsigned(slotid); + *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1); + *tl = txdr_unsigned(NFSV4_CBSLOTS - 1); + } + break; + default: + if (i == 0 && minorvers == NFSV41_MINORVERSION) + error = NFSERR_OPNOTINSESS; + else { + NFSCL_DEBUG(1, "unsupp callback %d\n", op); + error = NFSERR_NOTSUPP; + } + break; }; if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = error; } error = 0; } retops++; if (nd->nd_repstat) { *repp = nfscl_errmap(nd); break; } else *repp = 0; /* NFS4_OK */ } nfsmout: + if (recallp != NULL) + free(recallp, M_NFSLAYRECALL); if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; else printf("nfsv4 comperr1=%d\n", error); } if (taglen == -1) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; } else { *retopsp = txdr_unsigned(retops); } *nd->nd_errp = nfscl_errmap(nd); +out: + if (gotseq_ok != 0) { + rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + nfsv4_seqsess_cacherep(slotid, + NFSMNT_MDSSESSION(clp->nfsc_nmp)->nfsess_cbslots, + rep); + NFSUNLOCKCLSTATE(); + } else { + NFSUNLOCKCLSTATE(); + m_freem(rep); + } + } } /* * Generate the next cbident value. Basically just increment a static value * and then check that it isn't already in the list, if it has wrapped around. */ static u_int32_t nfscl_nextcbident(void) { struct nfsclclient *clp; int matched; static u_int32_t nextcbident = 0; static int haswrapped = 0; nextcbident++; if (nextcbident == 0) haswrapped = 1; if (haswrapped) { /* * Search the clientid list for one already using this cbident. */ do { matched = 0; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { if (clp->nfsc_cbident == nextcbident) { matched = 1; break; } } NFSUNLOCKCLSTATE(); if (matched == 1) nextcbident++; } while (matched); } return (nextcbident); } /* - * Get the mount point related to a given cbident. + * Get the mount point related to a given cbident or session and busy it. */ static mount_t -nfscl_getmnt(u_int32_t cbident) +nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident, + struct nfsclclient **clpp) { struct nfsclclient *clp; - struct nfsmount *nmp; + mount_t mp; + int error; + *clpp = NULL; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { - if (clp->nfsc_cbident == cbident) + if (minorvers == NFSV4_MINORVERSION) { + if (clp->nfsc_cbident == cbident) + break; + } else if (!NFSBCMP(NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } if (clp == NULL) { NFSUNLOCKCLSTATE(); return (NULL); } - nmp = clp->nfsc_nmp; + mp = clp->nfsc_nmp->nm_mountp; + vfs_ref(mp); NFSUNLOCKCLSTATE(); - return (nmp->nm_mountp); + error = vfs_busy(mp, 0); + vfs_rel(mp); + if (error != 0) + return (NULL); + *clpp = clp; + return (mp); } /* + * Get the clientid pointer related to a given cbident. + */ +static struct nfsclclient * +nfscl_getclnt(u_int32_t cbident) +{ + struct nfsclclient *clp; + + LIST_FOREACH(clp, &nfsclhead, nfsc_list) + if (clp->nfsc_cbident == cbident) + break; + return (clp); +} + +/* + * Get the clientid pointer related to a given sessionid. + */ +static struct nfsclclient * +nfscl_getclntsess(uint8_t *sessionid) +{ + struct nfsclclient *clp; + + LIST_FOREACH(clp, &nfsclhead, nfsc_list) + if (!NFSBCMP(NFSMNT_MDSSESSION(clp->nfsc_nmp)->nfsess_sessionid, + sessionid, NFSX_V4SESSIONID)) + break; + return (clp); +} + +/* * Search for a lock conflict locally on the client. A conflict occurs if * - not same owner and overlapping byte range and at least one of them is * a write lock or this is an unlock. */ static int nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen, struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp, struct nfscllock **lopp) { struct nfsclowner *owp; struct nfsclopen *op; int ret; if (dp != NULL) { ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp); if (ret) return (ret); } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) { ret = nfscl_checkconflict(&op->nfso_lock, nlop, own, lopp); if (ret) return (ret); } } } return (0); } static int nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop, u_int8_t *own, struct nfscllock **lopp) { struct nfscllockowner *lp; struct nfscllock *lop; LIST_FOREACH(lp, lhp, nfsl_list) { if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= nlop->nfslo_end) break; if (lop->nfslo_end <= nlop->nfslo_first) continue; if (lop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_UNLCK) { if (lopp != NULL) *lopp = lop; return (NFSERR_DENIED); } } } } return (0); } /* * Check for a local conflicting lock. */ APPLESTATIC int nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags) { struct nfscllock *lop, nlck; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error; nlck.nfslo_type = fl->l_type; nlck.nfslo_first = off; if (len == NFS64BITSSET) { nlck.nfslo_end = NFS64BITSSET; } else { nlck.nfslo_end = off + len; if (nlck.nfslo_end <= nlck.nfslo_first) return (NFSERR_INVAL); } np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, &nlck, own, dp, &lop); if (error != 0) { fl->l_whence = SEEK_SET; fl->l_start = lop->nfslo_first; if (lop->nfslo_end == NFS64BITSSET) fl->l_len = 0; else fl->l_len = lop->nfslo_end - lop->nfslo_first; fl->l_pid = (pid_t)0; fl->l_type = lop->nfslo_type; error = -1; /* no RPC required */ } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) || fl->l_type == F_RDLCK)) { /* * The delegation ensures that there isn't a conflicting * lock on the server, so return -1 to indicate an RPC * isn't required. */ fl->l_type = F_UNLCK; error = -1; } NFSUNLOCKCLSTATE(); return (error); } /* * Handle Recall of a delegation. * The clp must be exclusive locked when this is called. */ static int nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p, int called_from_renewthread) { struct nfsclowner *owp, *lowp, *nowp; struct nfsclopen *op, *lop; struct nfscllockowner *lp; struct nfscllock *lckp; struct nfsnode *np; int error = 0, ret, gotvp = 0; if (vp == NULL) { /* * First, get a vnode for the file. This is needed to do RPCs. */ ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh, dp->nfsdl_fhlen, p, &np); if (ret) { /* * File isn't open, so nothing to move over to the * server. */ return (0); } vp = NFSTOV(np); gotvp = 1; } else { np = VTONFS(vp); } dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET; /* * Ok, if it's a write delegation, flush data to the server, so * that close/open consistency is retained. */ ret = 0; NFSLOCKNODE(np); if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) { np->n_flag |= NDELEGRECALL; NFSUNLOCKNODE(np); ret = ncl_flush(vp, MNT_WAIT, cred, p, 1, called_from_renewthread); NFSLOCKNODE(np); np->n_flag &= ~NDELEGRECALL; } NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); if (ret == EIO && called_from_renewthread != 0) { /* * If the flush failed with EIO for the renew thread, * return now, so that the dirty buffer will be flushed * later. */ if (gotvp != 0) vrele(vp); return (ret); } /* * Now, for each openowner with opens issued locally, move them * over to state against the server. */ LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) { lop = LIST_FIRST(&lowp->nfsow_open); if (lop != NULL) { if (LIST_NEXT(lop, nfso_list) != NULL) panic("nfsdlg mult opens"); /* * Look for the same openowner against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(lowp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) { + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } break; } } /* * If no openowner found, create one and get an open * for it. */ if (owp == NULL) { MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); nfscl_newopen(clp, NULL, &owp, &nowp, &op, NULL, lowp->nfsow_owner, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret) { nfscl_freeopenowner(owp, 0); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) { + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } } } } } /* * Now, get byte range locks for any locks done locally. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p); if (ret == NFSERR_STALESTATEID || ret == NFSERR_STALEDONTRECOVER || - ret == NFSERR_STALECLIENTID) { + ret == NFSERR_STALECLIENTID || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret && !error) error = ret; } } if (gotvp) vrele(vp); return (error); } /* * Move a locally issued open over to an owner on the state list. * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and * returns with it unlocked. */ static int nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op, *nop; struct nfscldeleg *ndp; struct nfsnode *np; int error = 0, newone; /* * First, look for an appropriate open, If found, just increment the * opencnt in it. */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode && op->nfso_fhlen == lop->nfso_fhlen && !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) { op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); return (0); } } /* No appropriate open, so we have to do one against the server. */ np = VTONFS(vp); MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK); newone = 0; nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner, lop->nfso_fh, lop->nfso_fhlen, &newone); ndp = dp; error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p); if (error) { if (newone) nfscl_freeopen(op, 0); } else { if (newone) newnfs_copyincred(cred, &op->nfso_cred); op->nfso_mode |= lop->nfso_mode; op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); } if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); if (ndp != NULL) { /* * What should I do with the returned delegation, since the * delegation is being recalled? For now, just printf and * through it away. */ printf("Moveopen returned deleg\n"); FREE((caddr_t)ndp, M_NFSCLDELEG); } return (error); } /* * Recall all delegations on this client. */ static void nfscl_totalrecall(struct nfsclclient *clp) { struct nfscldeleg *dp; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) dp->nfsdl_flags |= NFSCLDL_RECALL; } } /* * Relock byte ranges. Called for delegation recall and state expiry. */ static int nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *nlp; struct nfsfh *nfhp; u_int64_t off, len; u_int32_t clidrev = 0; int error, newone, donelocally; off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p, clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, &donelocally); if (error || donelocally) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; error = nfscl_trylock(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, nlp, newone, 0, off, len, lop->nfslo_type, cred, p); if (error) nfscl_freelockowner(nlp, 0); return (error); } /* * Called to re-open a file. Basically get a vnode for the file handle * and then call nfsrpc_openrpc() to do the rest. */ static int nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np; vnode_t vp; int error; error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np); if (error) return (error); vp = NFSTOV(np); if (np->n_v4 != NULL) { error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, fhp, fhlen, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0, cred, p); } else { error = EINVAL; } vrele(vp); return (error); } /* * Try an open against the server. Just call nfsrpc_openrpc(), retrying while * NFSERR_DELAY. Also, try system credentials, if the passed in credentials * fail. */ static int nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **ndpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p) { int error; do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 0, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 1, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a byte range lock. Just loop on nfsrpc_lock() while it returns * NFSERR_DELAY. Also, retry with system credentials, if the provided * cred don't work. */ static int nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, struct nfscllockowner *nlp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 0); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; if (error == EAUTH || error == EACCES) { /* Try again using root credentials */ newnfs_setroot(cred); do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 1); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; } return (error); } /* * Try a delegreturn against the server. Just call nfsrpc_delegreturn(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ static int nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { int error; do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a close against the server. Just call nfsrpc_closerpc(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ APPLESTATIC int nfscl_tryclose(struct nfsclopen *op, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); } return (error); } /* * Decide if a delegation on a file permits close without flushing writes * to the server. This might be a big performance win in some environments. * (Not useful until the client does caching on local stable storage.) */ APPLESTATIC int nfscl_mustflush(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == NFSCLDL_WRITE && (dp->nfsdl_sizelimit >= np->n_size || !NFSHASSTRICT3530(nmp))) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * See if a (write) delegation exists for this file. */ APPLESTATIC int nfscl_nodeleg(vnode_t vp, int writedeleg) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 && (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) == NFSCLDL_WRITE)) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * Look for an associated delegation that should be DelegReturned. */ APPLESTATIC int nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(vp)); np = VTONFS(vp); NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on vp that has state, lock the client and * do a recall * - return delegation with no state */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *stp = dp->nfsdl_stateid; retcnt = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Look for associated delegation(s) that should be DelegReturned. */ APPLESTATIC int nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp, nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(fvp)); *gotfdp = 0; *gottdp = 0; NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on fvp that has state, lock the client and * do a recall * - return delegation(s) with no state. */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } np = VTONFS(fvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gotfdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *fstp = dp->nfsdl_stateid; retcnt++; *gotfdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } if (tvp != NULL) { np = VTONFS(tvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gottdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { NFSUNLOCKCLSTATE(); return (retcnt); } } LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { NFSUNLOCKCLSTATE(); return (retcnt); } } *tstp = dp->nfsdl_stateid; retcnt++; *gottdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } } NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Get a reference on the clientid associated with the mount point. * Return 1 if success, 0 otherwise. */ APPLESTATIC int nfscl_getref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (0); } nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL); NFSUNLOCKCLSTATE(); return (1); } /* * Release a reference on a clientid acquired with the above call. */ APPLESTATIC void nfscl_relref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Save the size attribute in the delegation, since the nfsnode * is going away. */ APPLESTATIC void nfscl_reclaimnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) dp->nfsdl_size = np->n_size; NFSUNLOCKCLSTATE(); } /* * Get the saved size attribute in the delegation, since it is a * newly allocated nfsnode. */ APPLESTATIC void nfscl_newnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) np->n_size = dp->nfsdl_size; NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file, set the modtime * to the local clock time. */ APPLESTATIC void nfscl_delegmodtime(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) { NFSGETNANOTIME(&dp->nfsdl_modtime); dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file with a modtime set, * put that modtime in mtime. */ APPLESTATIC void nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) == (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) *mtime = dp->nfsdl_modtime; NFSUNLOCKCLSTATE(); } static int nfscl_errmap(struct nfsrv_descript *nd) { short *defaulterrp, *errp; if (!nd->nd_repstat) return (0); if (nd->nd_procnum == NFSPROC_NOOP) return (txdr_unsigned(nd->nd_repstat & 0xffff)); if (nd->nd_repstat == EBADRPC) return (txdr_unsigned(NFSERR_BADXDR)); if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); - errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; + if (nd->nd_procnum < NFSV4OP_CBNOPS) + errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; + else + return (txdr_unsigned(nd->nd_repstat)); while (*++errp) if (*errp == (short)nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); +} + +/* + * Called to find/add a layout to a client. + * This function returns the layout with a refcnt (shared lock) upon + * success (returns 0) or with no lock/refcnt on the layout when an + * error is returned. + * If a layout is passed in via lypp, it is locked (exclusively locked). + */ +APPLESTATIC int +nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, + nfsv4stateid_t *stateidp, int retonclose, + struct nfsclflayouthead *fhlp, struct nfscllayout **lypp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsclclient *clp; + struct nfscllayout *lyp, *tlyp; + struct nfsclflayout *flp; + struct nfsnode *np = VTONFS(vp); + mount_t mp; + int layout_passed_in; + + mp = nmp->nm_mountp; + layout_passed_in = 1; + tlyp = NULL; + lyp = *lypp; + if (lyp == NULL) { + layout_passed_in = 0; + tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT, + M_WAITOK | M_ZERO); + } + + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + if (layout_passed_in != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (EPERM); + } + if (lyp == NULL) { + /* + * Although no lyp was passed in, another thread might have + * allocated one. If one is found, just increment it's ref + * count and return it. + */ + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp == NULL) { + lyp = tlyp; + tlyp = NULL; + lyp->nfsly_stateid.seqid = stateidp->seqid; + lyp->nfsly_stateid.other[0] = stateidp->other[0]; + lyp->nfsly_stateid.other[1] = stateidp->other[1]; + lyp->nfsly_stateid.other[2] = stateidp->other[2]; + lyp->nfsly_lastbyte = 0; + LIST_INIT(&lyp->nfsly_flayread); + LIST_INIT(&lyp->nfsly_flayrw); + LIST_INIT(&lyp->nfsly_recall); + lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0]; + lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1]; + lyp->nfsly_clp = clp; + lyp->nfsly_flags = (retonclose != 0) ? + (NFSLY_FILES | NFSLY_RETONCLOSE) : NFSLY_FILES; + lyp->nfsly_fhlen = fhlen; + NFSBCOPY(fhp, lyp->nfsly_fh, fhlen); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp, + nfsly_hash); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + nfscl_layoutcnt++; + } else { + if (retonclose != 0) + lyp->nfsly_flags |= NFSLY_RETONCLOSE; + TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + } + nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (EPERM); + } + *lypp = lyp; + } else + lyp->nfsly_stateid.seqid = stateidp->seqid; + + /* Merge the new list of File Layouts into the list. */ + flp = LIST_FIRST(fhlp); + if (flp != NULL) { + if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ) + nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp); + else + nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp); + } + if (layout_passed_in != 0) + nfsv4_unlock(&lyp->nfsly_lock, 1); + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (0); +} + +/* + * Search for a layout by MDS file handle. + * If one is found, it is returned with a refcnt (shared lock) iff + * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is + * returned NULL. + */ +struct nfscllayout * +nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen, + uint64_t off, struct nfsclflayout **retflpp, int *recalledp) +{ + struct nfscllayout *lyp; + mount_t mp; + int error, igotlock; + + mp = clp->nfsc_nmp->nm_mountp; + *recalledp = 0; + *retflpp = NULL; + NFSLOCKCLSTATE(); + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp != NULL) { + if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) { + TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + error = nfscl_findlayoutforio(lyp, off, + NFSV4OPEN_ACCESSREAD, retflpp); + if (error == 0) + nfsv4_getref(&lyp->nfsly_lock, NULL, + NFSCLSTATEMUTEXPTR, mp); + else { + do { + igotlock = nfsv4_lock(&lyp->nfsly_lock, + 1, NULL, NFSCLSTATEMUTEXPTR, mp); + } while (igotlock == 0 && + (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0); + *retflpp = NULL; + } + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + lyp = NULL; + *recalledp = 1; + } + } else { + lyp = NULL; + *recalledp = 1; + } + } + NFSUNLOCKCLSTATE(); + return (lyp); +} + +/* + * Search for a layout by MDS file handle. If one is found that is marked + * "return on close", delete it, since it should now be forgotten. + */ +static void +nfscl_retoncloselayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen) +{ + struct nfscllayout *lyp; + +tryagain: + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp != NULL && (lyp->nfsly_flags & NFSLY_RETONCLOSE) != 0) { + /* + * Wait for outstanding I/O ops to be done. + */ + if (lyp->nfsly_lock.nfslock_usecnt != 0 || + lyp->nfsly_lock.nfslock_lock != 0) { + lyp->nfsly_lock.nfslock_lock |= NFSV4LOCK_WANTED; + (void)mtx_sleep(&lyp->nfsly_lock, + NFSCLSTATEMUTEXPTR, PZERO, "nfslyc", 0); + goto tryagain; + } + nfscl_freelayout(lyp); + } +} + +/* + * Dereference a layout. + */ +void +nfscl_rellayout(struct nfscllayout *lyp, int exclocked) +{ + + NFSLOCKCLSTATE(); + if (exclocked != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + else + nfsv4_relref(&lyp->nfsly_lock); + NFSUNLOCKCLSTATE(); +} + +/* + * Search for a devinfo by deviceid. If one is found, return it after + * acquiring a reference count on it. + */ +struct nfscldevinfo * +nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid, + struct nfscldevinfo *dip) +{ + + NFSLOCKCLSTATE(); + if (dip == NULL) + dip = nfscl_finddevinfo(clp, deviceid); + if (dip != NULL) + dip->nfsdi_refcnt++; + NFSUNLOCKCLSTATE(); + return (dip); +} + +/* + * Dereference a devinfo structure. + */ +static void +nfscl_reldevinfo_locked(struct nfscldevinfo *dip) +{ + + dip->nfsdi_refcnt--; + if (dip->nfsdi_refcnt == 0) + wakeup(&dip->nfsdi_refcnt); +} + +/* + * Dereference a devinfo structure. + */ +void +nfscl_reldevinfo(struct nfscldevinfo *dip) +{ + + NFSLOCKCLSTATE(); + nfscl_reldevinfo_locked(dip); + NFSUNLOCKCLSTATE(); +} + +/* + * Find a layout for this file handle. Return NULL upon failure. + */ +static struct nfscllayout * +nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) +{ + struct nfscllayout *lyp; + + LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash) + if (lyp->nfsly_fhlen == fhlen && + !NFSBCMP(lyp->nfsly_fh, fhp, fhlen)) + break; + return (lyp); +} + +/* + * Find a devinfo for this deviceid. Return NULL upon failure. + */ +static struct nfscldevinfo * +nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid) +{ + struct nfscldevinfo *dip; + + LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list) + if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID) + == 0) + break; + return (dip); +} + +/* + * Merge the new file layout list into the main one, maintaining it in + * increasing offset order. + */ +static void +nfscl_mergeflayouts(struct nfsclflayouthead *fhlp, + struct nfsclflayouthead *newfhlp) +{ + struct nfsclflayout *flp, *nflp, *prevflp, *tflp; + + flp = LIST_FIRST(fhlp); + prevflp = NULL; + LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) { + while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) { + prevflp = flp; + flp = LIST_NEXT(flp, nfsfl_list); + } + if (prevflp == NULL) + LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list); + else + LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list); + prevflp = nflp; + } +} + +/* + * Add this nfscldevinfo to the client, if it doesn't already exist. + * This function consumes the structure pointed at by dip, if not NULL. + */ +APPLESTATIC int +nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, + struct nfsclflayout *flp) +{ + struct nfsclclient *clp; + struct nfscldevinfo *tdip; + + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + NFSUNLOCKCLSTATE(); + if (dip != NULL) + free(dip, M_NFSDEVINFO); + return (ENODEV); + } + tdip = nfscl_finddevinfo(clp, flp->nfsfl_dev); + if (tdip != NULL) { + tdip->nfsdi_layoutrefs++; + flp->nfsfl_devp = tdip; + nfscl_reldevinfo_locked(tdip); + NFSUNLOCKCLSTATE(); + if (dip != NULL) + free(dip, M_NFSDEVINFO); + return (0); + } + if (dip != NULL) { + LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list); + dip->nfsdi_layoutrefs = 1; + flp->nfsfl_devp = dip; + } + NFSUNLOCKCLSTATE(); + if (dip == NULL) + return (ENODEV); + return (0); +} + +/* + * Free up a layout structure and associated file layout structure(s). + */ +APPLESTATIC void +nfscl_freelayout(struct nfscllayout *layp) +{ + struct nfsclflayout *flp, *nflp; + struct nfsclrecalllayout *rp, *nrp; + + LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) { + LIST_REMOVE(flp, nfsfl_list); + nfscl_freeflayout(flp); + } + LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) { + LIST_REMOVE(flp, nfsfl_list); + nfscl_freeflayout(flp); + } + LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) { + LIST_REMOVE(rp, nfsrecly_list); + free(rp, M_NFSLAYRECALL); + } + nfscl_layoutcnt--; + free(layp, M_NFSLAYOUT); +} + +/* + * Free up a file layout structure. + */ +APPLESTATIC void +nfscl_freeflayout(struct nfsclflayout *flp) +{ + int i; + + for (i = 0; i < flp->nfsfl_fhcnt; i++) + free(flp->nfsfl_fh[i], M_NFSFH); + if (flp->nfsfl_devp != NULL) + flp->nfsfl_devp->nfsdi_layoutrefs--; + free(flp, M_NFSFLAYOUT); +} + +/* + * Free up a file layout devinfo structure. + */ +APPLESTATIC void +nfscl_freedevinfo(struct nfscldevinfo *dip) +{ + + free(dip, M_NFSDEVINFO); +} + +/* + * Mark any layouts that match as recalled. + */ +static int +nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode, + uint64_t off, uint64_t len, uint32_t stateseqid, + struct nfsclrecalllayout *recallp) +{ + struct nfsclrecalllayout *rp, *orp; + + recallp->nfsrecly_recalltype = recalltype; + recallp->nfsrecly_iomode = iomode; + recallp->nfsrecly_stateseqid = stateseqid; + recallp->nfsrecly_off = off; + recallp->nfsrecly_len = len; + /* + * Order the list as file returns first, followed by fsid and any + * returns, both in increasing stateseqid order. + * Note that the seqids wrap around, so 1 is after 0xffffffff. + * (I'm not sure this is correct because I find RFC5661 confusing + * on this, but hopefully it will work ok.) + */ + orp = NULL; + LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { + orp = rp; + if ((recalltype == NFSLAYOUTRETURN_FILE && + (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE || + nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) || + (recalltype != NFSLAYOUTRETURN_FILE && + rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE && + nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) { + LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list); + break; + } + } + if (rp == NULL) { + if (orp == NULL) + LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp, + nfsrecly_list); + else + LIST_INSERT_AFTER(orp, recallp, nfsrecly_list); + } + lyp->nfsly_flags |= NFSLY_RECALL; + return (0); +} + +/* + * Compare the two seqids for ordering. The trick is that the seqids can + * wrap around from 0xffffffff->0, so check for the cases where one + * has wrapped around. + * Return 1 if seqid1 comes before seqid2, 0 otherwise. + */ +static int +nfscl_seq(uint32_t seqid1, uint32_t seqid2) +{ + + if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff) + /* seqid2 has wrapped around. */ + return (0); + if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff) + /* seqid1 has wrapped around. */ + return (1); + if (seqid1 <= seqid2) + return (1); + return (0); +} + +/* + * Do a layout return for each of the recalls. + */ +static void +nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsclrecalllayout *rp; + nfsv4stateid_t stateid; + + NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER); + LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { + stateid.seqid = rp->nfsrecly_stateseqid; + (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh, + lyp->nfsly_fhlen, 0, NFSLAYOUT_NFSV4_1_FILES, + rp->nfsrecly_iomode, rp->nfsrecly_recalltype, + rp->nfsrecly_off, rp->nfsrecly_len, + &stateid, 0, NULL, cred, p, NULL); + } +} + +/* + * Do the layout commit for a file layout. + */ +static void +nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp, + struct ucred *cred, NFSPROC_T *p) +{ + int error; + + error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, + 0, 0, 0, lyp->nfsly_lastbyte, &lyp->nfsly_stateid, + NFSLAYOUT_NFSV4_1_FILES, 0, NULL, cred, p, NULL); + if (error == NFSERR_NOTSUPP) { + /* If the server doesn't want it, don't bother doing it. */ + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; + NFSUNLOCKMNT(nmp); + } +} + +/* + * Commit all layouts for a file (vnode). + */ +int +nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p) +{ + struct nfsclclient *clp; + struct nfscllayout *lyp; + struct nfsnode *np = VTONFS(vp); + mount_t mp; + struct nfsmount *nmp; + + mp = vnode_mount(vp); + nmp = VFSTONFS(mp); + if (NFSHASNOLAYOUTCOMMIT(nmp)) + return (0); + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } + lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); + if (lyp == NULL) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } + nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } +tryagain: + if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { + lyp->nfsly_flags &= ~NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + NFSCL_DEBUG(4, "do layoutcommit2\n"); + nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p); + NFSLOCKCLSTATE(); + goto tryagain; + } + nfsv4_relref(&lyp->nfsly_lock); + NFSUNLOCKCLSTATE(); + return (0); } Index: head/sys/fs/nfsclient/nfs_clvfsops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvfsops.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clvfsops.c (revision 244042) @@ -1,1731 +1,1799 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; extern struct nfsstats newnfsstats; extern int nfsrv_useacl; +extern int nfscl_debuglevel; +NFSCLSTATEMUTEX; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct"); SYSCTL_DECL(_vfs_nfs); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); static int nfs_mountroot(struct mount *); static void nfs_sec_name(char *, int *); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, - struct thread *, int, int); + struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = ncl_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = ncl_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); MODULE_DEPEND(nfs, nfscommon, 1, 1, 1); MODULE_DEPEND(nfs, krpc, 1, 1, 1); MODULE_DEPEND(nfs, nfssvc, 1, 1, 1); MODULE_DEPEND(nfs, nfslock, 1, 1, 1); /* * This structure is now defined in sys/nfs/nfs_diskless.c so that it * can be shared by both NFS clients. It is declared here so that it * will be defined for kernels built without NFS_ROOT, although it * isn't used in that case. */ #if !defined(NFS_ROOT) && !defined(NFSCLIENT) struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; #endif SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, "Has the diskless struct been filled correctly"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr), "%Ssockaddr_in", "Diskless root nfs address"); void newnfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int newnfs_iosize(struct nfsmount *nmp) { int iosize, maxio; /* First, set the upper limit for iosize */ if (nmp->nm_flag & NFSMNT_NFSV4) { maxio = NFS_MAXBSIZE; } else if (nmp->nm_flag & NFSMNT_NFSV3) { if (nmp->nm_sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXBSIZE; } else { maxio = NFS_V2MAXDATA; } if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) nmp->nm_rsize = maxio; if (nmp->nm_rsize > MAXBSIZE) nmp->nm_rsize = MAXBSIZE; if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) nmp->nm_wsize = maxio; if (nmp->nm_wsize > MAXBSIZE) nmp->nm_wsize = MAXBSIZE; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); nmp->nm_mountp->mnt_stat.f_iosize = iosize; return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->readahead = oargs->readahead; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_MYFH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp) { struct vnode *vp; struct thread *td; struct nfsmount *nmp = VFSTONFS(mp); struct nfsvattr nfsva; struct nfsfsinfo fs; struct nfsstatfs sb; int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; td = curthread; error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (!error) gotfsinfo = 1; } else mtx_unlock(&nmp->nm_mtx); if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); + if (error != 0) + NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, - td->td_ucred, td, &nfsva, NULL); + td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; } } (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) nfscl_loadfsinfo(nmp, &fs); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } vput(vp); vfs_unbusy(mp); return (error); } /* * nfs version 3 fsinfo rpc call */ int ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsfsinfo fs; struct nfsvattr nfsva; int error, attrflag; error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); if (!error) { if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); nfscl_loadfsinfo(nmp, &fs); mtx_unlock(&nmp->nm_mtx); } return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ static int nfs_mountroot(struct mount *mp) { struct thread *td = curthread; struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * XXX splnet, so networks will receive... */ splnet(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ mtx_lock(&prison0.pr_mtx); strlcpy(prison0.pr_hostname, nd->my_hostnam, sizeof(prison0.pr_hostname)); mtx_unlock(&prison0.pr_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int dirlen, error; char *dirpath; /* * Find the directory path in "path", which also has the server's * name/ip address in it. */ dirpath = strchr(path, ':'); if (dirpath != NULL) dirlen = strlen(++dirpath); else dirlen = 0; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, - NFS_DEFAULT_NEGNAMETIMEO)) != 0) { + NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_sec_name(char *sec, int *flagsp) { if (!strcmp(sec, "krb5")) *flagsp |= NFSMNT_KERB; else if (!strcmp(sec, "krb5i")) *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY); else if (!strcmp(sec, "krb5p")) *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *hostname, struct ucred *cred, struct thread *td) { int s; int adjsock; char *p; s = splnet(); /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_timeo = NFS_MAXTIMEO; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_retry = INT_MAX; else nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if NFSv2, it crashes some servers */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { argp->flags &= ~NFSMNT_RDIRPLUS; nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } /* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */ if ((argp->flags & NFSMNT_NFSV4) != 0) { argp->flags &= ~NFSMNT_RESVPORT; nmp->nm_flag &= ~NFSMNT_RESVPORT; } /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; splx(s); if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* Round down to multiple of blocksize */ nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize <= 0) nmp->nm_wsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* Round down to multiple of blocksize */ nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize <= 0) nmp->nm_rsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); if (nmp->nm_client != NULL && adjsock) { int haslock = 0, error = 0; if (nmp->nm_sotype == SOCK_STREAM) { error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); if (!error) haslock = 1; } if (!error) { newnfs_disconnect(&nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "newnfscon"); } } } else { nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; } if (hostname != NULL) { strlcpy(nmp->nm_hostname, hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p != NULL) *p = '\0'; } } static const char *nfs_opts[] = { "from", "nfs_args", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec", - "principal", "nfsv4", "gssname", "allgssname", "dirpath", - "nametimeo", "negnametimeo", "nocto", "wcommitsize", + "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", + "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize", NULL }; /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .hostname = NULL, .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error = 0, ret, len; struct sockaddr *nam = NULL; struct vnode *vp; struct thread *td; char hst[MNAMELEN]; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; char *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; + int minvers = 0; int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen; size_t hstlen; has_nfs_args_opt = 0; if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } td = curthread; if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { error = nfs_mountroot(mp); goto out; } nfscl_init(); /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof(args)); if (error != 0) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } /* Handle the new style options. */ if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) { args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; + if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) + args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } + if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == + 0) { + ret = sscanf(opt, "%d", &minvers); + if (ret != 1 || minvers < 0 || minvers > 1 || + (args.flags & NFSMNT_NFSV4) == 0) { + vfs_mount_error(mp, "illegal minorversion: %s", opt); + error = EINVAL; + goto out; + } + } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a tranfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "newnfsreq" (truncated to "newnfsre" by procstat). */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(td->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change version, * security, switch lockd strategies or change cookie * translation */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt != 0) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error != 0) goto out; error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen); if (error != 0) goto out; bzero(&hst[hstlen], MNAMELEN - hstlen); args.hostname = hst; /* sockargs() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error != 0) goto out; } else { if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } bcopy(args.fh, nfh, args.fhsize); } else { args.fhsize = 0; } (void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, &len); if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } bcopy(args.hostname, hst, MNAMELEN); hst[MNAMELEN - 1] = '\0'; } if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0) strlcpy(srvkrbname, name, sizeof (srvkrbname)); else snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst); srvkrbnamelen = strlen(srvkrbname); if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0) strlcpy(krbname, name, sizeof (krbname)); else krbname[0] = '\0'; krbnamelen = strlen(krbname); if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0) strlcpy(dirpath, name, sizeof (dirpath)); else dirpath[0] = '\0'; dirlen = strlen(dirpath); if (has_nfs_args_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } else { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } } args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, - nametimeo, negnametimeo); + nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF; MNT_IUNLOCK(mp); } return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, - struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo) + struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, + int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; + struct nfsclclient *clp; + struct nfsclds *dsp, *tdsp; + uint32_t lease; static u_int64_t clval = 0; + NFSCL_DEBUG(3, "in mnt\n"); + clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); FREE(nam, M_SONAME); return (0); } else { MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) + krbnamelen + dirlen + srvkrbnamelen + 2, M_NEWNFSMNT, M_WAITOK | M_ZERO); TAILQ_INIT(&nmp->nm_bufq); if (clval == 0) clval = (u_int64_t)nfsboottime.tv_sec; nmp->nm_clval = clval++; nmp->nm_krbnamelen = krbnamelen; nmp->nm_dirpathlen = dirlen; nmp->nm_srvkrbnamelen = srvkrbnamelen; if (td->td_ucred->cr_uid != (uid_t)0) { /* * nm_uid is used to get KerberosV credentials for * the nfsv4 state handling operations if there is * no host based principal set. Use the uid of * this user if not root, since they are doing the * mount. I don't think setting this for root will * work, since root normally does not have user * credentials in a credentials cache. */ nmp->nm_uid = td->td_ucred->cr_uid; } else { /* * Just set to -1, so it won't be used. */ nmp->nm_uid = (uid_t)-1; } /* Copy and null terminate all the names */ if (nmp->nm_krbnamelen > 0) { bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); nmp->nm_name[nmp->nm_krbnamelen] = '\0'; } if (nmp->nm_dirpathlen > 0) { bcopy(dirpath, NFSMNT_DIRPATH(nmp), nmp->nm_dirpathlen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + 1] = '\0'; } if (nmp->nm_srvkrbnamelen > 0) { bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), nmp->nm_srvkrbnamelen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + nmp->nm_srvkrbnamelen + 2] = '\0'; } nmp->nm_sockreq.nr_cred = crhold(cred); mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); mp->mnt_data = nmp; nmp->nm_getinfo = nfs_getnlminfo; nmp->nm_vinvalbuf = ncl_vinvalbuf; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); /* * Since nfs_decode_args() might optionally set them, these * need to be set to defaults before the call, so that the * optional settings aren't overwritten. */ nmp->nm_nametimeo = nametimeo; nmp->nm_negnametimeo = negnametimeo; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_readahead = NFS_DEFRAHEAD; if (desiredvnodes >= 11000) nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); else nmp->nm_wcommitsize = hibufspace / 10; + if ((argp->flags & NFSMNT_NFSV4) != 0) + nmp->nm_minorvers = minvers; + else + nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = OFF_MAX; if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; } nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; if (nmp->nm_fhsize > 0) bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_sockreq.nr_prog = NFS_PROG; if ((argp->flags & NFSMNT_NFSV4)) nmp->nm_sockreq.nr_vers = NFS_VER4; else if ((argp->flags & NFSMNT_NFSV3)) nmp->nm_sockreq.nr_vers = NFS_VER3; else nmp->nm_sockreq.nr_vers = NFS_VER2; if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; + /* For NFSv4.1, get the clientid now. */ + if (nmp->nm_minorvers > 0) { + NFSCL_DEBUG(3, "at getcl\n"); + error = nfscl_getcl(mp, cred, td, 0, &clp); + NFSCL_DEBUG(3, "aft getcl=%d\n", error); + if (error != 0) + goto bad; + } - /* - * A reference count is needed on the nfsnode representing the - * remote root. If this object is not persistent, then backward - * traversals of the mount point (i.e. "..") will not work if - * the nfsnode gets flushed out of the cache. Ufs does not have - * this problem, because one can identify root inodes by their - * number == ROOTINO (2). - */ if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { + NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. */ trycnt = 3; do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); + NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); goto bad; } } + + /* + * A reference count is needed on the nfsnode representing the + * remote root. If this object is not persistent, then backward + * traversals of the mount point (i.e. "..") will not work if + * the nfsnode gets flushed out of the cache. Ufs does not have + * this problem, because one can identify root inodes by their + * number == ROOTINO (2). + */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set * non-zero for the root vnode. f_iosize will be set correctly * by nfs_statfs() before any I/O occurs. */ mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ; error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, - cred, td, &nfsva, NULL); + cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; + lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); + if (nmp->nm_minorvers > 0) { + NFSCL_DEBUG(3, "lease=%d\n", (int)lease); + NFSLOCKCLSTATE(); + clp->nfsc_renew = NFSCL_RENEW(lease); + clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; + clp->nfsc_clientidrev++; + if (clp->nfsc_clientidrev == 0) + clp->nfsc_clientidrev++; + NFSUNLOCKCLSTATE(); + /* + * Mount will succeed, so the renew thread can be + * started now. + */ + nfscl_start_renewthread(clp); + nfscl_clientrelease(clp); + } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); /* Mark if the mount point supports NFSv4 ACLs. */ if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 && ret == 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * Lose the lock but keep the ref. */ NFSVOPUNLOCK(*vpp, 0); return (0); } error = EIO; bad: + if (clp != NULL) + nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); + if (nmp->nm_clp != NULL) { + NFSLOCKCLSTATE(); + LIST_REMOVE(nmp->nm_clp, nfsc_list); + NFSUNLOCKCLSTATE(); + free(nmp->nm_clp, M_NFSCLCLIENT); + } + TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) + nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); FREE(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct nfsmount *nmp; int error, flags = 0, trycnt = 0; + struct nfsclds *dsp, *tdsp; td = curthread; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (mntflags & MNT_FORCE) { error = newnfs_nmcancelreqs(nmp); if (error) goto out; /* For a forced close, get rid of the renew thread now */ nfscl_umount(nmp, td); } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; /* * We are now committed to the unmount. */ if ((mntflags & MNT_FORCE) == 0) nfscl_umount(nmp, td); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); FREE(nmp->nm_nam, M_SONAME); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); + TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) + nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct thread *td; int error, allerror = 0; td = curthread; MNT_ILOCK(mp); /* * If a forced dismount is in progress, return from here so that * the umount(2) syscall doesn't get stuck in VFS_SYNC() before * calling VFS_UNMOUNT(). */ if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { MNT_IUNLOCK(mp); return (EBADF); } MNT_IUNLOCK(mp); /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* XXX Racy bv_cnt check. */ if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; NFSVOPUNLOCK(vp, 0); vrele(vp); } return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } /* * Extract the information needed by the nlm from the nfs vnode. */ static void nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, struct timeval *timeop) { struct nfsmount *nmp; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); if (fhlenp != NULL) *fhlenp = (size_t)np->n_fhp->nfh_len; if (fhp != NULL) bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len); if (sp != NULL) bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); if (is_v3p != NULL) *is_v3p = NFS_ISV3(vp); if (sizep != NULL) *sizep = np->n_size; if (timeop != NULL) { timeop->tv_sec = nmp->nm_timeo / NFS_HZ; timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); } } /* * This function prints out an option name, based on the conditional * argument. */ static __inline void nfscl_printopt(struct nfsmount *nmp, int testval, char *opt, char **buf, size_t *blen) { int len; if (testval != 0 && *blen > strlen(opt)) { len = snprintf(*buf, *blen, "%s", opt); if (len != strlen(opt)) printf("EEK!!\n"); *buf += len; *blen -= len; } } /* * This function printf out an options integer value. */ static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval, char *opt, char **buf, size_t *blen) { int len; if (*blen > strlen(opt) + 1) { /* Could result in truncated output string. */ len = snprintf(*buf, *blen, "%s=%d", opt, optval); if (len < *blen) { *buf += len; *blen -= len; } } } /* * Load the option flags and values into the buffer. */ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen) { char *buf; size_t blen; buf = buffer; blen = buflen; nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0, "nfsv2", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == 0, ",lockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == NFSMNT_NOLOCKD, ",nolockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen); } Index: head/sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvnops.c (revision 244041) +++ head/sys/fs/nfsclient/nfs_clvnops.c (revision 244042) @@ -1,3496 +1,3514 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2, 3 and 4 */ #include "opt_kdtrace.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfscl_accesscache_flush_done_probe; uint32_t nfscl_accesscache_flush_done_id; dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfscl_accesscache_get_hit_probe, dtrace_nfscl_accesscache_get_miss_probe; uint32_t nfscl_accesscache_get_hit_id; uint32_t nfscl_accesscache_get_miss_id; dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfscl_accesscache_load_done_probe; uint32_t nfscl_accesscache_load_done_id; #endif /* !KDTRACE_HOOKS */ /* Defs */ #define TRUE 1 #define FALSE 0 extern struct nfsstats newnfsstats; extern int nfsrv_useacl; +extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static vop_read_t nfsfifo_read; static vop_write_t nfsfifo_write; static vop_close_t nfsfifo_close; static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, struct thread *); static vop_lookup_t nfs_lookup; static vop_create_t nfs_create; static vop_mknod_t nfs_mknod; static vop_open_t nfs_open; static vop_pathconf_t nfs_pathconf; static vop_close_t nfs_close; static vop_access_t nfs_access; static vop_getattr_t nfs_getattr; static vop_setattr_t nfs_setattr; static vop_read_t nfs_read; static vop_fsync_t nfs_fsync; static vop_remove_t nfs_remove; static vop_link_t nfs_link; static vop_rename_t nfs_rename; static vop_mkdir_t nfs_mkdir; static vop_rmdir_t nfs_rmdir; static vop_symlink_t nfs_symlink; static vop_readdir_t nfs_readdir; static vop_strategy_t nfs_strategy; static vop_lock1_t nfs_lock1; static int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs_sillyrename(struct vnode *, struct vnode *, struct componentname *); static vop_access_t nfsspec_access; static vop_readlink_t nfs_readlink; static vop_print_t nfs_print; static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; static vop_getacl_t nfs_getacl; static vop_setacl_t nfs_setacl; /* * Global vfs data structures for nfs */ struct vop_vector newnfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nfs_access, .vop_advlock = nfs_advlock, .vop_advlockasync = nfs_advlockasync, .vop_close = nfs_close, .vop_create = nfs_create, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_getpages = ncl_getpages, .vop_putpages = ncl_putpages, .vop_inactive = ncl_inactive, .vop_link = nfs_link, .vop_lock1 = nfs_lock1, .vop_lookup = nfs_lookup, .vop_mkdir = nfs_mkdir, .vop_mknod = nfs_mknod, .vop_open = nfs_open, .vop_pathconf = nfs_pathconf, .vop_print = nfs_print, .vop_read = nfs_read, .vop_readdir = nfs_readdir, .vop_readlink = nfs_readlink, .vop_reclaim = ncl_reclaim, .vop_remove = nfs_remove, .vop_rename = nfs_rename, .vop_rmdir = nfs_rmdir, .vop_setattr = nfs_setattr, .vop_strategy = nfs_strategy, .vop_symlink = nfs_symlink, .vop_write = ncl_write, .vop_getacl = nfs_getacl, .vop_setacl = nfs_setacl, }; struct vop_vector newnfs_fifoops = { .vop_default = &fifo_specops, .vop_access = nfsspec_access, .vop_close = nfsfifo_close, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = ncl_inactive, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = ncl_reclaim, .vop_setattr = nfs_setattr, .vop_write = nfsfifo_write, }; static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp); /* * Global variables */ #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) SYSCTL_DECL(_vfs_nfs); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfs_prime_access_cache = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, &nfs_prime_access_cache, 0, "Prime NFS ACCESS cache when fetching attributes"); static int newnfs_commit_on_close = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, &newnfs_commit_on_close, 0, "write+commit on close, else only write"); static int nfs_clean_pages_on_close = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); int newnfs_directio_enable = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, &newnfs_directio_enable, 0, "Enable NFS directio"); int nfs_keep_dirty_on_error; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while * Direct IO is in progress can lead to Data Inconsistencies. But, we allow * this by default to prevent DoS attacks - to prevent a malicious user from * opening up files O_DIRECT preventing other users from mmap'ing these * files. "Protected" environments where stricter consistency guarantees are * required can disable this knob. The process that opened the file O_DIRECT * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not * meaningful. */ int newnfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); #if 0 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); #endif #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ | NFSACCESS_DELETE | NFSACCESS_LOOKUP) /* * SMP Locking Note : * The list of locks after the description of the lock is the ordering * of other locks acquired with the lock held. * np->n_mtx : Protects the fields in the nfsnode. VM Object Lock VI_MTX (acquired indirectly) * nmp->nm_mtx : Protects the fields in the nfsmount. rep->r_mtx * ncl_iod_mutex : Global lock, protects shared nfsiod state. * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. nmp->nm_mtx rep->r_mtx * rep->r_mtx : Protects the fields in an nfsreq. */ static int nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred, u_int32_t *retmode) { int error = 0, attrflag, i, lrupos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, &rmode, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { lrupos = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (np->n_accesscache[i].uid == cred->cr_uid) { np->n_accesscache[i].mode = rmode; np->n_accesscache[i].stamp = time_second; break; } if (i > 0 && np->n_accesscache[i].stamp < np->n_accesscache[lrupos].stamp) lrupos = i; } if (i == NFS_ACCESSCACHESIZE) { np->n_accesscache[lrupos].uid = cred->cr_uid; np->n_accesscache[lrupos].mode = rmode; np->n_accesscache[lrupos].stamp = time_second; } mtx_unlock(&np->n_mtx); if (retmode != NULL) *retmode = rmode; KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } #ifdef KDTRACE_HOOKS if (error != 0) KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, error); #endif return (error); } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0, i, gotahit; u_int32_t mode, wmode, rmode; int v34 = NFS_ISV34(vp); struct nfsnode *np = VTONFS(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3 or v4, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ if (v34) { if (ap->a_accmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_EXECUTE; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; } else { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_LOOKUP; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; if (ap->a_accmode & VDELETE_CHILD) mode |= NFSACCESS_MODIFY; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSACCESS_READ | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_EXECUTE | NFSACCESS_DELETE | NFSACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ gotahit = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { if (time_second < (np->n_accesscache[i].stamp + nfsaccess_cache_timeout) && (np->n_accesscache[i].mode & mode) == mode) { NFSINCRGLOBAL(newnfsstats.accesscache_hits); gotahit = 1; } break; } } mtx_unlock(&np->n_mtx); #ifdef KDTRACE_HOOKS if (gotahit != 0) KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, ap->a_cred->cr_uid, mode); else KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, ap->a_cred->cr_uid, mode); #endif if (gotahit == 0) { /* * Either a no, or a don't know. Go to the wire. */ NFSINCRGLOBAL(newnfsstats.accesscache_misses); error = nfs34_access_otw(vp, wmode, ap->a_td, ap->a_cred, &rmode); if (!error && (rmode & mode) != mode) error = EACCES; } return (error); } else { if ((error = nfsspec_access(ap)) != 0) { return (error); } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = ap->a_td; if (vp->v_type == VREG) error = ncl_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = ncl_readdirrpc(vp, &auio, ap->a_cred, ap->a_td); free(bp, M_TEMP); } else if (vp->v_type == VLNK) error = ncl_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } else mtx_unlock(&np->n_mtx); return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; int error; int fmode = ap->a_mode; struct ucred *cred; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); /* * For NFSv4, we need to do the Open Op before cache validation, * so that we conform to RFC3530 Sec. 9.3.1. */ if (NFS_ISV4(vp)) { error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); if (error) { error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); return (error); } } /* * Now, if this Open will be doing reading, re-validate/flush the * cache, so that Close/Open coherency is maintained. */ mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } else { mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. */ if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { if (np->n_directio_opens == 0) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; } np->n_directio_opens++; } + /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ + if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) + np->n_flag |= NWRITEOPENED; + /* * If this is an open for writing, capture a reference to the * credentials, so they can be used by ncl_putpages(). Using * these write credentials is preferable to the credentials of * whatever thread happens to be doing the VOP_PUTPAGES() since * the write RPCs are less likely to fail with EACCES. */ if ((fmode & FWRITE) != 0) { cred = np->n_writecred; np->n_writecred = crhold(ap->a_cred); } else cred = NULL; mtx_unlock(&np->n_mtx); + if (cred != NULL) crfree(cred); vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to ncl_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NFS Version 4 - flush the dirty buffers and commit them, if * nfscl_mustflush() says this is necessary. * It is necessary if there is no write delegation held, * in order to satisfy open/close coherency. * If the file isn't cached on local stable storage, * it may be necessary in order to detect "out of space" * errors from the server, if the write delegation * issued by the server doesn't allow the file to grow. */ /* ARGSUSED */ static int nfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; struct ucred *cred; int error = 0, ret, localcred = 0; int fmode = ap->a_fflag; if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)) return (0); /* * During shutdown, a_cred isn't valid, so just use root. */ if (ap->a_cred == NOCRED) { cred = newnfs_getcred(); localcred = 1; } else { cred = ap->a_cred; } if (vp->v_type == VREG) { /* * Examine and clean dirty pages, regardless of NMODIFIED. * This closes a major hole in close-to-open consistency. * We want to push out all dirty pages (and buffers) on * close, regardless of whether they were dirtied by * mmap'ed writes or via write(). */ if (nfs_clean_pages_on_close && vp->v_object) { VM_OBJECT_LOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_UNLOCK(vp->v_object); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We * must flush them to the NFS server. We have the option * of waiting all the way through the commit rpc or just * waiting for the initial write. The default is to only * wait through the initial write so the data is in the * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. * These races occur when there is no longer the old * traditional vnode locking implemented for Vnode Ops. */ int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); /* np->n_flag &= ~NMODIFIED; */ } else if (NFS_ISV4(vp)) { if (nfscl_mustflush(vp) != 0) { int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); /* * as above w.r.t races when clearing * NMODIFIED. * np->n_flag &= ~NMODIFIED; */ } } else error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. * An open is going to fetch fresh attrs any way, other procs * on this node that have file open will be forced to do an * otw attr fetch, but this is safe. * --> A user found that their RPC count dropped by 20% when * this was commented out and I can't see any requirement * for it, so I've disabled it when negative lookups are * enabled. (What does this have to do with negative lookup * caching? Well nothing, except it was reported by the * same user that needed negative lookup caching and I wanted * there to be a way to disable it to see if it * is the cause of some caching/coherency issue that might * crop up.) */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } mtx_unlock(&np->n_mtx); } if (NFS_ISV4(vp)) { /* * Get attributes so "change" is up to date. */ if (error == 0 && nfscl_mustflush(vp) != 0) { ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, NULL); if (!ret) { np->n_change = nfsva.na_filerev; (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 0); } } /* * and do the close. */ ret = nfsrpc_close(vp, 0, ap->a_td); if (!error && ret) error = ret; if (error) error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); } if (newnfs_directio_enable) KASSERT((np->n_directio_asyncwr == 0), ("nfs_close: dirty unflushed (%d) directio buffers\n", np->n_directio_asyncwr)); if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; mtx_unlock(&np->n_mtx); } if (localcred) NFSFREECRED(cred); return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; /* XXX */ struct nfsnode *np = VTONFS(vp); int error = 0; struct nfsvattr nfsva; struct vattr *vap = ap->a_vap; struct vattr vattr; /* * Update local times for special files. */ mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; mtx_unlock(&np->n_mtx); /* * First look in the cache. */ if (ncl_getattrcache(vp, &vattr) == 0) { vap->va_type = vattr.va_type; vap->va_mode = vattr.va_mode; vap->va_nlink = vattr.va_nlink; vap->va_uid = vattr.va_uid; vap->va_gid = vattr.va_gid; vap->va_fsid = vattr.va_fsid; vap->va_fileid = vattr.va_fileid; vap->va_size = vattr.va_size; vap->va_blocksize = vattr.va_blocksize; vap->va_atime = vattr.va_atime; vap->va_mtime = vattr.va_mtime; vap->va_ctime = vattr.va_ctime; vap->va_gen = vattr.va_gen; vap->va_flags = vattr.va_flags; vap->va_rdev = vattr.va_rdev; vap->va_bytes = vattr.va_bytes; vap->va_filerev = vattr.va_filerev; /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); return (0); } if (NFS_ISV34(vp) && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { NFSINCRGLOBAL(newnfsstats.accesscache_misses); nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); if (ncl_getattrcache(vp, ap->a_vap) == 0) { nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); return (0); } } error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); if (!error) error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); if (!error) { /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * nfs setattr call. */ static int nfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct thread *td = curthread; /* XXX */ struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags and marking of atimes are not supported. */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ mtx_lock(&np->n_mtx); tsize = np->n_size; mtx_unlock(&np->n_mtx); error = ncl_meta_setsize(vp, ap->a_cred, td, vap->va_size); mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); if (vap->va_size == 0) error = ncl_vinvalbuf(vp, 0, td, 1); else error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error) { vnode_pager_setsize(vp, tsize); return (error); } /* * Call nfscl_delegmodtime() to set the modify time * locally, as required. */ nfscl_delegmodtime(vp); } else mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in ncl_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * ncl_meta_setsize() and could modify np->n_size. */ mtx_lock(&np->n_mtx); np->n_vattr.na_size = np->n_size = vap->va_size; mtx_unlock(&np->n_mtx); }; } else { mtx_lock(&np->n_mtx); if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && (error == EINTR || error == EIO)) return (error); } else mtx_unlock(&np->n_mtx); } error = nfs_setattrrpc(vp, vap, ap->a_cred, td); if (error && vap->va_size != VNOVAL) { mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.na_size = tsize; vnode_pager_setsize(vp, tsize); mtx_unlock(&np->n_mtx); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct nfsnode *np = VTONFS(vp); int error, ret, attrflag, i; struct nfsvattr nfsva; if (NFS_ISV34(vp)) { mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) np->n_accesscache[i].stamp = 0; np->n_flag |= NDELEGMOD; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); } error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct mount *mp = dvp->v_mount; int flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; struct nfsnode *np, *newnp; int error = 0, attrflag, dattrflag, ltype, ncticks; struct thread *td = cnp->cn_thread; struct nfsfh *nfhp; struct nfsvattr dnfsva, nfsva; struct vattr vattr; struct timespec nctime; *vpp = NULLVP; if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); nmp = VFSTONFS(mp); np = VTONFS(dvp); /* For NFSv4, wait until any remove is done. */ mtx_lock(&np->n_mtx); while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { np->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); } mtx_unlock(&np->n_mtx); if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return (error); error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); if (error > 0 && error != ENOENT) return (error); if (error == -1) { /* * Lookups of "." are special and always return the * current directory. cache_lookup() already handles * associated locking bookkeeping, etc. */ if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { /* XXX: Is this really correct? */ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } /* * We only accept a positive hit in the cache if the * change time of the file matches our cached copy. * Otherwise, we discard the cache entry and fallback * to doing a lookup RPC. We also only trust cache * entries for less than nm_nametimeo seconds. * * To better handle stale file handles and attributes, * clear the attribute cache of this node if it is a * leaf component, part of an open() call, and not * locally modified before fetching the attributes. * This should allow stale file handles to be detected * here where we can fall back to a LOOKUP RPC to * recover rather than having nfs_open() detect the * stale file handle and failing open(2) with ESTALE. */ newvp = *vpp; newnp = VTONFS(newvp); if (!(nmp->nm_flag & NFSMNT_NOCTO) && (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(newnp->n_flag & NMODIFIED)) { mtx_lock(&newnp->n_mtx); newnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&newnp->n_mtx); } if (nfscl_nodeleg(newvp, 0) == 0 || ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_ctime, &nctime, ==))) { NFSINCRGLOBAL(newnfsstats.lookupcache_hits); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); if (dvp != newvp) vput(newvp); else vrele(newvp); *vpp = NULLVP; } else if (error == ENOENT) { if (dvp->v_iflag & VI_DOOMED) return (ENOENT); /* * We only accept a negative hit in the cache if the * modification time of the parent directory matches * the cached copy in the name cache entry. * Otherwise, we discard all of the negative cache * entries for this directory. We also only trust * negative cache entries for up to nm_negnametimeo * seconds. */ if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_mtime, &nctime, ==)) { NFSINCRGLOBAL(newnfsstats.lookupcache_hits); return (ENOENT); } cache_purge_negative(dvp); } error = 0; newvp = NULLVP; NFSINCRGLOBAL(newnfsstats.lookupcache_misses); error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (error) { if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } if (error != ENOENT) { if (NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* The requested file was not found. */ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN)) { /* * XXX: UFS does a full VOP_ACCESS(dvp, * VWRITE) here instead of just checking * MNT_RDONLY. */ if (mp->mnt_flag & MNT_RDONLY) return (EROFS); cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE && dattrflag) { /* * Cache the modification time of the parent * directory from the post-op attributes in * the name cache entry. The negative cache * entry will be ignored once the directory * has changed. Don't bother adding the entry * if the directory has already changed. */ mtx_lock(&np->n_mtx); if (timespeccmp(&np->n_vattr.na_mtime, &dnfsva.na_mtime, ==)) { mtx_unlock(&np->n_mtx); cache_enter_time(dvp, NULL, cnp, &dnfsva.na_mtime, NULL); } else mtx_unlock(&np->n_mtx); } return (ENOENT); } /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); return (EISDIR); } error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); *vpp = newvp; cnp->cn_flags |= SAVENAME; return (0); } if (flags & ISDOTDOT) { ltype = NFSVOPISLOCKED(dvp); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); NFSVOPUNLOCK(dvp, 0); error = vfs_busy(mp, 0); NFSVOPLOCK(dvp, ltype | LK_RETRY); vfs_rel(mp); if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { vfs_unbusy(mp); error = ENOENT; } if (error != 0) return (error); } NFSVOPUNLOCK(dvp, 0); error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error == 0) newvp = NFSTOV(np); vfs_unbusy(mp); if (newvp != dvp) NFSVOPLOCK(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { if (error == 0) { if (newvp == dvp) vrele(newvp); else vput(newvp); } error = ENOENT; } if (error != 0) return (error); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); VREF(dvp); newvp = dvp; if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else { error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(np->n_flag & NMODIFIED)) { /* * Flush the attribute cache when opening a * leaf node to ensure that fresh attributes * are fetched in nfs_open() since we did not * fetch attributes from the LOOKUP reply. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&np->n_mtx); } } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); *vpp = newvp; return (0); } /* * nfs read call. * Just call ncl_bioread() to do the work. */ static int nfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by ncl_doio() from below the buffer cache. */ int ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs read rpc call * Ditto above */ int ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; + struct nfsmount *nmp; - error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, - NULL); + nmp = VFSTONFS(vnode_mount(vp)); + error = EIO; + attrflag = 0; + if (NFSHASPNFS(nmp)) + error = nfscl_doiods(vp, uiop, NULL, NULL, + NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td); + NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); + if (error != 0) + error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, + &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs write call */ int ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; - int error = 0, attrflag, ret; + int error, attrflag, ret; + struct nfsmount *nmp; - error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, - uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); + nmp = VFSTONFS(vnode_mount(vp)); + error = EIO; + attrflag = 0; + if (NFSHASPNFS(nmp)) + error = nfscl_doiods(vp, uiop, iomode, must_commit, + NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td); + NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); + if (error != 0) + error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, + uiop->uio_td, &nfsva, &attrflag, NULL, + called_from_strategy); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1); else ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (DOINGASYNC(vp)) *iomode = NFSWRITE_FILESYNC; if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct nfsvattr nfsva, dnfsva; struct vnode *newvp = NULL; struct nfsnode *np = NULL, *dnp; struct nfsfh *nfhp; struct vattr vattr; int error = 0, attrflag, dattrflag; u_int32_t rdev; if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = vap->va_rdev; else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else return (EOPNOTSUPP); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (!nfhp) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag != 0) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { *vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(struct vop_mknod_args *ap) { return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } static struct mtx nfs_cverf_mtx; MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", MTX_DEF); static nfsquad_t nfs_get_cverf(void) { static nfsquad_t cverf; nfsquad_t ret; static int cverf_initialized = 0; mtx_lock(&nfs_cverf_mtx); if (cverf_initialized == 0) { cverf.lval[0] = arc4random(); cverf.lval[1] = arc4random(); cverf_initialized = 1; } else cverf.qval++; ret = cverf; mtx_unlock(&nfs_cverf_mtx); return (ret); } /* * nfs file create call */ static int nfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct nfsmount *nmp; struct nfsvattr dnfsva, nfsva; struct nfsfh *nfhp; nfsquad_t cverf; int error = 0, attrflag, dattrflag, fmode = 0; struct vattr vattr; /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; dnp = VTONFS(dvp); nmp = VFSTONFS(vnode_mount(dvp)); again: /* For NFSv4, wait until any remove is done. */ mtx_lock(&dnp->n_mtx); while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { dnp->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); } mtx_unlock(&dnp->n_mtx); cverf = nfs_get_cverf(); error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (nfhp == NULL) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp != NULL) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag == 0) error = nfsrpc_getattr(newvp, cnp->cn_cred, cnp->cn_thread, &nfsva, NULL); if (error == 0) error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (error) { if (newvp != NULL) { vput(newvp); newvp = NULL; } if (NFS_ISV34(dvp) && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { if (nfscl_checksattr(vap, &nfsva)) { /* * We are normally called with only a partially * initialized VAP. Since the NFSv3 spec says that * the server may use the file attributes to * store the verifier, the spec requires us to do a * SETATTR RPC. FreeBSD servers store the verifier in * atime, but we can't really assume that all servers * will so we ensure that our SETATTR sets both atime * and mtime. */ if (vap->va_mtime.tv_sec == VNOVAL) vfs_timestamp(&vap->va_mtime); if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); if (error && (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)) { /* try again without setting uid/gid */ vap->va_uid = (uid_t)VNOVAL; vap->va_gid = (uid_t)VNOVAL; error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { if ((cnp->cn_flags & MAKEENTRY) && attrflag) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); *ap->a_vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); /* Do the rpc */ if (error != EINTR && error != EIO) error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int ncl_removeit(struct sillyrename *sp, struct vnode *vp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). */ static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td) { struct nfsvattr dnfsva; struct nfsnode *dnp = VTONFS(dvp); int error = 0, dattrflag; mtx_lock(&dnp->n_mtx); dnp->n_flag |= NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, &dattrflag, NULL); mtx_lock(&dnp->n_mtx); if ((dnp->n_flag & NREMOVEWANT)) { dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); mtx_unlock(&dnp->n_mtx); wakeup((caddr_t)dnp); } else { dnp->n_flag &= ~NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs file rename call */ static int nfs_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct nfsnode *fnp = VTONFS(ap->a_fvp); struct nfsnode *tdnp = VTONFS(ap->a_tdvp); struct nfsv4node *newv4 = NULL; int error; KASSERT((tcnp->cn_flags & HASBUF) != 0 && (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { ncl_printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. * * Skip the rename operation if the fsync fails, this can happen * due to the server's volume being full, when we pushed out data * that was written back to our cache earlier. Not checking for * this condition can result in potential (silent) data loss. */ error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); NFSVOPUNLOCK(fvp, 0); if (!error && tvp) error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); if (error) goto out; /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (error == 0 && NFS_ISV4(tdvp)) { /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ MALLOC(newv4, struct nfsv4node *, sizeof (struct nfsv4node) + tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); mtx_lock(&tdnp->n_mtx); mtx_lock(&fnp->n_mtx); if (fnp->n_v4 != NULL && fvp->v_type == VREG && (fnp->n_v4->n4_namelen != tcnp->cn_namelen || NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen) || tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; bcopy(tcnp->cn_nameptr, nnn, nnnl); nnn[nnnl] = '\0'; printf("ren replace=%s\n",nnn); } #endif FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); fnp->n_v4 = newv4; newv4 = NULL; fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; fnp->n_v4->n4_namelen = tcnp->cn_namelen; NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len); NFSBCOPY(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); } mtx_unlock(&tdnp->n_mtx); mtx_unlock(&fnp->n_mtx); if (newv4 != NULL) FREE((caddr_t)newv4, M_NFSV4NODE); } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp) { return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { struct nfsvattr fnfsva, tnfsva; struct nfsnode *fdnp = VTONFS(fdvp); struct nfsnode *tdnp = VTONFS(tdvp); int error = 0, fattrflag, tattrflag; error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, &tattrflag, NULL, NULL); mtx_lock(&fdnp->n_mtx); fdnp->n_flag |= NMODIFIED; if (fattrflag != 0) { mtx_unlock(&fdnp->n_mtx); (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); } else { fdnp->n_attrstamp = 0; mtx_unlock(&fdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); } mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (tattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (error && NFS_ISV4(fdvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs hard link create call */ static int nfs_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np, *tdnp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag; if (vp->v_mount != tdvp->v_mount) { return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, &dattrflag, NULL); tdnp = VTONFS(tdvp); mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); else { np = VTONFS(vp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); } if (error && NFS_ISV4(vp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsvattr nfsva, dnfsva; struct nfsfh *nfhp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; int error = 0, attrflag, dattrflag, ret; vap->va_type = VLNK; error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) newvp = NFSTOV(np); else if (!error) error = ret; } if (newvp != NULL) { if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) { /* * If we do not have an error and we could not extract the * newvp from the response due to the request being NFSv2, we * have to do a lookup in order to obtain a newvp to return. */ error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { *ap->a_vpp = newvp; } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); } return (error); } /* * nfs make dir call */ static int nfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct vattr vattr; struct nfsfh *nfhp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag, ret; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); vap->va_type = VDIR; error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) { newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) error = ret; } if (!error && newvp == NULL) { error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && dattrflag != 0) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, &dnfsva.na_ctime); *ap->a_vpp = newvp; } return (error); } /* * nfs remove directory call */ static int nfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *dnp; struct nfsvattr dnfsva; int error, dattrflag; if (dvp == vp) return (EINVAL); error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } cache_purge(dvp); cache_purge(vp); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; ssize_t tresid; int error = 0; struct vattr vattr; if (vp->v_type != VDIR) return(EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); NFSINCRGLOBAL(newnfsstats.direofcache_hits); return (0); } else mtx_unlock(&np->n_mtx); } } /* * Call ncl_bioread() to do the real work. */ tresid = uio->uio_resid; error = ncl_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) NFSINCRGLOBAL(newnfsstats.direofcache_misses); return (error); } /* * Readdir rpc call. * Called from below the buffer cache by ncl_doio(). */ int ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, eof, attrflag; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at the end of the directory or have filled * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) ncl_printf("EEK! readdirrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). */ int ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, attrflag, eof; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at end of the directory or have filled the * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) ncl_printf("EEK! readdirplusrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; unsigned int lticks; cache_purge(dvp); np = VTONFS(vp); KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NEWNFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* * Fudge together a funny name. * Changing the format of the funny name to accomodate more * sillynames per directory. * The name is now changed to .nfs...4, where ticks is * CPU ticks since boot. */ pid = cnp->cn_thread->td_proc->p_pid; lticks = (unsigned int)ticks; for ( ; ; ) { sp->s_namlen = sprintf(sp->s_name, ".nfs.%08x.%04x4.4", lticks, pid); if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL)) break; lticks++; } error = nfs_renameit(dvp, vp, cnp, sp); if (error) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NEWNFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL, *vp; struct nfsnode *np, *dnp = VTONFS(dvp); struct nfsfh *nfhp, *onfhp; struct nfsvattr nfsva, dnfsva; struct componentname cn; int error = 0, attrflag, dattrflag; u_int hash; error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (npp && !error) { if (*npp != NULL) { np = *npp; vp = NFSTOV(np); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ if (np->n_v4 != NULL && nfsva.na_type == VREG && (np->n_v4->n4_namelen != len || NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (len < 100) ? len : 99; bcopy(name, nnn, nnnl); nnn[nnnl] = '\0'; printf("replace=%s\n",nnn); } #endif FREE((caddr_t)np->n_v4, M_NFSV4NODE); MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + len - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = len; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); } hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); onfhp = np->n_fhp; /* * Rehash node for new file handle. */ vfs_hash_rehash(vp, hash); np->n_fhp = nfhp; if (onfhp != NULL) FREE((caddr_t)onfhp, M_NFSFH); newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { FREE((caddr_t)nfhp, M_NFSFH); VREF(dvp); newvp = dvp; } else { cn.cn_nameptr = name; cn.cn_namelen = len; error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); } if (!attrflag && *npp == NULL) { if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Nfs Version 3 and 4 commit rpc */ int ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error, attrflag; - u_char verf[NFSX_VERF]; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { mtx_unlock(&nmp->nm_mtx); return (0); } mtx_unlock(&nmp->nm_mtx); - error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva, + error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, &attrflag, NULL); - if (!error) { - mtx_lock(&nmp->nm_mtx); - if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) { - NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF); - error = NFSERR_STALEWRITEVERF; - } - mtx_unlock(&nmp->nm_mtx); - if (!error && attrflag) - (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, - 0, 1); - } else if (NFS_ISV4(vp)) { + if (attrflag != 0) + (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, + 0, 1); + if (error != 0 && NFS_ISV4(vp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); - } return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling ncl_asyncio(), otherwise just all ncl_doio() to do the * request. */ static int nfs_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct ucred *cr; KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); BUF_ASSERT_HELD(bp); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1); return (0); } /* * fsync vnode op. Just call ncl_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(struct vop_fsync_args *ap) { if (ap->a_vp->v_type != VREG) { /* * For NFS, metadata is changed synchronously on the server, * so there is nothing to flush. Also, ncl_flush() clears * the NMODIFIED flag and that shouldn't be done here for * directories. */ return (0); } return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. * If the called_from_renewthread argument is TRUE, it has been called * from the NFSv4 renew thread and, as such, cannot block indefinitely * waiting for a buffer write to complete. */ int ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, int commit, int called_from_renewthread) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1, trycnt = 0; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; struct bufobj *bo; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (called_from_renewthread != 0) slptimeo = hz; if (nmp->nm_flag & NFSMNT_INT) slpflag = NFS_PCATCH; if (!commit) passone = 0; bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, ncl_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV34(vp) && commit) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_bobufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_bobufs); continue; } BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; vfs_busy_pages(bp, 1); BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_bobufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } BO_UNLOCK(bo); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = ncl_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = ncl_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) ncl_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; } if (error == ENOLCK) { error = 0; goto loop; } if (called_from_renewthread != 0) { /* * Return EIO so the flush will be retried * later. */ error = EIO; goto done; } if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } if (slpflag & PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; bwrite(bp); if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } goto loop; } if (passone) { passone = 0; BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { while (bo->bo_numoutput) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); if (called_from_renewthread != 0) { /* * Return EIO so that the flush will be * retried later. */ error = EIO; goto done; } error = newnfs_sigintr(nmp, td); if (error) goto done; if (slpflag & PCATCH) { slpflag = 0; slptimeo = 2 * hz; } BO_LOCK(bo); } } if (bo->bo_dirty.bv_cnt != 0 && commit) { BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; error = newnfs_msleep(td, &np->n_directio_asyncwr, &np->n_mtx, slpflag | (PRIBIO + 1), "nfsfsync", 0); if (error) { if (newnfs_sigintr(nmp, td)) { mtx_unlock(&np->n_mtx); error = EINTR; goto done; } } } mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); + if (NFSHASPNFS(nmp)) + nfscl_layoutcommit(vp, td); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } if (commit && bo->bo_dirty.bv_cnt == 0 && bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); if (error == 0 && commit != 0 && waitfor == MNT_WAIT && (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || np->n_directio_asyncwr != 0) && trycnt++ < 5) { /* try, try again... */ passone = 1; wcred = NULL; bvec = NULL; bvecsize = 0; printf("try%d\n", trycnt); goto again; } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; struct ucred *cred; struct nfsnode *np = VTONFS(ap->a_vp); struct proc *p = (struct proc *)ap->a_id; struct thread *td = curthread; /* XXX */ struct vattr va; int ret, error = EOPNOTSUPP; u_quad_t size; if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { if (vp->v_type != VREG) return (EINVAL); if ((ap->a_flags & F_POSIX) != 0) cred = p->p_ucred; else cred = td->td_ucred; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } /* * If this is unlocking a write locked region, flush and * commit them before unlocking. This is required by * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_UNLCK && nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, ap->a_flags)) (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); /* * Loop around doing the lock op, while a blocking lock * must wait for the lock op to succeed. */ do { ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { NFSVOPUNLOCK(vp, 0); error = nfs_catnap(PZERO | PCATCH, ret, "ncladvl"); if (error) return (EINTR); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } } } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK); if (ret == NFSERR_DENIED) { NFSVOPUNLOCK(vp, 0); return (EAGAIN); } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { NFSVOPUNLOCK(vp, 0); return (ret); } else if (ret != 0) { NFSVOPUNLOCK(vp, 0); return (EACCES); } /* * Now, if we just got a lock, invalidate data in the buffer * cache, as required, so that the coherency conforms with * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_SETLK) { if ((np->n_flag & NMODIFIED) == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); } if ((np->n_flag & NMODIFIED) || ret || np->n_change != va.va_filerev) { (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); if (!ret) { np->n_mtime = va.va_mtime; np->n_change = va.va_filerev; } } } NFSVOPUNLOCK(vp, 0); return (0); } else if (!NFS_ISV4(vp)) { error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlock(ap, &(vp->v_lockf), size); } else { if (nfs_advlock_p != NULL) error = nfs_advlock_p(ap); else { NFSVOPUNLOCK(vp, 0); error = ENOLCK; } } } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; if (NFS_ISV4(vp)) return (EOPNOTSUPP); error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlockasync(ap, &(vp->v_lockf), size); } else { NFSVOPUNLOCK(vp, 0); error = EOPNOTSUPP; } return (error); } /* * Print out the contents of an nfsnode. */ static int nfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); ncl_printf("\tfileid %ld fsid 0x%x", np->n_vattr.na_fileid, np->n_vattr.na_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * We set B_CACHE if this is a VMIO buffer. */ int ncl_writebp(struct buf *bp, int force __unused, struct thread *td) { int s; int oldflags = bp->b_flags; #if 0 int retv = 1; off_t off; #endif BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return(0); } bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ s = splbio(); bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); curthread->td_ru.ru_oublock++; splx(s); /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if( (oldflags & B_ASYNC) == 0) { int rtval = bufwait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred); if (error) goto out; error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, accmode, cred, NULL); out: return error; } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); int error; /* * Set access flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NACC; getnanotime(&np->n_atim); mtx_unlock(&np->n_mtx); error = fifo_specops.vop_read(ap); return error; } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NUPD; getnanotime(&np->n_mtim); mtx_unlock(&np->n_mtx); return(fifo_specops.vop_write(ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { getnanotime(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred); goto out; } } mtx_unlock(&np->n_mtx); out: return (fifo_specops.vop_close(ap)); } /* * Just call ncl_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(struct buf *bp) { return (ncl_writebp(bp, 1, curthread)); } struct buf_ops buf_ops_newnfs = { .bop_name = "buf_ops_nfs", .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, }; /* * Cloned from vop_stdlock(), and then the ugly hack added. */ static int nfs_lock1(struct vop_lock1_args *ap) { struct vnode *vp = ap->a_vp; int error = 0; /* * Since vfs_hash_get() calls vget() and it will no longer work * for FreeBSD8 with flags == 0, I can only think of this horrible * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER * and then handle it here. All I want for this case is a v_usecount * on the vnode to use for recovery, while another thread might * hold a lock on the vnode. I have the other threads blocked, so * there isn't any race problem. */ if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) { if ((ap->a_flags & LK_INTERLOCK) == 0) panic("ncllock1"); if ((vp->v_iflag & VI_DOOMED)) error = ENOENT; VI_UNLOCK(vp); return (error); } return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, ap->a_line)); } static int nfs_getacl(struct vop_getacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } static int nfs_setacl(struct vop_setacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } /* * Return POSIX pathconf information applicable to nfs filesystems. */ static int nfs_pathconf(struct vop_pathconf_args *ap) { struct nfsv3_pathconf pc; struct nfsvattr nfsva; struct vnode *vp = ap->a_vp; struct thread *td = curthread; int attrflag, error; if (NFS_ISV4(vp) || (NFS_ISV3(vp) && (ap->a_name == _PC_LINK_MAX || ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || ap->a_name == _PC_NO_TRUNC))) { /* * Since only the above 4 a_names are returned by the NFSv3 * Pathconf RPC, there is no point in doing it for others. */ error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0) return (error); } else { /* * For NFSv2 (or NFSv3 when not one of the above 4 a_names), * just fake them. */ pc.pc_linkmax = LINK_MAX; pc.pc_namemax = NFS_MAXNAMLEN; pc.pc_notrunc = 1; pc.pc_chownrestricted = 1; pc.pc_caseinsensitive = 0; pc.pc_casepreserving = 1; error = 0; } switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = pc.pc_linkmax; break; case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; break; case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = pc.pc_chownrestricted; break; case _PC_NO_TRUNC: *ap->a_retval = pc.pc_notrunc; break; case _PC_ACL_EXTENDED: *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_PATH_MAX: if (NFS_ISV4(vp)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; break; case _PC_MAC_PRESENT: *ap->a_retval = 0; break; case _PC_ASYNC_IO: /* _PC_ASYNC_IO should have been handled by upper layers. */ KASSERT(0, ("_PC_ASYNC_IO should not get here")); error = EINVAL; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: if (NFS_ISV34(vp)) *ap->a_retval = 64; else *ap->a_retval = 32; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = NFS_MAXPATHLEN; break; default: error = EINVAL; break; } return (error); } Index: head/sys/fs/nfsclient/nfsmount.h =================================================================== --- head/sys/fs/nfsclient/nfsmount.h (revision 244041) +++ head/sys/fs/nfsclient/nfsmount.h (revision 244042) @@ -1,120 +1,128 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSMOUNT_H_ #define _NFSCLIENT_NFSMOUNT_H_ #include /* * Mount structure. * One allocated on every NFS mount. * Holds NFS specific information for mount. */ struct nfsmount { struct nfsmount_common nm_com; /* Common fields for nlm */ int nm_numgrps; /* Max. size of groupslist */ u_char nm_fh[NFSX_FHMAX]; /* File handle of root dir */ int nm_fhsize; /* Size of root file handle */ struct nfssockreq nm_sockreq; /* Socket Info */ int nm_timeouts; /* Request timeouts */ int nm_rsize; /* Max size of read rpc */ int nm_wsize; /* Max size of write rpc */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ int nm_wcommitsize; /* Max size of commit for write */ int nm_acdirmin; /* Directory attr cache min lifetime */ int nm_acdirmax; /* Directory attr cache max lifetime */ int nm_acregmin; /* Reg file attr cache min lifetime */ int nm_acregmax; /* Reg file attr cache max lifetime */ u_char nm_verf[NFSX_VERF]; /* write verifier */ TAILQ_HEAD(, buf) nm_bufq; /* async io buffer queue */ short nm_bufqlen; /* number of buffers in queue */ short nm_bufqwant; /* process wants to add to the queue */ int nm_bufqiods; /* number of iods processing queue */ u_int64_t nm_maxfilesize; /* maximum file size */ int nm_tprintf_initial_delay; /* initial delay */ int nm_tprintf_delay; /* interval for messages */ int nm_nametimeo; /* timeout for +ve entries (sec) */ int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ + TAILQ_HEAD(, nfsclds) nm_sess; /* Session(s) for NFSv4.1. */ struct nfsclclient *nm_clp; uid_t nm_uid; /* Uid for SetClientID etc. */ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ + int nm_minorvers; /* Minor version # for NFSv4 */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ u_char nm_name[1]; /* malloc'd actual len of krbname + dirpath */ }; #define nm_nam nm_sockreq.nr_nam #define nm_sotype nm_sockreq.nr_sotype #define nm_so nm_sockreq.nr_so #define nm_soflags nm_sockreq.nr_soflags #define nm_soproto nm_sockreq.nr_soproto #define nm_client nm_sockreq.nr_client #define nm_krbname nm_name #define nm_mtx nm_com.nmcom_mtx #define nm_flag nm_com.nmcom_flag #define nm_state nm_com.nmcom_state #define nm_mountp nm_com.nmcom_mountp #define nm_timeo nm_com.nmcom_timeo #define nm_retry nm_com.nmcom_retry #define nm_hostname nm_com.nmcom_hostname #define nm_getinfo nm_com.nmcom_getinfo #define nm_vinvalbuf nm_com.nmcom_vinvalbuf #define NFSMNT_DIRPATH(m) (&((m)->nm_name[(m)->nm_krbnamelen + 1])) #define NFSMNT_SRVKRBNAME(m) \ (&((m)->nm_name[(m)->nm_krbnamelen + (m)->nm_dirpathlen + 2])) #if defined(_KERNEL) /* * Convert mount ptr to nfsmount ptr. */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) + +/* + * Get a pointer to the MDS session, which is always the first element + * in the list. + */ +#define NFSMNT_MDSSESSION(m) (&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess)) #ifndef NFS_DEFAULT_NAMETIMEO #define NFS_DEFAULT_NAMETIMEO 60 #endif #ifndef NFS_DEFAULT_NEGNAMETIMEO #define NFS_DEFAULT_NEGNAMETIMEO 60 #endif #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSMOUNT_H_ */ Index: head/sys/fs/nfsclient/nfsnode.h =================================================================== --- head/sys/fs/nfsclient/nfsnode.h (revision 244041) +++ head/sys/fs/nfsclient/nfsnode.h (revision 244042) @@ -1,191 +1,193 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSNODE_H_ #define _NFSCLIENT_NFSNODE_H_ #include /* * Silly rename structure that hangs off the nfsnode until the name * can be removed by nfs_inactive() */ struct sillyrename { struct task s_task; struct ucred *s_cred; struct vnode *s_dvp; long s_namlen; char s_name[32]; }; /* * This structure is used to save the logical directory offset to * NFS cookie mappings. * The mappings are stored in a list headed * by n_cookies, as required. * There is one mapping for each NFS_DIRBLKSIZ bytes of directory information * stored in increasing logical offset byte order. */ #define NFSNUMCOOKIES 31 struct nfsdmap { LIST_ENTRY(nfsdmap) ndm_list; int ndm_eocookie; union { nfsuint64 ndmu3_cookies[NFSNUMCOOKIES]; uint64_t ndmu4_cookies[NFSNUMCOOKIES]; } ndm_un1; }; #define ndm_cookies ndm_un1.ndmu3_cookies #define ndm4_cookies ndm_un1.ndmu4_cookies struct nfs_accesscache { u_int32_t mode; /* ACCESS mode cache */ uid_t uid; /* credentials having mode */ time_t stamp; /* mode cache timestamp */ }; /* * The nfsnode is the nfs equivalent to ufs's inode. Any similarity * is purely coincidental. * There is a unique nfsnode allocated for each active file, * each current directory, each mounted-on file, text file, and the root. * An nfsnode is 'named' by its file handle. (nget/nfs_node.c) * If this structure exceeds 256 bytes (it is currently 256 using 4.4BSD-Lite * type definitions), file handles of > 32 bytes should probably be split out * into a separate MALLOC()'d data structure. (Reduce the size of nfsfh_t by * changing the definition in nfsproto.h of NFS_SMALLFH.) * NB: Hopefully the current order of the fields is such that everything will * be well aligned and, therefore, tightly packed. */ struct nfsnode { struct mtx n_mtx; /* Protects all of these members */ u_quad_t n_size; /* Current size of file */ u_quad_t n_brev; /* Modify rev when cached */ u_quad_t n_lrev; /* Modify rev for lease */ struct nfsvattr n_vattr; /* Vnode attribute cache */ time_t n_attrstamp; /* Attr. cache timestamp */ struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE]; struct timespec n_mtime; /* Prev modify time. */ struct nfsfh *n_fhp; /* NFS File Handle */ struct vnode *n_vnode; /* associated vnode */ struct vnode *n_dvp; /* parent vnode */ struct lockf *n_lockf; /* Locking record of file */ int n_error; /* Save write error value */ union { struct timespec nf_atim; /* Special file times */ nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */ u_char nd4_cookieverf[NFSX_VERF]; } n_un1; union { struct timespec nf_mtim; off_t nd_direof; /* Dir. EOF offset cache */ } n_un2; union { struct sillyrename *nf_silly; /* Ptr to silly rename struct */ LIST_HEAD(, nfsdmap) nd_cook; /* cookies */ } n_un3; short n_fhsize; /* size in bytes, of fh */ u_int32_t n_flag; /* Flag for locking.. */ int n_directio_opens; int n_directio_asyncwr; u_int64_t n_change; /* old Change attribute */ struct nfsv4node *n_v4; /* extra V4 stuff */ struct ucred *n_writecred; /* Cred. for putpages */ }; #define n_atim n_un1.nf_atim #define n_mtim n_un2.nf_mtim #define n_sillyrename n_un3.nf_silly #define n_cookieverf n_un1.nd_cookieverf #define n4_cookieverf n_un1.nd4_cookieverf #define n_direofoffset n_un2.nd_direof #define n_cookies n_un3.nd_cook /* * Flags for n_flag */ #define NDIRCOOKIELK 0x00000001 /* Lock to serialize access to directory cookies */ #define NFSYNCWAIT 0x00000002 /* fsync waiting for all directio async writes to drain */ #define NMODIFIED 0x00000004 /* Might have a modified buffer in bio */ #define NWRITEERR 0x00000008 /* Flag write errors so close will know */ #define NCREATED 0x00000010 /* Opened by nfs_create() */ #define NTRUNCATE 0x00000020 /* Opened by nfs_setattr() */ #define NSIZECHANGED 0x00000040 /* File size has changed: need cache inval */ #define NNONCACHE 0x00000080 /* Node marked as noncacheable */ #define NACC 0x00000100 /* Special file accessed */ #define NUPD 0x00000200 /* Special file updated */ #define NCHG 0x00000400 /* Special file times changed */ #define NDELEGMOD 0x00000800 /* Modified delegation */ #define NDELEGRECALL 0x00001000 /* Recall in progress */ #define NREMOVEINPROG 0x00002000 /* Remove in progress */ #define NREMOVEWANT 0x00004000 /* Want notification that remove is done */ #define NLOCK 0x00008000 /* Sleep lock the node */ #define NLOCKWANT 0x00010000 /* Want the sleep lock */ +#define NNOLAYOUT 0x00020000 /* Can't get a layout for this file */ +#define NWRITEOPENED 0x00040000 /* Has been opened for writing */ /* * Convert between nfsnode pointers and vnode pointers */ #define VTONFS(vp) ((struct nfsnode *)(vp)->v_data) #define NFSTOV(np) ((struct vnode *)(np)->n_vnode) #define NFS_TIMESPEC_COMPARE(T1, T2) (((T1)->tv_sec != (T2)->tv_sec) || ((T1)->tv_nsec != (T2)->tv_nsec)) #if defined(_KERNEL) /* * Prototypes for NFS vnode operations */ int ncl_getpages(struct vop_getpages_args *); int ncl_putpages(struct vop_putpages_args *); int ncl_write(struct vop_write_args *); int ncl_inactive(struct vop_inactive_args *); int ncl_reclaim(struct vop_reclaim_args *); /* other stuff */ int ncl_removeit(struct sillyrename *, struct vnode *); int ncl_nget(struct mount *, u_int8_t *, int, struct nfsnode **, int); nfsuint64 *ncl_getcookie(struct nfsnode *, off_t, int); void ncl_invaldir(struct vnode *); int ncl_upgrade_vnlock(struct vnode *); void ncl_downgrade_vnlock(struct vnode *, int); void ncl_printf(const char *, ...); void ncl_dircookie_lock(struct nfsnode *); void ncl_dircookie_unlock(struct nfsnode *); #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSNODE_H_ */ Index: head/sys/fs/nfsserver/nfs_nfsdstate.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 244041) +++ head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 244042) @@ -1,5293 +1,5294 @@ /*- * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; extern int newnfs_numnfsd; extern struct nfsstats newnfsstats; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; /* * Hash lists for nfs V4. * (Some would put them in the .h file, but I don't like declaring storage * in a .h) */ struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE]; struct nfslockhashhead nfslockhash[NFSLOCKHASHSIZE]; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; static time_t nfsrvboottime; static int nfsrv_writedelegifpos = 1; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p); static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freenfslock(struct nfslock *lop); static void nfsrv_freenfslockfile(struct nfslockfile *lfp); static void nfsrv_freedeleg(struct nfsstate *); static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, u_int32_t flags, struct nfsstate **stpp); static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp); static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile **new_lfpp, fhandle_t *nfhp, NFSPROC_T *p); static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit); static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(u_int32_t flags); static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p); static u_int32_t nfsrv_nextclientindex(void); static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp); static void nfsrv_markstable(struct nfsclient *clp); static int nfsrv_checkstable(struct nfsclient *clp); static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct vnode *vp, NFSPROC_T *p); static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp); static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p); static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp); static time_t nfsrv_leaseexpiry(void); static void nfsrv_delaydelegtimeout(struct nfsstate *stp); static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op); static int nfsrv_nootherstate(struct nfsstate *stp); static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p); static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p); static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end); static void nfsrv_locklf(struct nfslockfile *lfp); static void nfsrv_unlocklf(struct nfslockfile *lfp); /* * Scan the client list for a match and either return the current one, * create a new entry or return an error. * If returning a non-error, the clp structure must either be linked into * the client list or free'd. */ APPLESTATIC int nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p) { struct nfsclient *clp = NULL, *new_clp = *new_clpp; int i, error = 0; struct nfsstate *stp, *tstp; struct sockaddr_in *sad, *rad; int zapit = 0, gotit, hasstate = 0, igotlock; static u_int64_t confirm_index = 0; /* * Check for state resource limit exceeded. */ if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) { error = NFSERR_RESOURCE; goto out; } if (nfsrv_issuedelegs == 0 || ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* * Don't do callbacks when delegations are disabled or * for AUTH_GSS unless enabled via nfsrv_nogsscallback. * If establishing a callback connection is attempted * when a firewall is blocking the callback path, the * server may wait too long for the connect attempt to * succeed during the Open. Some clients, such as Linux, * may timeout and give up on the Open before the server * replies. Also, since AUTH_GSS callbacks are not * yet interoperability tested, they might cause the * server to crap out, if they get past the Init call to * the client. */ new_clp->lc_program = 0; /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < NFSCLIENTHASHSIZE && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit || (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) { /* * Get rid of the old one. */ if (i != NFSCLIENTHASHSIZE) { LIST_REMOVE(clp, lc_hash); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); zapit = 1; } /* * Add it after assigning a client id to it. */ new_clp->lc_flags |= LCL_NEEDSCONFIRM; confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = (u_int32_t)nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); LIST_INIT(&new_clp->lc_open); LIST_INIT(&new_clp->lc_deleg); LIST_INIT(&new_clp->lc_olddeleg); for (i = 0; i < NFSSTATEHASHSIZE; i++) LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); newnfsstats.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if (zapit) nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * Now, handle the cases where the id is already issued. */ if (nfsrv_notsamecredname(nd, clp)) { /* * Check to see if there is expired state that should go away. */ if (clp->lc_expiry < NFSD_MONOSEC && (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); } /* * If there is outstanding state, then reply NFSERR_CLIDINUSE per * RFC3530 Sec. 8.1.2 last para. */ if (!LIST_EMPTY(&clp->lc_deleg)) { hasstate = 1; } else if (LIST_EMPTY(&clp->lc_open)) { hasstate = 0; } else { hasstate = 0; /* Look for an Open on the OpenOwner */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { if (!LIST_EMPTY(&stp->ls_open)) { hasstate = 1; break; } } } if (hasstate) { /* * If the uid doesn't match, return NFSERR_CLIDINUSE after * filling out the correct ipaddr and portnum. */ sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); sad->sin_addr.s_addr = rad->sin_addr.s_addr; sad->sin_port = rad->sin_port; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIDINUSE; goto out; } } if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) { /* * If the verifier has changed, the client has rebooted * and a new client id is issued. The old state info * can be thrown away once the SETCLIENTID_CONFIRM occurs. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= LCL_NEEDSCONFIRM; confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); /* * Save the state until confirmed. */ LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < NFSSTATEHASHSIZE; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); newnfsstats.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * id and verifier match, so update the net address info * and get rid of any existing callback authentication * handle, so a new one will be acquired. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN); new_clp->lc_expiry = nfsrv_leaseexpiry(); confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = clp->lc_clientid.lval[1]; new_clp->lc_delegtime = clp->lc_delegtime; new_clp->lc_stateindex = clp->lc_stateindex; new_clp->lc_statemaxindex = clp->lc_statemaxindex; new_clp->lc_cbref = 0; LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < NFSSTATEHASHSIZE; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); newnfsstats.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; out: NFSEXITCODE2(error, nd); return (error); } /* * Check to see if the client id exists and optionally confirm it. */ APPLESTATIC int nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, nfsquad_t confirm, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int i; struct nfsclienthashhead *hp; int error = 0, igotlock, doneok; if (clpp) *clpp = NULL; if (nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* * If called with opflags == CLOPS_RENEW, the State Lock is * already held. Otherwise, we need to get either that or, * for the case of Confirm, lock out the nfsd threads. */ if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSLOCKSTATE(); } hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } if (clp == LIST_END(hp)) { if (opflags & CLOPS_CONFIRM) error = NFSERR_STALECLIENTID; else error = NFSERR_EXPIRED; } else if (clp->lc_flags & LCL_ADMINREVOKED) { /* * If marked admin revoked, just return the error. */ error = NFSERR_ADMINREVOKED; } if (error) { if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } goto out; } /* * Perform any operations specified by the opflags. */ if (opflags & CLOPS_CONFIRM) { if (clp->lc_confirm.qval != confirm.qval) error = NFSERR_STALECLIENTID; else if (nfsrv_notsamecredname(nd, clp)) error = NFSERR_CLIDINUSE; if (!error) { if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) == LCL_NEEDSCONFIRM) { /* * Hang onto the delegations (as old delegations) * for an Open with CLAIM_DELEGATE_PREV unless in * grace, but get rid of the rest of the state. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_olddeleg); if (nfsrv_checkgrace(0)) { /* In grace, so just delete delegations */ nfsrv_freedeleglist(&clp->lc_deleg); } else { LIST_FOREACH(stp, &clp->lc_deleg, ls_list) stp->ls_flags |= NFSLCK_OLDDELEG; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg, ls_list); } } clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN); if (clp->lc_program) clp->lc_flags |= LCL_NEEDSCBNULL; } } else if (clp->lc_flags & LCL_NEEDSCONFIRM) { error = NFSERR_EXPIRED; } /* * If called by the Renew Op, we must check the principal. */ if (!error && (opflags & CLOPS_RENEWOP)) { if (nfsrv_notsamecredname(nd, clp)) { doneok = 0; for (i = 0; i < NFSSTATEHASHSIZE && doneok == 0; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if ((stp->ls_flags & NFSLCK_OPEN) && stp->ls_uid == nd->nd_cred->cr_uid) { doneok = 1; break; } } } if (!doneok) error = NFSERR_ACCES; } if (!error && (clp->lc_flags & LCL_CBDOWN)) error = NFSERR_CBPATHDOWN; } if ((!error || error == NFSERR_CBPATHDOWN) && (opflags & CLOPS_RENEW)) { clp->lc_expiry = nfsrv_leaseexpiry(); } if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } if (clpp) *clpp = clp; out: NFSEXITCODE2(error, nd); return (error); } /* * Called from the new nfssvc syscall to admin revoke a clientid. * Returns 0 for success, error otherwise. */ APPLESTATIC int nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) { struct nfsclient *clp = NULL; int i, error = 0; int gotit, igotlock; /* * First, lock out the nfsd so that state won't change while the * revocation record is being written to the stable storage restart * file. */ NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < NFSCLIENTHASHSIZE && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); error = EPERM; goto out; } /* * Now, write out the revocation record */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); /* * and clear out the state, marking the clientid revoked. */ clp->lc_flags &= ~LCL_CALLBACKSON; clp->lc_flags |= LCL_ADMINREVOKED; nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); out: NFSEXITCODE(error); return (error); } /* * Dump out stats for all clients. Called from nfssvc(2), that is used * newnfsstats. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) { struct nfsclient *clp; int i = 0, cnt = 0; /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* * Rattle through the client lists until done. */ while (i < NFSCLIENTHASHSIZE && cnt < maxcnt) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); } i++; } if (cnt < maxcnt) dumpp[cnt].ndcl_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd. */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp) { struct nfsstate *stp, *openstp, *lckownstp; struct nfslock *lop; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0; dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0; dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0; dumpp->ndcl_flags = clp->lc_flags; dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen; NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen); sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *); dumpp->ndcl_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr; } /* * Now, scan the state lists and total up the opens and locks. */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { dumpp->ndcl_nopenowners++; LIST_FOREACH(openstp, &stp->ls_open, ls_list) { dumpp->ndcl_nopens++; LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) { dumpp->ndcl_nlockowners++; LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) { dumpp->ndcl_nlocks++; } } } } /* * and the delegation lists. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { dumpp->ndcl_ndelegs++; } LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { dumpp->ndcl_nolddelegs++; } } /* * Dump out lock stats for a file. */ APPLESTATIC void nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, NFSPROC_T *p) { struct nfsstate *stp; struct nfslock *lop; int cnt = 0; struct nfslockfile *lfp; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; int ret; fhandle_t nfh; ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0); if (ret) { ldumpp[0].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); return; } /* * For each open share on file, dump it out. */ stp = LIST_FIRST(&lfp->lf_open); while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_openowner->ls_ownerlen; NFSBCOPY(stp->ls_openowner->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_openowner->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * and all locks. */ lop = LIST_FIRST(&lfp->lf_lock); while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) { stp = lop->lo_stp; ldumpp[cnt].ndlck_flags = lop->lo_flags; ldumpp[cnt].ndlck_first = lop->lo_first; ldumpp[cnt].ndlck_end = lop->lo_end; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen; NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } lop = LIST_NEXT(lop, lo_lckfile); cnt++; } /* * and the delegations. */ stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = 0; ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * If list isn't full, mark end of list by setting the client name * to zero length. */ if (cnt < maxcnt) ldumpp[cnt].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Server timer routine. It can scan any linked list, so long * as it holds the spin/mutex lock and there is no exclusive lock on * nfsv4rootfs_lock. * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok * to do this from a callout, since the spin locks work. For * Darwin, I'm not sure what will work correctly yet.) * Should be called once per second. */ APPLESTATIC void nfsrv_servertimer(void) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int got_ref, i; /* * Make sure nfsboottime is set. This is used by V3 as well * as V4. Note that nfsboottime is not nfsrvboottime, which is * only used by the V4 server for leases. */ if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); /* * If server hasn't started yet, just return. */ NFSLOCKSTATE(); if (nfsrv_stablefirst.nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; } /* * Try and get a reference count on the nfsv4rootfs_lock so that * no nfsd thread can acquire an exclusive lock on it before this * call is done. If it is already exclusively locked, just return. */ NFSLOCKV4ROOTMUTEX(); got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); if (got_ref == 0) { NFSUNLOCKSTATE(); return; } /* * For each client... */ for (i = 0; i < NFSCLIENTHASHSIZE; i++) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC && ((LIST_EMPTY(&clp->lc_deleg) && LIST_EMPTY(&clp->lc_open)) || nfsrv_clients > nfsrv_clienthighwater)) || (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC || (clp->lc_expiry < NFSD_MONOSEC && (nfsrv_openpluslock * 10 / 9) > NFSRV_V4STATELIMIT)) { /* * Lease has expired several nfsrv_lease times ago: * PLUS * - no state is associated with it * OR * - above high water mark for number of clients * (nfsrv_clienthighwater should be large enough * that this only occurs when clients fail to * use the same nfs_client_id4.id. Maybe somewhat * higher that the maximum number of clients that * will mount this server?) * OR * Lease has expired a very long time ago * OR * Lease has expired PLUS the number of opens + locks * has exceeded 90% of capacity * * --> Mark for expiry. The actual expiry will be done * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; nfsrv_stablefirst.nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* * If there are no opens, increment no open tick cnt * If time exceeds NFSNOOPEN, mark it to be thrown away * otherwise, if there is an open, reset no open time * Hopefully, this will avoid excessive re-creation * of open owners and subsequent open confirms. */ stp = LIST_FIRST(&clp->lc_open); while (stp != LIST_END(&clp->lc_open)) { nstp = LIST_NEXT(stp, ls_list); if (LIST_EMPTY(&stp->ls_open)) { stp->ls_noopens++; if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > NFSRV_V4STATELIMIT) nfsrv_stablefirst.nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; } stp = nstp; } } } clp = nclp; } } NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * The following set of functions free up the various data structures. */ /* * Clear out all open/lock state related to this nfsclient. * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that * there are no other active nfsd threads. */ APPLESTATIC void nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) nfsrv_freeopenowner(stp, 1, p); } /* * Free a client that has been cleaned. It should also already have been * removed from the lists. * (Just to be safe w.r.t. newnfs_disconnect(), call this function when * softclock interrupts are enabled.) */ APPLESTATIC void nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) { #ifdef notyet if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) == (LCL_GSS | LCL_CALLBACKSON) && (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) && clp->lc_handlelen > 0) { clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE; clp->lc_hand.nfsh_flag |= NFSG_DESTROYED; (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, p); } #endif newnfs_disconnect(&clp->lc_req); NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free((caddr_t)clp, M_NFSDCLIENT); NFSLOCKSTATE(); newnfsstats.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); } /* * Free a list of delegation state structures. * (This function will also free all nfslockfile structures that no * longer have associated state.) */ APPLESTATIC void nfsrv_freedeleglist(struct nfsstatehead *sthp) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) { nfsrv_freedeleg(stp); } LIST_INIT(sthp); } /* * Free up a delegation. */ static void nfsrv_freedeleg(struct nfsstate *stp) { struct nfslockfile *lfp; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; if (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); FREE((caddr_t)stp, M_NFSDSTATE); newnfsstats.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } /* * This function frees an open owner and all associated opens. */ static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; LIST_REMOVE(stp, ls_list); /* * Now, free all associated opens. */ nstp = LIST_FIRST(&stp->ls_open); while (nstp != LIST_END(&stp->ls_open)) { tstp = nstp; nstp = LIST_NEXT(nstp, ls_list); (void) nfsrv_freeopen(tstp, NULL, cansleep, p); } if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); newnfsstats.srvopenowners--; nfsrv_openpluslock--; } /* * This function frees an open (nfsstate open structure) with all associated * lock_owners and locks. It also frees the nfslockfile structure iff there * are no other opens on the file. * Returns 1 if it free'd the nfslockfile, 0 otherwise. */ static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; struct nfslockfile *lfp; int ret; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; /* * Now, free all lockowners associated with this open. */ LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp) nfsrv_freelockowner(tstp, vp, cansleep, p); /* * The nfslockfile is freed here if there are no locks * associated with the open. * If there are locks associated with the open, the * nfslockfile structure can be freed via nfsrv_freelockowner(). * Acquire the state mutex to avoid races with calls to * nfsrv_getlockfile(). */ if (cansleep != 0) NFSLOCKSTATE(); if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) { nfsrv_freenfslockfile(lfp); ret = 1; } else ret = 0; if (cansleep != 0) NFSUNLOCKSTATE(); FREE((caddr_t)stp, M_NFSDSTATE); newnfsstats.srvopens--; nfsrv_openpluslock--; return (ret); } /* * Frees a lockowner and all associated locks. */ static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); nfsrv_freeallnfslocks(stp, vp, cansleep, p); if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); newnfsstats.srvlockowners--; nfsrv_openpluslock--; } /* * Free all the nfs locks on a lockowner. */ static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfslock *lop, *nlop; struct nfsrollback *rlp, *nrlp; struct nfslockfile *lfp = NULL; int gottvp = 0; vnode_t tvp = NULL; uint64_t first, end; lop = LIST_FIRST(&stp->ls_lock); while (lop != LIST_END(&stp->ls_lock)) { nlop = LIST_NEXT(lop, lo_lckowner); /* * Since all locks should be for the same file, lfp should * not change. */ if (lfp == NULL) lfp = lop->lo_lfp; else if (lfp != lop->lo_lfp) panic("allnfslocks"); /* * If vp is NULL and cansleep != 0, a vnode must be acquired * from the file handle. This only occurs when called from * nfsrv_cleanclient(). */ if (gottvp == 0) { if (nfsrv_dolocallocks == 0) tvp = NULL; else if (vp == NULL && cansleep != 0) tvp = nfsvno_getvp(&lfp->lf_fh); else tvp = vp; gottvp = 1; } if (tvp != NULL) { if (cansleep == 0) panic("allnfs2"); first = lop->lo_first; end = lop->lo_end; nfsrv_freenfslock(lop); nfsrv_localunlock(tvp, lfp, first, end, p); LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } else nfsrv_freenfslock(lop); lop = nlop; } if (vp == NULL && tvp != NULL) vput(tvp); } /* * Free an nfslock structure. */ static void nfsrv_freenfslock(struct nfslock *lop) { if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); newnfsstats.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); FREE((caddr_t)lop, M_NFSDLOCK); } /* * This function frees an nfslockfile structure. */ static void nfsrv_freenfslockfile(struct nfslockfile *lfp) { LIST_REMOVE(lfp, lf_hash); FREE((caddr_t)lfp, M_NFSDLOCKFILE); } /* * This function looks up an nfsstate structure via stateid. */ static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags, struct nfsstate **stpp) { struct nfsstate *stp; struct nfsstatehead *hp; int error = 0; *stpp = NULL; hp = NFSSTATEHASH(clp, *stateidp); LIST_FOREACH(stp, hp, ls_hash) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } /* * If no state id in list, return NFSERR_BADSTATEID. */ if (stp == LIST_END(hp)) { error = NFSERR_BADSTATEID; goto out; } *stpp = stp; out: NFSEXITCODE(error); return (error); } /* * This function gets an nfsstate structure via owner string. */ static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp) { struct nfsstate *stp; *stpp = NULL; LIST_FOREACH(stp, hp, ls_list) { if (new_stp->ls_ownerlen == stp->ls_ownerlen && !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) { *stpp = stp; return; } } } /* * Lock control function called to update lock status. * Returns 0 upon success, -1 if there is no lock and the flags indicate * that one isn't to be created and an NFSERR_xxx for other errors. * The structures new_stp and new_lop are passed in as pointers that should * be set to NULL if the structure is used and shouldn't be free'd. * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are * never used and can safely be allocated on the stack. For all other * cases, *new_stpp and *new_lopp should be malloc'd before the call, * in case they are used. */ APPLESTATIC int nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, struct nfslock **new_lopp, struct nfslockconflict *cfp, nfsquad_t clientid, nfsv4stateid_t *stateidp, __unused struct nfsexstuff *exp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfslock *lop; struct nfsstate *new_stp = *new_stpp; struct nfslock *new_lop = *new_lopp; struct nfsstate *tstp, *mystp, *nstp; int specialid = 0; struct nfslockfile *lfp; struct nfslock *other_lop = NULL; struct nfsstate *stp, *lckstp = NULL; struct nfsclient *clp = NULL; u_int32_t bits; int error = 0, haslock = 0, ret, reterr; int getlckret, delegation = 0, filestruct_locked; fhandle_t nfh; uint64_t first, end; uint32_t lock_flags; if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Note the special cases of "all 1s" or "all 0s" stateids and * let reads with all 1s go ahead. */ if (new_stp->ls_stateid.seqid == 0x0 && new_stp->ls_stateid.other[0] == 0x0 && new_stp->ls_stateid.other[1] == 0x0 && new_stp->ls_stateid.other[2] == 0x0) specialid = 1; else if (new_stp->ls_stateid.seqid == 0xffffffff && new_stp->ls_stateid.other[0] == 0xffffffff && new_stp->ls_stateid.other[1] == 0xffffffff && new_stp->ls_stateid.other[2] == 0xffffffff) specialid = 2; } /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, specialid); if (error) goto out; /* * Check for state resource limit exceeded. */ if ((new_stp->ls_flags & NFSLCK_LOCK) && nfsrv_openpluslock > NFSRV_V4STATELIMIT) { error = NFSERR_RESOURCE; goto out; } /* * For the lock case, get another nfslock structure, * just in case we need it. * Malloc now, before we start sifting through the linked lists, * in case we have to wait for memory. */ tryagain: if (new_stp->ls_flags & NFSLCK_LOCK) MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); filestruct_locked = 0; reterr = 0; lfp = NULL; /* * Get the lockfile structure for CFH now, so we can do a sanity * check against the stateid, before incrementing the seqid#, since * we want to return NFSERR_BADSTATEID on failure and the seqid# * shouldn't be incremented for this case. * If nfsrv_getlockfile() returns -1, it means "not found", which * will be handled later. * If we are doing Lock/LockU and local locking is enabled, sleep * lock the nfslockfile structure. */ getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p); NFSLOCKSTATE(); if (getlckret == 0) { if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 && nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) { getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 1); if (getlckret == 0) filestruct_locked = 1; } else getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 0); } if (getlckret != 0 && getlckret != -1) reterr = getlckret; if (filestruct_locked != 0) { LIST_INIT(&lfp->lf_rollback); if ((new_stp->ls_flags & NFSLCK_LOCK)) { /* * For local locking, do the advisory locking now, so * that any conflict can be detected. A failure later * can be rolled back locally. If an error is returned, * struct nfslockfile has been unlocked and any local * locking rolled back. */ NFSUNLOCKSTATE(); reterr = nfsrv_locallock(vp, lfp, (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)), new_lop->lo_first, new_lop->lo_end, cfp, p); NFSLOCKSTATE(); } } if (specialid == 0) { if (new_stp->ls_flags & NFSLCK_TEST) { /* * RFC 3530 does not list LockT as an op that renews a * lease, but the concensus seems to be that it is ok * for a server to do so. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); /* * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid * error returns for LockT, just go ahead and test for a lock, * since there are no locks for this client, but other locks * can conflict. (ie. same client will always be false) */ if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED) error = 0; lckstp = new_stp; } else { error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); if (error == 0) /* * Look up the stateid */ error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * do some sanity checks for an unconfirmed open or a * stateid that refers to the wrong file, for an open stateid */ if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) && ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) || (getlckret == 0 && stp->ls_lfp != lfp))) error = NFSERR_BADSTATEID; if (error == 0 && (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) && getlckret == 0 && stp->ls_lfp != lfp) error = NFSERR_BADSTATEID; /* * If the lockowner stateid doesn't refer to the same file, * I believe that is considered ok, since some clients will * only create a single lockowner and use that for all locks * on all files. * For now, log it as a diagnostic, instead of considering it * a BadStateid. */ if (error == 0 && (stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 && getlckret == 0 && stp->ls_lfp != lfp) { #ifdef DIAGNOSTIC printf("Got a lock statid for different file open\n"); #endif /* error = NFSERR_BADSTATEID; */ } if (error == 0) { if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) { /* * If haslock set, we've already checked the seqid. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); else error = NFSERR_BADSTATEID; } if (!error) nfsrv_getowner(&stp->ls_open, new_stp, &lckstp); if (lckstp) /* * I believe this should be an error, but it * isn't obvious what NFSERR_xxx would be * appropriate, so I'll use NFSERR_INVAL for now. */ error = NFSERR_INVAL; else lckstp = new_stp; } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) { /* * If haslock set, ditto above. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = NFSERR_BADSTATEID; else error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp, new_stp->ls_op); } lckstp = stp; } else { lckstp = stp; } } /* * If the seqid part of the stateid isn't the same, return * NFSERR_OLDSTATEID for cases other than I/O Ops. * For I/O Ops, only return NFSERR_OLDSTATEID if * nfsrv_returnoldstateid is set. (The concensus on the email * list was that most clients would prefer to not receive * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that * is what will happen, so I use the nfsrv_returnoldstateid to * allow for either server configuration.) */ if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid && (!(new_stp->ls_flags & NFSLCK_CHECK) || nfsrv_returnoldstateid)) error = NFSERR_OLDSTATEID; } } /* * Now we can check for grace. */ if (!error) error = nfsrv_checkgrace(new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If we successfully Reclaimed state, note that. */ if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error) nfsrv_markstable(clp); /* * At this point, either error == NFSERR_BADSTATEID or the * seqid# has been updated, so we can return any error. * If error == 0, there may be an error in: * nd_repstat - Set by the calling function. * reterr - Set above, if getting the nfslockfile structure * or acquiring the local lock failed. * (If both of these are set, nd_repstat should probably be * returned, since that error was detected before this * function call.) */ if (error != 0 || nd->nd_repstat != 0 || reterr != 0) { if (error == 0) { if (nd->nd_repstat != 0) error = nd->nd_repstat; else error = reterr; } if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Check the nfsrv_getlockfile return. * Returned -1 if no structure found. */ if (getlckret == -1) { error = NFSERR_EXPIRED; /* * Called from lockt, so no lock is OK. */ if (new_stp->ls_flags & NFSLCK_TEST) { error = 0; } else if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Called to check for a lock, OK if the stateid is all * 1s or all 0s, but there should be an nfsstate * otherwise. * (ie. If there is no open, I'll assume no share * deny bits.) */ if (specialid) error = 0; else error = NFSERR_BADSTATEID; } NFSUNLOCKSTATE(); goto out; } /* * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict. * For NFSLCK_CHECK, allow a read if write access is granted, * but check for a deny. For NFSLCK_LOCK, require correct access, * which implies a conflicting deny can't exist. */ if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) { /* * Four kinds of state id: * - specialid (all 0s or all 1s), only for NFSLCK_CHECK * - stateid for an open * - stateid for a delegation * - stateid for a lock owner */ if (!specialid) { if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { delegation = 1; mystp = stp; nfsrv_delaydelegtimeout(stp); } else if (stp->ls_flags & NFSLCK_OPEN) { mystp = stp; } else { mystp = stp->ls_openstp; } /* * If locking or checking, require correct access * bit set. */ if (((new_stp->ls_flags & NFSLCK_LOCK) && !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) & mystp->ls_flags & NFSLCK_ACCESSBITS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) == (NFSLCK_CHECK | NFSLCK_READACCESS) && !(mystp->ls_flags & NFSLCK_READACCESS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) == (NFSLCK_CHECK | NFSLCK_WRITEACCESS) && !(mystp->ls_flags & NFSLCK_WRITEACCESS))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); error = NFSERR_OPENMODE; goto out; } } else mystp = NULL; if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) { /* * Check for a conflicting deny bit. */ LIST_FOREACH(tstp, &lfp->lf_open, ls_file) { if (tstp != mystp) { bits = tstp->ls_flags; bits >>= NFSLCK_SHIFT; if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) { ret = nfsrv_clientconflict(tstp->ls_clp, &haslock, vp, p); if (ret == 1) { /* * nfsrv_clientconflict unlocks state * when it returns non-zero. */ lckstp = NULL; goto tryagain; } if (ret == 0) NFSUNLOCKSTATE(); if (ret == 2) error = NFSERR_PERM; else error = NFSERR_OPENMODE; goto out; } } } /* We're outta here */ NFSUNLOCKSTATE(); goto out; } } /* * For setattr, just get rid of all the Delegations for other clients. */ if (new_stp->ls_flags & NFSLCK_SETATTR) { ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (ret) { /* * nfsrv_cleandeleg() unlocks state when it * returns non-zero. */ if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } if (!(new_stp->ls_flags & NFSLCK_CHECK) || (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg))) { NFSUNLOCKSTATE(); goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * I currently believe the conflict algorithm to be: * For Lock Ops (Lock/LockT/LockU) * - there is a conflict iff a different client has a write delegation * For Reading (Read Op) * - there is a conflict iff a different client has a write delegation * (the specialids are always a different client) * For Writing (Write/Setattr of size) * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (I don't understand why this isn't allowed, but that seems to be * the current concensus?) */ tstp = LIST_FIRST(&lfp->lf_deleg); while (tstp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(tstp, ls_file); if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))|| ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_READ))) && clp != tstp->ls_clp && (tstp->ls_flags & NFSLCK_DELEGWRITE)) || ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_WRITE) && (clp != tstp->ls_clp || (tstp->ls_flags & NFSLCK_DELEGREAD)))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } ret = nfsrv_delegconflict(tstp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict unlocks state when it * returns non-zero, which it always does. */ if (other_lop) { FREE((caddr_t)other_lop, M_NFSDLOCK); other_lop = NULL; } if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } /* Never gets here. */ } tstp = nstp; } /* * Handle the unlock case by calling nfsrv_updatelock(). * (Should I have done some access checking above for unlock? For now, * just let it happen.) */ if (new_stp->ls_flags & NFSLCK_UNLOCK) { first = new_lop->lo_first; end = new_lop->lo_end; nfsrv_updatelock(stp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(stp->ls_stateid.seqid); stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; if (filestruct_locked != 0) { NFSUNLOCKSTATE(); /* Update the local locks. */ nfsrv_localunlock(vp, lfp, first, end, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Search for a conflicting lock. A lock conflicts if: * - the lock range overlaps and * - at least one lock is a write lock and * - it is not owned by the same lock owner */ if (!delegation) { LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { if (new_lop->lo_end > lop->lo_first && new_lop->lo_first < lop->lo_end && (new_lop->lo_flags == NFSLCK_WRITE || lop->lo_flags == NFSLCK_WRITE) && lckstp != lop->lo_stp && (clp != lop->lo_stp->ls_clp || lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen || NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner, lckstp->ls_ownerlen))) { if (other_lop) { FREE((caddr_t)other_lop, M_NFSDLOCK); other_lop = NULL; } ret = nfsrv_clientconflict(lop->lo_stp->ls_clp,&haslock,vp,p); if (ret == 1) { if (filestruct_locked != 0) { /* Roll back local locks. */ nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } /* * nfsrv_clientconflict() unlocks state when it * returns non-zero. */ lckstp = NULL; goto tryagain; } /* * Found a conflicting lock, so record the conflict and * return the error. */ if (cfp != NULL && ret == 0) { cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0]; cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1]; cfp->cl_first = lop->lo_first; cfp->cl_end = lop->lo_end; cfp->cl_flags = lop->lo_flags; cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen; NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner, cfp->cl_ownerlen); } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else if (new_stp->ls_flags & NFSLCK_CHECK) error = NFSERR_LOCKED; else error = NFSERR_DENIED; if (filestruct_locked != 0 && ret == 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } if (ret == 0) NFSUNLOCKSTATE(); goto out; } } } /* * We only get here if there was no lock that conflicted. */ if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) { NFSUNLOCKSTATE(); goto out; } /* * We only get here when we are creating or modifying a lock. * There are two variants: * - exist_lock_owner where lock_owner exists * - open_to_lock_owner with new lock_owner */ first = new_lop->lo_first; end = new_lop->lo_end; lock_flags = new_lop->lo_flags; if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) { nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(lckstp->ls_stateid.seqid); stateidp->other[0] = lckstp->ls_stateid.other[0]; stateidp->other[1] = lckstp->ls_stateid.other[1]; stateidp->other[2] = lckstp->ls_stateid.other[2]; } else { /* * The new open_to_lock_owner case. * Link the new nfsstate into the lists. */ new_stp->ls_seq = new_stp->ls_opentolockseq; nfsrvd_refcache(new_stp->ls_op); stateidp->seqid = new_stp->ls_stateid.seqid = 1; stateidp->other[0] = new_stp->ls_stateid.other[0] = clp->lc_clientid.lval[0]; stateidp->other[1] = new_stp->ls_stateid.other[1] = clp->lc_clientid.lval[1]; stateidp->other[2] = new_stp->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_stp->ls_clp = clp; LIST_INIT(&new_stp->ls_lock); new_stp->ls_openstp = stp; new_stp->ls_lfp = lfp; nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp, lfp); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid), new_stp, ls_hash); LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; newnfsstats.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { NFSUNLOCKSTATE(); nfsrv_locallock_commit(lfp, lock_flags, first, end); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); out: if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (other_lop) FREE((caddr_t)other_lop, M_NFSDLOCK); NFSEXITCODE2(error, nd); return (error); } /* * Check for state errors for Open. * repstat is passed back out as an error if more critical errors * are not detected. */ APPLESTATIC int nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd, NFSPROC_T *p, int repstat) { struct nfsstate *stp, *nstp; struct nfsclient *clp; struct nfsstate *ownerstp; struct nfslockfile *lfp, *new_lfp; int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; /* * Check for state resource limit exceeded. * Technically this should be SMP protected, but the worst * case error is "out by one or two" on the count when it * returns NFSERR_RESOURCE and the limit is just a rather * arbitrary high water mark, so no harm is done. */ if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) { error = NFSERR_RESOURCE; goto out; } tryagain: MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); if (vp) getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, &new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the nfsclient structure. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (!error && ownerstp) { error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp, new_stp->ls_op); /* * If the OpenOwner hasn't been confirmed, assume the * old one was a replay and this one is ok. * See: RFC3530 Sec. 14.2.18. */ if (error == NFSERR_BADSEQID && (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM)) error = 0; } /* * Check for grace. */ if (!error) error = nfsrv_checkgrace(new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If none of the above errors occurred, let repstat be * returned. */ if (repstat && !error) error = repstat; if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free((caddr_t)new_lfp, M_NFSDLOCKFILE); goto out; } /* * If vp == NULL, the file doesn't exist yet, so return ok. * (This always happens on the first pass, so haslock must be 0.) */ if (vp == NULL) { NFSUNLOCKSTATE(); FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); goto out; } /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && stateidp->seqid == stp->ls_stateid.seqid && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } } /* * Check for access/deny bit conflicts. I check for the same * owner as well, in case the client didn't bother. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) && (((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks * state when it returns non-zero. */ goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current concensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) * Don't check for a Reclaim, since that will be dealt with * by nfsrv_openctrl(). */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ if (ret == -1) goto tryagain; error = ret; goto out; } } stp = nstp; } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Open control function to create/update open state for an open. */ APPLESTATIC int nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp, nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp, NFSPROC_T *p, u_quad_t filerev) { struct nfsstate *new_stp = *new_stpp; struct nfsstate *stp, *nstp; struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg; struct nfslockfile *lfp, *new_lfp; struct nfsclient *clp; int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1; int readonly = 0, cbret = 1, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). * (Paranoia, should have been detected by nfsrv_opencheck().) * If an error does show up, return NFSERR_EXPIRED, since the * the seqid# has already been incremented. */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) { printf("Nfsd: openctrl unexpected restart err=%d\n", error); error = NFSERR_EXPIRED; goto out; } tryagain: MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, &new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the client structure. Since the linked lists could be changed * by other nfsd processes if this process does a tsleep(), one of * two things must be done. * 1 - don't tsleep() * or * 2 - get the nfsv4_lock() { indicated by haslock == 1 } * before using the lists, since this lock stops the other * nfsd. This should only be used for rare cases, since it * essentially single threads the nfsd. * At this time, it is only done for cases where the stable * storage file must be written prior to completion of state * expiration. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) && clp->lc_program) { /* * This happens on the first open for a client * that supports callbacks. */ NFSUNLOCKSTATE(); /* * Although nfsrv_docallback() will sleep, clp won't * go away, since they are only removed when the * nfsv4_lock() has blocked the nfsd threads. The * fields in clp can change, but having multiple * threads do this Null callback RPC should be * harmless. */ cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, p); NFSLOCKSTATE(); clp->lc_flags &= ~LCL_NEEDSCBNULL; if (!cbret) clp->lc_flags |= LCL_CALLBACKSON; } /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (error) { NFSUNLOCKSTATE(); printf("Nfsd: openctrl unexpected state err=%d\n", error); free((caddr_t)new_lfp, M_NFSDLOCKFILE); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } if (new_stp->ls_flags & NFSLCK_RECLAIM) nfsrv_markstable(clp); /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected getlockfile err=%d\n", error); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && stateidp->seqid == stp->ls_stateid.seqid && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected expiry\n"); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } /* * Don't issue a Delegation, since one already exists and * delay delegation timeout, as required. */ delegate = 0; nfsrv_delaydelegtimeout(stp); } /* * Check for access/deny bit conflicts. I also check for the * same owner, since the client might not have bothered to check. * Also, note an open for the same file and owner, if found, * which is all we do here for Delegate_Cur, since conflict * checking is already done. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (ownerstp && stp->ls_openowner == ownerstp) openstp = stp; if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) { /* * If another client has the file open, the only * delegation that can be issued is a Read delegation * and only if it is a Read open with Deny none. */ if (clp != stp->ls_clp) { if ((stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) writedeleg = 0; else delegate = 0; } if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks state * when it returns non-zero. */ free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); openstp = NULL; goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); printf("nfsd openctrl unexpected client cnfl\n"); goto out; } } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current concensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD)) writedeleg = 0; else delegate = 0; if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { if (new_stp->ls_flags & NFSLCK_RECLAIM) { delegate = 2; } else { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ printf("Nfsd openctrl unexpected deleg cnfl\n"); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (ret == -1) { openstp = NULL; goto tryagain; } error = ret; goto out; } } } stp = nstp; } } /* * We only get here if there was no open that conflicted. * If an open for the owner exists, or in the access/deny bits. * Otherwise it is a new open. If the open_owner hasn't been * confirmed, replace the open with the new one needing confirmation, * otherwise add the open. */ if (new_stp->ls_flags & NFSLCK_DELEGPREV) { /* * Handle NFSLCK_DELEGPREV by searching the old delegations for * a match. If found, just move the old delegation to the current * delegation list and issue open. If not found, return * NFSERR_EXPIRED. */ LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { if (stp->ls_lfp == lfp) { /* Found it */ if (stp->ls_clp != clp) panic("olddeleg clp"); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_hash); stp->ls_flags &= ~NFSLCK_OLDDELEG; stp->ls_stateid.seqid = delegstateidp->seqid = 0; stp->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; stp->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; stp->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); stp->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, stp->ls_stateid), stp, ls_hash); if (stp->ls_flags & NFSLCK_DELEGWRITE) *rflagsp |= NFSV4OPEN_WRITEDELEGATE; else *rflagsp |= NFSV4OPEN_READDELEGATE; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 0; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)| NFSLCK_OPEN; if (stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; newnfsstats.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; newnfsstats.srvopens++; nfsrv_openpluslock++; break; } } if (stp == LIST_END(&clp->lc_olddeleg)) error = NFSERR_EXPIRED; } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { /* * Scan to see that no delegation for this client and file * doesn't already exist. * There also shouldn't yet be an Open for this file and * openowner. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_clp == clp) break; } if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) { /* * This is the Claim_Previous case with a delegation * type != Delegate_None. */ /* * First, add the delegation. (Although we must issue the * delegation, we can also ask for an immediate return.) */ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (new_stp->ls_flags & NFSLCK_DELEGWRITE) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; if (delegate == 2 || nfsrv_issuedelegs == 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; newnfsstats.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 0; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) | NFSLCK_OPEN; if (new_stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; newnfsstats.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; newnfsstats.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; } } else if (ownerstp) { if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) { /* Replace the open */ if (ownerstp->ls_op) nfsrvd_derefcache(ownerstp->ls_op); ownerstp->ls_op = new_stp->ls_op; nfsrvd_refcache(ownerstp->ls_op); ownerstp->ls_seq = new_stp->ls_seq; *rflagsp |= NFSV4OPEN_RESULTCONFIRM; stp = LIST_FIRST(&ownerstp->ls_open); stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; stp->ls_stateid.seqid = 0; stp->ls_uid = new_stp->ls_uid; if (lfp != stp->ls_lfp) { LIST_REMOVE(stp, ls_file); LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file); stp->ls_lfp = lfp; } openstp = stp; } else if (openstp) { openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS); openstp->ls_stateid.seqid++; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; newnfsstats.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } else { new_open->ls_stateid.seqid = 0; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)| NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; new_open->ls_openowner = ownerstp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; newnfsstats.srvopens++; nfsrv_openpluslock++; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && (writedeleg || readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly)) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; newnfsstats.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } } else { /* * New owner case. Start the open_owner sequence with a * Needs confirmation (unless a reclaim) and hang the * new open off it. */ new_open->ls_stateid.seqid = 0; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; LIST_INIT(&new_open->ls_open); new_open->ls_openowner = new_stp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); if (new_stp->ls_flags & NFSLCK_RECLAIM) { new_stp->ls_flags = 0; } else { *rflagsp |= NFSV4OPEN_RESULTCONFIRM; new_stp->ls_flags = NFSLCK_NEEDSCONFIRM; } nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; *new_stpp = NULL; newnfsstats.srvopens++; nfsrv_openpluslock++; newnfsstats.srvopenowners++; nfsrv_openpluslock++; } if (!error) { stateidp->seqid = openstp->ls_stateid.seqid; stateidp->other[0] = openstp->ls_stateid.other[0]; stateidp->other[1] = openstp->ls_stateid.other[1]; stateidp->other[2] = openstp->ls_stateid.other[2]; } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (new_open) FREE((caddr_t)new_open, M_NFSDSTATE); if (new_deleg) FREE((caddr_t)new_deleg, M_NFSDSTATE); out: NFSEXITCODE2(error, nd); return (error); } /* * Open update. Does the confirm, downgrade and close. */ APPLESTATIC int nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsstate *stp, *ownerstp; struct nfsclient *clp; struct nfslockfile *lfp; u_int32_t bits; int error = 0, gotstate = 0, len = 0; u_char client[NFSV4_OPAQUELIMIT]; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); if (!error) error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * Sanity check the open. */ if (!error && (!(stp->ls_flags & NFSLCK_OPEN) || (!(new_stp->ls_flags & NFSLCK_CONFIRM) && (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) || ((new_stp->ls_flags & NFSLCK_CONFIRM) && (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))))) error = NFSERR_BADSTATEID; if (!error) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid && !(new_stp->ls_flags & NFSLCK_CONFIRM)) error = NFSERR_OLDSTATEID; if (!error && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) error = NFSERR_ISDIR; else error = NFSERR_INVAL; } if (error) { /* * If a client tries to confirm an Open with a bad * seqid# and there are no byte range locks or other Opens * on the openowner, just throw it away, so the next use of the * openowner will start a fresh seq#. */ if (error == NFSERR_BADSEQID && (new_stp->ls_flags & NFSLCK_CONFIRM) && nfsrv_nootherstate(stp)) nfsrv_freeopenowner(stp->ls_openowner, 0, p); NFSUNLOCKSTATE(); goto out; } /* * Set the return stateid. */ stateidp->seqid = stp->ls_stateid.seqid + 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; /* * Now, handle the three cases. */ if (new_stp->ls_flags & NFSLCK_CONFIRM) { /* * If the open doesn't need confirmation, it seems to me that * there is a client error, but I'll just log it and keep going? */ if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) printf("Nfsv4d: stray open confirm\n"); stp->ls_openowner->ls_flags = 0; stp->ls_stateid.seqid++; if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, client, len); gotstate = 1; } NFSUNLOCKSTATE(); } else if (new_stp->ls_flags & NFSLCK_CLOSE) { ownerstp = stp->ls_openowner; lfp = stp->ls_lfp; if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) { /* Get the lf lock */ nfsrv_locklf(lfp); NFSUNLOCKSTATE(); if (nfsrv_freeopen(stp, vp, 1, p) == 0) { NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } } else { (void) nfsrv_freeopen(stp, NULL, 0, p); NFSUNLOCKSTATE(); } } else { /* * Update the share bits, making sure that the new set are a * subset of the old ones. */ bits = (new_stp->ls_flags & NFSLCK_SHAREBITS); if (~(stp->ls_flags) & bits) { NFSUNLOCKSTATE(); error = NFSERR_INVAL; goto out; } stp->ls_flags = (bits | NFSLCK_OPEN); stp->ls_stateid.seqid++; NFSUNLOCKSTATE(); } /* * If the client just confirmed its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(client, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Delegation update. Does the purge and return. */ APPLESTATIC int nfsrv_delegupdate(nfsquad_t clientid, nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p) { struct nfsstate *stp; struct nfsclient *clp; int error = 0; fhandle_t fh; /* * Do a sanity check against the file handle for DelegReturn. */ if (vp) { error = nfsvno_getfh(vp, &fh, p); if (error) goto out; } /* * Check for restart conditions (client and server). */ if (op == NFSV4OP_DELEGRETURN) error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN, stateidp, 0); else error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE, stateidp, 0); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ if (!error) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); if (error) { if (error == NFSERR_CBPATHDOWN) error = 0; if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN) error = NFSERR_STALESTATEID; } if (!error && op == NFSV4OP_DELEGRETURN) { error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp); if (!error && stp->ls_stateid.seqid != stateidp->seqid) error = NFSERR_OLDSTATEID; } /* * NFSERR_EXPIRED means that the state has gone away, * so Delegations have been purged. Just return ok. */ if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) { NFSUNLOCKSTATE(); error = 0; goto out; } if (error) { NFSUNLOCKSTATE(); goto out; } if (op == NFSV4OP_DELEGRETURN) { if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh, sizeof (fhandle_t))) { NFSUNLOCKSTATE(); error = NFSERR_BADSTATEID; goto out; } nfsrv_freedeleg(stp); } else { nfsrv_freedeleglist(&clp->lc_olddeleg); } NFSUNLOCKSTATE(); error = 0; out: NFSEXITCODE(error); return (error); } /* * Release lock owner. */ APPLESTATIC int nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid, NFSPROC_T *p) { struct nfsstate *stp, *nstp, *openstp, *ownstp; struct nfsclient *clp; int error = 0; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the lock owner by name. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, (nfsquad_t)((u_quad_t)0), NULL, p); if (error) { NFSUNLOCKSTATE(); goto out; } LIST_FOREACH(ownstp, &clp->lc_open, ls_list) { LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) { stp = LIST_FIRST(&openstp->ls_open); while (stp != LIST_END(&openstp->ls_open)) { nstp = LIST_NEXT(stp, ls_list); /* * If the owner matches, check for locks and * then free or return an error. */ if (stp->ls_ownerlen == new_stp->ls_ownerlen && !NFSBCMP(stp->ls_owner, new_stp->ls_owner, stp->ls_ownerlen)){ if (LIST_EMPTY(&stp->ls_lock)) { nfsrv_freelockowner(stp, NULL, 0, p); } else { NFSUNLOCKSTATE(); error = NFSERR_LOCKSHELD; goto out; } } stp = nstp; } } } NFSUNLOCKSTATE(); out: NFSEXITCODE(error); return (error); } /* * Get the file handle for a lock structure. */ static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile **new_lfpp, fhandle_t *nfhp, NFSPROC_T *p) { fhandle_t *fhp = NULL; struct nfslockfile *new_lfp; int error; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfh"); } error = nfsvno_getfh(vp, fhp, p); NFSEXITCODE(error); return (error); } /* * Get an nfs lock structure. Allocate one, as required, and return a * pointer to it. * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock. */ static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit) { struct nfslockfile *lfp; fhandle_t *fhp = NULL, *tfhp; struct nfslockhashhead *hp; struct nfslockfile *new_lfp = NULL; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfile"); } hp = NFSLOCKHASH(fhp); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(fhp, tfhp)) { if (lockit) nfsrv_locklf(lfp); *lfpp = lfp; return (0); } } if (!(flags & NFSLCK_OPEN)) return (-1); /* * No match, so chain the new one into the list. */ LIST_INIT(&new_lfp->lf_open); LIST_INIT(&new_lfp->lf_lock); LIST_INIT(&new_lfp->lf_deleg); LIST_INIT(&new_lfp->lf_locallock); LIST_INIT(&new_lfp->lf_rollback); new_lfp->lf_locallock_lck.nfslock_usecnt = 0; new_lfp->lf_locallock_lck.nfslock_lock = 0; new_lfp->lf_usecount = 0; LIST_INSERT_HEAD(hp, new_lfp, lf_hash); *lfpp = new_lfp; *new_lfpp = NULL; return (0); } /* * This function adds a nfslock lock structure to the list for the associated * nfsstate and nfslockfile structures. It will be inserted after the * entry pointed at by insert_lop. */ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp) { struct nfslock *lop, *nlop; new_lop->lo_stp = stp; new_lop->lo_lfp = lfp; if (stp != NULL) { /* Insert in increasing lo_first order */ lop = LIST_FIRST(&lfp->lf_lock); if (lop == LIST_END(&lfp->lf_lock) || new_lop->lo_first <= lop->lo_first) { LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile); } else { nlop = LIST_NEXT(lop, lo_lckfile); while (nlop != LIST_END(&lfp->lf_lock) && nlop->lo_first < new_lop->lo_first) { lop = nlop; nlop = LIST_NEXT(lop, lo_lckfile); } LIST_INSERT_AFTER(lop, new_lop, lo_lckfile); } } else { new_lop->lo_lckfile.le_prev = NULL; /* list not used */ } /* * Insert after insert_lop, which is overloaded as stp or lfp for * an empty list. */ if (stp == NULL && (struct nfslockfile *)insert_lop == lfp) LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner); else if ((struct nfsstate *)insert_lop == stp) LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner); else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { newnfsstats.srvlocks++; nfsrv_openpluslock++; } } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. */ static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp) { struct nfslock *new_lop = *new_lopp; struct nfslock *lop, *tlop, *ilop; struct nfslock *other_lop = *other_lopp; int unlock = 0, myfile = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->lo_flags & NFSLCK_UNLOCK) unlock = 1; if (stp != NULL) { ilop = (struct nfslock *)stp; lop = LIST_FIRST(&stp->ls_lock); } else { ilop = (struct nfslock *)lfp; lop = LIST_FIRST(&lfp->lf_locallock); } while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->lo_lfp == lfp) { myfile = 1; if (lop->lo_end >= new_lop->lo_first) { if (new_lop->lo_end < lop->lo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->lo_flags == lop->lo_flags || (new_lop->lo_first <= lop->lo_first && new_lop->lo_end >= lop->lo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (lop->lo_first < new_lop->lo_first) new_lop->lo_first = lop->lo_first; if (lop->lo_end > new_lop->lo_end) new_lop->lo_end = lop->lo_end; tlop = lop; lop = LIST_NEXT(lop, lo_lckowner); nfsrv_freenfslock(tlop); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->lo_first <= lop->lo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ lop->lo_first = new_lop->lo_end; break; } if (new_lop->lo_end >= lop->lo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ lop->lo_end = new_lop->lo_first; ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->lo_flags any longer. */ tmp = new_lop->lo_first; if (other_lop == NULL) { if (!unlock) panic("nfsd srv update unlock"); other_lop = new_lop; *new_lopp = NULL; } other_lop->lo_first = new_lop->lo_end; other_lop->lo_end = lop->lo_end; other_lop->lo_flags = lop->lo_flags; other_lop->lo_stp = stp; other_lop->lo_lfp = lfp; lop->lo_end = tmp; nfsrv_insertlock(other_lop, lop, stp, lfp); *other_lopp = NULL; ilop = lop; break; } } ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); if (myfile && (lop == NULL || lop->lo_lfp != lfp)) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfsrv_insertlock(new_lop, ilop, stp, lfp); *new_lopp = NULL; } } /* * This function handles sequencing of locks, etc. * It returns an error that indicates what the caller should do. */ static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op) { int error = 0; if (op != nd->nd_rp) panic("nfsrvstate checkseqid"); if (!(op->rc_flag & RC_INPROG)) panic("nfsrvstate not inprog"); if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) { printf("refcnt=%d\n", stp->ls_op->rc_refcnt); panic("nfsrvstate op refcnt"); } if ((stp->ls_seq + 1) == seqid) { if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); stp->ls_op = op; nfsrvd_refcache(op); stp->ls_seq = seqid; goto out; } else if (stp->ls_seq == seqid && stp->ls_op && op->rc_xid == stp->ls_op->rc_xid && op->rc_refcnt == 0 && op->rc_reqlen == stp->ls_op->rc_reqlen && op->rc_cksum == stp->ls_op->rc_cksum) { if (stp->ls_op->rc_flag & RC_INPROG) { error = NFSERR_DONTREPLY; goto out; } nd->nd_rp = stp->ls_op; nd->nd_rp->rc_flag |= RC_INPROG; nfsrvd_delcache(op); error = NFSERR_REPLYFROMCACHE; goto out; } error = NFSERR_BADSEQID; out: NFSEXITCODE2(error, nd); return (error); } /* * Get the client ip address for callbacks. If the strings can't be parsed, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used * for callbacks, but can be printed out by newnfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { u_int32_t *tl; u_char *cp, *cp2; int i, j; struct sockaddr_in *rad, *sad; u_char protocol[5], addr[24]; int error = 0, cantparse = 0; union { u_long ival; u_char cval[4]; } ip; union { u_short sval; u_char cval[2]; } port; rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); rad->sin_family = AF_INET; rad->sin_len = sizeof (struct sockaddr_in); rad->sin_addr.s_addr = 0; rad->sin_port = 0; clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (!strcmp(protocol, "tcp")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; } else if (!strcmp(protocol, "udp")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; } else { cantparse = 1; } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (i == 0) { cantparse = 1; } else if (!cantparse && i <= 23 && i >= 11) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* * Parse out the address fields. We expect 6 decimal numbers * separated by '.'s. */ cp = addr; i = 0; while (*cp && i < 6) { cp2 = cp; while (*cp2 && *cp2 != '.') cp2++; if (*cp2) *cp2++ = '\0'; else if (i != 5) { cantparse = 1; break; } j = nfsrv_getipnumber(cp); if (j >= 0) { if (i < 4) ip.cval[3 - i] = j; else port.cval[5 - i] = j; } else { cantparse = 1; break; } cp = cp2; i++; } if (!cantparse) { if (ip.ival != 0x0) { rad->sin_addr.s_addr = htonl(ip.ival); rad->sin_port = htons(port.sval); } else { cantparse = 1; } } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } if (cantparse) { sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); rad->sin_addr.s_addr = sad->sin_addr.s_addr; rad->sin_port = 0x0; clp->lc_program = 0; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Turn a string of up to three decimal digits into a number. Return -1 upon * error. */ static int nfsrv_getipnumber(u_char *cp) { int i = 0, j = 0; while (*cp) { if (j > 2 || *cp < '0' || *cp > '9') return (-1); i *= 10; i += (*cp - '0'); cp++; j++; } if (i < 256) return (i); return (-1); } /* * This function checks for restart conditions. */ static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid) { int ret = 0; /* * First check for a server restart. Open, LockT, ReleaseLockOwner * and DelegPurge have a clientid, the rest a stateid. */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { if (clientid.lval[0] != nfsrvboottime) { ret = NFSERR_STALECLIENTID; goto out; } } else if (stateidp->other[0] != nfsrvboottime && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; } /* * Read, Write, Setattr and LockT can return NFSERR_GRACE and do * not use a lock/open owner seqid#, so the check can be done now. * (The others will be checked, as required, later.) */ if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST))) goto out; NFSLOCKSTATE(); ret = nfsrv_checkgrace(flags); NFSUNLOCKSTATE(); out: NFSEXITCODE(ret); return (ret); } /* * Check for grace. */ static int nfsrv_checkgrace(u_int32_t flags) { int error = 0; if (nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; } } else { if (!(flags & NFSLCK_RECLAIM)) { error = NFSERR_GRACE; goto out; } /* * If grace is almost over and we are still getting Reclaims, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } out: NFSEXITCODE(error); return (error); } /* * Do a server callback. */ static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p) { mbuf_t m; u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct ucred *cred; int error = 0; u_int32_t callback; cred = newnfs_getcred(); NFSLOCKSTATE(); /* mostly for lc_cbref++ */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); panic("docallb"); } clp->lc_cbref++; /* * Fill the callback program# and version into the request * structure for newnfs_connect() to use. */ clp->lc_req.nr_prog = clp->lc_program; clp->lc_req.nr_vers = NFSV4_CBVERS; /* * First, fill in some of the fields of nd and cr. */ nd->nd_flag = ND_NFSV4; if (clp->lc_flags & LCL_GSS) nd->nd_flag |= ND_KERBV; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; callback = clp->lc_callback; NFSUNLOCKSTATE(); cred->cr_ngroups = 1; /* * Get the first mbuf for the request. */ MGET(m, M_WAITOK, MT_DATA); mbuf_setlen(m, 0); nd->nd_mreq = nd->nd_mb = m; nd->nd_bpos = NFSMTOD(m, caddr_t); /* * and build the callback request. */ if (procnum == NFSV4OP_CBGETATTR) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; (void) nfsm_strtom(nd, "CB Getattr", 10); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(NFSV4OP_CBGETATTR); (void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); (void) nfsrv_putattrbit(nd, attrbitp); } else if (procnum == NFSV4OP_CBRECALL) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; (void) nfsm_strtom(nd, "CB Recall", 9); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl++ = txdr_unsigned(NFSV4OP_CBRECALL); *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (trunc) *tl = newnfs_true; else *tl = newnfs_false; (void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); } else { nd->nd_procnum = NFSV4PROC_CBNULL; } /* * Call newnfs_connect(), as required, and then newnfs_request(). */ (void) newnfs_sndlock(&clp->lc_req.nr_lock); if (clp->lc_req.nr_client == NULL) { if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 1); else error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 3); } newnfs_sndunlock(&clp->lc_req.nr_lock); if (!error) { error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, - NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL); + NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL, + NULL); } NFSFREECRED(cred); /* * If error is set here, the Callback path isn't working * properly, so twiddle the appropriate LCL_ flags. * (nd_repstat != 0 indicates the Callback path is working, * but the callback failed on the client.) */ if (error) { /* * Mark the callback pathway down, which disabled issuing * of delegations and gets Renew to return NFSERR_CBPATHDOWN. */ NFSLOCKSTATE(); clp->lc_flags |= LCL_CBDOWN; NFSUNLOCKSTATE(); } else { /* * Callback worked. If the callback path was down, disable * callbacks, so no more delegations will be issued. (This * is done on the assumption that the callback pathway is * flakey.) */ NFSLOCKSTATE(); if (clp->lc_flags & LCL_CBDOWN) clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON); NFSUNLOCKSTATE(); if (nd->nd_repstat) error = nd->nd_repstat; else if (procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, NULL); mbuf_freem(nd->nd_mrep); } NFSLOCKSTATE(); clp->lc_cbref--; if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) { clp->lc_flags &= ~LCL_WAKEUPWANTED; wakeup(clp); } NFSUNLOCKSTATE(); NFSEXITCODE(error); return (error); } /* * Return the next index# for a clientid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around, * it should be rebooted. * At an average rate of one new client per second, it will wrap around in * approximately 136 years. (I think the server will have been shut * down or rebooted before then.) */ static u_int32_t nfsrv_nextclientindex(void) { static u_int32_t client_index = 0; client_index++; if (client_index != 0) return (client_index); printf("%s: out of clientids\n", __func__); return (client_index); } /* * Return the next index# for a stateid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around * (will a BSD server stay up that long?), find * new start and end values. */ static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp) { struct nfsstate *stp; int i; u_int32_t canuse, min_index, max_index; if (!(clp->lc_flags & LCL_INDEXNOTOK)) { clp->lc_stateindex++; if (clp->lc_stateindex != clp->lc_statemaxindex) return (clp->lc_stateindex); } /* * Yuck, we've hit the end. * Look for a new min and max. */ min_index = 0; max_index = 0xffffffff; for (i = 0; i < NFSSTATEHASHSIZE; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] > 0x80000000) { if (stp->ls_stateid.other[2] < max_index) max_index = stp->ls_stateid.other[2]; } else { if (stp->ls_stateid.other[2] > min_index) min_index = stp->ls_stateid.other[2]; } } } /* * Yikes, highly unlikely, but I'll handle it anyhow. */ if (min_index == 0x80000000 && max_index == 0x80000001) { canuse = 0; /* * Loop around until we find an unused entry. Return that * and set LCL_INDEXNOTOK, so the search will continue next time. * (This is one of those rare cases where a goto is the * cleanest way to code the loop.) */ tryagain: for (i = 0; i < NFSSTATEHASHSIZE; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] == canuse) { canuse++; goto tryagain; } } } clp->lc_flags |= LCL_INDEXNOTOK; return (canuse); } /* * Ok to start again from min + 1. */ clp->lc_stateindex = min_index + 1; clp->lc_statemaxindex = max_index; clp->lc_flags &= ~LCL_INDEXNOTOK; return (clp->lc_stateindex); } /* * The following functions handle the stable storage file that deals with * the edge conditions described in RFC3530 Sec. 8.6.3. * The file is as follows: * - a single record at the beginning that has the lease time of the * previous server instance (before the last reboot) and the nfsrvboottime * values for the previous server boots. * These previous boot times are used to ensure that the current * nfsrvboottime does not, somehow, get set to a previous one. * (This is important so that Stale ClientIDs and StateIDs can * be recognized.) * The number of previous nfsvrboottime values preceeds the list. * - followed by some number of appended records with: * - client id string * - flag that indicates it is a record revoking state via lease * expiration or similar * OR has successfully acquired state. * These structures vary in length, with the client string at the end, up * to NFSV4_OPAQUELIMIT in size. * * At the end of the grace period, the file is truncated, the first * record is rewritten with updated information and any acquired state * records for successful reclaims of state are written. * * Subsequent records are appended when the first state is issued to * a client and when state is revoked for a client. * * When reading the file in, state issued records that come later in * the file override older ones, since the append log is in cronological order. * If, for some reason, the file can't be read, the grace period is * immediately terminated and all reclaims get NFSERR_NOGRACE. */ /* * Read in the stable storage file. Called by nfssvc() before the nfsd * processes start servicing requests. */ APPLESTATIC void nfsrv_setupstable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; off_t off = 0; ssize_t aresid, len; struct timeval curtime; /* * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without * a reboot, so state has not been lost. */ if (sf->nsf_flags & NFSNSF_UPDATEDONE) return; /* * Set Grace over just until the file reads successfully. */ NFSGETTIME(&curtime); nfsrvboottime = curtime.tv_sec; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; if (sf->nsf_fp == NULL) return; error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid || sf->nsf_numboots == 0 || sf->nsf_numboots > NFSNSF_MAXNUMBOOTS) return; /* * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * sizeof (time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid) { free((caddr_t)sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } /* * Make sure this nfsrvboottime is different from all recorded * previous ones. */ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { if (nfsrvboottime == sf->nsf_bootvals[i]) { nfsrvboottime++; tryagain = 1; break; } } } while (tryagain); sf->nsf_flags |= NFSNSF_OK; off += (sf->nsf_numboots * sizeof (time_t)); /* * Read through the file, building a list of records for grace * checking. * Each record is between sizeof (struct nfst_rec) and * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1 * and is actually sizeof (struct nfst_rec) + nst_len - 1. */ tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK); do { error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid; if (error || (len > 0 && (len < sizeof (struct nfst_rec) || len < (sizeof (struct nfst_rec) + tsp->len - 1)))) { /* * Yuck, the file has been corrupted, so just return * after clearing out any restart state, so the grace period * is over. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { LIST_REMOVE(sp, nst_list); free((caddr_t)sp, M_TEMP); } free((caddr_t)tsp, M_TEMP); sf->nsf_flags &= ~NFSNSF_OK; free((caddr_t)sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } if (len > 0) { off += sizeof (struct nfst_rec) + tsp->len - 1; /* * Search the list for a matching client. */ LIST_FOREACH(sp, &sf->nsf_head, nst_list) { if (tsp->len == sp->nst_len && !NFSBCMP(tsp->client, sp->nst_client, tsp->len)) break; } if (sp == LIST_END(&sf->nsf_head)) { sp = (struct nfsrv_stable *)malloc(tsp->len + sizeof (struct nfsrv_stable) - 1, M_TEMP, M_WAITOK); NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec, sizeof (struct nfst_rec) + tsp->len - 1); LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list); } else { if (tsp->flag == NFSNST_REVOKE) sp->nst_flag |= NFSNST_REVOKE; else /* * A subsequent timestamp indicates the client * did a setclientid/confirm and any previous * revoke is no longer relevant. */ sp->nst_flag &= ~NFSNST_REVOKE; } } } while (len > 0); free((caddr_t)tsp, M_TEMP); sf->nsf_flags = NFSNSF_OK; sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease + NFSRV_LEASEDELTA; } /* * Update the stable storage file, now that the grace period is over. */ APPLESTATIC void nfsrv_updatestable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; vnode_t vp; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) mount_t mp = NULL; #endif int error; if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE)) return; sf->nsf_flags |= NFSNSF_UPDATEDONE; /* * Ok, we need to rewrite the stable storage file. * - truncate to 0 length * - write the new first structure * - loop through the data structures, writing out any that * have timestamps older than the old boot */ if (sf->nsf_bootvals) { sf->nsf_numboots++; for (i = sf->nsf_numboots - 2; i >= 0; i--) sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), M_TEMP, M_WAITOK); } sf->nsf_bootvals[0] = nfsrvboottime; sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); vp = NFSFPVNODE(sf->nsf_fp); vn_start_write(vp, &mp, V_WAIT); if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p, NULL); NFSVOPUNLOCK(vp, 0); } else error = EPERM; vn_finished_write(mp); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0, UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), (off_t)(sizeof (struct nfsf_rec)), UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); free((caddr_t)sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); return; } sf->nsf_flags |= NFSNSF_OK; /* * Loop through the list and write out timestamp records for * any clients that successfully reclaimed state. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { if (sp->nst_flag & NFSNST_GOTSTATE) { nfsrv_writestable(sp->nst_client, sp->nst_len, NFSNST_NEWSTATE, p); sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE; } LIST_REMOVE(sp, nst_list); free((caddr_t)sp, M_TEMP); } nfsrv_backupstable(); } /* * Append a record to the stable storage file. */ APPLESTATIC void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfst_rec *sp; int error; if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL) return; sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + len - 1, M_TEMP, M_WAITOK); sp->len = len; NFSBCOPY(client, sp->client, len); sp->flag = flag; error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp), (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0, UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p); free((caddr_t)sp, M_TEMP); if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); } } /* * This function is called during the grace period to mark a client * that successfully reclaimed state. */ static void nfsrv_markstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just mark it and set the nfsclient back pointer. */ sp->nst_flag |= NFSNST_GOTSTATE; sp->nst_clp = clp; } /* * This function is called for a reclaim, to see if it gets grace. * It returns 0 if a reclaim is allowed, 1 otherwise. */ static int nfsrv_checkstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First, find the entry for the client. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } /* * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) return (1); return (0); } /* * Test for and try to clear out a conflicting client. This is called by * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients * a found. * The trick here is that it can't revoke a conflicting client with an * expired lease unless it holds the v4root lock, so... * If no v4root lock, get the lock and return 1 to indicate "try again". * Return 0 to indicate the conflict can't be revoked and 1 to indicate * the revocation worked and the conflicting client is "bye, bye", so it * can be tried again. * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK(). * Unlocks State before a non-zero value is returned. */ static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, NFSPROC_T *p) { int gotlock, lktype; /* * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (2); else return (1); } NFSUNLOCKSTATE(); /* * Ok, we can expire the conflicting client. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); return (1); } /* * Resolve a delegation conflict. * Returns 0 to indicate the conflict was resolved without sleeping. * Return -1 to indicate that the caller should check for conflicts again. * Return > 0 for an error that should be returned, normally NFSERR_DELAY. * * Also, manipulate the nfsv4root_lock, as required. It isn't changed * for a return of 0, since there was no sleep and it could be required * later. It is released for a return of NFSERR_DELAY, since the caller * will return that error. It is released when a sleep was done waiting * for the delegation to be returned or expire (so that other nfsds can * handle ops). Then, it must be acquired for the write to stable storage. * (This function is somewhat similar to nfsrv_clientconflict(), but * the semantics differ in a couple of subtle ways. The return of 0 * indicates the conflict was resolved without sleeping here, not * that the conflict can't be resolved and the handling of nfsv4root_lock * differs, as noted above.) * Unlocks State before returning a non-zero value. */ static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp) { struct nfsclient *clp = stp->ls_clp; int gotlock, error, lktype, retrycnt, zapped_clp; nfsv4stateid_t tstateid; fhandle_t tfh; /* * If the conflict is with an old delegation... */ if (stp->ls_flags & NFSLCK_OLDDELEG) { /* * You can delete it, if it has expired. */ if (clp->lc_delegtime < NFSD_MONOSEC) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); error = -1; goto out; } NFSUNLOCKSTATE(); /* * During this delay, the old delegation could expire or it * could be recovered by the client via an Open with * CLAIM_DELEGATE_PREV. * Release the nfsv4root_lock, if held. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * It's a current delegation, so: * - check to see if the delegation has expired * - if so, get the v4root lock and then expire it */ if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) { /* * - do a recall callback, since not yet done * For now, never allow truncate to be set. To use * truncate safely, it must be guaranteed that the * Remove, Rename or Setattr with size of 0 will * succeed and that would require major changes to * the VFS/Vnode OPs. * Set the expiry time large enough so that it won't expire * until after the callback, then set it correctly, once * the callback is done. (The delegation will now time * out whether or not the Recall worked ok. The timeout * will be extended when ops are done on the delegation * stateid, up to the timelimit.) */ stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_flags |= NFSLCK_DELEGRECALL; /* * Loop NFSRV_CBRETRYCNT times while the CBRecall replies * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done * in order to try and avoid a race that could happen * when a CBRecall request passed the Open reply with * the delegation in it when transitting the network. * Since nfsrv_docallback will sleep, don't use stp after * the call. */ NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid, sizeof (tstateid)); NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh, sizeof (tfh)); NFSUNLOCKSTATE(); if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } retrycnt = 0; do { error = nfsrv_docallback(clp, NFSV4OP_CBRECALL, &tstateid, 0, &tfh, NULL, NULL, p); retrycnt++; } while ((error == NFSERR_BADSTATEID || error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT); error = NFSERR_DELAY; goto out; } if (clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); /* * A recall has been done, but it has not yet expired. * So, RETURN_DELAY. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * If we don't yet have the lock, just get it and then return, * since we need that before deleting expired state, such as * this delegation. * When getting the lock, unlock the vnode, so other nfsds that * are in progress, won't get stuck waiting for the vnode lock. */ if (*haslockp == 0) { NFSUNLOCKSTATE(); lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_PERM; goto out; } error = -1; goto out; } NFSUNLOCKSTATE(); /* * Ok, we can delete the expired delegation. * First, write the Revoke record to stable storage and then * clear out the conflict. * Since all other nfsd threads are now blocked, we can safely * sleep without the state changing. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); if (clp->lc_expiry < NFSD_MONOSEC) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); zapped_clp = 1; } else { nfsrv_freedeleg(stp); zapped_clp = 0; } if (zapped_clp) nfsrv_zapclient(clp, p); error = -1; out: NFSEXITCODE(error); return (error); } /* * Check for a remove allowed, if remove is set to 1 and get rid of * delegations. */ APPLESTATIC int nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (error == -1) error = 0; goto out; } /* * Now, we must Recall any delegations. */ error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero * return. */ if (error == -1) goto tryagain; if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Now, look for a conflicting open share. */ if (remove) { LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (stp->ls_flags & NFSLCK_WRITEDENY) { error = NFSERR_FILEOPEN; break; } } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE(error); return (error); } /* * Clear out all delegations for the file referred to by lfp. * May return NFSERR_DELAY, if there will be a delay waiting for * delegations to expire. * Returns -1 to indicate it slept while recalling a delegation. * This function has the side effect of deleting the nfslockfile structure, * if it no longer has associated state and didn't have to sleep. * Unlocks State before a non-zero value is returned. */ static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; int ret = 0; stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp) { ret = nfsrv_delegconflict(stp, haslockp, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ goto out; } } stp = nstp; } out: NFSEXITCODE(ret); return (ret); } /* * There are certain operations that, when being done outside of NFSv4, * require that any NFSv4 delegation for the file be recalled. * This function is to be called for those cases: * VOP_RENAME() - When a delegation is being recalled for any reason, * the client may have to do Opens against the server, using the file's * final component name. If the file has been renamed on the server, * that component name will be incorrect and the Open will fail. * VOP_REMOVE() - Theoretically, a client could Open a file after it has * been removed on the server, if there is a delegation issued to * that client for the file. I say "theoretically" since clients * normally do an Access Op before the Open and that Access Op will * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so * they will detect the file's removal in the same manner. (There is * one case where RFC3530 allows a client to do an Open without first * doing an Access Op, which is passage of a check against the ACE * returned with a Write delegation, but current practice is to ignore * the ACE and always do an Access Op.) * Since the functions can only be called with an unlocked vnode, this * can't be done at this time. * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range * locks locally in the client, which are not visible to the server. To * deal with this, issuing of delegations for a vnode must be disabled * and all delegations for the vnode recalled. This is done via the * second function, using the VV_DISABLEDELEG vflag on the vnode. */ APPLESTATIC void nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) { struct timespec mytime; int32_t starttime; int error; /* * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ if (newnfs_numnfsd == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp)); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* * Now, call nfsrv_checkremove() in a loop while it returns * NFSERR_DELAY. Return upon any other error or when timed out. */ NFSGETNANOTIME(&mytime); starttime = (u_int32_t)mytime.tv_sec; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(vp, 0, p); NFSVOPUNLOCK(vp, 0); } else error = EPERM; if (error == NFSERR_DELAY) { NFSGETNANOTIME(&mytime); if (((u_int32_t)mytime.tv_sec - starttime) > NFS_REMOVETIMEO && ((u_int32_t)mytime.tv_sec - starttime) < 100000) break; /* Sleep for a short period of time */ (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } APPLESTATIC void nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p) { #ifdef VV_DISABLEDELEG /* * First, flag issuance of delegations disabled. */ atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG); #endif /* * Then call nfsd_recalldelegation() to get rid of all extant * delegations. */ nfsd_recalldelegation(vp, p); } /* * Check for conflicting locks, etc. and then get rid of delegations. * (At one point I thought that I should get rid of delegations for any * Setattr, since it could potentially disallow the I/O op (read or write) * allowed by the delegation. However, Setattr Ops that aren't changing * the size get a stateid of all 0s, so you can't tell if it is a delegation * for the same client or a different one, so I decided to only get rid * of delegations for other clients when the size is being changed.) * In general, a Setattr can disable NFS I/O Ops that are outstanding, such * as Write backs, even if there is no delegation, so it really isn't any * different?) */ APPLESTATIC int nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct nfsexstuff *exp, NFSPROC_T *p) { struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; int error = 0; nfsquad_t clientid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_first = nvap->na_size; } else { stp->ls_flags = 0; lop->lo_first = 0; } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL)) stp->ls_flags |= NFSLCK_SETATTR; if (stp->ls_flags == 0) goto out; lop->lo_end = NFS64BITSSET; lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = stateidp->seqid; clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0]; clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1]; stp->ls_stateid.other[2] = stateidp->other[2]; error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, stateidp, exp, nd, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Check for a write delegation and do a CBGETATTR if there is one, updating * the attributes, as required. * Should I return an error if I can't get the attributes? (For now, I'll * just return ok. */ APPLESTATIC int nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; struct nfsclient *clp; struct nfsvattr nva; fhandle_t nfh; int error = 0; nfsattrbit_t cbbits; u_quad_t delegfilerev; NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits); if (!NFSNONZERO_ATTRBIT(&cbbits)) goto out; /* * Get the lock file structure. * (A return of -1 means no associated state, so return ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (error == -1) error = 0; goto out; } /* * Now, look for a write delegation. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_flags & NFSLCK_DELEGWRITE) break; } if (stp == LIST_END(&lfp->lf_deleg)) { NFSUNLOCKSTATE(); goto out; } clp = stp->ls_clp; delegfilerev = stp->ls_filerev; /* * If the Write delegation was issued as a part of this Compound RPC * or if we have an Implied Clientid (used in a previous Op in this * compound) and it is the client the delegation was issued to, * just return ok. * I also assume that it is from the same client iff the network * host IP address is the same as the callback address. (Not * exactly correct by the RFC, but avoids a lot of Getattr * callbacks.) */ if (nd->nd_compref == stp->ls_compref || ((nd->nd_flag & ND_IMPLIEDCLID) && clp->lc_clientid.qval == nd->nd_clientid.qval) || nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) { NFSUNLOCKSTATE(); goto out; } /* * We are now done with the delegation state structure, * so the statelock can be released and we can now tsleep(). */ /* * Now, we must do the CB Getattr callback, to see if Change or Size * has changed. */ if (clp->lc_expiry >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); NFSVNO_ATTRINIT(&nva); nva.na_filerev = NFS64BITSSET; error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL, 0, &nfh, &nva, &cbbits, p); if (!error) { if ((nva.na_filerev != NFS64BITSSET && nva.na_filerev > delegfilerev) || (NFSVNO_ISSETSIZE(&nva) && nva.na_size != nvap->na_size)) { nfsvno_updfilerev(vp, nvap, cred, p); if (NFSVNO_ISSETSIZE(&nva)) nvap->na_size = nva.na_size; } } } else { NFSUNLOCKSTATE(); } error = 0; out: NFSEXITCODE2(error, nd); return (error); } /* * This function looks for openowners that haven't had any opens for * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS * is set. */ APPLESTATIC void nfsrv_throwawayopens(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int i; NFSLOCKSTATE(); nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < NFSCLIENTHASHSIZE; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > NFSRV_V4STATELIMIT)) nfsrv_freeopenowner(stp, 0, p); } } } NFSUNLOCKSTATE(); } /* * This function checks to see if the credentials are the same. * Returns 1 for not same, 0 otherwise. */ static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp) { if (nd->nd_flag & ND_GSS) { if (!(clp->lc_flags & LCL_GSS)) return (1); if (clp->lc_flags & LCL_NAME) { if (nd->nd_princlen != clp->lc_namelen || NFSBCMP(nd->nd_principal, clp->lc_name, clp->lc_namelen)) return (1); else return (0); } if (nd->nd_cred->cr_uid == clp->lc_uid) return (0); else return (1); } else if (clp->lc_flags & LCL_GSS) return (1); /* * For AUTH_SYS, allow the same uid or root. (This is underspecified * in RFC3530, which talks about principals, but doesn't say anything * about uids for AUTH_SYS.) */ if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0) return (0); else return (1); } /* * Calculate the lease expiry time. */ static time_t nfsrv_leaseexpiry(void) { struct timeval curtime; NFSGETTIME(&curtime); if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } /* * Delay the delegation timeout as far as ls_delegtimelimit, as required. */ static void nfsrv_delaydelegtimeout(struct nfsstate *stp) { if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) return; if ((stp->ls_delegtime + 15) > NFSD_MONOSEC && stp->ls_delegtime < stp->ls_delegtimelimit) { stp->ls_delegtime += nfsrv_lease; if (stp->ls_delegtime > stp->ls_delegtimelimit) stp->ls_delegtime = stp->ls_delegtimelimit; } } /* * This function checks to see if there is any other state associated * with the openowner for this Open. * It returns 1 if there is no other state, 0 otherwise. */ static int nfsrv_nootherstate(struct nfsstate *stp) { struct nfsstate *tstp; LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) { if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock)) return (0); } return (1); } /* * Create a list of lock deltas (changes to local byte range locking * that can be rolled back using the list) and apply the changes via * nfsvno_advlock(). Optionally, lock the list. It is expected that either * the rollback or update function will be called after this. * It returns an error (and rolls back, as required), if any nfsvno_advlock() * call fails. If it returns an error, it will unlock the list. */ static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfslock *lop, *nlop; int error = 0; /* Loop through the list of locks. */ lop = LIST_FIRST(&lfp->lf_locallock); while (first < end && lop != NULL) { nlop = LIST_NEXT(lop, lo_lckowner); if (first >= lop->lo_end) { /* not there yet */ lop = nlop; } else if (first < lop->lo_first) { /* new one starts before entry in list */ if (end <= lop->lo_first) { /* no overlap between old and new */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, lop->lo_first, cfp, p); if (error != 0) break; first = lop->lo_first; } } else { /* new one overlaps this entry in list */ if (end <= lop->lo_end) { /* overlaps all of new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, lop->lo_end, cfp, p); if (error != 0) break; first = lop->lo_end; lop = nlop; } } } if (first < end && error == 0) /* handle fragment past end of list */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); NFSEXITCODE(error); return (error); } /* * Local lock unlock. Unlock all byte ranges that are no longer locked * by NFSv4. To do this, unlock any subranges of first-->end that * do not overlap with the byte ranges of any lock in the lfp->lf_lock * list. This list has all locks for the file held by other * tuples. The list is ordered by increasing * lo_first value, but may have entries that overlap each other, for * the case of read locks. */ static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p) { struct nfslock *lop; uint64_t first, end, prevfirst; first = init_first; end = init_end; while (first < init_end) { /* Loop through all nfs locks, adjusting first and end */ prevfirst = 0; LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { KASSERT(prevfirst <= lop->lo_first, ("nfsv4 locks out of order")); KASSERT(lop->lo_first < lop->lo_end, ("nfsv4 bogus lock")); prevfirst = lop->lo_first; if (first >= lop->lo_first && first < lop->lo_end) /* * Overlaps with initial part, so trim * off that initial part by moving first past * it. */ first = lop->lo_end; else if (end > lop->lo_first && lop->lo_first > first) { /* * This lock defines the end of the * segment to unlock, so set end to the * start of it and break out of the loop. */ end = lop->lo_first; break; } if (first >= end) /* * There is no segment left to do, so * break out of this loop and then exit * the outer while() since first will be set * to end, which must equal init_end here. */ break; } if (first < end) { /* Unlock this segment */ (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK, NFSLCK_READ, first, end, NULL, p); nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK, first, end); } /* * Now move past this segment and look for any further * segment in the range, if there is one. */ first = end; end = init_end; } } /* * Do the local lock operation and update the rollback list, as required. * Perform the rollback and return the error if nfsvno_advlock() fails. */ static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfsrollback *rlp; int error = 0, ltype, oldltype; if (flags & NFSLCK_WRITE) ltype = F_WRLCK; else if (flags & NFSLCK_READ) ltype = F_RDLCK; else ltype = F_UNLCK; if (oldflags & NFSLCK_WRITE) oldltype = F_WRLCK; else if (oldflags & NFSLCK_READ) oldltype = F_RDLCK; else oldltype = F_UNLCK; if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK)) /* nothing to do */ goto out; error = nfsvno_advlock(vp, ltype, first, end, p); if (error != 0) { if (cfp != NULL) { cfp->cl_clientid.lval[0] = 0; cfp->cl_clientid.lval[1] = 0; cfp->cl_first = 0; cfp->cl_end = NFS64BITSSET; cfp->cl_flags = NFSLCK_WRITE; cfp->cl_ownerlen = 5; NFSBCOPY("LOCAL", cfp->cl_owner, 5); } nfsrv_locallock_rollback(vp, lfp, p); } else if (ltype != F_UNLCK) { rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK, M_WAITOK); rlp->rlck_first = first; rlp->rlck_end = end; rlp->rlck_type = oldltype; LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list); } out: NFSEXITCODE(error); return (error); } /* * Roll back local lock changes and free up the rollback list. */ static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p) { struct nfsrollback *rlp, *nrlp; LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) { (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first, rlp->rlck_end, p); free(rlp, M_NFSDROLLBACK); } LIST_INIT(&lfp->lf_rollback); } /* * Update local lock list and delete rollback list (ie now committed to the * local locks). Most of the work is done by the internal function. */ static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end) { struct nfsrollback *rlp, *nrlp; struct nfslock *new_lop, *other_lop; new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); if (flags & (NFSLCK_READ | NFSLCK_WRITE)) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); else other_lop = NULL; new_lop->lo_flags = flags; new_lop->lo_first = first; new_lop->lo_end = end; nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp); if (new_lop != NULL) free(new_lop, M_NFSDLOCK); if (other_lop != NULL) free(other_lop, M_NFSDLOCK); /* and get rid of the rollback list */ LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } /* * Lock the struct nfslockfile for local lock updating. */ static void nfsrv_locklf(struct nfslockfile *lfp) { int gotlock; /* lf_usecount ensures *lfp won't be free'd */ lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } /* * Unlock the struct nfslockfile after local lock updating. */ static void nfsrv_unlocklf(struct nfslockfile *lfp) { nfsv4_unlock(&lfp->lf_locallock_lck, 0); } /* * Clear out all state for the NFSv4 server. * Must be called by a thread that can sleep when no nfsds are running. */ void nfsrv_throwawayallstate(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfslockfile *lfp, *nlfp; int i; /* * For each client, clean out the state and then free the structure. */ for (i = 0; i < NFSCLIENTHASHSIZE; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); free(clp, M_NFSDCLIENT); } } /* * Also, free up any remaining lock file structures. */ for (i = 0; i < NFSLOCKHASHSIZE; i++) { LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) { printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); } } } Index: head/sys/nfsclient/nfsargs.h =================================================================== --- head/sys/nfsclient/nfsargs.h (revision 244041) +++ head/sys/nfsclient/nfsargs.h (revision 244042) @@ -1,102 +1,103 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.4 (Berkeley) 5/1/95 * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSARGS_H_ #define _NFSCLIENT_NFSARGS_H_ /* * Arguments to mount NFS */ #define NFS_ARGSVERSION 3 /* change when nfs_args changes */ struct nfs_args { int version; /* args structure version number */ struct sockaddr *addr; /* file server address */ int addrlen; /* length of address */ int sotype; /* Socket type */ int proto; /* and Protocol */ u_char *fh; /* File handle to be mounted */ int fhsize; /* Size, in bytes, of fh */ int flags; /* flags */ int wsize; /* write size in bytes */ int rsize; /* read size in bytes */ int readdirsize; /* readdir size in bytes */ int timeo; /* initial timeout in .1 secs */ int retrans; /* times to retry send */ int maxgrouplist; /* Max. size of group list */ int readahead; /* # of blocks to readahead */ int wcommitsize; /* Max. write commit size in bytes */ int deadthresh; /* Retrans threshold */ char *hostname; /* server's name */ int acregmin; /* cache attrs for reg files min time */ int acregmax; /* cache attrs for reg files max time */ int acdirmin; /* cache attrs for dirs min time */ int acdirmax; /* cache attrs for dirs max time */ }; /* * NFS mount option flags */ #define NFSMNT_SOFT 0x00000001 /* soft mount (hard is default) */ #define NFSMNT_WSIZE 0x00000002 /* set write size */ #define NFSMNT_RSIZE 0x00000004 /* set read size */ #define NFSMNT_TIMEO 0x00000008 /* set initial timeout */ #define NFSMNT_RETRANS 0x00000010 /* set number of request retries */ #define NFSMNT_MAXGRPS 0x00000020 /* set maximum grouplist size */ #define NFSMNT_INT 0x00000040 /* allow interrupts on hard mount */ #define NFSMNT_NOCONN 0x00000080 /* Don't Connect the socket */ /* 0x100 free, was NFSMNT_NQNFS */ #define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */ #define NFSMNT_KERB 0x00000400 /* Use RPCSEC_GSS/Krb5 */ #define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */ #define NFSMNT_WCOMMITSIZE 0x00001000 /* set max write commit size */ #define NFSMNT_READAHEAD 0x00002000 /* set read ahead */ #define NFSMNT_DEADTHRESH 0x00004000 /* set dead server retry thresh */ #define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */ #define NFSMNT_RDIRPLUS 0x00010000 /* Use Readdirplus for V3 */ #define NFSMNT_READDIRSIZE 0x00020000 /* Set readdir size */ #define NFSMNT_ACREGMIN 0x00040000 #define NFSMNT_ACREGMAX 0x00080000 #define NFSMNT_ACDIRMIN 0x00100000 #define NFSMNT_ACDIRMAX 0x00200000 #define NFSMNT_NOLOCKD 0x00400000 /* Locks are local */ #define NFSMNT_NFSV4 0x00800000 /* Use NFS Version 4 protocol */ #define NFSMNT_HASWRITEVERF 0x01000000 /* NFSv4 Write verifier */ #define NFSMNT_INTEGRITY 0x02000000 /* Use integrity with RPCSEC_GSS */ #define NFSMNT_PRIVACY 0x04000000 /* Use privacy with RPCSEC_GSS */ #define NFSMNT_ALLGSSNAME 0x08000000 /* Use principal for all accesses */ #define NFSMNT_STRICT3530 0x10000000 /* Adhere strictly to RFC3530 */ #define NFSMNT_NOCTO 0x20000000 /* Don't flush attrcache on open */ +#define NFSMNT_PNFS 0x40000000 /* Enable pNFS support */ #endif