Index: head/sys/nfsclient/nfs.h =================================================================== --- head/sys/nfsclient/nfs.h (revision 148161) +++ head/sys/nfsclient/nfs.h (revision 148162) @@ -1,319 +1,320 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.4 (Berkeley) 5/1/95 * $FreeBSD$ */ #ifndef _NFSCLIENT_NFS_H_ #define _NFSCLIENT_NFS_H_ #ifdef _KERNEL #include "opt_nfs.h" #endif #include /* * Tunable constants for nfs */ #define NFS_TICKINTVL 10 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 3 /* VREG attrib cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #ifndef NFS_MINDIRATTRTIMO #define NFS_MINDIRATTRTIMO 30 /* VDIR attrib cache timeout in sec */ #endif #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */ #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ #ifdef _KERNEL #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ #endif #define NFS_MAXDEADTHRESH 9 /* How long till we say 'server not responding' */ /* * Oddballs */ #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier for V3 */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */ #define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */ #define NFSSTA_WANTSND 0x02000000 /* Want above */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ /* * XXX to allow amd to include nfs.h without nfsproto.h */ #ifdef NFS_NPROCS #include #endif /* * vfs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ /* * File context information for nfsv4. Currently, there is only one * lockowner for the whole machine "0." */ struct nfs4_fctx { TAILQ_ENTRY(nfs4_fstate) next; uint32_t refcnt; struct nfs4_lowner *lop; struct nfsnode *np; char stateid[NFSX_V4STATEID]; }; #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_NFSREQ); MALLOC_DECLARE(M_NFSDIROFF); MALLOC_DECLARE(M_NFSBIGFH); MALLOC_DECLARE(M_NFSHASH); MALLOC_DECLARE(M_NFSDIRECTIO); #endif extern struct uma_zone *nfsmount_zone; extern struct callout nfs_callout; extern struct nfsstats nfsstats; extern int nfs_numasync; extern unsigned int nfs_iodmax; extern int nfs_pbuf_freecnt; extern int nfs_ticks; /* Data constants in XDR form */ extern u_int32_t nfs_true, nfs_false, nfs_xdrneg1; extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers; extern u_int32_t rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr; extern int nfsv3_procid[NFS_NPROCS]; struct uio; struct buf; struct vattr; struct nameidata; /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != EIO && \ (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_int32_t r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ u_int32_t r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ int r_lastmsg; /* last tprintf */ struct thread *r_td; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_RESENDERR 0x08 /* Resend failed */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ +#define R_REXMIT_INPROG 0x100 /* Re-transmit in progress */ /* * Pointers to ops that differ from v3 to v4 */ struct nfs_rpcops { int (*nr_readrpc)(struct vnode *vp, struct uio *uiop, struct ucred *cred); int (*nr_writerpc)(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit); int (*nr_writebp)(struct buf *bp, int force, struct thread *td); int (*nr_readlinkrpc)(struct vnode *vp, struct uio *uiop, struct ucred *cred); void (*nr_invaldir)(struct vnode *vp); int (*nr_commit)(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td); }; /* * Defines for WebNFS */ #define WEBNFS_ESC_CHAR '%' #define WEBNFS_SPECCHAR_START 0x80 #define WEBNFS_NATIVE_CHAR 0x80 /* * .. * Possibly more here in the future. */ /* * Macro for converting escape characters in WebNFS pathnames. * Should really be in libkern. */ #define HEXTOC(c) \ ((c) >= 'a' ? ((c) - ('a' - 10)) : \ ((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0'))) #define HEXSTRTOI(p) \ ((HEXTOC(p[0]) << 4) + HEXTOC(p[1])) /* nfs_sigintr() helper, when 'rep' has all we need */ #define NFS_SIGREP(rep) nfs_sigintr((rep)->r_nmp, (rep), (rep)->r_td) #ifdef NFS_DEBUG extern int nfs_debug; #define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */ #define NFS_DEBUG_WG 2 /* server write gathering */ #define NFS_DEBUG_RC 4 /* server request caching */ #define NFS_DPF(cat, args) \ do { \ if (nfs_debug & NFS_DEBUG_##cat) printf args; \ } while (0) #else #define NFS_DPF(cat, args) #endif vfs_init_t nfs_init; vfs_uninit_t nfs_uninit; int nfs_mountroot(struct mount *mp, struct thread *td); #ifndef NFS4_USE_RPCCLNT int nfs_send(struct socket *, struct sockaddr *, struct mbuf *, struct nfsreq *); int nfs_sndlock(struct nfsreq *); void nfs_sndunlock(struct nfsreq *); #endif /* ! NFS4_USE_RPCCLNT */ int nfs_vinvalbuf(struct vnode *, int, struct thread *, int); int nfs_readrpc(struct vnode *, struct uio *, struct ucred *); int nfs_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *); int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td); int nfs_readdirrpc(struct vnode *, struct uio *, struct ucred *); int nfs_nfsiodnew(void); int nfs_asyncio(struct nfsmount *, struct buf *, struct ucred *, struct thread *); int nfs_doio(struct vnode *, struct buf *, struct ucred *, struct thread *); void nfs_doio_directwrite (struct buf *); void nfs_up(struct nfsreq *, struct nfsmount *, struct thread *, const char *, int); void nfs_down(struct nfsreq *, struct nfsmount *, struct thread *, const char *, int, int); int nfs_readlinkrpc(struct vnode *, struct uio *, struct ucred *); int nfs_sigintr(struct nfsmount *, struct nfsreq *, struct thread *); int nfs_readdirplusrpc(struct vnode *, struct uio *, struct ucred *); int nfs_request(struct vnode *, struct mbuf *, int, struct thread *, struct ucred *, struct mbuf **, struct mbuf **, caddr_t *); int nfs_loadattrcache(struct vnode **, struct mbuf **, caddr_t *, struct vattr *, int); int nfsm_mbuftouio(struct mbuf **, struct uio *, int, caddr_t *); void nfs_nhinit(void); void nfs_nhuninit(void); int nfs_nmcancelreqs(struct nfsmount *); void nfs_timer(void*); int nfs_connect(struct nfsmount *, struct nfsreq *); void nfs_disconnect(struct nfsmount *); void nfs_safedisconnect(struct nfsmount *); int nfs_getattrcache(struct vnode *, struct vattr *); int nfsm_strtmbuf(struct mbuf **, char **, const char *, long); int nfs_bioread(struct vnode *, struct uio *, int, struct ucred *); int nfsm_uiotombuf(struct uio *, struct mbuf **, int, caddr_t *); void nfs_clearcommit(struct mount *); int nfs_writebp(struct buf *, int, struct thread *); int nfs_fsinfo(struct nfsmount *, struct vnode *, struct ucred *, struct thread *); int nfs_meta_setsize (struct vnode *, struct ucred *, struct thread *, u_quad_t); void nfs_set_sigmask __P((struct thread *td, sigset_t *oldset)); void nfs_restore_sigmask __P((struct thread *td, sigset_t *set)); int nfs_tsleep __P((struct thread *td, void *ident, int priority, char *wmesg, int timo)); int nfs_msleep __P((struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)); #endif /* _KERNEL */ #endif Index: head/sys/nfsclient/nfs_socket.c =================================================================== --- head/sys/nfsclient/nfs_socket.c (revision 148161) +++ head/sys/nfsclient/nfs_socket.c (revision 148162) @@ -1,1679 +1,1689 @@ /*- * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TRUE 1 #define FALSE 0 /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc * for the frequent rpcs and a default for the others. * The justification for doing "other" this way is that these rpcs * happen so infrequently that timer est. would probably be stale. * Also, since many of these rpcs are * non-idempotent, a conservative timeout is desired. * getattr, lookup - A+2D * read, write - A+4D * other - nm_timeo */ #define NFS_RTO(n, t) \ ((t) == 0 ? (n)->nm_timeo : \ ((t) < 3 ? \ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] /* * Defines which timer to use for the procnum. * 0 - default * 1 - getattr * 2 - lookup * 3 - read * 4 - write */ static int proct[NFS_NPROCS] = { 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, }; static int nfs_realign_test; static int nfs_realign_count; static int nfs_bufpackets = 4; static int nfs_reconnects; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "number of times the nfs client has had to reconnect"); /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of * SIGCOMM '88". ACM, August 1988. * describes for TCP. The cwnd size is chopped in half on a retransmit timeout * and incremented by 1/cwnd when each rpc reply is received and a full cwnd * of rpcs is in progress. * (The sent count and cwnd are scaled for integer arith.) * Variants of "slow start" were tried and were found to be too much of a * performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define NFS_CWNDSCALE 256 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) #define NFS_NBACKOFF 8 static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, }; struct callout nfs_callout; static int nfs_msg(struct thread *, const char *, const char *, int); static int nfs_realign(struct mbuf **pm, int hsiz); static int nfs_reply(struct nfsreq *); static void nfs_softterm(struct nfsreq *rep); static int nfs_reconnect(struct nfsreq *rep); static void nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag); static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag); static void wakeup_nfsreq(struct nfsreq *req); extern struct mtx nfs_reqq_mtx; extern struct mtx nfs_reply_mtx; /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) { struct socket *so; int error, rcvreserve, sndreserve; int pktscale; struct sockaddr *saddr; struct thread *td = &thread0; /* only used for socreate and sobind */ NET_ASSERT_GIANT(); if (nmp->nm_sotype == SOCK_STREAM) { mtx_lock(&nmp->nm_nfstcpstate.mtx); nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER; nmp->nm_nfstcpstate.rpcresid = 0; mtx_unlock(&nmp->nm_nfstcpstate.mtx); } nmp->nm_so = NULL; saddr = nmp->nm_nam; error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto, nmp->nm_mountp->mnt_cred, td); if (error) goto bad; so = nmp->nm_so; nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (nmp->nm_flag & NFSMNT_RESVPORT) { struct sockopt sopt; int ip, ip2, len; struct sockaddr_in6 ssin; struct sockaddr *sa; bzero(&sopt, sizeof sopt); switch(saddr->sa_family) { case AF_INET: sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_PORTRANGE; ip = IP_PORTRANGE_LOW; ip2 = IP_PORTRANGE_DEFAULT; len = sizeof (struct sockaddr_in); break; #ifdef INET6 case AF_INET6: sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_PORTRANGE; ip = IPV6_PORTRANGE_LOW; ip2 = IPV6_PORTRANGE_DEFAULT; len = sizeof (struct sockaddr_in6); break; #endif default: goto noresvport; } sa = (struct sockaddr *)&ssin; bzero(sa, len); sa->sa_len = len; sa->sa_family = saddr->sa_family; sopt.sopt_dir = SOPT_SET; sopt.sopt_val = (void *)&ip; sopt.sopt_valsize = sizeof(ip); error = sosetopt(so, &sopt); if (error) goto bad; error = sobind(so, sa, td); if (error) goto bad; ip = ip2; error = sosetopt(so, &sopt); if (error) goto bad; noresvport: ; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } } else { error = soconnect(so, nmp->nm_nam, td); if (error) goto bad; /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { (void) msleep(&so->so_timeo, SOCK_MTX(so), PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0) { so->so_state &= ~SS_ISCONNECTING; SOCK_UNLOCK(so); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCK_UNLOCK(so); goto bad; } SOCK_UNLOCK(so); } so->so_rcv.sb_timeo = 12 * hz; so->so_snd.sb_timeo = 5 * hz; /* * Get buffer reservation size from sysctl, but impose reasonable * limits. */ pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + NFS_MAXPKTHDR) * pktscale; } else if (nmp->nm_sotype == SOCK_SEQPACKET) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + NFS_MAXPKTHDR) * pktscale; } else { if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { struct sockopt sopt; int val; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_KEEPALIVE; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; val = 1; sosetopt(so, &sopt); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { struct sockopt sopt; int val; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; val = 1; sosetopt(so, &sopt); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; } error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_flags |= SB_NOINTR; so->so_upcallarg = (caddr_t)nmp; if (so->so_type == SOCK_STREAM) so->so_upcall = nfs_clnt_tcp_soupcall; else so->so_upcall = nfs_clnt_udp_soupcall; so->so_rcv.sb_flags |= SB_UPCALL; SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_NOINTR; SOCKBUF_UNLOCK(&so->so_snd); /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = (NFS_TIMEO << 3); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; return (0); bad: nfs_disconnect(nmp); return (error); } /* * Reconnect routine: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */ static int nfs_reconnect(struct nfsreq *rep) { struct nfsreq *rp; struct nfsmount *nmp = rep->r_nmp; int error; nfs_reconnects++; nfs_disconnect(nmp); while ((error = nfs_connect(nmp, rep)) != 0) { if (error == ERESTART) error = EINTR; if (error == EIO || error == EINTR) return (error); (void) tsleep(&lbolt, PSOCK, "nfscon", 0); } /* * Clear the FORCE_RECONNECT flag only after the connect * succeeds. To prevent races between multiple processes * waiting on the mountpoint where the connection is being * torn down. The first one to acquire the sndlock will * retry the connection. The others block on the sndlock * until the connection is established successfully, and * then re-transmit the request. */ mtx_lock(&nmp->nm_nfstcpstate.mtx); nmp->nm_nfstcpstate.flags &= ~NFS_TCP_FORCE_RECONNECT; mtx_unlock(&nmp->nm_nfstcpstate.mtx); /* * Loop through outstanding request list and fix up all requests * on old socket. */ mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { if (rp->r_nmp == nmp) rp->r_flags |= R_MUSTRESEND; } mtx_unlock(&nfs_reqq_mtx); return (0); } /* * NFS disconnect. Clean up and unlink. */ void nfs_disconnect(struct nfsmount *nmp) { struct socket *so; NET_ASSERT_GIANT(); if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = NULL; SOCKBUF_LOCK(&so->so_rcv); so->so_upcallarg = NULL; so->so_upcall = NULL; so->so_rcv.sb_flags &= ~SB_UPCALL; SOCKBUF_UNLOCK(&so->so_rcv); soshutdown(so, SHUT_WR); soclose(so); } } void nfs_safedisconnect(struct nfsmount *nmp) { struct nfsreq dummyreq; bzero(&dummyreq, sizeof(dummyreq)); dummyreq.r_nmp = nmp; nfs_disconnect(nmp); } /* * This is the nfs send routine. For connection based socket types, it * must be called with an nfs_sndlock() on the socket. * - return EINTR if the RPC is terminated, 0 otherwise * - set R_MUSTRESEND if the send fails for any reason * - do any cleanup required by recoverable socket errors (?) */ int nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top, struct nfsreq *rep) { struct sockaddr *sendnam; int error, error2, soflags, flags; NET_ASSERT_GIANT(); KASSERT(rep, ("nfs_send: called with rep == NULL")); error = nfs_sigintr(rep->r_nmp, rep, rep->r_td); if (error) { m_freem(top); return (error); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; m_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = NULL; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0, flags, curthread /*XXX*/); if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { error = 0; rep->r_flags |= R_MUSTRESEND; } if (error) { /* * Don't report EPIPE errors on nfs sockets. * These can be due to idle tcp mounts which will be closed by * netapp, solaris, etc. if left idle too long. */ if (error != EPIPE) { log(LOG_INFO, "nfs send error %d for server %s\n", error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); } /* * Deal with errors for the client side. */ error2 = NFS_SIGREP(rep); if (error2) error = error2; else rep->r_flags |= R_MUSTRESEND; /* * Handle any recoverable (soft) socket errors here. (?) */ if (error != EINTR && error != ERESTART && error != EIO && error != EWOULDBLOCK && error != EPIPE) error = 0; } return (error); } int nfs_reply(struct nfsreq *rep) { register struct socket *so; register struct mbuf *m; int error = 0, sotype, slpflag; NET_ASSERT_GIANT(); sotype = rep->r_nmp->nm_sotype; /* * For reliable protocols, lock against other senders/receivers * in case a reconnect is necessary. */ if (sotype != SOCK_DGRAM) { error = nfs_sndlock(rep); if (error) return (error); tryagain: if (rep->r_mrep) { nfs_sndunlock(rep); return (0); } if (rep->r_flags & R_SOFTTERM) { nfs_sndunlock(rep); return (EINTR); } so = rep->r_nmp->nm_so; mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx); if (!so || (rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT)) { mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); error = nfs_reconnect(rep); if (error) { nfs_sndunlock(rep); return (error); } goto tryagain; } else mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); while (rep->r_flags & R_MUSTRESEND) { m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); if (error) { if (error == EINTR || error == ERESTART || (error = nfs_reconnect(rep)) != 0) { nfs_sndunlock(rep); return (error); } goto tryagain; } } nfs_sndunlock(rep); } slpflag = 0; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; mtx_lock(&nfs_reply_mtx); while ((rep->r_mrep == NULL) && (error == 0) && ((rep->r_flags & R_SOFTTERM) == 0) && ((sotype == SOCK_DGRAM) || ((rep->r_flags & R_MUSTRESEND) == 0))) error = msleep((caddr_t)rep, &nfs_reply_mtx, slpflag | (PZERO - 1), "nfsreq", 0); mtx_unlock(&nfs_reply_mtx); if (error == EINTR || error == ERESTART) /* NFS operations aren't restartable. Map ERESTART to EINTR */ return (EINTR); if (rep->r_flags & R_SOFTTERM) /* Request was terminated because we exceeded the retries (soft mount) */ return (ETIMEDOUT); if (sotype == SOCK_STREAM) { mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx); if (((rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) || (rep->r_flags & R_MUSTRESEND))) { mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); error = nfs_sndlock(rep); if (error) return (error); goto tryagain; } else mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx); } return (error); } /* * XXX TO DO * Make nfs_realign() non-blocking. Also make nfsm_dissect() nonblocking. */ static void nfs_clnt_match_xid(struct socket *so, struct nfsmount *nmp, struct mbuf *mrep) { struct mbuf *md; caddr_t dpos; u_int32_t rxid, *tl; struct nfsreq *rep; register int32_t t1; int error; /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ if (nfs_realign(&mrep, 5 * NFSX_UNSIGNED) == ENOMEM) { m_freem(mrep); nfsstats.rpcinvalid++; return; } /* * Get the xid and check that it is an rpc reply */ md = mrep; dpos = mtod(md, caddr_t); tl = nfsm_dissect_nonblock(u_int32_t *, 2*NFSX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { m_freem(mrep); nfsmout: nfsstats.rpcinvalid++; return; } mtx_lock(&nfs_reqq_mtx); /* * Loop through the request list to match up the reply * Iff no match, just drop the datagram */ TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; rep->r_md = md; rep->r_dpos = dpos; /* * Update congestion window. * Do the additive increase of * one rpc/rtt. */ if (nmp->nm_cwnd <= nmp->nm_sent) { nmp->nm_cwnd += (NFS_CWNDSCALE * NFS_CWNDSCALE + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; if (nmp->nm_cwnd > NFS_MAXCWND) nmp->nm_cwnd = NFS_MAXCWND; } if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; nmp->nm_sent -= NFS_CWNDSCALE; } /* * Update rtt using a gain of 0.125 on the mean * and a gain of 0.25 on the deviation. */ if (rep->r_flags & R_TIMING) { /* * Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since * r_rtt == N means that the actual * rtt is between N+dt and N+2-dt ticks, * add 1. */ t1 = rep->r_rtt + 1; t1 -= (NFS_SRTT(rep) >> 3); NFS_SRTT(rep) += t1; if (t1 < 0) t1 = -t1; t1 -= (NFS_SDRTT(rep) >> 2); NFS_SDRTT(rep) += t1; } nmp->nm_timeouts = 0; break; } } /* * If not matched to a request, drop it. * If it's mine, wake up requestor. */ if (rep == 0) { nfsstats.rpcunexpected++; m_freem(mrep); } else wakeup_nfsreq(rep); mtx_unlock(&nfs_reqq_mtx); } /* * The wakeup of the requestor should be done under the mutex * to avoid potential missed wakeups. */ static void wakeup_nfsreq(struct nfsreq *req) { mtx_lock(&nfs_reply_mtx); wakeup((caddr_t)req); mtx_unlock(&nfs_reply_mtx); } static void nfs_mark_for_reconnect(struct nfsmount *nmp) { struct nfsreq *rp; mtx_lock(&nmp->nm_nfstcpstate.mtx); nmp->nm_nfstcpstate.flags |= NFS_TCP_FORCE_RECONNECT; mtx_unlock(&nmp->nm_nfstcpstate.mtx); /* * Wakeup all processes that are waiting for replies * on this mount point. One of them does the reconnect. */ mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { if (rp->r_nmp == nmp) { rp->r_flags |= R_MUSTRESEND; wakeup_nfsreq(rp); } } mtx_unlock(&nfs_reqq_mtx); } static int nfstcp_readable(struct socket *so, int bytes) { int retval; SOCKBUF_LOCK(&so->so_rcv); retval = (so->so_rcv.sb_cc >= (bytes) || (so->so_state & SBS_CANTRCVMORE) || so->so_error); SOCKBUF_UNLOCK(&so->so_rcv); return (retval); } #define nfstcp_marker_readable(so) nfstcp_readable(so, sizeof(u_int32_t)) static void nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag) { struct nfsmount *nmp = (struct nfsmount *)arg; struct mbuf *mp = NULL; struct uio auio; int error; u_int32_t len; int rcvflg; /* * Don't pick any more data from the socket if we've marked the * mountpoint for reconnect. */ mtx_lock(&nmp->nm_nfstcpstate.mtx); if (nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) { mtx_unlock(&nmp->nm_nfstcpstate.mtx); return; } else mtx_unlock(&nmp->nm_nfstcpstate.mtx); auio.uio_td = curthread; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; for ( ; ; ) { if (nmp->nm_nfstcpstate.flags & NFS_TCP_EXPECT_RPCMARKER) { if (!nfstcp_marker_readable(so)) { /* Marker is not readable */ return; } auio.uio_resid = sizeof(u_int32_t); auio.uio_iov = NULL; auio.uio_iovcnt = 0; mp = NULL; rcvflg = (MSG_DONTWAIT | MSG_SOCALLBCK); error = so->so_proto->pr_usrreqs->pru_soreceive (so, (struct sockaddr **)0, &auio, &mp, (struct mbuf **)0, &rcvflg); /* * We've already tested that the socket is readable. 2 cases * here, we either read 0 bytes (client closed connection), * or got some other error. In both cases, we tear down the * connection. */ if (error || auio.uio_resid > 0) { if (error != ECONNRESET) { log(LOG_ERR, "nfs/tcp clnt: Error %d reading socket, tearing down TCP connection\n", error); } goto mark_reconnect; } if (mp == NULL) panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n"); bcopy(mtod(mp, u_int32_t *), &len, sizeof(len)); len = ntohl(len) & ~0x80000000; m_freem(mp); /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > NFS_MAXPACKET || len == 0) { log(LOG_ERR, "%s (%d) from nfs server %s\n", "impossible packet length", len, nmp->nm_mountp->mnt_stat.f_mntfromname); goto mark_reconnect; } nmp->nm_nfstcpstate.rpcresid = len; nmp->nm_nfstcpstate.flags &= ~(NFS_TCP_EXPECT_RPCMARKER); } /* * Processed RPC marker or no RPC marker to process. * Pull in and process data. */ if (nmp->nm_nfstcpstate.rpcresid > 0) { if (!nfstcp_readable(so, nmp->nm_nfstcpstate.rpcresid)) { /* All data not readable */ return; } auio.uio_resid = nmp->nm_nfstcpstate.rpcresid; auio.uio_iov = NULL; auio.uio_iovcnt = 0; mp = NULL; rcvflg = (MSG_DONTWAIT | MSG_SOCALLBCK); error = so->so_proto->pr_usrreqs->pru_soreceive (so, (struct sockaddr **)0, &auio, &mp, (struct mbuf **)0, &rcvflg); if (error || auio.uio_resid > 0) { if (error != ECONNRESET) { log(LOG_ERR, "nfs/tcp clnt: Error %d reading socket, tearing down TCP connection\n", error); } goto mark_reconnect; } if (mp == NULL) panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n"); nmp->nm_nfstcpstate.rpcresid = 0; nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER; /* We got the entire RPC reply. Match XIDs and wake up requestor */ nfs_clnt_match_xid(so, nmp, mp); } } mark_reconnect: nfs_mark_for_reconnect(nmp); } static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag) { struct nfsmount *nmp = (struct nfsmount *)arg; struct uio auio; struct mbuf *mp = NULL; struct mbuf *control = NULL; int error, rcvflag; auio.uio_resid = 1000000; auio.uio_td = curthread; rcvflag = MSG_DONTWAIT; auio.uio_resid = 1000000000; do { mp = control = NULL; error = so->so_proto->pr_usrreqs->pru_soreceive(so, NULL, &auio, &mp, &control, &rcvflag); if (control) m_freem(control); if (mp) nfs_clnt_match_xid(so, nmp, mp); } while (mp && !error); } /* * nfs_request - goes something like this * - fill in request struct * - links it into list * - calls nfs_send() for first transmit * - calls nfs_receive() to get reply * - break down rpc header and return with nfs reply pointed to * by mrep or error * nb: always frees up mreq mbuf list */ /* XXX overloaded before */ #define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */ int nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum, struct thread *td, struct ucred *cred, struct mbuf **mrp, struct mbuf **mdp, caddr_t *dposp) { struct mbuf *mrep, *m2; struct nfsreq *rep; u_int32_t *tl; int i; struct nfsmount *nmp; struct mbuf *m, *md, *mheadend; time_t waituntil; caddr_t dpos; int s, error = 0, mrest_len, auth_len, auth_type; int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0; struct timeval now; u_int32_t xid; /* Reject requests while attempting a forced unmount. */ if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { m_freem(mrest); return (ESTALE); } nmp = VFSTONFS(vp->v_mount); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); rep->r_mrep = rep->r_md = NULL; rep->r_nmp = nmp; rep->r_vp = vp; rep->r_td = td; rep->r_procnum = procnum; getmicrouptime(&now); rep->r_lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); mrest_len = m_length(mrest, NULL); /* * Get the RPC header with authorization. */ auth_type = RPCAUTH_UNIX; if (cred->cr_ngroups < 1) panic("nfsreq nogrps"); auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 5 * NFSX_UNSIGNED; m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, mrest, mrest_len, &mheadend, &xid); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (nmp->nm_sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_TRYWAIT); *mtod(m, u_int32_t *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } rep->r_mreq = m; rep->r_xid = xid; tryagain: if (nmp->nm_flag & NFSMNT_SOFT) rep->r_retry = nmp->nm_retry; else rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ rep->r_rtt = rep->r_rexmit = 0; if (proct[procnum] > 0) rep->r_flags = R_TIMING; else rep->r_flags = 0; rep->r_mrep = NULL; /* * Do the client side RPC. */ nfsstats.rpcrequests++; /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ s = splsoftclock(); mtx_lock(&nfs_reqq_mtx); if (TAILQ_EMPTY(&nfs_reqq)) callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); mtx_unlock(&nfs_reqq_mtx); /* * If backing off another request or avoiding congestion, don't * send this one now but let timer do it. If not timing a request, * do it now. */ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { splx(s); error = nfs_sndlock(rep); if (!error) { m2 = m_copym(m, 0, M_COPYALL, M_TRYWAIT); error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep); nfs_sndunlock(rep); } mtx_lock(&nfs_reqq_mtx); if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { nmp->nm_sent += NFS_CWNDSCALE; rep->r_flags |= R_SENT; } mtx_unlock(&nfs_reqq_mtx); } else { splx(s); rep->r_rtt = -1; } /* * Wait for the reply from our send or the timer's. */ if (!error || error == EPIPE) error = nfs_reply(rep); /* * RPC done, unlink the request. */ s = splsoftclock(); mtx_lock(&nfs_reqq_mtx); + /* + * nfs_timer() may be in the process of re-transmitting this request. + * nfs_timer() drops the nfs_reqq_mtx before the pru_send() (to avoid LORs). + * Wait till nfs_timer() completes the re-transmission. When the reply + * comes back, it will be discarded (since the req struct for it no longer + * exists). + */ + while (rep->r_flags & R_REXMIT_INPROG) { + msleep((caddr_t)&rep->r_flags, &nfs_reqq_mtx, + (PZERO - 1), "nfsrxmt", 0); + } TAILQ_REMOVE(&nfs_reqq, rep, r_chain); if (TAILQ_EMPTY(&nfs_reqq)) callout_stop(&nfs_callout); /* * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; /* paranoia */ nmp->nm_sent -= NFS_CWNDSCALE; } mtx_unlock(&nfs_reqq_mtx); splx(s); /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (!error) nfs_up(rep, nmp, rep->r_td, "is alive again", NFSSTA_TIMEO); mrep = rep->r_mrep; md = rep->r_md; dpos = rep->r_dpos; if (error) { /* * If we got interrupted by a signal in nfs_reply(), there's * a very small window where the reply could've come in before * this process got scheduled in. To handle that case, we need * to free the reply if it was delivered. */ if (rep->r_mrep != NULL) m_freem(rep->r_mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } if (rep->r_mrep == NULL) panic("nfs_request: rep->r_mrep shouldn't be NULL if no error\n"); /* * break down the rpc header and check if ok */ tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); if (*tl++ == rpc_msgdenied) { if (*tl == rpc_mismatch) error = EOPNOTSUPP; else error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Just throw away any verifyer (ie: kerberos etc). */ i = fxdr_unsigned(int, *tl++); /* verf type */ i = fxdr_unsigned(int32_t, *tl); /* len */ if (i > 0) nfsm_adv(nfsm_rndup(i)); tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); /* 0 == ok */ if (*tl == 0) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { m_freem(mrep); error = 0; waituntil = time_second + trylater_delay; while (time_second < waituntil) (void) tsleep(&lbolt, PSOCK, "nqnfstry", 0); trylater_delay *= nfs_backoff[trylater_cnt]; if (trylater_cnt < NFS_NBACKOFF - 1) trylater_cnt++; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. */ if (error == ESTALE) cache_purge(vp); if (nmp->nm_flag & NFSMNT_NFSV3) { *mrp = mrep; *mdp = md; *dposp = dpos; error |= NFSERR_RETERR; } else m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } *mrp = mrep; *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); FREE((caddr_t)rep, M_NFSREQ); return (0); } m_freem(mrep); error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Nfs timer routine * Scan the nfsreq list and retranmit any requests that have timed out * To avoid retransmission attempts on STREAM sockets (in the future) make * sure to set the r_retry field to 0 (implies nm_retry == 0). * - * XXX - - * For now, since we don't register MPSAFE callouts for the NFS client - - * softclock() acquires Giant before calling us. That prevents req entries - * from being removed from the list (from nfs_request()). But we still - * acquire the nfs reqq mutex to make sure the state of individual req - * entries is not modified from RPC reply handling (from socket callback) - * while nfs_timer is walking the list of reqs. * The nfs reqq lock cannot be held while we do the pru_send() because of a * lock ordering violation. The NFS client socket callback acquires * inp_lock->nfsreq mutex and pru_send acquires inp_lock. So we drop the - * reqq mutex (and reacquire it after the pru_send()). This won't work - * when we move to fine grained locking for NFS. When we get to that point, - * a rewrite of nfs_timer() will be needed. + * reqq mutex (and reacquire it after the pru_send()). The req structure + * (for the rexmit) is prevented from being removed by the R_REXMIT_INPROG flag. */ void nfs_timer(void *arg) { struct nfsreq *rep; struct mbuf *m; struct socket *so; struct nfsmount *nmp; int timeo; int s, error; struct timeval now; getmicrouptime(&now); s = splnet(); mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { nmp = rep->r_nmp; if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) continue; if (nfs_sigintr(nmp, rep, rep->r_td)) continue; if (nmp->nm_tprintf_initial_delay != 0 && (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) && rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { rep->r_lastmsg = now.tv_sec; nfs_down(rep, nmp, rep->r_td, "not responding", 0, NFSSTA_TIMEO); #if 0 if (!(nmp->nm_state & NFSSTA_MOUNTED)) { /* we're not yet completely mounted and */ /* we can't complete an RPC, so we fail */ nfsstats.rpctimeouts++; nfs_softterm(rep); continue; } #endif } if (rep->r_rtt >= 0) { rep->r_rtt++; if (nmp->nm_flag & NFSMNT_DUMBTIMR) timeo = nmp->nm_timeo; else timeo = NFS_RTO(nmp, proct[rep->r_procnum]); if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; if (rep->r_rtt <= timeo) continue; if (nmp->nm_timeouts < NFS_NBACKOFF) nmp->nm_timeouts++; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ nfsstats.rpctimeouts++; nfs_softterm(rep); continue; } if (nmp->nm_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; /* * For NFS/TCP, setting R_MUSTRESEND and waking up * the requester will cause the request to be * retransmitted (in nfs_reply()), re-connecting * if necessary. */ rep->r_flags |= R_MUSTRESEND; wakeup_nfsreq(rep); continue; } if ((so = nmp->nm_so) == NULL) continue; /* * If there is enough space and the window allows.. * Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && - (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ + (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) { + /* + * Mark the request to indicate that a XMIT is in progress + * to prevent the req structure being removed in nfs_request(). + */ + rep->r_flags |= R_REXMIT_INPROG; mtx_unlock(&nfs_reqq_mtx); if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, NULL, NULL, curthread); else error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, nmp->nm_nam, NULL, curthread); mtx_lock(&nfs_reqq_mtx); + rep->r_flags &= ~R_REXMIT_INPROG; + wakeup((caddr_t)&rep->r_flags); if (error) { if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) so->so_error = 0; rep->r_flags |= R_RESENDERR; } else { /* * Iff first send, start timing * else turn timing off, backoff timer * and divide congestion window by 2. */ rep->r_flags &= ~R_RESENDERR; if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_TIMING; if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; nmp->nm_cwnd >>= 1; if (nmp->nm_cwnd < NFS_CWNDSCALE) nmp->nm_cwnd = NFS_CWNDSCALE; nfsstats.rpcretries++; } else { rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } rep->r_rtt = 0; } } } mtx_unlock(&nfs_reqq_mtx); splx(s); callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int nfs_nmcancelreqs(nmp) struct nfsmount *nmp; { struct nfsreq *req; int i, s; s = splnet(); mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (nmp != req->r_nmp || req->r_mrep != NULL || (req->r_flags & R_SOFTTERM)) continue; nfs_softterm(req); } mtx_unlock(&nfs_reqq_mtx); splx(s); for (i = 0; i < 30; i++) { s = splnet(); mtx_lock(&nfs_reqq_mtx); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (nmp == req->r_nmp) break; } mtx_unlock(&nfs_reqq_mtx); splx(s); if (req == NULL) return (0); tsleep(&lbolt, PSOCK, "nfscancel", 0); } return (EBUSY); } /* * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT). * The nm_send count is decremented now to avoid deadlocks when the process in * soreceive() hasn't yet managed to send its own request. */ static void nfs_softterm(struct nfsreq *rep) { rep->r_flags |= R_SOFTTERM; if (rep->r_flags & R_SENT) { rep->r_nmp->nm_sent -= NFS_CWNDSCALE; rep->r_flags &= ~R_SENT; } /* * Request terminated, wakeup the blocked process, so that we * can return EINTR back. */ wakeup_nfsreq(rep); } /* * Any signal that can interrupt an NFS operation in an intr mount * should be added to this set. SIGSTOP and SIGKILL cannot be masked. */ int nfs_sig_set[] = { SIGINT, SIGTERM, SIGHUP, SIGKILL, SIGSTOP, SIGQUIT }; /* * Check to see if one of the signals in our subset is pending on * the process (in an intr mount). */ static int nfs_sig_pending(sigset_t set) { int i; for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) if (SIGISMEMBER(set, nfs_sig_set[i])) return (1); return (0); } /* * The set/restore sigmask functions are used to (temporarily) overwrite * the process p_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void nfs_set_sigmask(struct thread *td, sigset_t *oldset) { sigset_t newset; int i; struct proc *p; SIGFILLSET(newset); if (td == NULL) td = curthread; /* XXX */ p = td->td_proc; /* Remove the NFS set of signals from newset */ PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) { /* * But make sure we leave the ones already masked * by the process, ie. remove the signal from the * temporary signalmask only if it wasn't already * in p_sigmask. */ if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) && !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i])) SIGDELSET(newset, nfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); PROC_UNLOCK(p); kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0); } void nfs_restore_sigmask(struct thread *td, sigset_t *set) { if (td == NULL) td = curthread; /* XXX */ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); } /* * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the * old one after msleep() returns. */ int nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) { sigset_t oldset; int error; struct proc *p; if ((priority & PCATCH) == 0) return msleep(ident, mtx, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ nfs_set_sigmask(td, &oldset); error = msleep(ident, mtx, priority, wmesg, timo); nfs_restore_sigmask(td, &oldset); p = td->td_proc; return (error); } /* * NFS wrapper to tsleep(), that shoves a new p_sigmask and restores the * old one after tsleep() returns. */ int nfs_tsleep(struct thread *td, void *ident, int priority, char *wmesg, int timo) { sigset_t oldset; int error; struct proc *p; if ((priority & PCATCH) == 0) return tsleep(ident, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ nfs_set_sigmask(td, &oldset); error = tsleep(ident, priority, wmesg, timo); nfs_restore_sigmask(td, &oldset); p = td->td_proc; return (error); } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td) { struct proc *p; sigset_t tmpset; if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) return nfs4_sigintr(nmp, rep, td); if (rep && (rep->r_flags & R_SOFTTERM)) return (EIO); /* Terminate all requests while attempting a forced unmount. */ if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) return (EIO); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if (SIGNOTEMPTY(p->p_siglist) && nfs_sig_pending(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } /* * Lock a socket against others. * Necessary for STREAM sockets to ensure you get an entire rpc request/reply * and also to avoid race conditions between the processes with nfs requests * in progress when a reconnect is necessary. */ int nfs_sndlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; struct thread *td; int error, slpflag = 0, slptimeo = 0; td = rep->r_td; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; while (*statep & NFSSTA_SNDLOCK) { error = nfs_sigintr(rep->r_nmp, rep, td); if (error) return (error); *statep |= NFSSTA_WANTSND; (void) tsleep(statep, slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *statep |= NFSSTA_SNDLOCK; return (0); } /* * Unlock the stream socket for others. */ void nfs_sndunlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; if ((*statep & NFSSTA_SNDLOCK) == 0) panic("nfs sndunlock"); *statep &= ~NFSSTA_SNDLOCK; if (*statep & NFSSTA_WANTSND) { *statep &= ~NFSSTA_WANTSND; wakeup(statep); } } /* * nfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occassionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. * */ static int nfs_realign(struct mbuf **pm, int hsiz) { struct mbuf *m; struct mbuf *n = NULL; int off = 0; ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { MGET(n, M_DONTWAIT, MT_DATA); if (n == NULL) return (ENOMEM); if (m->m_len >= MINCLSIZE) { MCLGET(n, M_DONTWAIT); if (n->m_ext.ext_buf == NULL) { m_freem(n); return (ENOMEM); } } n->m_len = 0; break; } pm = &m->m_next; } /* * If n is non-NULL, loop on m copying data, then replace the * portion of the chain that had to be realigned. */ if (n != NULL) { ++nfs_realign_count; while (m) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; } return (0); } static int nfs_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } void nfs_down(rep, nmp, td, msg, error, flags) struct nfsreq *rep; struct nfsmount *nmp; struct thread *td; const char *msg; int error, flags; { if (nmp == NULL) return; if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 0); nmp->nm_state |= NFSSTA_TIMEO; } #ifdef NFSSTA_LOCKTIMEO if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); nmp->nm_state |= NFSSTA_LOCKTIMEO; } #endif if (rep) rep->r_flags |= R_TPRINTFMSG; nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } void nfs_up(rep, nmp, td, msg, flags) struct nfsreq *rep; struct nfsmount *nmp; struct thread *td; const char *msg; int flags; { if (nmp == NULL) return; if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0) nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 1); } #ifdef NFSSTA_LOCKTIMEO if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } #endif } Index: head/sys/nfsclient/nfs_subs.c =================================================================== --- head/sys/nfsclient/nfs_subs.c (revision 148161) +++ head/sys/nfsclient/nfs_subs.c (revision 148162) @@ -1,1075 +1,1075 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t nfs_xdrneg1; u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted; u_int32_t nfs_true, nfs_false; /* And other global data */ static u_int32_t nfs_xid = 0; static enum vtype nv2tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; int nfs_ticks; int nfs_pbuf_freecnt = -1; /* start out unlimited */ struct nfs_reqq nfs_reqq; struct mtx nfs_reqq_mtx; struct mtx nfs_reply_mtx; struct nfs_bufq nfs_bufq; /* * and the reverse mapping from generic to Version 2 procedure numbers */ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; LIST_HEAD(nfsnodehashhead, nfsnode); /* * Create the header for an rpc request packet * The hsiz is the size of the rest of the nfs request header. * (just used to decide if a cluster is a good idea) */ struct mbuf * nfsm_reqhead(struct vnode *vp, u_long procid, int hsiz) { struct mbuf *mb; MGET(mb, M_TRYWAIT, MT_DATA); if (hsiz >= MINCLSIZE) MCLGET(mb, M_TRYWAIT); mb->m_len = 0; return (mb); } /* * Build the RPC header and fill in the authorization info. * The authorization string argument is only used when the credentials * come from outside of the kernel. * Returns the head of the mbuf list. */ struct mbuf * nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type, int auth_len, struct mbuf *mrest, int mrest_len, struct mbuf **mbp, u_int32_t *xidp) { struct mbuf *mb; u_int32_t *tl; caddr_t bpos; int i; struct mbuf *mreq; int grpsiz, authsiz; authsiz = nfsm_rndup(auth_len); MGETHDR(mb, M_TRYWAIT, MT_DATA); if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { MCLGET(mb, M_TRYWAIT); } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); } else { MH_ALIGN(mb, 8 * NFSX_UNSIGNED); } mb->m_len = 0; mreq = mb; bpos = mtod(mb, caddr_t); /* * First the RPC header. */ tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED); /* Get a pretty random xid to start with */ if (!nfs_xid) nfs_xid = random(); /* * Skip zero xid if it should ever happen. */ if (++nfs_xid == 0) nfs_xid++; *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; *tl++ = rpc_vers; *tl++ = txdr_unsigned(NFS_PROG); if (nmflag & NFSMNT_NFSV3) { *tl++ = txdr_unsigned(NFS_VER3); *tl++ = txdr_unsigned(procid); } else { *tl++ = txdr_unsigned(NFS_VER2); *tl++ = txdr_unsigned(nfsv2_procid[procid]); } /* * And then the authorization cred. */ *tl++ = txdr_unsigned(auth_type); *tl = txdr_unsigned(authsiz); switch (auth_type) { case RPCAUTH_UNIX: tl = nfsm_build(u_int32_t *, auth_len); *tl++ = 0; /* stamp ?? */ *tl++ = 0; /* NULL hostname */ *tl++ = txdr_unsigned(cr->cr_uid); *tl++ = txdr_unsigned(cr->cr_groups[0]); grpsiz = (auth_len >> 2) - 5; *tl++ = txdr_unsigned(grpsiz); for (i = 1; i <= grpsiz; i++) *tl++ = txdr_unsigned(cr->cr_groups[i]); break; } /* * And the verifier... */ tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_NULL); *tl = 0; mb->m_next = mrest; mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; mreq->m_pkthdr.rcvif = NULL; *mbp = mb; return (mreq); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ int nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos) { char *uiocp; struct mbuf *mp, *mp2; int xfer, left, mlen; int uiosiz, clflg, rem; char *cp; #ifdef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfsm_uiotombuf: iovcnt != 1"); #endif if (siz > MLEN) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = nfsm_rndup(siz)-siz; mp = mp2 = *mq; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { MGET(mp, M_TRYWAIT, MT_DATA); if (clflg) MCLGET(mp, M_TRYWAIT); mp->m_len = 0; mp2->m_next = mp; mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + uiosiz; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { MGET(mp, M_TRYWAIT, MT_DATA); mp->m_len = 0; mp2->m_next = mp; } cp = mtod(mp, caddr_t)+mp->m_len; for (left = 0; left < rem; left++) *cp++ = '\0'; mp->m_len += rem; *bpos = cp; } else *bpos = mtod(mp, caddr_t)+mp->m_len; *mq = mp; return (0); } /* * Copy a string into mbufs for the hard cases... */ int nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz) { struct mbuf *m1 = NULL, *m2; long left, xfer, len, tlen; u_int32_t *tl; int putsize; putsize = 1; m2 = *mb; left = M_TRAILINGSPACE(m2); if (left > 0) { tl = ((u_int32_t *)(*bpos)); *tl++ = txdr_unsigned(siz); putsize = 0; left -= NFSX_UNSIGNED; m2->m_len += NFSX_UNSIGNED; if (left > 0) { bcopy(cp, (caddr_t) tl, left); siz -= left; cp += left; m2->m_len += left; left = 0; } } /* Loop around adding mbufs */ while (siz > 0) { MGET(m1, M_TRYWAIT, MT_DATA); if (siz > MLEN) MCLGET(m1, M_TRYWAIT); m1->m_len = NFSMSIZ(m1); m2->m_next = m1; m2 = m1; tl = mtod(m1, u_int32_t *); tlen = 0; if (putsize) { *tl++ = txdr_unsigned(siz); m1->m_len -= NFSX_UNSIGNED; tlen = NFSX_UNSIGNED; putsize = 0; } if (siz < m1->m_len) { len = nfsm_rndup(siz); xfer = siz; if (xfer < len) *(tl+(xfer>>2)) = 0; } else { xfer = len = m1->m_len; } bcopy(cp, (caddr_t) tl, xfer); m1->m_len = len+tlen; siz -= xfer; cp += xfer; } *mb = m1; *bpos = mtod(m1, caddr_t)+m1->m_len; return (0); } /* * Called once to initialize data structures... */ int nfs_init(struct vfsconf *vfsp) { int i; nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); rpc_reply = txdr_unsigned(RPC_REPLY); rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { nfs_iodwant[i] = NULL; nfs_iodmount[i] = NULL; } nfs_nhinit(); /* Init the nfsnode table */ /* * Initialize reply list and start timer */ TAILQ_INIT(&nfs_reqq); - callout_init(&nfs_callout, 0); + callout_init(&nfs_callout, CALLOUT_MPSAFE); mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF); mtx_init(&nfs_reply_mtx, "Synch NFS reply posting", NULL, MTX_DEF); nfs_pbuf_freecnt = nswbuf / 2 + 1; return (0); } int nfs_uninit(struct vfsconf *vfsp) { int i; callout_stop(&nfs_callout); KASSERT(TAILQ_EMPTY(&nfs_reqq), ("nfs_uninit: request queue not empty")); /* * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup * any sleeping nfsiods so they check nfs_iodmax and exit. */ nfs_iodmax = 0; for (i = 0; i < nfs_numasync; i++) if (nfs_iodwant[i]) wakeup(&nfs_iodwant[i]); /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */ while (nfs_numasync) tsleep(&nfs_numasync, PWAIT, "ioddie", 0); nfs_nhuninit(); uma_zdestroy(nfsmount_zone); return (0); } /* * Attribute cache routines. * nfs_loadattrcache() - loads or updates the cache contents from attributes * that are on the mbuf list * nfs_getattrcache() - returns valid attributes if found in cache, returns * error otherwise */ /* * Load the attribute cache (that lives in the nfsnode entry) with * the values on the mbuf list and * Iff vap not NULL * copy the attributes to *vaper */ int nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, struct vattr *vaper, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap; struct nfs_fattr *fp; struct nfsnode *np; int32_t t1; caddr_t cp2; int rdev; struct mbuf *md; enum vtype vtyp; u_short vmode; struct timespec mtime; int v3 = NFS_ISV3(vp); md = *mdp; t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_TRYWAIT); if (cp2 == NULL) return EBADRPC; fp = (struct nfs_fattr *)cp2; if (v3) { vtyp = nfsv3tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); fxdr_nfsv3time(&fp->fa3_mtime, &mtime); } else { vtyp = nfsv2tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); /* * XXX * * The duplicate information returned in fa_type and fa_mode * is an ambiguity in the NFS version 2 protocol. * * VREG should be taken literally as a regular file. If a * server intents to return some type information differently * in the upper bits of the mode field (e.g. for sockets, or * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we * leave the examination of the mode bits even in the VREG * case to avoid breakage for bogus servers, but we make sure * that there are actually type bits set in the upper part of * fa_mode (and failing that, trust the va_type field). * * NFSv3 cleared the issue, and requires fa_mode to not * contain any type information (while also introduing sockets * and FIFOs for fa_type). */ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) vtyp = IFTOVT(vmode); rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); fxdr_nfsv2time(&fp->fa2_mtime, &mtime); /* * Really ugly NFSv2 kludge. */ if (vtyp == VCHR && rdev == 0xffffffff) vtyp = VFIFO; } /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); if (vp->v_type != vtyp) { vp->v_type = vtyp; if (vp->v_type == VFIFO) vp->v_op = &nfs_fifoops; np->n_mtime = mtime; } vap = &np->n_vattr; vap->va_type = vtyp; vap->va_mode = (vmode & 07777); vap->va_rdev = rdev; vap->va_mtime = mtime; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; if (v3) { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); vap->va_size = fxdr_hyper(&fp->fa3_size); vap->va_blocksize = NFS_FABLKSIZE; vap->va_bytes = fxdr_hyper(&fp->fa3_used); vap->va_fileid = fxdr_unsigned(int32_t, fp->fa3_fileid.nfsuquad[1]); fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); vap->va_flags = 0; vap->va_filerev = 0; } else { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) * NFS_FABLKSIZE; vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); vap->va_flags = 0; vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_sec); vap->va_ctime.tv_nsec = 0; vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec); vap->va_filerev = 0; } np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (dontshrink && vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. */ vap->va_size = np->n_size; np->n_attrstamp = 0; } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger * of our size, and the server's size. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } vnode_pager_setsize(vp, np->n_size); } else { np->n_size = vap->va_size; } } if (vaper != NULL) { bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } return (0); } #ifdef NFS_ACDEBUG #include SYSCTL_DECL(_vfs_nfs); static int nfs_acdebug; SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, ""); #endif /* * Check the time stamp * If the cache is valid, copy contents to *vap and return 0 * otherwise return an error */ int nfs_getattrcache(struct vnode *vp, struct vattr *vaper) { struct nfsnode *np; struct vattr *vap; struct nfsmount *nmp; int timeo; np = VTONFS(vp); vap = &np->n_vattr; nmp = VFSTONFS(vp->v_mount); /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ timeo = (time_second - np->n_mtime.tv_sec) / 10; #ifdef NFS_ACDEBUG if (nfs_acdebug>1) printf("nfs_getattrcache: initial timeo = %d\n", timeo); #endif if (vap->va_type == VDIR) { if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) timeo = nmp->nm_acdirmin; else if (timeo > nmp->nm_acdirmax) timeo = nmp->nm_acdirmax; } else { if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) timeo = nmp->nm_acregmin; else if (timeo > nmp->nm_acregmax) timeo = nmp->nm_acregmax; } #ifdef NFS_ACDEBUG if (nfs_acdebug > 2) printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", nmp->nm_acregmin, nmp->nm_acregmax, nmp->nm_acdirmin, nmp->nm_acdirmax); if (nfs_acdebug) printf("nfs_getattrcache: age = %d; final timeo = %d\n", (time_second - np->n_attrstamp), timeo); #endif if ((time_second - np->n_attrstamp) >= timeo) { nfsstats.attrcache_misses++; return (ENOENT); } nfsstats.attrcache_hits++; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else { np->n_size = vap->va_size; } vnode_pager_setsize(vp, np->n_size); } else { np->n_size = vap->va_size; } } bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } return (0); } static nfsuint64 nfs_nullcookie = { { 0, 0 } }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * nfs_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; int pos; pos = (uoff_t)off / NFS_DIRBLKSIZ; if (pos == 0 || off < 0) { #ifdef DIAGNOSTIC if (add) panic("nfs getcookie add at <= 0"); #endif return (&nfs_nullcookie); } pos--; dp = LIST_FIRST(&np->n_cookies); if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return (NULL); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list)) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return (NULL); dp = LIST_NEXT(dp, ndm_list); } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return (NULL); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return (NULL); } return (&dp->ndm_cookies[pos]); } /* * Invalidate cached directory information, except for the actual directory * blocks (which are invalidated separately). * Done mainly to avoid the use of stale offset cookies. */ void nfs_invaldir(struct vnode *vp) { struct nfsnode *np = VTONFS(vp); #ifdef DIAGNOSTIC if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (LIST_FIRST(&np->n_cookies)) LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; } /* * The write verifier has changed (probably due to a server reboot), so all * B_NEEDCOMMIT blocks will have to be written again. Since they are on the * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT * and B_CLUSTEROK flags. Once done the new write verifier can be set for the * mount point. * * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data * writes are not clusterable. */ void nfs_clearcommit(struct mount *mp) { struct vnode *vp, *nvp; struct buf *bp, *nbp; int s; GIANT_REQUIRED; s = splbio(); MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, nvp) { VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { VI_UNLOCK(vp); continue; } MNT_IUNLOCK(mp); TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_REFCNT(bp) == 0 && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); } VI_UNLOCK(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); splx(s); } /* * Helper functions for former macros. Some of these should be * moved to their callers. */ int nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f, struct mbuf **md, caddr_t *dpos) { struct nfsnode *ttnp; struct vnode *ttvp; nfsfh_t *ttfhp; u_int32_t *tl; int ttfhsize; int t1; if (v3) { tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; *f = fxdr_unsigned(int, *tl); } else *f = 1; if (*f) { t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos); if (t1 != 0) return t1; t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp); if (t1 != 0) return t1; *v = NFSTOV(ttnp); } if (v3) { tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*f) *f = fxdr_unsigned(int, *tl); else if (fxdr_unsigned(int, *tl)) nfsm_adv_xx(NFSX_V3FATTR, md, dpos); } if (*f) { ttvp = *v; t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0); if (t1) return t1; *v = ttvp; } return 0; } int nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; if (v3) { tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; *s = fxdr_unsigned(int, *tl); if (*s <= 0 || *s > NFSX_V3FHMAX) return EBADRPC; } else *s = NFSX_V2FH; *f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos); if (*f == NULL) return EBADRPC; else return 0; } int nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, caddr_t *dpos) { int t1; struct vnode *ttvp = *v; t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0); if (t1 != 0) return t1; *v = ttvp; return 0; } int nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; int t1; struct vnode *ttvp = *v; tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; *f = fxdr_unsigned(int, *tl); if (*f != 0) { t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 1); if (t1 != 0) { *f = 0; return t1; } *v = ttvp; } return 0; } int nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; int ttattrf, ttretf = 0; int t1; tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*tl == nfs_true) { tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*f) ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); } t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos); if (t1) return t1; if (*f) *f = ttretf; else *f = ttattrf; return 0; } int nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos) { u_int32_t *tl; int t1; if (s > m) return ENAMETOOLONG; t1 = nfsm_rndup(s) + NFSX_UNSIGNED; if (t1 <= M_TRAILINGSPACE(*mb)) { tl = nfsm_build_xx(t1, mb, bpos); *tl++ = txdr_unsigned(s); *(tl + ((t1 >> 2) - 2)) = 0; bcopy(a, tl, s); } else { t1 = nfsm_strtmbuf(mb, bpos, a, s); if (t1 != 0) return t1; } return 0; } int nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos) { u_int32_t *tl; int t1; caddr_t cp; if (v3) { t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; if (t1 < M_TRAILINGSPACE(*mb)) { tl = nfsm_build_xx(t1, mb, bpos); *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); *(tl + ((t1 >> 2) - 2)) = 0; bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize); } else { t1 = nfsm_strtmbuf(mb, bpos, (const char *)VTONFS(v)->n_fhp, VTONFS(v)->n_fhsize); if (t1 != 0) return t1; } } else { cp = nfsm_build_xx(NFSX_V2FH, mb, bpos); bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH); } return 0; } void nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb, caddr_t *bpos) { u_int32_t *tl; if (va->va_mode != (mode_t)VNOVAL) { tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); *tl++ = nfs_true; *tl = txdr_unsigned(va->va_mode); } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = nfs_false; } if (full && va->va_uid != (uid_t)VNOVAL) { tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); *tl++ = nfs_true; *tl = txdr_unsigned(va->va_uid); } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = nfs_false; } if (full && va->va_gid != (gid_t)VNOVAL) { tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); *tl++ = nfs_true; *tl = txdr_unsigned(va->va_gid); } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = nfs_false; } if (full && va->va_size != VNOVAL) { tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); *tl++ = nfs_true; txdr_hyper(va->va_size, tl); } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = nfs_false; } if (va->va_atime.tv_sec != VNOVAL) { if (va->va_atime.tv_sec != time_second) { tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&va->va_atime, tl); } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (va->va_mtime.tv_sec != VNOVAL) { if (va->va_mtime.tv_sec != time_second) { tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&va->va_mtime, tl); } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } }