Index: head/sys/nfsclient/nfs.h =================================================================== --- head/sys/nfsclient/nfs.h (revision 158904) +++ head/sys/nfsclient/nfs.h (revision 158905) @@ -1,346 +1,347 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.4 (Berkeley) 5/1/95 * $FreeBSD$ */ #ifndef _NFSCLIENT_NFS_H_ #define _NFSCLIENT_NFS_H_ #ifdef _KERNEL #include "opt_nfs.h" #endif #include /* * Tunable constants for nfs */ #define NFS_TICKINTVL 10 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_RETRANS 10 /* Num of retrans for UDP soft mounts */ #define NFS_RETRANS_TCP 2 /* Num of retrans for TCP soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 3 /* VREG attrib cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #ifndef NFS_MINDIRATTRTIMO #define NFS_MINDIRATTRTIMO 30 /* VDIR attrib cache timeout in sec */ #endif #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */ #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ #ifdef _KERNEL #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ #endif #define NFS_MAXDEADTHRESH 9 /* How long till we say 'server not responding' */ /* * Oddballs */ #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) +#define NFS_ISV4(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV4) #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier for V3 */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */ #define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */ #define NFSSTA_WANTSND 0x02000000 /* Want above */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ /* * XXX to allow amd to include nfs.h without nfsproto.h */ #ifdef NFS_NPROCS #include #endif /* * vfs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ /* * File context information for nfsv4. Currently, there is only one * lockowner for the whole machine "0." */ struct nfs4_fctx { TAILQ_ENTRY(nfs4_fstate) next; uint32_t refcnt; struct nfs4_lowner *lop; struct nfsnode *np; char stateid[NFSX_V4STATEID]; }; #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_NFSREQ); MALLOC_DECLARE(M_NFSDIROFF); MALLOC_DECLARE(M_NFSBIGFH); MALLOC_DECLARE(M_NFSHASH); MALLOC_DECLARE(M_NFSDIRECTIO); #endif extern struct uma_zone *nfsmount_zone; extern struct callout nfs_callout; extern struct nfsstats nfsstats; extern struct mtx nfs_iod_mtx; extern int nfs_numasync; extern unsigned int nfs_iodmax; extern int nfs_pbuf_freecnt; extern int nfs_ticks; /* Data constants in XDR form */ extern u_int32_t nfs_true, nfs_false, nfs_xdrneg1; extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers; extern u_int32_t rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr; extern int nfsv3_procid[NFS_NPROCS]; struct uio; struct buf; struct vattr; struct nameidata; /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != EIO && \ (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_int32_t r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ u_int32_t r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ int r_lastmsg; /* last tprintf */ struct thread *r_td; /* Proc that did I/O system call */ struct mtx r_mtx; /* Protects nfsreq fields */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_RESENDERR 0x08 /* Resend failed */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ #define R_REXMIT_INPROG 0x100 /* Re-transmit in progress */ /* * Pointers to ops that differ from v3 to v4 */ struct nfs_rpcops { int (*nr_readrpc)(struct vnode *vp, struct uio *uiop, struct ucred *cred); int (*nr_writerpc)(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit); int (*nr_writebp)(struct buf *bp, int force, struct thread *td); int (*nr_readlinkrpc)(struct vnode *vp, struct uio *uiop, struct ucred *cred); void (*nr_invaldir)(struct vnode *vp); int (*nr_commit)(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td); }; /* * Defines for WebNFS */ #define WEBNFS_ESC_CHAR '%' #define WEBNFS_SPECCHAR_START 0x80 #define WEBNFS_NATIVE_CHAR 0x80 /* * .. * Possibly more here in the future. */ /* * Macro for converting escape characters in WebNFS pathnames. * Should really be in libkern. */ #define HEXTOC(c) \ ((c) >= 'a' ? ((c) - ('a' - 10)) : \ ((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0'))) #define HEXSTRTOI(p) \ ((HEXTOC(p[0]) << 4) + HEXTOC(p[1])) /* nfs_sigintr() helper, when 'rep' has all we need */ #define NFS_SIGREP(rep) nfs_sigintr((rep)->r_nmp, (rep), (rep)->r_td) #ifdef NFS_DEBUG extern int nfs_debug; #define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */ #define NFS_DEBUG_WG 2 /* server write gathering */ #define NFS_DEBUG_RC 4 /* server request caching */ #define NFS_DPF(cat, args) \ do { \ if (nfs_debug & NFS_DEBUG_##cat) printf args; \ } while (0) #else #define NFS_DPF(cat, args) #endif /* * On fast networks, the estimator will try to reduce the * timeout lower than the latency of the server's disks, * which results in too many timeouts, so cap the lower * bound. */ #define NFS_MINRTO (NFS_HZ >> 2) /* * Keep the RTO from increasing to unreasonably large values * when a server is not responding. */ #define NFS_MAXRTO (20 * NFS_HZ) enum nfs_rto_timer_t { NFS_DEFAULT_TIMER, NFS_GETATTR_TIMER, NFS_LOOKUP_TIMER, NFS_READ_TIMER, NFS_WRITE_TIMER, }; #define NFS_MAX_TIMER (NFS_WRITE_TIMER) #define NFS_INITRTT (NFS_HZ << 3) vfs_init_t nfs_init; vfs_uninit_t nfs_uninit; int nfs_mountroot(struct mount *mp, struct thread *td); #ifndef NFS4_USE_RPCCLNT int nfs_send(struct socket *, struct sockaddr *, struct mbuf *, struct nfsreq *); int nfs_sndlock(struct nfsreq *); void nfs_sndunlock(struct nfsreq *); #endif /* ! NFS4_USE_RPCCLNT */ int nfs_vinvalbuf(struct vnode *, int, struct thread *, int); int nfs_readrpc(struct vnode *, struct uio *, struct ucred *); int nfs_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *); int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td); int nfs_readdirrpc(struct vnode *, struct uio *, struct ucred *); int nfs_nfsiodnew(void); int nfs_asyncio(struct nfsmount *, struct buf *, struct ucred *, struct thread *); int nfs_doio(struct vnode *, struct buf *, struct ucred *, struct thread *); void nfs_doio_directwrite (struct buf *); void nfs_up(struct nfsreq *, struct nfsmount *, struct thread *, const char *, int); void nfs_down(struct nfsreq *, struct nfsmount *, struct thread *, const char *, int, int); int nfs_readlinkrpc(struct vnode *, struct uio *, struct ucred *); int nfs_sigintr(struct nfsmount *, struct nfsreq *, struct thread *); int nfs_readdirplusrpc(struct vnode *, struct uio *, struct ucred *); int nfs_request(struct vnode *, struct mbuf *, int, struct thread *, struct ucred *, struct mbuf **, struct mbuf **, caddr_t *); int nfs_loadattrcache(struct vnode **, struct mbuf **, caddr_t *, struct vattr *, int); int nfsm_mbuftouio(struct mbuf **, struct uio *, int, caddr_t *); void nfs_nhinit(void); void nfs_nhuninit(void); int nfs_nmcancelreqs(struct nfsmount *); void nfs_timer(void*); int nfs_connect(struct nfsmount *, struct nfsreq *); void nfs_disconnect(struct nfsmount *); void nfs_safedisconnect(struct nfsmount *); int nfs_getattrcache(struct vnode *, struct vattr *); int nfsm_strtmbuf(struct mbuf **, char **, const char *, long); int nfs_bioread(struct vnode *, struct uio *, int, struct ucred *); int nfsm_uiotombuf(struct uio *, struct mbuf **, int, caddr_t *); void nfs_clearcommit(struct mount *); int nfs_writebp(struct buf *, int, struct thread *); int nfs_fsinfo(struct nfsmount *, struct vnode *, struct ucred *, struct thread *); int nfs_meta_setsize (struct vnode *, struct ucred *, struct thread *, u_quad_t); void nfs_set_sigmask __P((struct thread *td, sigset_t *oldset)); void nfs_restore_sigmask __P((struct thread *td, sigset_t *set)); int nfs_msleep __P((struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)); #endif /* _KERNEL */ #endif Index: head/sys/nfsclient/nfs_nfsiod.c =================================================================== --- head/sys/nfsclient/nfs_nfsiod.c (revision 158904) +++ head/sys/nfsclient/nfs_nfsiod.c (revision 158905) @@ -1,297 +1,305 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NFSSVC, "nfsclient_srvsock", "Nfs server structure"); static void nfssvc_iod(void *); static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; SYSCTL_DECL(_vfs_nfs); /* Maximum number of seconds a nfsiod kthread will sleep before exiting */ static unsigned int nfs_iodmaxidle = 120; SYSCTL_UINT(_vfs_nfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0, ""); /* Maximum number of nfsiod kthreads */ unsigned int nfs_iodmax = 20; /* Minimum number of nfsiod kthreads to keep as spares */ static unsigned int nfs_iodmin = 0; static int sysctl_iodmin(SYSCTL_HANDLER_ARGS) { int error, i; int newmin; newmin = nfs_iodmin; error = sysctl_handle_int(oidp, &newmin, 0, req); if (error || (req->newptr == NULL)) return (error); mtx_lock(&nfs_iod_mtx); if (newmin > nfs_iodmax) { error = EINVAL; goto out; } nfs_iodmin = newmin; if (nfs_numasync >= nfs_iodmin) goto out; /* * If the current number of nfsiod is lower * than the new minimum, create some more. */ for (i = nfs_iodmin - nfs_numasync; i > 0; i--) nfs_nfsiodnew(); out: mtx_unlock(&nfs_iod_mtx); return (0); } SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0, sizeof (nfs_iodmin), sysctl_iodmin, "IU", ""); static int sysctl_iodmax(SYSCTL_HANDLER_ARGS) { int error, i; int iod, newmax; newmax = nfs_iodmax; error = sysctl_handle_int(oidp, &newmax, 0, req); if (error || (req->newptr == NULL)) return (error); if (newmax > NFS_MAXASYNCDAEMON) return (EINVAL); mtx_lock(&nfs_iod_mtx); nfs_iodmax = newmax; if (nfs_numasync <= nfs_iodmax) goto out; /* * If there are some asleep nfsiods that should * exit, wakeup() them so that they check nfs_iodmax * and exit. Those who are active will exit as * soon as they finish I/O. */ iod = nfs_numasync - 1; for (i = 0; i < nfs_numasync - nfs_iodmax; i++) { if (nfs_iodwant[iod]) wakeup(&nfs_iodwant[iod]); iod--; } out: mtx_unlock(&nfs_iod_mtx); return (0); } SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0, sizeof (nfs_iodmax), sysctl_iodmax, "IU", ""); int nfs_nfsiodnew(void) { int error, i; int newiod; if (nfs_numasync >= nfs_iodmax) return (-1); newiod = -1; for (i = 0; i < nfs_iodmax; i++) if (nfs_asyncdaemon[i] == 0) { nfs_asyncdaemon[i]++; newiod = i; break; } if (newiod == -1) return (-1); mtx_unlock(&nfs_iod_mtx); error = kthread_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, RFHIGHPID, 0, "nfsiod %d", newiod); mtx_lock(&nfs_iod_mtx); if (error) return (-1); nfs_numasync++; return (newiod); } static void nfsiod_setup(void *dummy) { int i; int error; TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin); mtx_lock(&nfs_iod_mtx); /* Silently limit the start number of nfsiod's */ if (nfs_iodmin > NFS_MAXASYNCDAEMON) nfs_iodmin = NFS_MAXASYNCDAEMON; for (i = 0; i < nfs_iodmin; i++) { error = nfs_nfsiodnew(); if (error == -1) panic("nfsiod_setup: nfs_nfsiodnew failed"); } mtx_unlock(&nfs_iod_mtx); } SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); static int nfs_defect = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); /* * Asynchronous I/O daemons for client nfs. * They do read-ahead and write-behind operations on the block I/O cache. * Returns if we hit the timeout defined by the iodmaxidle sysctl. */ static void nfssvc_iod(void *instance) { struct buf *bp; struct nfsmount *nmp; int myiod, timo; int error = 0; mtx_lock(&nfs_iod_mtx); myiod = (int *)instance - nfs_asyncdaemon; /* * Main loop */ for (;;) { while (((nmp = nfs_iodmount[myiod]) == NULL || !TAILQ_FIRST(&nmp->nm_bufq)) && error == 0) { if (myiod >= nfs_iodmax) goto finish; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = curthread->td_proc; nfs_iodmount[myiod] = NULL; /* * Always keep at least nfs_iodmin kthreads. */ timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH, "-", timo); } if (error) break; while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { + int giant_locked = 0; + /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen--; if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { nmp->nm_bufqwant = 0; wakeup(&nmp->nm_bufq); } mtx_unlock(&nfs_iod_mtx); + if (NFS_ISV4(bp->b_vp)) { + giant_locked = 1; + mtx_lock(&Giant); + } if (bp->b_flags & B_DIRECT) { KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set")); (void)nfs_doio_directwrite(bp); } else { if (bp->b_iocmd == BIO_READ) (void) nfs_doio(bp->b_vp, bp, bp->b_rcred, NULL); else (void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL); } + if (giant_locked) + mtx_unlock(&Giant); mtx_lock(&nfs_iod_mtx); /* * If there are more than one iod on this mount, then defect * so that the iods can be shared out fairly between the mounts */ if (nfs_defect && nmp->nm_bufqiods > 1) { NFS_DPF(ASYNCIO, ("nfssvc_iod: iod %d defecting from mount %p\n", myiod, nmp)); nfs_iodmount[myiod] = NULL; nmp->nm_bufqiods--; break; } } } finish: nfs_asyncdaemon[myiod] = 0; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = NULL; nfs_iodmount[myiod] = NULL; /* Someone may be waiting for the last nfsiod to terminate. */ if (--nfs_numasync == 0) wakeup(&nfs_numasync); mtx_unlock(&nfs_iod_mtx); if ((error == 0) || (error == EWOULDBLOCK)) kthread_exit(0); /* Abnormal termination */ kthread_exit(1); }