Index: head/sys/contrib/ipfilter/netinet/ip_compat.h =================================================================== --- head/sys/contrib/ipfilter/netinet/ip_compat.h (revision 328416) +++ head/sys/contrib/ipfilter/netinet/ip_compat.h (revision 328417) @@ -1,1502 +1,1502 @@ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_compat.h 1.8 1/14/96 * $FreeBSD$ * Id: ip_compat.h,v 2.142.2.57 2007/10/10 09:51:42 darrenr Exp $ */ #ifndef __IP_COMPAT_H__ #define __IP_COMPAT_H__ #ifndef __P # ifdef __STDC__ # define __P(x) x # else # define __P(x) () # endif #endif #ifndef __STDC__ # undef const # define const #endif #if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__) # undef KERNEL # undef _KERNEL # undef __KERNEL__ # define KERNEL # define _KERNEL # define __KERNEL__ #endif #ifndef SOLARIS # if defined(sun) && (defined(__svr4__) || defined(__SVR4)) # define SOLARIS 1 # else # define SOLARIS 0 # endif #endif #if defined(__SVR4) || defined(__svr4__) || defined(__sgi) # define index strchr # if !defined(_KERNEL) # define bzero(a,b) memset(a,0,b) # define bcmp memcmp # define bcopy(a,b,c) memmove(b,a,c) # endif #endif #ifndef LIFNAMSIZ # ifdef IF_NAMESIZE # define LIFNAMSIZ IF_NAMESIZE # else # ifdef IFNAMSIZ # define LIFNAMSIZ IFNAMSIZ # else # define LIFNAMSIZ 16 # endif # endif #endif #if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux) struct ether_addr { u_char ether_addr_octet[6]; }; #endif # ifdef __STDC__ # define IPL_EXTERN(ep) ipl##ep # else # define IPL_EXTERN(ep) ipl/**/ep # endif /* * This is a workaround for troubles on FreeBSD and OpenBSD. */ # ifndef _KERNEL # define ADD_KERNEL # define _KERNEL # define KERNEL # endif # include # ifdef ADD_KERNEL # undef _KERNEL # undef KERNEL # endif #define NETBSD_GE_REV(x) (defined(__NetBSD_Version__) && \ (__NetBSD_Version__ >= (x))) #define NETBSD_GT_REV(x) (defined(__NetBSD_Version__) && \ (__NetBSD_Version__ > (x))) #define NETBSD_LT_REV(x) (defined(__NetBSD_Version__) && \ (__NetBSD_Version__ < (x))) #define FREEBSD_GE_REV(x) (defined(__FreeBSD_version) && \ (__FreeBSD_version >= (x))) #define FREEBSD_GT_REV(x) (defined(__FreeBSD_version) && \ (__FreeBSD_version > (x))) #define FREEBSD_LT_REV(x) (defined(__FreeBSD_version) && \ (__FreeBSD_version < (x))) #define BSDOS_GE_REV(x) (defined(_BSDI_VERSION) && \ (_BSDI_VERSION >= (x))) #define BSDOS_GT_REV(x) (defined(_BSDI_VERSION) && \ (_BSDI_VERSION > (x))) #define BSDOS_LT_REV(x) (defined(_BSDI_VERSION) && \ (_BSDI_VERSION < (x))) #define OPENBSD_GE_REV(x) (defined(OpenBSD) && (OpenBSD >= (x))) #define OPENBSD_GT_REV(x) (defined(OpenBSD) && (OpenBSD > (x))) #define OPENBSD_LT_REV(x) (defined(OpenBSD) && (OpenBSD < (x))) #define BSD_GE_YEAR(x) (defined(BSD) && (BSD >= (x))) #define BSD_GT_YEAR(x) (defined(BSD) && (BSD > (x))) #define BSD_LT_YEAR(x) (defined(BSD) && (BSD < (x))) /* ----------------------------------------------------------------------- */ /* F R E E B S D */ /* ----------------------------------------------------------------------- */ # define HAS_SYS_MD5_H 1 # if defined(_KERNEL) # include "opt_bpf.h" # include "opt_inet6.h" # if defined(INET6) && !defined(USE_INET6) # define USE_INET6 # endif # else # if !defined(USE_INET6) && !defined(NOINET6) # define USE_INET6 # endif # endif # if defined(_KERNEL) # include # define p_cred td_ucred # define p_uid td_ucred->cr_ruid /* * When #define'd, the 5.2.1 kernel panics when used with the ftp proxy. * There may be other, safe, kernels but this is not extensively tested yet. */ # define HAVE_M_PULLDOWN # if !defined(IPFILTER_LKM) && (__FreeBSD_version >= 300000) # include "opt_ipfilter.h" # endif # define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) # define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) # define NETBSD_PF # else # include # endif /* _KERNEL */ # include # include # include # include # define KRWLOCK_FILL_SZ 56 # define KMUTEX_FILL_SZ 56 # include # define KMUTEX_T struct mtx # define KRWLOCK_T struct rwlock #ifdef _KERNEL # define READ_ENTER(x) rw_rlock(&(x)->ipf_lk) # define WRITE_ENTER(x) rw_wlock(&(x)->ipf_lk) # define MUTEX_DOWNGRADE(x) rw_downgrade(&(x)->ipf_lk) # define MUTEX_TRY_UPGRADE(x) rw_try_upgrade(&(x)->ipf_lk) # define RWLOCK_INIT(x,y) rw_init(&(x)->ipf_lk, (y)) # define RW_DESTROY(x) rw_destroy(&(x)->ipf_lk) # define RWLOCK_EXIT(x) do { \ if (rw_wowned(&(x)->ipf_lk)) \ rw_wunlock(&(x)->ipf_lk); \ else \ rw_runlock(&(x)->ipf_lk); \ } while (0) # include # define GETKTIME(x) microtime((struct timeval *)x) # define if_addrlist if_addrhead # include # include # include # define USE_MUTEXES # define MUTEX_ENTER(x) mtx_lock(&(x)->ipf_lk) # define MUTEX_EXIT(x) mtx_unlock(&(x)->ipf_lk) # define MUTEX_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\ MTX_DEF) # define MUTEX_DESTROY(x) mtx_destroy(&(x)->ipf_lk) # define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) /* * Whilst the sx(9) locks on FreeBSD have the right semantics and interface * for what we want to use them for, despite testing showing they work - * with a WITNESS kernel, it generates LOR messages. */ # include # define ATOMIC_INC(x) { mtx_lock(&softc->ipf_rw.ipf_lk); (x)++; \ mtx_unlock(&softc->ipf_rw.ipf_lk); } # define ATOMIC_DEC(x) { mtx_lock(&softc->ipf_rw.ipf_lk); (x)--; \ mtx_unlock(&softc->ipf_rw.ipf_lk); } # define ATOMIC_INCL(x) atomic_add_long(&(x), 1) # define ATOMIC_INC64(x) ATOMIC_INC(x) # define ATOMIC_INC32(x) atomic_add_32((u_int *)&(x), 1) # define ATOMIC_DECL(x) atomic_add_long(&(x), -1) # define ATOMIC_DEC64(x) ATOMIC_DEC(x) # define ATOMIC_DEC32(x) atomic_add_32((u_int *)&(x), -1) # define SPL_X(x) ; # define SPL_NET(x) ; # define SPL_IMP(x) ; # define SPL_SCHED(x) ; # define GET_MINOR dev2unit # define MSGDSIZE(m) mbufchainlen(m) # define M_LEN(m) (m)->m_len # define M_ADJ(m,x) m_adj(m, x) # define M_COPY(x) m_copym((x), 0, M_COPYALL, M_NOWAIT) # define M_DUP(m) m_dup(m, M_NOWAIT) # define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } typedef struct mbuf mb_t; #else /* !_KERNEL */ #ifndef _NET_IF_VAR_H_ /* * Userland emulation of struct ifnet. */ struct route; struct mbuf; struct ifnet { char if_xname[IFNAMSIZ]; TAILQ_HEAD(, ifaddr) if_addrlist; int (*if_output)(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); }; #endif /* _NET_IF_VAR_H_ */ #endif /* _KERNEL */ # define IFNAME(x) ((struct ifnet *)x)->if_xname # define COPYIFNAME(v, x, b) \ (void) strncpy(b, \ ((struct ifnet *)x)->if_xname, \ LIFNAMSIZ) typedef u_long ioctlcmd_t; typedef struct uio uio_t; typedef int minor_t; typedef u_int32_t u_32_t; # define U_32_T 1 /* ----------------------------------------------------------------------- */ /* G E N E R I C */ /* ----------------------------------------------------------------------- */ /* * For BSD kernels, if bpf is in the kernel, enable ipfilter to use bpf in * filter rules. */ #if !defined(IPFILTER_BPF) # if (defined(NBPF) && (NBPF > 0)) || (defined(DEV_BPF) && (DEV_BPF > 0)) || \ (defined(NBPFILTER) && (NBPFILTER > 0)) # define IPFILTER_BPF # endif #endif /* * Userland locking primitives */ #ifndef _KERNEL #if !defined(KMUTEX_FILL_SZ) # define KMUTEX_FILL_SZ 1 #endif #if !defined(KRWLOCK_FILL_SZ) # define KRWLOCK_FILL_SZ 1 #endif #endif typedef struct { char *eMm_owner; char *eMm_heldin; u_int eMm_magic; int eMm_held; int eMm_heldat; } eMmutex_t; typedef struct { char *eMrw_owner; char *eMrw_heldin; u_int eMrw_magic; short eMrw_read; short eMrw_write; int eMrw_heldat; } eMrwlock_t; typedef union { char _fill[KMUTEX_FILL_SZ]; #ifdef KMUTEX_T struct { KMUTEX_T ipf_slk; const char *ipf_lname; } ipf_lkun_s; #endif eMmutex_t ipf_emu; } ipfmutex_t; typedef union { char _fill[KRWLOCK_FILL_SZ]; #ifdef KRWLOCK_T struct { KRWLOCK_T ipf_slk; const char *ipf_lname; int ipf_sr; int ipf_sw; u_int ipf_magic; } ipf_lkun_s; #endif eMrwlock_t ipf_emu; } ipfrwlock_t; #define ipf_lk ipf_lkun_s.ipf_slk #define ipf_lname ipf_lkun_s.ipf_lname #define ipf_isr ipf_lkun_s.ipf_sr #define ipf_isw ipf_lkun_s.ipf_sw #define ipf_magic ipf_lkun_s.ipf_magic #if !defined(__GNUC__) || \ (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000)) # ifndef INLINE # define INLINE # endif #else # define INLINE __inline__ #endif #if defined(__FreeBSD_version) && defined(_KERNEL) CTASSERT(sizeof(ipfrwlock_t) == KRWLOCK_FILL_SZ); CTASSERT(sizeof(ipfmutex_t) == KMUTEX_FILL_SZ); #endif /* * In a non-kernel environment, there are a lot of macros that need to be * filled in to be null-ops or to point to some compatibility function, * somewhere in userland. */ #ifndef _KERNEL typedef struct mb_s { struct mb_s *mb_next; char *mb_data; void *mb_ifp; int mb_len; int mb_flags; u_long mb_buf[2048]; } mb_t; # undef m_next # define m_next mb_next # undef m_len # define m_len mb_len # undef m_flags # define m_flags mb_flags # undef m_data # define m_data mb_data # undef M_MCAST # define M_MCAST 0x01 # undef M_BCAST # define M_BCAST 0x02 # undef M_MBCAST # define M_MBCAST 0x04 # define MSGDSIZE(m) msgdsize(m) # define M_LEN(m) (m)->mb_len # define M_ADJ(m,x) (m)->mb_len += x # define M_COPY(m) dupmbt(m) # define M_DUP(m) dupmbt(m) # define GETKTIME(x) gettimeofday((struct timeval *)(x), NULL) # define MTOD(m, t) ((t)(m)->mb_data) # define FREE_MB_T(m) freembt(m) # define ALLOC_MB_T(m,l) (m) = allocmbt(l) # define PREP_MB_T(f, m) do { \ (m)->mb_next = *(f)->fin_mp; \ *(fin)->fin_mp = (m); \ (f)->fin_m = (m); \ } while (0) # define SLEEP(x,y) 1; # define WAKEUP(x,y) ; # define POLLWAKEUP(y) ; # define IPF_PANIC(x,y) ; # define PANIC(x,y) ; # define SPL_SCHED(x) ; # define SPL_NET(x) ; # define SPL_IMP(x) ; # define SPL_X(x) ; # define KMALLOC(a,b) (a) = (b)malloc(sizeof(*a)) # define KMALLOCS(a,b,c) (a) = (b)malloc(c) # define KFREE(x) free(x) # define KFREES(x,s) free(x) # define GETIFP(x, v) get_unit(x,v) # define GETIFMTU_4(x) 2048 # define GETIFMTU_6(x) 2048 # define COPYIN(a,b,c) bcopywrap((a), (b), (c)) # define COPYOUT(a,b,c) bcopywrap((a), (b), (c)) # define COPYDATA(m, o, l, b) bcopy(MTOD((mb_t *)m, char *) + (o), \ (b), (l)) # define COPYBACK(m, o, l, b) bcopy((b), \ MTOD((mb_t *)m, char *) + (o), \ (l)) # define UIOMOVE(a,b,c,d) ipfuiomove((caddr_t)a,b,c,d) extern void m_copydata __P((mb_t *, int, int, caddr_t)); extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); extern int bcopywrap __P((void *, void *, size_t)); extern mb_t *allocmbt __P((size_t)); extern mb_t *dupmbt __P((mb_t *)); extern void freembt __P((mb_t *)); # define MUTEX_DESTROY(x) eMmutex_destroy(&(x)->ipf_emu, \ __FILE__, __LINE__) # define MUTEX_ENTER(x) eMmutex_enter(&(x)->ipf_emu, \ __FILE__, __LINE__) # define MUTEX_EXIT(x) eMmutex_exit(&(x)->ipf_emu, \ __FILE__, __LINE__) # define MUTEX_INIT(x,y) eMmutex_init(&(x)->ipf_emu, y, \ __FILE__, __LINE__) # define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) # define MUTEX_DOWNGRADE(x) eMrwlock_downgrade(&(x)->ipf_emu, \ __FILE__, __LINE__) # define MUTEX_TRY_UPGRADE(x) eMrwlock_try_upgrade(&(x)->ipf_emu, \ __FILE__, __LINE__) # define READ_ENTER(x) eMrwlock_read_enter(&(x)->ipf_emu, \ __FILE__, __LINE__) # define RWLOCK_INIT(x, y) eMrwlock_init(&(x)->ipf_emu, y) # define RWLOCK_EXIT(x) eMrwlock_exit(&(x)->ipf_emu) # define RW_DESTROY(x) eMrwlock_destroy(&(x)->ipf_emu) # define WRITE_ENTER(x) eMrwlock_write_enter(&(x)->ipf_emu, \ __FILE__, \ __LINE__) # define USE_MUTEXES 1 extern void eMmutex_destroy __P((eMmutex_t *, char *, int)); extern void eMmutex_enter __P((eMmutex_t *, char *, int)); extern void eMmutex_exit __P((eMmutex_t *, char *, int)); extern void eMmutex_init __P((eMmutex_t *, char *, char *, int)); extern void eMrwlock_destroy __P((eMrwlock_t *)); extern void eMrwlock_exit __P((eMrwlock_t *)); extern void eMrwlock_init __P((eMrwlock_t *, char *)); extern void eMrwlock_read_enter __P((eMrwlock_t *, char *, int)); extern void eMrwlock_write_enter __P((eMrwlock_t *, char *, int)); extern void eMrwlock_downgrade __P((eMrwlock_t *, char *, int)); #endif extern mb_t *allocmbt(size_t); #define MAX_IPV4HDR ((0xf << 2) + sizeof(struct icmp) + sizeof(ip_t) + 8) #ifndef IP_OFFMASK # define IP_OFFMASK 0x1fff #endif /* * On BSD's use quad_t as a guarantee for getting at least a 64bit sized * object. */ #if !defined(__amd64__) && BSD_GT_YEAR(199306) # define USE_QUAD_T # define U_QUAD_T unsigned long long # define QUAD_T long long #else /* BSD > 199306 */ # if !defined(U_QUAD_T) # define U_QUAD_T u_long # define QUAD_T long # endif #endif /* BSD > 199306 */ #ifdef USE_INET6 # if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ defined(__osf__) || defined(linux) # include # include # if defined(_KERNEL) && !defined(__osf__) # include # endif typedef struct ip6_hdr ip6_t; # endif #endif #ifndef MAX # define MAX(a,b) (((a) > (b)) ? (a) : (b)) #endif #if defined(_KERNEL) # if defined(MENTAT) && !defined(INSTANCES) # define COPYDATA mb_copydata # define COPYBACK mb_copyback # else # define COPYDATA m_copydata # define COPYBACK m_copyback # endif # if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \ defined(__FreeBSD__) || (defined(OpenBSD) && (OpenBSD < 200206)) || \ defined(_BSDI_VERSION) # include # endif # if !defined(__FreeBSD__) || FREEBSD_GE_REV(300000) # if NETBSD_GE_REV(105180000) || OPENBSD_GE_REV(200111) # include # else # include extern vm_map_t kmem_map; # endif # include # else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ # include # endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ # ifdef IPFILTER_M_IPFILTER # include MALLOC_DECLARE(M_IPFILTER); # define _M_IPF M_IPFILTER # else /* IPFILTER_M_IPFILTER */ # ifdef M_PFIL # define _M_IPF M_PFIL # else # ifdef M_IPFILTER # define _M_IPF M_IPFILTER # else # define _M_IPF M_TEMP # endif /* M_IPFILTER */ # endif /* M_PFIL */ # endif /* IPFILTER_M_IPFILTER */ # if !defined(KMALLOC) -# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), _M_IPF, M_NOWAIT) +# define KMALLOC(a, b) (a) = (b)malloc(sizeof(*(a)), _M_IPF, M_NOWAIT) # endif # if !defined(KMALLOCS) -# define KMALLOCS(a, b, c) MALLOC((a), b, (c), _M_IPF, M_NOWAIT) +# define KMALLOCS(a, b, c) (a) = (b)malloc((c), _M_IPF, M_NOWAIT) # endif # if !defined(KFREE) -# define KFREE(x) FREE((x), _M_IPF) +# define KFREE(x) free((x), _M_IPF) # endif # if !defined(KFREES) -# define KFREES(x,s) FREE((x), _M_IPF) +# define KFREES(x,s) free((x), _M_IPF) # endif # define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,d) # define SLEEP(id, n) tsleep((id), PPAUSE|PCATCH, n, 0) # define WAKEUP(id,x) wakeup(id+x) # if !defined(POLLWAKEUP) # define POLLWAKEUP(x) selwakeup(softc->ipf_selwait+x) # endif # define GETIFP(n, v) ifunit(n) # define GETIFMTU_4(x) ((struct ifnet *)x)->if_mtu # define GETIFMTU_6(x) ((struct ifnet *)x)->if_mtu # if !defined(USE_MUTEXES) && !defined(SPL_NET) # define SPL_IMP(x) x = splimp() # define SPL_NET(x) x = splnet() # if !defined(SPL_SCHED) # define SPL_SCHED(x) x = splsched() # endif # define SPL_X(x) (void) splx(x) # endif /* !USE_MUTEXES */ # ifndef FREE_MB_T # define FREE_MB_T(m) m_freem(m) # endif # ifndef ALLOC_MB_T # ifdef MGETHDR # define ALLOC_MB_T(m,l) do { \ MGETHDR((m), M_NOWAIT, MT_HEADER); \ if ((m) != NULL) { \ (m)->m_len = (l); \ (m)->m_pkthdr.len = (l); \ } \ } while (0) # else # define ALLOC_MB_T(m,l) do { \ MGET((m), M_NOWAIT, MT_HEADER); \ if ((m) != NULL) { \ (m)->m_len = (l); \ (m)->m_pkthdr.len = (l); \ } \ } while (0) # endif # endif # ifndef PREP_MB_T # define PREP_MB_T(f, m) do { \ mb_t *_o = *(f)->fin_mp; \ (m)->m_next = _o; \ *(fin)->fin_mp = (m); \ if (_o->m_flags & M_PKTHDR) { \ (m)->m_pkthdr.len += \ _o->m_pkthdr.len; \ (m)->m_pkthdr.rcvif = \ _o->m_pkthdr.rcvif; \ } \ } while (0) # endif # ifndef M_DUP # ifdef M_COPYALL # define M_DUP(m) m_dup(m, 0, M_COPYALL, 0) # else # define M_DUP(m) m_dup(m) # endif # endif # ifndef MTOD # define MTOD(m,t) mtod(m,t) # endif # ifndef COPYIN # define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) # define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) # endif # ifndef KMALLOC # define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \ KMEM_NOSLEEP) # define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP) # endif # ifndef GET_MINOR # define GET_MINOR(x) dev2unit(x) # endif # define PANIC(x,y) if (x) panic y #endif /* _KERNEL */ #if !defined(IFNAME) && !defined(_KERNEL) # define IFNAME(x) get_ifname((struct ifnet *)x) #endif #ifndef COPYIFNAME # define NEED_FRGETIFNAME extern char *ipf_getifname __P((struct ifnet *, char *)); # define COPYIFNAME(v, x, b) \ ipf_getifname((struct ifnet *)x, b) #endif #ifndef ASSERT # ifdef _KERNEL # define ASSERT(x) # else # define ASSERT(x) do { if (!(x)) abort(); } while (0) # endif #endif #ifndef BCOPYIN # define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) # define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) #endif /* * Because the ctype(3) posix definition, if used "safely" in code everywhere, * would mean all normal code that walks through strings needed casts. Yuck. */ #define ISALNUM(x) isalnum((u_char)(x)) #define ISALPHA(x) isalpha((u_char)(x)) #define ISDIGIT(x) isdigit((u_char)(x)) #define ISSPACE(x) isspace((u_char)(x)) #define ISUPPER(x) isupper((u_char)(x)) #define ISXDIGIT(x) isxdigit((u_char)(x)) #define ISLOWER(x) islower((u_char)(x)) #define TOUPPER(x) toupper((u_char)(x)) #define TOLOWER(x) tolower((u_char)(x)) /* * If mutexes aren't being used, turn all the mutex functions into null-ops. */ #if !defined(USE_MUTEXES) # define USE_SPL 1 # undef RW_DESTROY # undef MUTEX_INIT # undef MUTEX_NUKE # undef MUTEX_DESTROY # define MUTEX_ENTER(x) ; # define READ_ENTER(x) ; # define WRITE_ENTER(x) ; # define MUTEX_DOWNGRADE(x) ; # define MUTEX_TRY_UPGRADE(x) ; # define RWLOCK_INIT(x, y) ; # define RWLOCK_EXIT(x) ; # define RW_DESTROY(x) ; # define MUTEX_EXIT(x) ; # define MUTEX_INIT(x,y) ; # define MUTEX_DESTROY(x) ; # define MUTEX_NUKE(x) ; #endif /* !USE_MUTEXES */ #ifndef ATOMIC_INC # define ATOMIC_INC(x) (x)++ # define ATOMIC_DEC(x) (x)-- #endif #if defined(USE_SPL) && defined(_KERNEL) # define SPL_INT(x) int x #else # define SPL_INT(x) #endif /* * If there are no atomic operations for bit sizes defined, define them to all * use a generic one that works for all sizes. */ #ifndef ATOMIC_INCL # define ATOMIC_INCL ATOMIC_INC # define ATOMIC_INC64 ATOMIC_INC # define ATOMIC_INC32 ATOMIC_INC # define ATOMIC_DECL ATOMIC_DEC # define ATOMIC_DEC64 ATOMIC_DEC # define ATOMIC_DEC32 ATOMIC_DEC #endif #ifndef HDR_T_PRIVATE typedef struct tcphdr tcphdr_t; typedef struct udphdr udphdr_t; #endif typedef struct icmp icmphdr_t; typedef struct ip ip_t; typedef struct ether_header ether_header_t; typedef struct tcpiphdr tcpiphdr_t; #ifndef FR_GROUPLEN # define FR_GROUPLEN 16 #endif #ifndef offsetof # define offsetof(t,m) (size_t)((&((t *)0L)->m)) #endif #ifndef stsizeof # define stsizeof(t,m) sizeof(((t *)0L)->m) #endif /* * This set of macros has been brought about because on Tru64 it is not * possible to easily assign or examine values in a structure that are * bit fields. */ #ifndef IP_V # define IP_V(x) (x)->ip_v #endif #ifndef IP_V_A # define IP_V_A(x,y) (x)->ip_v = (y) #endif #ifndef IP_HL # define IP_HL(x) (x)->ip_hl #endif #ifndef IP_HL_A # define IP_HL_A(x,y) (x)->ip_hl = ((y) & 0xf) #endif #ifndef TCP_X2 # define TCP_X2(x) (x)->th_x2 #endif #ifndef TCP_X2_A # define TCP_X2_A(x,y) (x)->th_x2 = (y) #endif #ifndef TCP_OFF # define TCP_OFF(x) (x)->th_off #endif #ifndef TCP_OFF_A # define TCP_OFF_A(x,y) (x)->th_off = (y) #endif #define IPMINLEN(i, h) ((i)->ip_len >= (IP_HL(i) * 4 + sizeof(struct h))) /* * XXX - This is one of those *awful* hacks which nobody likes */ #ifdef ultrix #define A_A #else #define A_A & #endif #define TCPF_ALL (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|\ TH_ECN|TH_CWR) #if BSD_GE_YEAR(199306) && !defined(m_act) # define m_act m_nextpkt #endif /* * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108. * * Basic Option * * 00000001 - (Reserved 4) * 00111101 - Top Secret * 01011010 - Secret * 10010110 - Confidential * 01100110 - (Reserved 3) * 11001100 - (Reserved 2) * 10101011 - Unclassified * 11110001 - (Reserved 1) */ #define IPSO_CLASS_RES4 0x01 #define IPSO_CLASS_TOPS 0x3d #define IPSO_CLASS_SECR 0x5a #define IPSO_CLASS_CONF 0x96 #define IPSO_CLASS_RES3 0x66 #define IPSO_CLASS_RES2 0xcc #define IPSO_CLASS_UNCL 0xab #define IPSO_CLASS_RES1 0xf1 #define IPSO_AUTH_GENSER 0x80 #define IPSO_AUTH_ESI 0x40 #define IPSO_AUTH_SCI 0x20 #define IPSO_AUTH_NSA 0x10 #define IPSO_AUTH_DOE 0x08 #define IPSO_AUTH_UN 0x06 #define IPSO_AUTH_FTE 0x01 /* * IP option #defines */ #undef IPOPT_RR #define IPOPT_RR 7 #undef IPOPT_ZSU #define IPOPT_ZSU 10 /* ZSU */ #undef IPOPT_MTUP #define IPOPT_MTUP 11 /* MTUP */ #undef IPOPT_MTUR #define IPOPT_MTUR 12 /* MTUR */ #undef IPOPT_ENCODE #define IPOPT_ENCODE 15 /* ENCODE */ #undef IPOPT_TS #define IPOPT_TS 68 #undef IPOPT_TR #define IPOPT_TR 82 /* TR */ #undef IPOPT_SECURITY #define IPOPT_SECURITY 130 #undef IPOPT_LSRR #define IPOPT_LSRR 131 #undef IPOPT_E_SEC #define IPOPT_E_SEC 133 /* E-SEC */ #undef IPOPT_CIPSO #define IPOPT_CIPSO 134 /* CIPSO */ #undef IPOPT_SATID #define IPOPT_SATID 136 #ifndef IPOPT_SID # define IPOPT_SID IPOPT_SATID #endif #undef IPOPT_SSRR #define IPOPT_SSRR 137 #undef IPOPT_ADDEXT #define IPOPT_ADDEXT 147 /* ADDEXT */ #undef IPOPT_VISA #define IPOPT_VISA 142 /* VISA */ #undef IPOPT_IMITD #define IPOPT_IMITD 144 /* IMITD */ #undef IPOPT_EIP #define IPOPT_EIP 145 /* EIP */ #undef IPOPT_RTRALRT #define IPOPT_RTRALRT 148 /* RTRALRT */ #undef IPOPT_SDB #define IPOPT_SDB 149 #undef IPOPT_NSAPA #define IPOPT_NSAPA 150 #undef IPOPT_DPS #define IPOPT_DPS 151 #undef IPOPT_UMP #define IPOPT_UMP 152 #undef IPOPT_FINN #define IPOPT_FINN 205 /* FINN */ #undef IPOPT_AH #define IPOPT_AH 256+IPPROTO_AH #ifndef TCPOPT_EOL # define TCPOPT_EOL 0 #endif #ifndef TCPOPT_NOP # define TCPOPT_NOP 1 #endif #ifndef TCPOPT_MAXSEG # define TCPOPT_MAXSEG 2 #endif #ifndef TCPOLEN_MAXSEG # define TCPOLEN_MAXSEG 4 #endif #ifndef TCPOPT_WINDOW # define TCPOPT_WINDOW 3 #endif #ifndef TCPOLEN_WINDOW # define TCPOLEN_WINDOW 3 #endif #ifndef TCPOPT_SACK_PERMITTED # define TCPOPT_SACK_PERMITTED 4 #endif #ifndef TCPOLEN_SACK_PERMITTED # define TCPOLEN_SACK_PERMITTED 2 #endif #ifndef TCPOPT_SACK # define TCPOPT_SACK 5 #endif #ifndef TCPOPT_TIMESTAMP # define TCPOPT_TIMESTAMP 8 #endif #ifndef ICMP_MINLEN # define ICMP_MINLEN 8 #endif #ifndef ICMP_ECHOREPLY # define ICMP_ECHOREPLY 0 #endif #ifndef ICMP_UNREACH # define ICMP_UNREACH 3 #endif #ifndef ICMP_UNREACH_NET # define ICMP_UNREACH_NET 0 #endif #ifndef ICMP_UNREACH_HOST # define ICMP_UNREACH_HOST 1 #endif #ifndef ICMP_UNREACH_PROTOCOL # define ICMP_UNREACH_PROTOCOL 2 #endif #ifndef ICMP_UNREACH_PORT # define ICMP_UNREACH_PORT 3 #endif #ifndef ICMP_UNREACH_NEEDFRAG # define ICMP_UNREACH_NEEDFRAG 4 #endif #ifndef ICMP_UNREACH_SRCFAIL # define ICMP_UNREACH_SRCFAIL 5 #endif #ifndef ICMP_UNREACH_NET_UNKNOWN # define ICMP_UNREACH_NET_UNKNOWN 6 #endif #ifndef ICMP_UNREACH_HOST_UNKNOWN # define ICMP_UNREACH_HOST_UNKNOWN 7 #endif #ifndef ICMP_UNREACH_ISOLATED # define ICMP_UNREACH_ISOLATED 8 #endif #ifndef ICMP_UNREACH_NET_PROHIB # define ICMP_UNREACH_NET_PROHIB 9 #endif #ifndef ICMP_UNREACH_HOST_PROHIB # define ICMP_UNREACH_HOST_PROHIB 10 #endif #ifndef ICMP_UNREACH_TOSNET # define ICMP_UNREACH_TOSNET 11 #endif #ifndef ICMP_UNREACH_TOSHOST # define ICMP_UNREACH_TOSHOST 12 #endif #ifndef ICMP_UNREACH_ADMIN_PROHIBIT # define ICMP_UNREACH_ADMIN_PROHIBIT 13 #endif #ifndef ICMP_UNREACH_FILTER # define ICMP_UNREACH_FILTER 13 #endif #ifndef ICMP_UNREACH_HOST_PRECEDENCE # define ICMP_UNREACH_HOST_PRECEDENCE 14 #endif #ifndef ICMP_UNREACH_PRECEDENCE_CUTOFF # define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 #endif #ifndef ICMP_SOURCEQUENCH # define ICMP_SOURCEQUENCH 4 #endif #ifndef ICMP_REDIRECT_NET # define ICMP_REDIRECT_NET 0 #endif #ifndef ICMP_REDIRECT_HOST # define ICMP_REDIRECT_HOST 1 #endif #ifndef ICMP_REDIRECT_TOSNET # define ICMP_REDIRECT_TOSNET 2 #endif #ifndef ICMP_REDIRECT_TOSHOST # define ICMP_REDIRECT_TOSHOST 3 #endif #ifndef ICMP_ALTHOSTADDR # define ICMP_ALTHOSTADDR 6 #endif #ifndef ICMP_TIMXCEED # define ICMP_TIMXCEED 11 #endif #ifndef ICMP_TIMXCEED_INTRANS # define ICMP_TIMXCEED_INTRANS 0 #endif #ifndef ICMP_TIMXCEED_REASS # define ICMP_TIMXCEED_REASS 1 #endif #ifndef ICMP_PARAMPROB # define ICMP_PARAMPROB 12 #endif #ifndef ICMP_PARAMPROB_ERRATPTR # define ICMP_PARAMPROB_ERRATPTR 0 #endif #ifndef ICMP_PARAMPROB_OPTABSENT # define ICMP_PARAMPROB_OPTABSENT 1 #endif #ifndef ICMP_PARAMPROB_LENGTH # define ICMP_PARAMPROB_LENGTH 2 #endif #ifndef ICMP_TSTAMP # define ICMP_TSTAMP 13 #endif #ifndef ICMP_TSTAMPREPLY # define ICMP_TSTAMPREPLY 14 #endif #ifndef ICMP_IREQ # define ICMP_IREQ 15 #endif #ifndef ICMP_IREQREPLY # define ICMP_IREQREPLY 16 #endif #ifndef ICMP_MASKREQ # define ICMP_MASKREQ 17 #endif #ifndef ICMP_MASKREPLY # define ICMP_MASKREPLY 18 #endif #ifndef ICMP_TRACEROUTE # define ICMP_TRACEROUTE 30 #endif #ifndef ICMP_DATACONVERR # define ICMP_DATACONVERR 31 #endif #ifndef ICMP_MOBILE_REDIRECT # define ICMP_MOBILE_REDIRECT 32 #endif #ifndef ICMP_IPV6_WHEREAREYOU # define ICMP_IPV6_WHEREAREYOU 33 #endif #ifndef ICMP_IPV6_IAMHERE # define ICMP_IPV6_IAMHERE 34 #endif #ifndef ICMP_MOBILE_REGREQUEST # define ICMP_MOBILE_REGREQUEST 35 #endif #ifndef ICMP_MOBILE_REGREPLY # define ICMP_MOBILE_REGREPLY 36 #endif #ifndef ICMP_SKIP # define ICMP_SKIP 39 #endif #ifndef ICMP_PHOTURIS # define ICMP_PHOTURIS 40 #endif #ifndef ICMP_PHOTURIS_UNKNOWN_INDEX # define ICMP_PHOTURIS_UNKNOWN_INDEX 1 #endif #ifndef ICMP_PHOTURIS_AUTH_FAILED # define ICMP_PHOTURIS_AUTH_FAILED 2 #endif #ifndef ICMP_PHOTURIS_DECRYPT_FAILED # define ICMP_PHOTURIS_DECRYPT_FAILED 3 #endif #ifndef IPVERSION # define IPVERSION 4 #endif #ifndef IPOPT_MINOFF # define IPOPT_MINOFF 4 #endif #ifndef IPOPT_COPIED # define IPOPT_COPIED(x) ((x)&0x80) #endif #ifndef IPOPT_EOL # define IPOPT_EOL 0 #endif #ifndef IPOPT_NOP # define IPOPT_NOP 1 #endif #ifndef IP_MF # define IP_MF ((u_short)0x2000) #endif #ifndef ETHERTYPE_IP # define ETHERTYPE_IP ((u_short)0x0800) #endif #ifndef TH_FIN # define TH_FIN 0x01 #endif #ifndef TH_SYN # define TH_SYN 0x02 #endif #ifndef TH_RST # define TH_RST 0x04 #endif #ifndef TH_PUSH # define TH_PUSH 0x08 #endif #ifndef TH_ACK # define TH_ACK 0x10 #endif #ifndef TH_URG # define TH_URG 0x20 #endif #undef TH_ACKMASK #define TH_ACKMASK (TH_FIN|TH_SYN|TH_RST|TH_ACK) #ifndef IPOPT_EOL # define IPOPT_EOL 0 #endif #ifndef IPOPT_NOP # define IPOPT_NOP 1 #endif #ifndef IPOPT_RR # define IPOPT_RR 7 #endif #ifndef IPOPT_TS # define IPOPT_TS 68 #endif #ifndef IPOPT_SECURITY # define IPOPT_SECURITY 130 #endif #ifndef IPOPT_LSRR # define IPOPT_LSRR 131 #endif #ifndef IPOPT_SATID # define IPOPT_SATID 136 #endif #ifndef IPOPT_SSRR # define IPOPT_SSRR 137 #endif #ifndef IPOPT_SECUR_UNCLASS # define IPOPT_SECUR_UNCLASS ((u_short)0x0000) #endif #ifndef IPOPT_SECUR_CONFID # define IPOPT_SECUR_CONFID ((u_short)0xf135) #endif #ifndef IPOPT_SECUR_EFTO # define IPOPT_SECUR_EFTO ((u_short)0x789a) #endif #ifndef IPOPT_SECUR_MMMM # define IPOPT_SECUR_MMMM ((u_short)0xbc4d) #endif #ifndef IPOPT_SECUR_RESTR # define IPOPT_SECUR_RESTR ((u_short)0xaf13) #endif #ifndef IPOPT_SECUR_SECRET # define IPOPT_SECUR_SECRET ((u_short)0xd788) #endif #ifndef IPOPT_SECUR_TOPSECRET # define IPOPT_SECUR_TOPSECRET ((u_short)0x6bc5) #endif #ifndef IPOPT_OLEN # define IPOPT_OLEN 1 #endif #ifndef IPPROTO_HOPOPTS # define IPPROTO_HOPOPTS 0 #endif #ifndef IPPROTO_IPIP # define IPPROTO_IPIP 4 #endif #ifndef IPPROTO_ENCAP # define IPPROTO_ENCAP 98 #endif #ifndef IPPROTO_IPV6 # define IPPROTO_IPV6 41 #endif #ifndef IPPROTO_ROUTING # define IPPROTO_ROUTING 43 #endif #ifndef IPPROTO_FRAGMENT # define IPPROTO_FRAGMENT 44 #endif #ifndef IPPROTO_GRE # define IPPROTO_GRE 47 /* GRE encaps RFC 1701 */ #endif #ifndef IPPROTO_ESP # define IPPROTO_ESP 50 #endif #ifndef IPPROTO_AH # define IPPROTO_AH 51 #endif #ifndef IPPROTO_ICMPV6 # define IPPROTO_ICMPV6 58 #endif #ifndef IPPROTO_NONE # define IPPROTO_NONE 59 #endif #ifndef IPPROTO_DSTOPTS # define IPPROTO_DSTOPTS 60 #endif #ifndef IPPROTO_MOBILITY # define IPPROTO_MOBILITY 135 #endif #ifndef ICMP_ROUTERADVERT # define ICMP_ROUTERADVERT 9 #endif #ifndef ICMP_ROUTERSOLICIT # define ICMP_ROUTERSOLICIT 10 #endif #ifndef ICMP6_DST_UNREACH # define ICMP6_DST_UNREACH 1 #endif #ifndef ICMP6_PACKET_TOO_BIG # define ICMP6_PACKET_TOO_BIG 2 #endif #ifndef ICMP6_TIME_EXCEEDED # define ICMP6_TIME_EXCEEDED 3 #endif #ifndef ICMP6_PARAM_PROB # define ICMP6_PARAM_PROB 4 #endif #ifndef ICMP6_ECHO_REQUEST # define ICMP6_ECHO_REQUEST 128 #endif #ifndef ICMP6_ECHO_REPLY # define ICMP6_ECHO_REPLY 129 #endif #ifndef ICMP6_MEMBERSHIP_QUERY # define ICMP6_MEMBERSHIP_QUERY 130 #endif #ifndef MLD6_LISTENER_QUERY # define MLD6_LISTENER_QUERY 130 #endif #ifndef ICMP6_MEMBERSHIP_REPORT # define ICMP6_MEMBERSHIP_REPORT 131 #endif #ifndef MLD6_LISTENER_REPORT # define MLD6_LISTENER_REPORT 131 #endif #ifndef ICMP6_MEMBERSHIP_REDUCTION # define ICMP6_MEMBERSHIP_REDUCTION 132 #endif #ifndef MLD6_LISTENER_DONE # define MLD6_LISTENER_DONE 132 #endif #ifndef ND_ROUTER_SOLICIT # define ND_ROUTER_SOLICIT 133 #endif #ifndef ND_ROUTER_ADVERT # define ND_ROUTER_ADVERT 134 #endif #ifndef ND_NEIGHBOR_SOLICIT # define ND_NEIGHBOR_SOLICIT 135 #endif #ifndef ND_NEIGHBOR_ADVERT # define ND_NEIGHBOR_ADVERT 136 #endif #ifndef ND_REDIRECT # define ND_REDIRECT 137 #endif #ifndef ICMP6_ROUTER_RENUMBERING # define ICMP6_ROUTER_RENUMBERING 138 #endif #ifndef ICMP6_WRUREQUEST # define ICMP6_WRUREQUEST 139 #endif #ifndef ICMP6_WRUREPLY # define ICMP6_WRUREPLY 140 #endif #ifndef ICMP6_FQDN_QUERY # define ICMP6_FQDN_QUERY 139 #endif #ifndef ICMP6_FQDN_REPLY # define ICMP6_FQDN_REPLY 140 #endif #ifndef ICMP6_NI_QUERY # define ICMP6_NI_QUERY 139 #endif #ifndef ICMP6_NI_REPLY # define ICMP6_NI_REPLY 140 #endif #ifndef MLD6_MTRACE_RESP # define MLD6_MTRACE_RESP 200 #endif #ifndef MLD6_MTRACE # define MLD6_MTRACE 201 #endif #ifndef ICMP6_HADISCOV_REQUEST # define ICMP6_HADISCOV_REQUEST 202 #endif #ifndef ICMP6_HADISCOV_REPLY # define ICMP6_HADISCOV_REPLY 203 #endif #ifndef ICMP6_MOBILEPREFIX_SOLICIT # define ICMP6_MOBILEPREFIX_SOLICIT 204 #endif #ifndef ICMP6_MOBILEPREFIX_ADVERT # define ICMP6_MOBILEPREFIX_ADVERT 205 #endif #ifndef ICMP6_MAXTYPE # define ICMP6_MAXTYPE 205 #endif #ifndef ICMP6_DST_UNREACH_NOROUTE # define ICMP6_DST_UNREACH_NOROUTE 0 #endif #ifndef ICMP6_DST_UNREACH_ADMIN # define ICMP6_DST_UNREACH_ADMIN 1 #endif #ifndef ICMP6_DST_UNREACH_NOTNEIGHBOR # define ICMP6_DST_UNREACH_NOTNEIGHBOR 2 #endif #ifndef ICMP6_DST_UNREACH_BEYONDSCOPE # define ICMP6_DST_UNREACH_BEYONDSCOPE 2 #endif #ifndef ICMP6_DST_UNREACH_ADDR # define ICMP6_DST_UNREACH_ADDR 3 #endif #ifndef ICMP6_DST_UNREACH_NOPORT # define ICMP6_DST_UNREACH_NOPORT 4 #endif #ifndef ICMP6_TIME_EXCEED_TRANSIT # define ICMP6_TIME_EXCEED_TRANSIT 0 #endif #ifndef ICMP6_TIME_EXCEED_REASSEMBLY # define ICMP6_TIME_EXCEED_REASSEMBLY 1 #endif #ifndef ICMP6_NI_SUCCESS # define ICMP6_NI_SUCCESS 0 #endif #ifndef ICMP6_NI_REFUSED # define ICMP6_NI_REFUSED 1 #endif #ifndef ICMP6_NI_UNKNOWN # define ICMP6_NI_UNKNOWN 2 #endif #ifndef ICMP6_ROUTER_RENUMBERING_COMMAND # define ICMP6_ROUTER_RENUMBERING_COMMAND 0 #endif #ifndef ICMP6_ROUTER_RENUMBERING_RESULT # define ICMP6_ROUTER_RENUMBERING_RESULT 1 #endif #ifndef ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET # define ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET 255 #endif #ifndef ICMP6_PARAMPROB_HEADER # define ICMP6_PARAMPROB_HEADER 0 #endif #ifndef ICMP6_PARAMPROB_NEXTHEADER # define ICMP6_PARAMPROB_NEXTHEADER 1 #endif #ifndef ICMP6_PARAMPROB_OPTION # define ICMP6_PARAMPROB_OPTION 2 #endif #ifndef ICMP6_NI_SUBJ_IPV6 # define ICMP6_NI_SUBJ_IPV6 0 #endif #ifndef ICMP6_NI_SUBJ_FQDN # define ICMP6_NI_SUBJ_FQDN 1 #endif #ifndef ICMP6_NI_SUBJ_IPV4 # define ICMP6_NI_SUBJ_IPV4 2 #endif #ifndef MLD_MTRACE_RESP # define MLD_MTRACE_RESP 200 #endif #ifndef MLD_MTRACE # define MLD_MTRACE 201 #endif #ifndef MLD6_MTRACE_RESP # define MLD6_MTRACE_RESP MLD_MTRACE_RESP #endif #ifndef MLD6_MTRACE # define MLD6_MTRACE MLD_MTRACE #endif #if !defined(IPV6_FLOWINFO_MASK) # if (BYTE_ORDER == BIG_ENDIAN) || defined(_BIG_ENDIAN) # define IPV6_FLOWINFO_MASK 0x0fffffff /* flow info (28 bits) */ # else # if(BYTE_ORDER == LITTLE_ENDIAN) || !defined(_BIG_ENDIAN) # define IPV6_FLOWINFO_MASK 0xffffff0f /* flow info (28 bits) */ # endif /* LITTLE_ENDIAN */ # endif #endif #if !defined(IPV6_FLOWLABEL_MASK) # if (BYTE_ORDER == BIG_ENDIAN) || defined(_BIG_ENDIAN) # define IPV6_FLOWLABEL_MASK 0x000fffff /* flow label (20 bits) */ # else # if (BYTE_ORDER == LITTLE_ENDIAN) || !defined(_BIG_ENDIAN) # define IPV6_FLOWLABEL_MASK 0xffff0f00 /* flow label (20 bits) */ # endif /* LITTLE_ENDIAN */ # endif #endif /* * ECN is a new addition to TCP - RFC 2481 */ #ifndef TH_ECN # define TH_ECN 0x40 #endif #ifndef TH_CWR # define TH_CWR 0x80 #endif #define TH_ECNALL (TH_ECN|TH_CWR) /* * TCP States */ #define IPF_TCPS_LISTEN 0 /* listening for connection */ #define IPF_TCPS_SYN_SENT 1 /* active, have sent syn */ #define IPF_TCPS_SYN_RECEIVED 2 /* have send and received syn */ #define IPF_TCPS_HALF_ESTAB 3 /* for connections not fully "up" */ /* states < IPF_TCPS_ESTABLISHED are those where connections not established */ #define IPF_TCPS_ESTABLISHED 4 /* established */ #define IPF_TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ /* states > IPF_TCPS_CLOSE_WAIT are those where user has closed */ #define IPF_TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ #define IPF_TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ #define IPF_TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ /* states > IPF_TCPS_CLOSE_WAIT && < IPF_TCPS_FIN_WAIT_2 await ACK of FIN */ #define IPF_TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ #define IPF_TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ #define IPF_TCPS_CLOSED 11 /* closed */ #define IPF_TCP_NSTATES 12 #define TCP_MSL 120 #undef ICMP_MAX_UNREACH #define ICMP_MAX_UNREACH 14 #undef ICMP_MAXTYPE #define ICMP_MAXTYPE 18 #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif #ifndef LOG_FTP # define LOG_FTP (11<<3) #endif #ifndef LOG_AUTHPRIV # define LOG_AUTHPRIV (10<<3) #endif #ifndef LOG_AUDIT # define LOG_AUDIT (13<<3) #endif #ifndef LOG_NTP # define LOG_NTP (12<<3) #endif #ifndef LOG_SECURITY # define LOG_SECURITY (13<<3) #endif #ifndef LOG_LFMT # define LOG_LFMT (14<<3) #endif #ifndef LOG_CONSOLE # define LOG_CONSOLE (14<<3) #endif /* * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data, * another IP header and then 64 bits of data, totalling 56. Of course, * the last 64 bits is dependent on that being available. */ #define ICMPERR_ICMPHLEN 8 #define ICMPERR_IPICMPHLEN (20 + 8) #define ICMPERR_MINPKTLEN (20 + 8 + 20) #define ICMPERR_MAXPKTLEN (20 + 8 + 20 + 8) #define ICMP6ERR_MINPKTLEN (40 + 8) #define ICMP6ERR_IPICMPHLEN (40 + 8 + 40) #ifndef MIN # define MIN(a,b) (((a)<(b))?(a):(b)) #endif #ifdef RESCUE # undef IPFILTER_BPF #endif #ifdef IPF_DEBUG # define DPRINT(x) printf x #else # define DPRINT(x) #endif #ifndef AF_INET6 # define AF_INET6 26 #endif #ifdef DTRACE_PROBE # ifdef _KERNEL # define DT(_n) DTRACE_PROBE(_n) # define DT1(_n,_a,_b) DTRACE_PROBE1(_n,_a,_b) # define DT2(_n,_a,_b,_c,_d) DTRACE_PROBE2(_n,_a,_b,_c,_d) # define DT3(_n,_a,_b,_c,_d,_e,_f) \ DTRACE_PROBE3(_n,_a,_b,_c,_d,_e,_f) # define DT4(_n,_a,_b,_c,_d,_e,_f,_g,_h) \ DTRACE_PROBE4(_n,_a,_b,_c,_d,_e,_f,_g,_h) # else # define DT(_n) # define DT1(_n,_a,_b) # define DT2(_n,_a,_b,_c,_d) # define DT3(_n,_a,_b,_c,_d,_e,_f) # define DT4(_n,_a,_b,_c,_d,_e,_f,_g,_h) # endif #else # define DT(_n) # define DT1(_n,_a,_b) # define DT2(_n,_a,_b,_c,_d) # define DT3(_n,_a,_b,_c,_d,_e,_f) # define DT4(_n,_a,_b,_c,_d,_e,_f,_g,_h) #endif struct ip6_routing { u_char ip6r_nxt; /* next header */ u_char ip6r_len; /* length in units of 8 octets */ u_char ip6r_type; /* always zero */ u_char ip6r_segleft; /* segments left */ u_32_t ip6r_reserved; /* reserved field */ }; #endif /* __IP_COMPAT_H__ */ Index: head/sys/fs/nfs/nfs_commonkrpc.c =================================================================== --- head/sys/fs/nfs/nfs_commonkrpc.c (revision 328416) +++ head/sys/fs/nfs/nfs_commonkrpc.c (revision 328417) @@ -1,1344 +1,1344 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_kgssapi.h" #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfscl_nfs234_start_probe; dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfscl_nfs234_done_probe; /* * Registered probes by RPC type. */ uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; #endif NFSSTATESPINLOCK; NFSREQSPINLOCK; NFSDLOCKMUTEX; NFSCLSTATEMUTEX; extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); extern int nfs_numnfscbd; extern int nfscl_debuglevel; SVCPOOL *nfscbd_pool; static int nfsrv_gsscallbackson = 0; static int nfs_bufpackets = 4; static int nfs_reconnects; static int nfs3_jukebox_delay = 10; static int nfs_skip_wcc_data_onerr = 1; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "Buffer reservation size 2 < x < 64"); SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "Number of times the nfs client has had to reconnect"); SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, "Number of seconds to delay a retry after receiving EJUKEBOX"); SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, "Disable weak cache consistency checking when server returns an error"); static void nfs_down(struct nfsmount *, struct thread *, const char *, int, int); static void nfs_up(struct nfsmount *, struct thread *, const char *, int, int); static int nfs_msg(struct thread *, const char *, const char *, int); struct nfs_cached_auth { int ca_refs; /* refcount, including 1 from the cache */ uid_t ca_uid; /* uid that corresponds to this auth */ AUTH *ca_auth; /* RPC auth handle */ }; static int nfsv2_procid[NFS_V3NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; struct netconfig *nconf; struct socket *so; int one = 1, retries, error = 0; struct thread *td = curthread; SVCXPRT *xprt; struct timeval timo; /* * We need to establish the socket using the credentials of * the mountpoint. Some parts of this process (such as * sobind() and soconnect()) will use the curent thread's * credential instead of the socket credential. To work * around this, temporarily change the current thread's * credential to that of the mountpoint. * * XXX: It would be better to explicitly pass the correct * credential to sobind() and soconnect(). */ origcred = td->td_ucred; /* * Use the credential in nr_cred, if not NULL. */ if (nrp->nr_cred != NULL) td->td_ucred = nrp->nr_cred; else td->td_ucred = cred; saddr = nrp->nr_nam; if (saddr->sa_family == AF_INET) if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp"); else nconf = getnetconfigent("tcp"); else if (saddr->sa_family == AF_LOCAL) nconf = getnetconfigent("local"); else if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp6"); else nconf = getnetconfigent("tcp6"); pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. * Print a log message suggesting increasing sb_max. * Creating a socket and doing this is necessary since, if the * reservation sizes are too large and will make soreserve() fail, * the connection will work until a large send is attempted and * then it will loop in the krpc code. */ so = NULL; saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); error = socreate(saddr->sa_family, &so, nrp->nr_sotype, nrp->nr_soproto, td->td_ucred, td); if (error) { td->td_ucred = origcred; goto out; } do { if (error != 0 && pktscale > 2) { if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale == pktscalesav) printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } else { if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale <= 2) printf("Must increase kern.ipc.maxsockbuf or reduce" " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error) { td->td_ucred = origcred; goto out; } client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, nrp->nr_vers, sndreserve, rcvreserve); CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); if (nmp != NULL) { if ((nmp->nm_flag & NFSMNT_INT)) CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); if ((nmp->nm_flag & NFSMNT_RESVPORT)) CLNT_CONTROL(client, CLSET_PRIVPORT, &one); if (NFSHASSOFT(nmp)) { if (nmp->nm_sotype == SOCK_DGRAM) /* * For UDP, the large timeout for a reconnect * will be set to "nm_retry * nm_timeo / 2", so * we only want to do 2 reconnect timeout * retries. */ retries = 2; else retries = nmp->nm_retry; } else retries = INT_MAX; /* cred == NULL for DS connects. */ if (NFSHASNFSV4N(nmp) && cred != NULL) { /* * Make sure the nfscbd_pool doesn't get destroyed * while doing this. */ NFSD_LOCK(); if (nfs_numnfscbd > 0) { nfs_numnfscbd++; NFSD_UNLOCK(); xprt = svc_vc_create_backchannel(nfscbd_pool); CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); NFSD_LOCK(); nfs_numnfscbd--; if (nfs_numnfscbd == 0) wakeup(&nfs_numnfscbd); } NFSD_UNLOCK(); } } else { /* * Three cases: * - Null RPC callback to client * - Non-Null RPC callback to client, wait a little longer * - upcalls to nfsuserd and gssd (clp == NULL) */ if (callback_retry_mult == 0) { retries = NFSV4_UPCALLRETRY; CLNT_CONTROL(client, CLSET_PRIVPORT, &one); } else { retries = NFSV4_CALLBACKRETRY * callback_retry_mult; } } CLNT_CONTROL(client, CLSET_RETRIES, &retries); if (nmp != NULL) { /* * For UDP, there are 2 timeouts: * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer * that does a retransmit of an RPC request using the same * socket and xid. This is what you normally want to do, * since NFS servers depend on "same xid" for their * Duplicate Request Cache. * - timeout specified in CLNT_CALL_MBUF(), which specifies when * retransmits on the same socket should fail and a fresh * socket created. Each of these timeouts counts as one * CLSET_RETRIES as set above. * Set the initial retransmit timeout for UDP. This timeout * doesn't exist for TCP and the following call just fails, * which is ok. */ timo.tv_sec = nmp->nm_timeo / NFS_HZ; timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); } mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { mtx_unlock(&nrp->nr_mtx); /* * Someone else already connected. */ CLNT_RELEASE(client); } else { nrp->nr_client = client; /* * Protocols that do not require connections may be optionally * left unconnected for servers that reply from a port other * than NFS_PORT. */ if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { mtx_unlock(&nrp->nr_mtx); CLNT_CONTROL(client, CLSET_CONNECT, &one); } else mtx_unlock(&nrp->nr_mtx); } /* Restore current thread's credentials. */ td->td_ucred = origcred; out: NFSEXITCODE(error); return (error); } /* * NFS disconnect. Clean up and unlink. */ void newnfs_disconnect(struct nfssockreq *nrp) { CLIENT *client; mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { client = nrp->nr_client; nrp->nr_client = NULL; mtx_unlock(&nrp->nr_mtx); rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else { mtx_unlock(&nrp->nr_mtx); } } static AUTH * nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, char *srv_principal, gss_OID mech_oid, struct ucred *cred) { rpc_gss_service_t svc; AUTH *auth; switch (secflavour) { case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!mech_oid) { if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) return (NULL); } if (secflavour == RPCSEC_GSS_KRB5) svc = rpc_gss_svc_none; else if (secflavour == RPCSEC_GSS_KRB5I) svc = rpc_gss_svc_integrity; else svc = rpc_gss_svc_privacy; if (clnt_principal == NULL) auth = rpc_gss_secfind_call(nrp->nr_client, cred, srv_principal, mech_oid, svc); else { auth = rpc_gss_seccreate_call(nrp->nr_client, cred, clnt_principal, srv_principal, "kerberosv5", svc, NULL, NULL, NULL); return (auth); } if (auth != NULL) return (auth); /* fallthrough */ case AUTH_SYS: default: return (authunix_create(cred)); } } /* * Callback from the RPC code to generate up/down notifications. */ struct nfs_feedback_arg { struct nfsmount *nf_mount; int nf_lastmsg; /* last tprintf */ int nf_tprintfmsg; struct thread *nf_td; }; static void nfs_feedback(int type, int proc, void *arg) { struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; struct nfsmount *nmp = nf->nf_mount; time_t now; switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: now = NFSD_MONOSEC; if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { nfs_down(nmp, nf->nf_td, "not responding", 0, NFSSTA_TIMEO); nf->nf_tprintfmsg = TRUE; nf->nf_lastmsg = now; } break; case FEEDBACK_OK: nfs_up(nf->nf_mount, nf->nf_td, "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); break; } } /* * newnfs_request - goes something like this * - does the rpc by calling the krpc layer * - break down rpc header and return with nfs reply * nb: always frees up nd_mreq mbuf list */ int newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) { uint32_t retseq, retval, slotseq, *tl; time_t waituntil; int i = 0, j = 0, opcnt, set_sigset = 0, slot; int error = 0, usegssname = 0, secflavour = AUTH_SYS; int freeslot, maxslot, reterr, slotpos, timeo; u_int16_t procnum; u_int trylater_delay = 1; struct nfs_feedback_arg nf; struct timeval timo; AUTH *auth; struct rpc_callextra ext; enum clnt_stat stat; struct nfsreq *rep = NULL; char *srv_principal = NULL, *clnt_principal = NULL; sigset_t oldset; struct ucred *authcred; struct nfsclsession *sep; uint8_t sessionid[NFSX_V4SESSIONID]; sep = dssep; if (xidp != NULL) *xidp = 0; /* Reject requests while attempting a forced unmount. */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { m_freem(nd->nd_mreq); return (ESTALE); } /* * Set authcred, which is used to acquire RPC credentials to * the cred argument, by default. The crhold() should not be * necessary, but will ensure that some future code change * doesn't result in the credential being free'd prematurely. */ authcred = crhold(cred); /* For client side interruptible mounts, mask off the signals. */ if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { newnfs_set_sigmask(td, &oldset); set_sigset = 1; } /* * XXX if not already connected call nfs_connect now. Longer * term, change nfs_mount to call nfs_connect unconditionally * and let clnt_reconnect_create handle reconnects. */ if (nrp->nr_client == NULL) newnfs_connect(nmp, nrp, cred, td, 0); /* * For a client side mount, nmp is != NULL and clp == NULL. For * server calls (callbacks or upcalls), nmp == NULL. */ if (clp != NULL) { NFSLOCKSTATE(); if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { secflavour = RPCSEC_GSS_KRB5; if (nd->nd_procnum != NFSPROC_NULL) { if (clp->lc_flags & LCL_GSSINTEGRITY) secflavour = RPCSEC_GSS_KRB5I; else if (clp->lc_flags & LCL_GSSPRIVACY) secflavour = RPCSEC_GSS_KRB5P; } } NFSUNLOCKSTATE(); } else if (nmp != NULL && NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL) { if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) nd->nd_flag |= ND_USEGSSNAME; if ((nd->nd_flag & ND_USEGSSNAME) != 0) { /* * If there is a client side host based credential, * use that, otherwise use the system uid, if set. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ if (nmp->nm_krbnamelen > 0) { usegssname = 1; clnt_principal = nmp->nm_krbname; } else if (nmp->nm_uid != (uid_t)-1) { KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } } else if (nmp->nm_krbnamelen == 0 && nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { /* * If there is no host based principal name and * the system uid is set and this is root, use the * system uid, since root won't have user * credentials in a credentials cache file. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (NFSHASINTEGRITY(nmp)) secflavour = RPCSEC_GSS_KRB5I; else if (NFSHASPRIVACY(nmp)) secflavour = RPCSEC_GSS_KRB5P; else secflavour = RPCSEC_GSS_KRB5; srv_principal = NFSMNT_SRVKRBNAME(nmp); } else if (nmp != NULL && !NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL && (nd->nd_flag & ND_USEGSSNAME) != 0) { /* * Use the uid that did the mount when the RPC is doing * NFSv4 system operations, as indicated by the * ND_USEGSSNAME flag, for the AUTH_SYS case. * The credentials in nm_sockreq.nr_cred were used for the * mount. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (nmp != NULL) { bzero(&nf, sizeof(struct nfs_feedback_arg)); nf.nf_mount = nmp; nf.nf_td = td; nf.nf_lastmsg = NFSD_MONOSEC - ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); } if (nd->nd_procnum == NFSPROC_NULL) auth = authnone_create(); else if (usegssname) { /* * For this case, the authenticator is held in the * nfssockreq structure, so don't release the reference count * held on it. --> Don't AUTH_DESTROY() it in this function. */ if (nrp->nr_auth == NULL) nrp->nr_auth = nfs_getauth(nrp, secflavour, clnt_principal, srv_principal, NULL, authcred); else rpc_gss_refresh_auth_call(nrp->nr_auth); auth = nrp->nr_auth; } else auth = nfs_getauth(nrp, secflavour, NULL, srv_principal, NULL, authcred); crfree(authcred); if (auth == NULL) { m_freem(nd->nd_mreq); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (EACCES); } bzero(&ext, sizeof(ext)); ext.rc_auth = auth; if (nmp != NULL) { ext.rc_feedback = nfs_feedback; ext.rc_feedback_arg = &nf; } procnum = nd->nd_procnum; if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBCOMPOUND) procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { NFSINCRGLOBAL(nfsstatsv1.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { if (nd->nd_procnum < NFS_V3NPROCS) procnum = nfsv2_procid[nd->nd_procnum]; else procnum = NFSV2PROC_NOOP; } /* * Now only used for the R_DONTRECOVER case, but until that is * supported within the krpc code, I need to keep a queue of * outstanding RPCs for nfsv4 client requests. */ if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) - MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), + rep = malloc(sizeof(struct nfsreq), M_NFSDREQ, M_WAITOK); #ifdef KDTRACE_HOOKS if (dtrace_nfscl_nfs234_start_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_start_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_start_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_start_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_start_probe) (probe_id, vp, nd->nd_mreq, cred, probe_procnum); } #endif } freeslot = -1; /* Set to slot that needs to be free'd */ tryagain: slot = -1; /* Slot that needs a sequence# increment. */ /* * This timeout specifies when a new socket should be created, * along with new xid values. For UDP, this should be done * infrequently, since retransmits of RPC requests should normally * use the same xid. */ if (nmp == NULL) { timo.tv_usec = 0; if (clp == NULL) timo.tv_sec = NFSV4_UPCALLTIMEO; else timo.tv_sec = NFSV4_CALLBACKTIMEO; } else { if (nrp->nr_sotype != SOCK_DGRAM) { timo.tv_usec = 0; if ((nmp->nm_flag & NFSMNT_NFSV4)) timo.tv_sec = INT_MAX; else timo.tv_sec = NFS_TCPTIMEO; } else { if (NFSHASSOFT(nmp)) { /* * CLSET_RETRIES is set to 2, so this should be * half of the total timeout required. */ timeo = nmp->nm_retry * nmp->nm_timeo / 2; if (timeo < 1) timeo = 1; timo.tv_sec = timeo / NFS_HZ; timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; } else { /* For UDP hard mounts, use a large value. */ timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; timo.tv_usec = 0; } } if (rep != NULL) { rep->r_flags = 0; rep->r_nmp = nmp; /* * Chain request into list of outstanding requests. */ NFSLOCKREQ(); TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } } nd->nd_mrep = NULL; if (clp != NULL && sep != NULL) stat = clnt_bck_call(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); else stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); if (rep != NULL) { /* * RPC done, unlink the request. */ NFSLOCKREQ(); TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EPROTONOSUPPORT; } else if (stat == RPC_INTR) { error = EINTR; } else { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EACCES; } if (error) { m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) - FREE((caddr_t)rep, M_NFSDREQ); + free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ newnfs_realign(&nd->nd_mrep, M_WAITOK); nd->nd_md = nd->nd_mrep; nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); nd->nd_repstat = 0; if (nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBNULL) { /* If sep == NULL, set it to the default in nmp. */ if (sep == NULL && nmp != NULL) sep = nfsmnt_mdssession(nmp); /* * and now the actual NFS xdr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_repstat >= 10000) NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, (int)nd->nd_repstat); /* * Get rid of the tag, return count and SEQUENCE result for * NFSv4. */ if ((nd->nd_flag & ND_NFSV4) != 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); opcnt = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); /* * If the first op is Sequence, free up the slot. */ if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) NFSCL_DEBUG(1, "failed seq=%d\n", j); if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j == 0) ) { if (i == NFSV4OP_SEQUENCE) NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); else NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); mtx_lock(&sep->nfsess_mtx); if (bcmp(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID) == 0) { tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; retseq = fxdr_unsigned(uint32_t, *tl++); slot = fxdr_unsigned(int, *tl++); freeslot = slot; if (retseq != sep->nfsess_slotseq[slot]) printf("retseq diff 0x%x\n", retseq); retval = fxdr_unsigned(uint32_t, *++tl); if ((retval + 1) < sep->nfsess_foreslots ) sep->nfsess_foreslots = (retval + 1); else if ((retval + 1) > sep->nfsess_foreslots) sep->nfsess_foreslots = (retval < 64) ? (retval + 1) : 64; } mtx_unlock(&sep->nfsess_mtx); /* Grab the op and status for the next one. */ if (opcnt > 1) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); } } } if (nd->nd_repstat != 0) { if (nd->nd_repstat == NFSERR_BADSESSION && nmp != NULL && dssep == NULL) { /* * If this is a client side MDS RPC, mark * the MDS session defunct and initiate * recovery, as required. * The nfsess_defunct field is protected by * the NFSLOCKMNT()/nm_mtx lock and not the * nfsess_mtx lock to simplify its handling, * for the MDS session. This lock is also * sufficient for nfsess_sessionid, since it * never changes in the structure. */ NFSCL_DEBUG(1, "Got badsession\n"); NFSLOCKCLSTATE(); NFSLOCKMNT(nmp); sep = NFSMNT_MDSSESSION(nmp); if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, NFSX_V4SESSIONID) == 0) { /* Initiate recovery. */ sep->nfsess_defunct = 1; NFSCL_DEBUG(1, "Marked defunct\n"); if (nmp->nm_clp != NULL) { nmp->nm_clp->nfsc_flags |= NFSCLFLAGS_RECOVER; wakeup(nmp->nm_clp); } } NFSUNLOCKCLSTATE(); /* * Sleep for up to 1sec waiting for a new * session. */ mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO, "nfsbadsess", hz); /* * Get the session again, in case a new one * has been created during the sleep. */ sep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); if ((nd->nd_flag & ND_LOOPBADSESS) != 0) { reterr = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); if (reterr == 0) { /* Fill in new session info. */ NFSCL_DEBUG(1, "Filling in new sequence\n"); tl = nd->nd_sequence; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl = txdr_unsigned(maxslot); } if (reterr == NFSERR_BADSESSION || reterr == 0) { NFSCL_DEBUG(1, "Badsession looping\n"); m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } nd->nd_repstat = reterr; NFSCL_DEBUG(1, "Got err=%d\n", reterr); } } if (((nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_GRACE) && (nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_DELEGRETURN && nd->nd_procnum != NFSPROC_SETATTR && nd->nd_procnum != NFSPROC_READ && nd->nd_procnum != NFSPROC_READDS && nd->nd_procnum != NFSPROC_WRITE && nd->nd_procnum != NFSPROC_WRITEDS && nd->nd_procnum != NFSPROC_OPEN && nd->nd_procnum != NFSPROC_CREATE && nd->nd_procnum != NFSPROC_OPENCONFIRM && nd->nd_procnum != NFSPROC_OPENDOWNGRADE && nd->nd_procnum != NFSPROC_CLOSE && nd->nd_procnum != NFSPROC_LOCK && nd->nd_procnum != NFSPROC_LOCKU) || (nd->nd_repstat == NFSERR_DELAY && (nd->nd_flag & ND_NFSV4) == 0) || nd->nd_repstat == NFSERR_RESOURCE) { if (trylater_delay > NFS_TRYLATERDEL) trylater_delay = NFS_TRYLATERDEL; waituntil = NFSD_MONOSEC + trylater_delay; while (NFSD_MONOSEC < waituntil) (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; if (slot != -1) { mtx_lock(&sep->nfsess_mtx); sep->nfsess_slotseq[slot]++; *nd->nd_slotseq = txdr_unsigned( sep->nfsess_slotseq[slot]); mtx_unlock(&sep->nfsess_mtx); } m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. * (vp != NULL implies a client side call) */ if (nd->nd_repstat == ESTALE && vp != NULL) { cache_purge(vp); if (ncl_call_invalcaches != NULL) (*ncl_call_invalcaches)(vp); } } if ((nd->nd_flag & ND_NFSV4) != 0) { /* Free the slot, as required. */ if (freeslot != -1) nfsv4_freeslot(sep, freeslot); /* * If this op is Putfh, throw its results away. */ if (j >= 10000) NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, j); /* * All Compounds that do an Op that must * be in sequence consist of NFSV4OP_PUTFH * followed by one of these. As such, we * can determine if the seqid# should be * incremented, here. */ if ((i == NFSV4OP_OPEN || i == NFSV4OP_OPENCONFIRM || i == NFSV4OP_OPENDOWNGRADE || i == NFSV4OP_CLOSE || i == NFSV4OP_LOCK || i == NFSV4OP_LOCKU) && (j == 0 || (j != NFSERR_STALECLIENTID && j != NFSERR_STALESTATEID && j != NFSERR_BADSTATEID && j != NFSERR_BADSEQID && j != NFSERR_BADXDR && j != NFSERR_RESOURCE && j != NFSERR_NOFILEHANDLE))) nd->nd_flag |= ND_INCRSEQID; } /* * If this op's status is non-zero, mark * that there is no more data to process. * The exception is Setattr, which always has xdr * when it has failed. */ if (j != 0 && i != NFSV4OP_SETATTR) nd->nd_flag |= ND_NOMOREDATA; /* * If R_DONTRECOVER is set, replace the stale error * reply, so that recovery isn't initiated. */ if ((nd->nd_repstat == NFSERR_STALECLIENTID || nd->nd_repstat == NFSERR_BADSESSION || nd->nd_repstat == NFSERR_STALESTATEID) && rep != NULL && (rep->r_flags & R_DONTRECOVER)) nd->nd_repstat = NFSERR_STALEDONTRECOVER; } } #ifdef KDTRACE_HOOKS if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_done_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, nd->nd_mreq, cred, probe_procnum, 0); } #endif m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) - FREE((caddr_t)rep, M_NFSDREQ); + free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (0); nfsmout: mbuf_freem(nd->nd_mrep); mbuf_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) - FREE((caddr_t)rep, M_NFSDREQ); + free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int newnfs_nmcancelreqs(struct nfsmount *nmp) { struct nfsclds *dsp; struct __rpc_client *cl; if (nmp->nm_sockreq.nr_client != NULL) CLNT_CLOSE(nmp->nm_sockreq.nr_client); lookformore: NFSLOCKMNT(nmp); TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { NFSLOCKDS(dsp); if (dsp != TAILQ_FIRST(&nmp->nm_sess) && (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 && dsp->nfsclds_sockp != NULL && dsp->nfsclds_sockp->nr_client != NULL) { dsp->nfsclds_flags |= NFSCLDS_CLOSED; cl = dsp->nfsclds_sockp->nr_client; NFSUNLOCKDS(dsp); NFSUNLOCKMNT(nmp); CLNT_CLOSE(cl); goto lookformore; } NFSUNLOCKDS(dsp); } NFSUNLOCKMNT(nmp); return (0); } /* * Any signal that can interrupt an NFS operation in an intr mount * should be added to this set. SIGSTOP and SIGKILL cannot be masked. */ int newnfs_sig_set[] = { SIGINT, SIGTERM, SIGHUP, SIGKILL, SIGQUIT }; /* * Check to see if one of the signals in our subset is pending on * the process (in an intr mount). */ static int nfs_sig_pending(sigset_t set) { int i; for (i = 0 ; i < nitems(newnfs_sig_set); i++) if (SIGISMEMBER(set, newnfs_sig_set[i])) return (1); return (0); } /* * The set/restore sigmask functions are used to (temporarily) overwrite * the thread td_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void newnfs_set_sigmask(struct thread *td, sigset_t *oldset) { sigset_t newset; int i; struct proc *p; SIGFILLSET(newset); if (td == NULL) td = curthread; /* XXX */ p = td->td_proc; /* Remove the NFS set of signals from newset */ PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); for (i = 0 ; i < nitems(newnfs_sig_set); i++) { /* * But make sure we leave the ones already masked * by the process, ie. remove the signal from the * temporary signalmask only if it wasn't already * in p_sigmask. */ if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) SIGDELSET(newset, newnfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, SIGPROCMASK_PROC_LOCKED); PROC_UNLOCK(p); } void newnfs_restore_sigmask(struct thread *td, sigset_t *set) { if (td == NULL) td = curthread; /* XXX */ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); } /* * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the * old one after msleep() returns. */ int newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) { sigset_t oldset; int error; if ((priority & PCATCH) == 0) return msleep(ident, mtx, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ newnfs_set_sigmask(td, &oldset); error = msleep(ident, mtx, priority, wmesg, timo); newnfs_restore_sigmask(td, &oldset); return (error); } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int newnfs_sigintr(struct nfsmount *nmp, struct thread *td) { struct proc *p; sigset_t tmpset; /* Terminate all requests while attempting a forced unmount. */ if (NFSCL_FORCEDISM(nmp->nm_mountp)) return (EIO); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETOR(tmpset, td->td_siglist); SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) && nfs_sig_pending(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } static int nfs_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } static void nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, int error, int flags) { if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state |= NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 0); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else mtx_unlock(&nmp->nm_mtx); nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, int flags, int tprintfmsg) { if (nmp == NULL) return; if (tprintfmsg) { nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); } mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 1); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else mtx_unlock(&nmp->nm_mtx); } Index: head/sys/fs/nfs/nfs_commonsubs.c =================================================================== --- head/sys/fs/nfs/nfs_commonsubs.c (revision 328416) +++ head/sys/fs/nfs/nfs_commonsubs.c (revision 328417) @@ -1,4241 +1,4241 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; /* And other global data */ nfstype nfsv34_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; enum vtype newnv2tov_type[8] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv34tov_type[8]={ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; struct timeval nfsboottime; /* Copy boottime once, so it never changes */ int nfscl_ticks; int nfsrv_useacl = 1; struct nfssockreq nfsrv_nfsuserdsock; int nfsrv_nfsuserd = 0; struct nfsreqhead nfsd_reqq; uid_t nfsrv_defaultuid = UID_NOBODY; gid_t nfsrv_defaultgid = GID_NOGROUP; int nfsrv_lease = NFSRV_LEASE; int ncl_mbuf_mlen = MLEN; int nfsd_enable_stringtouid = 0; static int nfs_enable_uidtostring = 0; NFSNAMEIDMUTEX; NFSSOCKMUTEX; extern int nfsrv_lughashsize; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW, &nfs_enable_uidtostring, 0, "Make nfs always send numeric owner_names"); /* * This array of structures indicates, for V4: * retfh - which of 3 types of calling args are used * 0 - doesn't change cfh or use a sfh * 1 - replaces cfh with a new one (unless it returns an error status) * 2 - uses cfh and sfh * needscfh - if the op wants a cfh and premtime * 0 - doesn't use a cfh * 1 - uses a cfh, but doesn't want pre-op attributes * 2 - uses a cfh and wants pre-op attributes * savereply - indicates a non-idempotent Op * 0 - not non-idempotent * 1 - non-idempotent * Ops that are ordered via seqid# are handled separately from these * non-idempotent Ops. * Define it here, since it is used by both the client and server. */ struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = { { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* undef */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Access */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Close */ { 0, 2, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Commit */ { 1, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Create */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegpurge */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Delegreturn */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Getattr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* GetFH */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Link */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Lock */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockT */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* LockU */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookup */ { 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Lookupp */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* NVerify */ { 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Open */ { 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenAttr */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* OpenDowngrade */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutPubFH */ { 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* PutRootFH */ { 0, 1, 0, 0, LK_SHARED, 1, 0 }, /* Read */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* Readdir */ { 0, 1, 0, 0, LK_SHARED, 1, 1 }, /* ReadLink */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Remove */ { 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 }, /* Rename */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Renew */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* RestoreFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SaveFH */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SecInfo */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Setattr */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* SetClientIDConfirm */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Verify */ { 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 }, /* Write */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* ReleaseLockOwner */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Backchannel Ctrl */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Bind Conn to Sess */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Exchange ID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Create Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy Session */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Free StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Dir Deleg */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device Info */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device List */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Get */ { 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Secinfo No name */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Sequence */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Set SSV */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Test StateID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Want Delegation */ { 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 }, /* Destroy ClientID */ { 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Reclaim Complete */ }; #endif /* !APPLEKEXT */ static int ncl_mbuf_mhlen = MHLEN; static int nfsrv_usercnt = 0; static int nfsrv_dnsnamelen; static u_char *nfsrv_dnsname = NULL; static int nfsrv_usermax = 999999999; struct nfsrv_lughash { struct mtx mtx; struct nfsuserhashhead lughead; }; static struct nfsrv_lughash *nfsuserhash; static struct nfsrv_lughash *nfsusernamehash; static struct nfsrv_lughash *nfsgrouphash; static struct nfsrv_lughash *nfsgroupnamehash; /* * This static array indicates whether or not the RPC generates a large * reply. This is used by nfs_reply() to decide whether or not an mbuf * cluster should be allocated. (If a cluster is required by an RPC * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); static void nfsv4_wanted(struct nfsv4lock *lp); static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len); static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p); static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser); static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, int *, int *); static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); #ifndef APPLE /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbufuio(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *mbufcp, *uiocp; int xfer, left, len; mbuf_t mp; long uiosiz, rem; int error = 0; mp = nd->nd_md; mbufcp = nd->nd_dpos; len = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - mbufcp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) { error = EBADRPC; goto out; } left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mbuf_next(mp); if (mp == NULL) { error = EBADRPC; goto out; } mbufcp = NFSMTOD(mp, caddr_t); len = mbuf_len(mp); KASSERT(len >= 0, ("len %d, corrupted mbuf?", len)); } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(mbufcp, uiocp, xfer); else copyout(mbufcp, CAST_USER_ADDR_T(uiocp), xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (void *) ((char *)uiop->uio_iov->iov_base + uiosiz); uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } nd->nd_dpos = mbufcp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } #endif /* !APPLE */ /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macro NFSM_DISSECT for tough * cases. */ APPLESTATIC void * nfsm_dissct(struct nfsrv_descript *nd, int siz, int how) { mbuf_t mp2; int siz2, xfer; caddr_t p; int left; caddr_t retp; retp = NULL; left = NFSMTOD(nd->nd_md, caddr_t) + mbuf_len(nd->nd_md) - nd->nd_dpos; while (left == 0) { nd->nd_md = mbuf_next(nd->nd_md); if (nd->nd_md == NULL) return (retp); left = mbuf_len(nd->nd_md); nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); } if (left >= siz) { retp = nd->nd_dpos; nd->nd_dpos += siz; } else if (mbuf_next(nd->nd_md) == NULL) { return (retp); } else if (siz > ncl_mbuf_mhlen) { panic("nfs S too big"); } else { MGET(mp2, MT_DATA, how); if (mp2 == NULL) return (NULL); mbuf_setnext(mp2, mbuf_next(nd->nd_md)); mbuf_setnext(nd->nd_md, mp2); mbuf_setlen(nd->nd_md, mbuf_len(nd->nd_md) - left); nd->nd_md = mp2; retp = p = NFSMTOD(mp2, caddr_t); NFSBCOPY(nd->nd_dpos, p, left); /* Copy what was left */ siz2 = siz - left; p += left; mp2 = mbuf_next(mp2); /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (NULL); xfer = (siz2 > mbuf_len(mp2)) ? mbuf_len(mp2) : siz2; if (xfer > 0) { NFSBCOPY(NFSMTOD(mp2, caddr_t), p, xfer); NFSM_DATAP(mp2, xfer); mbuf_setlen(mp2, mbuf_len(mp2) - xfer); p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mbuf_next(mp2); } mbuf_setlen(nd->nd_md, siz); nd->nd_md = mp2; nd->nd_dpos = NFSMTOD(mp2, caddr_t); } return (retp); } /* * Advance the position in the mbuf chain. * If offs == 0, this is a no-op, but it is simpler to just return from * here than check for offs > 0 for all calls to nfsm_advance. * If left == -1, it should be calculated here. */ APPLESTATIC int nfsm_advance(struct nfsrv_descript *nd, int offs, int left) { int error = 0; if (offs == 0) goto out; /* * A negative offs should be considered a serious problem. */ if (offs < 0) panic("nfsrv_advance"); /* * If left == -1, calculate it here. */ if (left == -1) left = NFSMTOD(nd->nd_md, caddr_t) + mbuf_len(nd->nd_md) - nd->nd_dpos; /* * Loop around, advancing over the mbuf data. */ while (offs > left) { offs -= left; nd->nd_md = mbuf_next(nd->nd_md); if (nd->nd_md == NULL) { error = EBADRPC; goto out; } left = mbuf_len(nd->nd_md); nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); } nd->nd_dpos += offs; out: NFSEXITCODE(error); return (error); } /* * Copy a string into mbuf(s). * Return the number of bytes output, including XDR overheads. */ APPLESTATIC int nfsm_strtom(struct nfsrv_descript *nd, const char *cp, int siz) { mbuf_t m2; int xfer, left; mbuf_t m1; int rem, bytesize; u_int32_t *tl; char *cp2; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(siz); rem = NFSM_RNDUP(siz) - siz; bytesize = NFSX_UNSIGNED + siz + rem; m2 = nd->nd_mb; cp2 = nd->nd_bpos; left = M_TRAILINGSPACE(m2); /* * Loop around copying the string to mbuf(s). */ while (siz > 0) { if (left == 0) { if (siz > ncl_mbuf_mlen) NFSMCLGET(m1, M_WAITOK); else NFSMGET(m1); mbuf_setlen(m1, 0); mbuf_setnext(m2, m1); m2 = m1; cp2 = NFSMTOD(m2, caddr_t); left = M_TRAILINGSPACE(m2); } if (left >= siz) xfer = siz; else xfer = left; NFSBCOPY(cp, cp2, xfer); cp += xfer; mbuf_setlen(m2, mbuf_len(m2) + xfer); siz -= xfer; left -= xfer; if (siz == 0 && rem) { if (left < rem) panic("nfsm_strtom"); NFSBZERO(cp2 + xfer, rem); mbuf_setlen(m2, mbuf_len(m2) + rem); } } nd->nd_mb = m2; nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2); return (bytesize); } /* * Called once to initialize data structures... */ APPLESTATIC void newnfs_init(void) { static int nfs_inited = 0; if (nfs_inited) return; nfs_inited = 1; newnfs_true = txdr_unsigned(TRUE); newnfs_false = txdr_unsigned(FALSE); newnfs_xdrneg1 = txdr_unsigned(-1); nfscl_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfscl_ticks < 1) nfscl_ticks = 1; NFSSETBOOTTIME(nfsboottime); /* * Initialize reply list and start timer */ TAILQ_INIT(&nfsd_reqq); NFS_TIMERINIT; } /* * Put a file handle in an mbuf list. * If the size argument == 0, just use the default size. * set_true == 1 if there should be an newnfs_true prepended on the file handle. * Return the number of bytes output, including XDR overhead. */ APPLESTATIC int nfsm_fhtom(struct nfsrv_descript *nd, u_int8_t *fhp, int size, int set_true) { u_int32_t *tl; u_int8_t *cp; int fullsiz, bytesize = 0; if (size == 0) size = NFSX_MYFH; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: if (size > NFSX_V2FH) panic("fh size > NFSX_V2FH for NFSv2"); NFSM_BUILD(cp, u_int8_t *, NFSX_V2FH); NFSBCOPY(fhp, cp, size); if (size < NFSX_V2FH) NFSBZERO(cp + size, NFSX_V2FH - size); bytesize = NFSX_V2FH; break; case ND_NFSV3: case ND_NFSV4: fullsiz = NFSM_RNDUP(size); if (set_true) { bytesize = 2 * NFSX_UNSIGNED + fullsiz; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; } else { bytesize = NFSX_UNSIGNED + fullsiz; } (void) nfsm_strtom(nd, fhp, size); break; } return (bytesize); } /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ APPLESTATIC int nfsaddr_match(int family, union nethostaddr *haddr, NFSSOCKADDR_T nam) { struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = NFSSOCKADDR(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inet.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inetaddr6; inetaddr6 = NFSSOCKADDR(nam, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inetaddr6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inetaddr6->sin6_addr, &haddr->had_inet6)) return (1); } break; #endif } return (0); } /* * Similar to the above, but takes to NFSSOCKADDR_T args. */ APPLESTATIC int nfsaddr2_match(NFSSOCKADDR_T nam1, NFSSOCKADDR_T nam2) { struct sockaddr_in *addr1, *addr2; struct sockaddr *inaddr; inaddr = NFSSOCKADDR(nam1, struct sockaddr *); switch (inaddr->sa_family) { case AF_INET: addr1 = NFSSOCKADDR(nam1, struct sockaddr_in *); addr2 = NFSSOCKADDR(nam2, struct sockaddr_in *); if (addr2->sin_family == AF_INET && addr1->sin_addr.s_addr == addr2->sin_addr.s_addr) return (1); break; #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *inet6addr1, *inet6addr2; inet6addr1 = NFSSOCKADDR(nam1, struct sockaddr_in6 *); inet6addr2 = NFSSOCKADDR(nam2, struct sockaddr_in6 *); /* XXX - should test sin6_scope_id ? */ if (inet6addr2->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inet6addr1->sin6_addr, &inet6addr2->sin6_addr)) return (1); } break; #endif } return (0); } /* * Trim the stuff already dissected off the mbuf list. */ APPLESTATIC void newnfs_trimleading(nd) struct nfsrv_descript *nd; { mbuf_t m, n; int offs; /* * First, free up leading mbufs. */ if (nd->nd_mrep != nd->nd_md) { m = nd->nd_mrep; while (mbuf_next(m) != nd->nd_md) { if (mbuf_next(m) == NULL) panic("nfsm trim leading"); m = mbuf_next(m); } mbuf_setnext(m, NULL); mbuf_freem(nd->nd_mrep); } m = nd->nd_md; /* * Now, adjust this mbuf, based on nd_dpos. */ offs = nd->nd_dpos - NFSMTOD(m, caddr_t); if (offs == mbuf_len(m)) { n = m; m = mbuf_next(m); if (m == NULL) panic("nfsm trim leading2"); mbuf_setnext(n, NULL); mbuf_freem(n); } else if (offs > 0) { mbuf_setlen(m, mbuf_len(m) - offs); NFSM_DATAP(m, offs); } else if (offs < 0) panic("nfsm trimleading offs"); nd->nd_mrep = m; nd->nd_md = m; nd->nd_dpos = NFSMTOD(m, caddr_t); } /* * Trim trailing data off the mbuf list being built. */ APPLESTATIC void newnfs_trimtrailing(nd, mb, bpos) struct nfsrv_descript *nd; mbuf_t mb; caddr_t bpos; { if (mbuf_next(mb)) { mbuf_freem(mbuf_next(mb)); mbuf_setnext(mb, NULL); } mbuf_setlen(mb, bpos - NFSMTOD(mb, caddr_t)); nd->nd_mb = mb; nd->nd_bpos = bpos; } /* * Dissect a file handle on the client. */ APPLESTATIC int nfsm_getfh(struct nfsrv_descript *nd, struct nfsfh **nfhpp) { u_int32_t *tl; struct nfsfh *nfhp; int error, len; *nfhpp = NULL; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { error = EBADRPC; goto nfsmout; } } else len = NFSX_V2FH; - MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + len, + nfhp = malloc(sizeof (struct nfsfh) + len, M_NFSFH, M_WAITOK); error = nfsrv_mtostr(nd, nfhp->nfh_fh, len); if (error) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); goto nfsmout; } nfhp->nfh_len = len; *nfhpp = nfhp; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Break down the nfsv4 acl. * If the aclp == NULL or won't fit in an acl, just discard the acl info. */ APPLESTATIC int nfsrv_dissectacl(struct nfsrv_descript *nd, NFSACL_T *aclp, int *aclerrp, int *aclsizep, __unused NFSPROC_T *p) { u_int32_t *tl; int i, aclsize; int acecnt, error = 0, aceerr = 0, acesize; *aclerrp = 0; if (aclp) aclp->acl_cnt = 0; /* * Parse out the ace entries and expect them to conform to * what can be supported by R/W/X bits. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); aclsize = NFSX_UNSIGNED; acecnt = fxdr_unsigned(int, *tl); if (acecnt > ACL_MAX_ENTRIES) aceerr = NFSERR_ATTRNOTSUPP; if (nfsrv_useacl == 0) aceerr = NFSERR_ATTRNOTSUPP; for (i = 0; i < acecnt; i++) { if (aclp && !aceerr) error = nfsrv_dissectace(nd, &aclp->acl_entry[i], &aceerr, &acesize, p); else error = nfsrv_skipace(nd, &acesize); if (error) goto nfsmout; aclsize += acesize; } if (aclp && !aceerr) aclp->acl_cnt = acecnt; if (aceerr) *aclerrp = aceerr; if (aclsizep) *aclsizep = aclsize; nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Skip over an NFSv4 ace entry. Just dissect the xdr and discard it. */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep) { u_int32_t *tl; int error, len = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 3)); error = nfsm_advance(nd, NFSM_RNDUP(len), -1); nfsmout: *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); NFSEXITCODE2(error, nd); return (error); } /* * Get attribute bits from an mbuf list. * Returns EBADRPC for a parsing error, 0 otherwise. * If the clearinvalid flag is set, clear the bits not supported. */ APPLESTATIC int nfsrv_getattrbits(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp, int *cntp, int *retnotsupp) { u_int32_t *tl; int cnt, i, outcnt; int error = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (cnt > NFSATTRBIT_MAXWORDS) outcnt = NFSATTRBIT_MAXWORDS; else outcnt = cnt; NFSZERO_ATTRBIT(attrbitp); if (outcnt > 0) { NFSM_DISSECT(tl, u_int32_t *, outcnt * NFSX_UNSIGNED); for (i = 0; i < outcnt; i++) attrbitp->bits[i] = fxdr_unsigned(u_int32_t, *tl++); } for (i = 0; i < (cnt - outcnt); i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (retnotsupp != NULL && *tl != 0) *retnotsupp = NFSERR_ATTRNOTSUPP; } if (cntp) *cntp = NFSX_UNSIGNED + (cnt * NFSX_UNSIGNED); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Get the attributes for V4. * If the compare flag is true, test for any attribute changes, * otherwise return the attribute values. * These attributes cover fields in "struct vattr", "struct statfs", * "struct nfsfsinfo", the file handle and the lease duration. * The value of retcmpp is set to 1 if all attributes are the same, * and 0 otherwise. * Returns EBADRPC if it can't be parsed, 0 otherwise. */ APPLESTATIC int nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nap, struct nfsfh **nfhpp, fhandle_t *fhp, int fhsize, struct nfsv3_pathconf *pc, struct statfs *sbp, struct nfsstatfs *sfp, struct nfsfsinfo *fsp, NFSACL_T *aclp, int compare, int *retcmpp, u_int32_t *leasep, u_int32_t *rderrp, NFSPROC_T *p, struct ucred *cred) { u_int32_t *tl; int i = 0, j, k, l = 0, m, bitpos, attrsum = 0; int error, tfhsize, aceerr, attrsize, cnt, retnotsup; u_char *cp, *cp2, namestr[NFSV4_SMALLSTR + 1]; nfsattrbit_t attrbits, retattrbits, checkattrbits; struct nfsfh *tnfhp; struct nfsreferral *refp; u_quad_t tquad; nfsquad_t tnfsquad; struct timespec temptime; uid_t uid; gid_t gid; u_int32_t freenum = 0, tuint; u_int64_t uquad = 0, thyp, thyp2; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif CTASSERT(sizeof(ino_t) == sizeof(uint64_t)); if (compare) { retnotsup = 0; error = nfsrv_getattrbits(nd, &attrbits, NULL, &retnotsup); } else { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (error) goto nfsmout; if (compare) { *retcmpp = retnotsup; } else { /* * Just set default values to some of the important ones. */ if (nap != NULL) { nap->na_type = VREG; nap->na_mode = 0; nap->na_rdev = (NFSDEV_T)0; nap->na_mtime.tv_sec = 0; nap->na_mtime.tv_nsec = 0; nap->na_gen = 0; nap->na_flags = 0; nap->na_blocksize = NFS_FABLKSIZE; } if (sbp != NULL) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; } if (fsp != NULL) { fsp->fs_rtmax = 8192; fsp->fs_rtpref = 8192; fsp->fs_maxname = NFS_MAXNAMLEN; fsp->fs_wtmax = 8192; fsp->fs_wtpref = 8192; fsp->fs_wtmult = NFS_FABLKSIZE; fsp->fs_dtpref = 8192; fsp->fs_maxfilesize = 0xffffffffffffffffull; fsp->fs_timedelta.tv_sec = 0; fsp->fs_timedelta.tv_nsec = 1; fsp->fs_properties = (NFSV3_FSFLINK | NFSV3_FSFSYMLINK | NFSV3_FSFHOMOGENEOUS | NFSV3_FSFCANSETTIME); } if (pc != NULL) { pc->pc_linkmax = NFS_LINK_MAX; pc->pc_namemax = NAME_MAX; pc->pc_notrunc = 0; pc->pc_chownrestricted = 0; pc->pc_caseinsensitive = 0; pc->pc_casepreserving = 1; } if (sfp != NULL) { sfp->sf_ffiles = UINT64_MAX; sfp->sf_tfiles = UINT64_MAX; sfp->sf_afiles = UINT64_MAX; sfp->sf_fbytes = UINT64_MAX; sfp->sf_tbytes = UINT64_MAX; sfp->sf_abytes = UINT64_MAX; } } /* * Loop around getting the attributes. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(&attrbits, bitpos)) switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: retnotsup = 0; if (compare || nap == NULL) error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); else error = nfsrv_getattrbits(nd, &nap->na_suppattr, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits); /* Some filesystem do not support NFSv4ACL */ if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT); } if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; case NFSATTRBIT_TYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_type != nfsv34tov_type(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_type = nfsv34tov_type(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFSV4FHTYPE_PERSISTENT) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_filerev != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filerev = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (nap->na_size != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_size = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFLINK; else fsp->fs_properties &= ~NFSV3_FSFLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFSYMLINK) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFSYMLINK; else fsp->fs_properties &= ~NFSV3_FSFSYMLINK; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); thyp = fxdr_hyper(tl); tl += 2; thyp2 = fxdr_hyper(tl); if (compare) { if (*retcmpp == 0) { if (thyp != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[0] || thyp2 != (u_int64_t) vfs_statfs(vnode_mount(vp))->f_fsid.val[1]) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_filesid[0] = thyp; nap->na_filesid[1] = thyp2; } attrsum += (4 * NFSX_UNSIGNED); break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (fxdr_unsigned(int, *tl) != nfsrv_lease && !(*retcmpp)) *retcmpp = NFSERR_NOTSAME; } else if (leasep != NULL) { *leasep = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) *retcmpp = NFSERR_INVAL; } else if (rderrp != NULL) { *rderrp = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ACL: if (compare) { if (!(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { NFSACL_T *naclp; naclp = acl_alloc(M_WAITOK); error = nfsrv_dissectacl(nd, naclp, &aceerr, &cnt, p); if (error) { acl_free(naclp); goto nfsmout; } if (aceerr || aclp == NULL || nfsrv_compareacl(aclp, naclp)) *retcmpp = NFSERR_NOTSAME; acl_free(naclp); } else { error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); *retcmpp = NFSERR_ATTRNOTSUPP; } } } else { if (vp != NULL && aclp != NULL) error = nfsrv_dissectacl(nd, aclp, &aceerr, &cnt, p); else error = nfsrv_dissectacl(nd, NULL, &aceerr, &cnt, p); if (error) goto nfsmout; } attrsum += cnt; break; case NFSATTRBIT_ACLSUPPORT: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) { if (nfsrv_useacl && nfs_supportsnfsv4acls(vp)) { if (fxdr_unsigned(u_int32_t, *tl) != NFSV4ACE_SUPTYPES) *retcmpp = NFSERR_NOTSAME; } else { *retcmpp = NFSERR_ATTRNOTSUPP; } } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFCANSETTIME) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFCANSETTIME; else fsp->fs_properties &= ~NFSV3_FSFCANSETTIME; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_false) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: error = nfsm_getfh(nd, &tnfhp); if (error) goto nfsmout; tfhsize = tnfhp->nfh_len; if (compare) { if (!(*retcmpp) && !NFSRV_CMPFH(tnfhp->nfh_fh, tfhsize, fhp, fhsize)) *retcmpp = NFSERR_NOTSAME; - FREE((caddr_t)tnfhp, M_NFSFH); + free(tnfhp, M_NFSFH); } else if (nfhpp != NULL) { *nfhpp = tnfhp; } else { - FREE((caddr_t)tnfhp, M_NFSFH); + free(tnfhp, M_NFSFH); } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(tfhsize)); break; case NFSATTRBIT_FILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (nap->na_fileid != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_fileid = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_afiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_afiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_ffiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_ffiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tfiles != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tfiles = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: error = nfsrv_getrefstr(nd, &cp, &cp2, &l, &m); if (error) goto nfsmout; attrsum += l; if (compare && !(*retcmpp)) { refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { if (cp == NULL || cp2 == NULL || strcmp(cp, "/") || strcmp(cp2, refp->nfr_srvlist)) *retcmpp = NFSERR_NOTSAME; } else if (m == 0) { *retcmpp = NFSERR_NOTSAME; } } if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_properties & NFSV3_FSFHOMOGENEOUS) { if (*tl == newnfs_false) *retcmpp = NFSERR_NOTSAME; } else { if (*tl == newnfs_true) *retcmpp = NFSERR_NOTSAME; } } } else if (fsp != NULL) { if (*tl == newnfs_true) fsp->fs_properties |= NFSV3_FSFHOMOGENEOUS; else fsp->fs_properties &= ~NFSV3_FSFHOMOGENEOUS; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); tnfsquad.qval = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { tquad = NFSRV_MAXFILESIZE; if (tquad != tnfsquad.qval) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_maxfilesize = tnfsquad.qval; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fxdr_unsigned(int, *tl) != NFS_LINK_MAX) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (fsp->fs_maxname != fxdr_unsigned(u_int32_t, *tl)) *retcmpp = NFSERR_NOTSAME; } } else { tuint = fxdr_unsigned(u_int32_t, *tl); /* * Some Linux NFSv4 servers report this * as 0 or 4billion, so I'll set it to * NFS_MAXNAMLEN. If a server actually creates * a name longer than NFS_MAXNAMLEN, it will * get an error back. */ if (tuint == 0 || tuint > NFS_MAXNAMLEN) tuint = NFS_MAXNAMLEN; if (fsp != NULL) fsp->fs_maxname = tuint; if (pc != NULL) pc->pc_namemax = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_rtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *++tl); fsp->fs_rtpref = fsp->fs_rtmax; fsp->fs_dtpref = fsp->fs_rtpref; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp)) { if (fsp->fs_wtmax != fxdr_unsigned(u_int32_t, *(tl + 1)) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else if (fsp != NULL) { fsp->fs_wtmax = fxdr_unsigned(int, *++tl); fsp->fs_wtpref = fsp->fs_wtmax; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (nap->na_mode != nfstov_mode(*tl)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mode = nfstov_mode(*tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { if (*tl != newnfs_true) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl); } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); tuint = fxdr_unsigned(u_int32_t, *tl); if (compare) { if (!(*retcmpp)) { if ((u_int32_t)nap->na_nlink != tuint) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_nlink = tuint; } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtouid(nd, cp, j, &uid, p) || nap->na_uid != uid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtouid(nd, cp, j, &uid, p)) nap->na_uid = nfsrv_defaultuid; else nap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (compare) { if (!(*retcmpp)) { if (nfsv4_strtogid(nd, cp, j, &gid, p) || nap->na_gid != gid) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { if (nfsv4_strtogid(nd, cp, j, &gid, p)) nap->na_gid = nfsrv_defaultgid; else nap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = sbp->f_bfree; else freenum = sbp->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (sbp != NULL) { freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(vnode_mount(vp),QCMD(Q_GETQUOTA, USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, sbp->f_bsize); } if (compare && !(*retcmpp)) { if (uquad != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4SPECDATA); j = fxdr_unsigned(int, *tl++); k = fxdr_unsigned(int, *tl); if (compare) { if (!(*retcmpp)) { if (nap->na_rdev != NFSMAKEDEV(j, k)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_rdev = NFSMAKEDEV(j, k); } attrsum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_abytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_abytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_fbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_fbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (compare) { if (!(*retcmpp) && sfp->sf_tbytes != fxdr_hyper(tl)) *retcmpp = NFSERR_NOTSAME; } else if (sfp != NULL) { sfp->sf_tbytes = fxdr_hyper(tl); } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if ((u_int64_t)nap->na_bytes != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_bytes = thyp; } attrsum += NFSX_HYPER; break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESS: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_atime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_atime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEDELTA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (fsp != NULL) { if (compare) { if (!(*retcmpp)) { if ((u_int32_t)fsp->fs_timedelta.tv_sec != fxdr_unsigned(u_int32_t, *(tl + 1)) || (u_int32_t)fsp->fs_timedelta.tv_nsec != (fxdr_unsigned(u_int32_t, *(tl + 2)) % 1000000000) || *tl != 0) *retcmpp = NFSERR_NOTSAME; } } else { fxdr_nfsv4time(tl, &fsp->fs_timedelta); } } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_ctime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_ctime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &temptime); if (compare) { if (!(*retcmpp)) { if (!NFS_CMPTIME(temptime, nap->na_mtime)) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) { nap->na_mtime = temptime; } attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; i = fxdr_unsigned(int, *tl); if (i == NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); attrsum += NFSX_V4TIME; } if (compare && !(*retcmpp)) *retcmpp = NFSERR_INVAL; break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); thyp = fxdr_hyper(tl); if (compare) { if (!(*retcmpp)) { if (!vp || !nfsrv_atroot(vp, &thyp2)) thyp2 = nap->na_fileid; if (thyp2 != thyp) *retcmpp = NFSERR_NOTSAME; } } else if (nap != NULL) nap->na_mntonfileno = thyp; attrsum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: retnotsup = 0; error = nfsrv_getattrbits(nd, &retattrbits, &cnt, &retnotsup); if (error) goto nfsmout; if (compare && !(*retcmpp)) { NFSSETSUPP_ATTRBIT(&checkattrbits); NFSCLRNOTSETABLE_ATTRBIT(&checkattrbits); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_TIMEACCESSSET); if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; } attrsum += cnt; break; default: printf("EEK! nfsv4_loadattr unknown attr=%d\n", bitpos); if (compare && !(*retcmpp)) *retcmpp = NFSERR_ATTRNOTSUPP; /* * and get out of the loop, since we can't parse * the unknown attrbute data. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Implement sleep locks for newnfs. The nfslock_usecnt allows for a * shared lock and the NFSXXX_LOCK flag permits an exclusive lock. * The first argument is a pointer to an nfsv4lock structure. * The second argument is 1 iff a blocking lock is wanted. * If this argument is 0, the call waits until no thread either wants nor * holds an exclusive lock. * It returns 1 if the lock was acquired, 0 otherwise. * If several processes call this function concurrently wanting the exclusive * lock, one will get the lock and the rest will return without getting the * lock. (If the caller must have the lock, it simply calls this function in a * loop until the function returns 1 to indicate the lock was acquired.) * Any usecnt must be decremented by calling nfsv4_relref() before * calling nfsv4_lock(). It was done this way, so nfsv4_lock() could * be called in a loop. * The isleptp argument is set to indicate if the call slept, iff not NULL * and the mp argument indicates to check for a forced dismount, iff not * NULL. */ APPLESTATIC int nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * If a lock is wanted, loop around until the lock is acquired by * someone and then released. If I want the lock, try to acquire it. * For a lock to be issued, no lock must be in force and the usecnt * must be zero. */ if (iwantlock) { if (!(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } lp->nfslock_lock |= NFSV4LOCK_LOCKWANTED; } while (lp->nfslock_lock & (NFSV4LOCK_LOCK | NFSV4LOCK_LOCKWANTED)) { if (mp != NULL && NFSCL_FORCEDISM(mp)) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; return (0); } lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4lck", NULL); if (iwantlock && !(lp->nfslock_lock & NFSV4LOCK_LOCK) && lp->nfslock_usecnt == 0) { lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; lp->nfslock_lock |= NFSV4LOCK_LOCK; return (1); } } return (0); } /* * Release the lock acquired by nfsv4_lock(). * The second argument is set to 1 to indicate the nfslock_usecnt should be * incremented, as well. */ APPLESTATIC void nfsv4_unlock(struct nfsv4lock *lp, int incref) { lp->nfslock_lock &= ~NFSV4LOCK_LOCK; if (incref) lp->nfslock_usecnt++; nfsv4_wanted(lp); } /* * Release a reference cnt. */ APPLESTATIC void nfsv4_relref(struct nfsv4lock *lp) { if (lp->nfslock_usecnt <= 0) panic("nfsv4root ref cnt"); lp->nfslock_usecnt--; if (lp->nfslock_usecnt == 0) nfsv4_wanted(lp); } /* * Get a reference cnt. * This function will wait for any exclusive lock to be released, but will * not wait for threads that want the exclusive lock. If priority needs * to be given to threads that need the exclusive lock, a call to nfsv4_lock() * with the 2nd argument == 0 should be done before calling nfsv4_getref(). * If the mp argument is not NULL, check for NFSCL_FORCEDISM() being set and * return without getting a refcnt for that case. */ APPLESTATIC void nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex, struct mount *mp) { if (isleptp) *isleptp = 0; /* * Wait for a lock held. */ while (lp->nfslock_lock & NFSV4LOCK_LOCK) { if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4gr", NULL); } if (mp != NULL && NFSCL_FORCEDISM(mp)) return; lp->nfslock_usecnt++; } /* * Get a reference as above, but return failure instead of sleeping if * an exclusive lock is held. */ APPLESTATIC int nfsv4_getref_nonblock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) != 0) return (0); lp->nfslock_usecnt++; return (1); } /* * Test for a lock. Return 1 if locked, 0 otherwise. */ APPLESTATIC int nfsv4_testlock(struct nfsv4lock *lp) { if ((lp->nfslock_lock & NFSV4LOCK_LOCK) == 0 && lp->nfslock_usecnt == 0) return (0); return (1); } /* * Wake up anyone sleeping, waiting for this lock. */ static void nfsv4_wanted(struct nfsv4lock *lp) { if (lp->nfslock_lock & NFSV4LOCK_WANTED) { lp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)&lp->nfslock_lock); } } /* * Copy a string from an mbuf list into a character array. * Return EBADRPC if there is an mbuf error, * 0 otherwise. */ APPLESTATIC int nfsrv_mtostr(struct nfsrv_descript *nd, char *str, int siz) { char *cp; int xfer, len; mbuf_t mp; int rem, error = 0; mp = nd->nd_md; cp = nd->nd_dpos; len = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - cp; rem = NFSM_RNDUP(siz) - siz; while (siz > 0) { if (len > siz) xfer = siz; else xfer = len; NFSBCOPY(cp, str, xfer); str += xfer; siz -= xfer; if (siz > 0) { mp = mbuf_next(mp); if (mp == NULL) { error = EBADRPC; goto out; } cp = NFSMTOD(mp, caddr_t); len = mbuf_len(mp); } else { cp += xfer; len -= xfer; } } *str = '\0'; nd->nd_dpos = cp; nd->nd_md = mp; if (rem > 0) { if (len < rem) error = nfsm_advance(nd, rem, len); else nd->nd_dpos += rem; } out: NFSEXITCODE2(error, nd); return (error); } /* * Fill in the attributes as marked by the bitmap (V4). */ APPLESTATIC int nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSACL_T *saclp, struct vattr *vap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { int bitpos, retnum = 0; u_int32_t *tl; int siz, prefixnum, error; u_char *cp, namestr[NFSV4_SMALLSTR]; nfsattrbit_t attrbits, retbits; nfsattrbit_t *retbitp = &retbits; u_int32_t freenum, *retnump; u_int64_t uquad; struct statfs *fs; struct nfsfsinfo fsinf; struct timespec temptime; NFSACL_T *aclp, *naclp = NULL; #ifdef QUOTA struct dqblk dqb; uid_t savuid; #endif /* * First, set the bits that can be filled and get fsinfo. */ NFSSET_ATTRBIT(retbitp, attrbitp); /* * If both p and cred are NULL, it is a client side setattr call. * If both p and cred are not NULL, it is a server side reply call. * If p is not NULL and cred is NULL, it is a client side callback * reply call. */ if (p == NULL && cred == NULL) { NFSCLRNOTSETABLE_ATTRBIT(retbitp); aclp = saclp; } else { NFSCLRNOTFILLABLE_ATTRBIT(retbitp); naclp = acl_alloc(M_WAITOK); aclp = naclp; } nfsvno_getfs(&fsinf, isdgram); #ifndef APPLE /* * Get the VFS_STATFS(), since some attributes need them. */ fs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); if (NFSISSETSTATFS_ATTRBIT(retbitp)) { error = VFS_STATFS(mp, fs); if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRSTATFS_ATTRBIT(retbitp); } } #endif /* * And the NFSv4 ACL... */ if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT) && (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0))) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACLSUPPORT); } if (NFSISSET_ATTRBIT(retbitp, NFSATTRBIT_ACL)) { if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } else if (naclp != NULL) { if (NFSVOPLOCK(vp, LK_SHARED) == 0) { error = VOP_ACCESSX(vp, VREAD_ACL, cred, p); if (error == 0) error = VOP_GETACL(vp, ACL_TYPE_NFS4, naclp, cred, p); NFSVOPUNLOCK(vp, 0); } else error = NFSERR_PERM; if (error != 0) { if (reterr) { nd->nd_repstat = NFSERR_ACCES; free(fs, M_STATFS); return (0); } NFSCLRBIT_ATTRBIT(retbitp, NFSATTRBIT_ACL); } } } /* * Put out the attribute bitmap for the ones being filled in * and get the field for the number of attributes returned. */ prefixnum = nfsrv_putattrbit(nd, retbitp); NFSM_BUILD(retnump, u_int32_t *, NFSX_UNSIGNED); prefixnum += NFSX_UNSIGNED; /* * Now, loop around filling in the attributes for each bit set. */ for (bitpos = 0; bitpos < NFSATTRBIT_MAX; bitpos++) { if (NFSISSET_ATTRBIT(retbitp, bitpos)) { switch (bitpos) { case NFSATTRBIT_SUPPORTEDATTRS: NFSSETSUPP_ATTRBIT(&attrbits); if (nfsrv_useacl == 0 || ((cred != NULL || p != NULL) && supports_nfsv4acls == 0)) { NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT); NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL); } retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vap->va_type); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FHEXPIRETYPE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4FHTYPE_PERSISTENT); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHANGE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_filerev, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_size, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_LINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_LINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_SYMLINKSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_SYMLINK) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NAMEDATTR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FSID: NFSM_BUILD(tl, u_int32_t *, NFSX_V4FSID); *tl++ = 0; *tl++ = txdr_unsigned(mp->mnt_stat.f_fsid.val[0]); *tl++ = 0; *tl = txdr_unsigned(mp->mnt_stat.f_fsid.val[1]); retnum += NFSX_V4FSID; break; case NFSATTRBIT_UNIQUEHANDLES: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_LEASETIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsrv_lease); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_RDATTRERROR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(rderror); retnum += NFSX_UNSIGNED; break; /* * Recommended Attributes. (Only the supported ones.) */ case NFSATTRBIT_ACL: retnum += nfsrv_buildacl(nd, aclp, vnode_vtype(vp), p); break; case NFSATTRBIT_ACLSUPPORT: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4ACE_SUPTYPES); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CANSETTIME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_CANSETTIME) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEINSENSITIVE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CASEPRESERVING: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_CHOWNRESTRICTED: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_FILEHANDLE: retnum += nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); break; case NFSATTRBIT_FILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESAVAIL: /* * Check quota and use min(quota, f_ffree). */ freenum = fs->f_ffree; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_isoftlimit-dqb.dqb_curinodes, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(freenum); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_ffree); retnum += NFSX_HYPER; break; case NFSATTRBIT_FILESTOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fs->f_files); retnum += NFSX_HYPER; break; case NFSATTRBIT_FSLOCATIONS: NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; retnum += 2 * NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS) *tl = newnfs_true; else *tl = newnfs_false; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXFILESIZE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = NFSRV_MAXFILESIZE; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXLINK: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_LINK_MAX); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_MAXNAMLEN); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXREAD: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_rtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MAXWRITE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); *tl++ = 0; *tl = txdr_unsigned(fsinf.fs_wtmax); retnum += NFSX_HYPER; break; case NFSATTRBIT_MODE: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_mode(vap->va_mode); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NOTRUNC: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_true; retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_NUMLINKS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(vap->va_nlink); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: cp = namestr; nfsv4_uidtostr(vap->va_uid, &cp, &siz, p); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_OWNERGROUP: cp = namestr; nfsv4_gidtostr(vap->va_gid, &cp, &siz, p); retnum += nfsm_strtom(nd, cp, siz); if (cp != namestr) free(cp, M_NFSSTRING); break; case NFSATTRBIT_QUOTAHARD: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bhardlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTASOFT: if (priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) freenum = fs->f_bfree; else freenum = fs->f_bavail; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = min(dqb.dqb_bsoftlimit, freenum); p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_QUOTAUSED: freenum = 0; #ifdef QUOTA /* * ufs_quotactl() insists that the uid argument * equal p_ruid for non-root quota access, so * we'll just make sure that's the case. */ savuid = p->p_cred->p_ruid; p->p_cred->p_ruid = cred->cr_uid; if (!VFS_QUOTACTL(mp, QCMD(Q_GETQUOTA,USRQUOTA), cred->cr_uid, (caddr_t)&dqb)) freenum = dqb.dqb_curblocks; p->p_cred->p_ruid = savuid; #endif /* QUOTA */ NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)freenum; NFSQUOTABLKTOBYTE(uquad, fs->f_bsize); txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_RAWDEV: NFSM_BUILD(tl, u_int32_t *, NFSX_V4SPECDATA); *tl++ = txdr_unsigned(NFSMAJOR(vap->va_rdev)); *tl = txdr_unsigned(NFSMINOR(vap->va_rdev)); retnum += NFSX_V4SPECDATA; break; case NFSATTRBIT_SPACEAVAIL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0)) uquad = (u_int64_t)fs->f_bfree; else uquad = (u_int64_t)fs->f_bavail; uquad *= fs->f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEFREE: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)fs->f_bfree; uquad *= fs->f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACETOTAL: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); uquad = (u_int64_t)fs->f_blocks; uquad *= fs->f_bsize; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SPACEUSED: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); txdr_hyper(vap->va_bytes, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_TIMEACCESS: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEACCESSSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_atime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_TIMEDELTA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); temptime.tv_sec = 0; temptime.tv_nsec = 1000000000 / hz; txdr_nfsv4time(&temptime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMETADATA: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_ctime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFY: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_V4SETTIME); *tl++ = txdr_unsigned(NFSV4SATTRTIME_TOCLIENT); txdr_nfsv4time(&vap->va_mtime, tl); retnum += NFSX_V4SETTIME; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4SATTRTIME_TOSERVER); retnum += NFSX_UNSIGNED; } break; case NFSATTRBIT_MOUNTEDONFILEID: NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER); if (at_root != 0) uquad = mounted_on_fileno; else uquad = vap->va_fileid; txdr_hyper(uquad, tl); retnum += NFSX_HYPER; break; case NFSATTRBIT_SUPPATTREXCLCREAT: NFSSETSUPP_ATTRBIT(&attrbits); NFSCLRNOTSETABLE_ATTRBIT(&attrbits); NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); retnum += nfsrv_putattrbit(nd, &attrbits); break; default: printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos); } } } if (naclp != NULL) acl_free(naclp); free(fs, M_STATFS); *retnump = txdr_unsigned(retnum); return (retnum + prefixnum); } /* * Put the attribute bits onto an mbuf list. * Return the number of bytes of output generated. */ APPLESTATIC int nfsrv_putattrbit(struct nfsrv_descript *nd, nfsattrbit_t *attrbitp) { u_int32_t *tl; int cnt, i, bytesize; for (cnt = NFSATTRBIT_MAXWORDS; cnt > 0; cnt--) if (attrbitp->bits[cnt - 1]) break; bytesize = (cnt + 1) * NFSX_UNSIGNED; NFSM_BUILD(tl, u_int32_t *, bytesize); *tl++ = txdr_unsigned(cnt); for (i = 0; i < cnt; i++) *tl++ = txdr_unsigned(attrbitp->bits[i]); return (bytesize); } /* * Convert a uid to a string. * If the lookup fails, just output the digits. * uid - the user id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_uidtostr(uid_t uid, u_char **cpp, int *retlenp, NFSPROC_T *p) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; uid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultuid to "nobody". */ if (uid == nfsrv_defaultuid) { i = nfsrv_dnsnamelen + 7; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nobody@", cp, 7); cp += 7; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL, p); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = uid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = uid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Get a credential for the uid with the server's group list. * If none is found, just return the credential passed in after * logging a warning message. */ struct ucred * nfsrv_getgrpscred(struct ucred *oldcred) { struct nfsusrgrp *usrp; struct ucred *newcred; int cnt, ret; uid_t uid; struct nfsrv_lughash *hp; cnt = 0; uid = oldcred->cr_uid; tryagain: if (nfsrv_dnsnamelen > 0) { hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_uid == uid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; if (usrp->lug_cred != NULL) { newcred = crhold(usrp->lug_cred); crfree(oldcred); } else newcred = oldcred; TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return (newcred); } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0, NULL, curthread); if (ret == 0 && cnt < 2) goto tryagain; } return (oldcred); } /* * Convert a string to a uid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp, NFSPROC_T *p) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; uid_t tuid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tuid = (uid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *uidp = tuid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the domain name matches, search for * the name with dns stripped off. * Mixed case alpahbetics will match for the domain name, but * all upper case will not. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nobody". */ if (len == 6 && !NFSBCMP(str, "nobody", 6)) { *uidp = nfsrv_defaultuid; error = 0; goto out; } hp = NFSUSERNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSUSERHASH(usrp->lug_uid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *uidp = usrp->lug_uid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0, str, p); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Convert a gid to a string. * gid - the group id * cpp - points to a buffer of size NFSV4_SMALLSTR * (malloc a larger one, as required) * retlenp - pointer to length to be returned */ APPLESTATIC void nfsv4_gidtostr(gid_t gid, u_char **cpp, int *retlenp, NFSPROC_T *p) { int i; struct nfsusrgrp *usrp; u_char *cp = *cpp; gid_t tmp; int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) { /* * Always map nfsrv_defaultgid to "nogroup". */ if (gid == nfsrv_defaultgid) { i = nfsrv_dnsnamelen + 8; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY("nogroup@", cp, 8); cp += 8; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); return; } hasampersand = 0; hp = NFSGROUPHASH(gid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { if (usrp->lug_gid == gid) { if (usrp->lug_expiry < NFSD_MONOSEC) break; /* * If the name doesn't already have an '@' * in it, append @domainname to it. */ for (i = 0; i < usrp->lug_namelen; i++) { if (usrp->lug_name[i] == '@') { hasampersand = 1; break; } } if (hasampersand) i = usrp->lug_namelen; else i = usrp->lug_namelen + nfsrv_dnsnamelen + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); cp = malloc(i, M_NFSSTRING, M_WAITOK); *cpp = cp; len = i; goto tryagain; } *retlenp = i; NFSBCOPY(usrp->lug_name, cp, usrp->lug_namelen); if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); return; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGID, (uid_t)0, gid, NULL, p); if (ret == 0 && cnt < 2) goto tryagain; } /* * No match, just return a string of digits. */ tmp = gid; i = 0; while (tmp || i == 0) { tmp /= 10; i++; } len = (i > len) ? len : i; *retlenp = len; cp += (len - 1); tmp = gid; for (i = 0; i < len; i++) { *cp-- = '0' + (tmp % 10); tmp /= 10; } return; } /* * Convert a string to a gid. * If no conversion is possible return NFSERR_BADOWNER, otherwise * return 0. * If this is called from a client side mount using AUTH_SYS and the * string is made up entirely of digits, just convert the string to * a number. */ APPLESTATIC int nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp, NFSPROC_T *p) { int i; char *cp, *endstr, *str0; struct nfsusrgrp *usrp; int cnt, ret; int error = 0; gid_t tgid; struct nfsrv_lughash *hp, *hp2; if (len == 0) { error = NFSERR_BADOWNER; goto out; } /* If a string of digits and an AUTH_SYS mount, just convert it. */ str0 = str; tgid = (gid_t)strtoul(str0, &endstr, 10); if ((endstr - str0) == len) { /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || nfsd_enable_stringtouid != 0)) *gidp = tgid; else error = NFSERR_BADOWNER; goto out; } /* * Look for an '@'. */ cp = strchr(str0, '@'); if (cp != NULL) i = (int)(cp++ - str0); else i = len; cnt = 0; tryagain: if (nfsrv_dnsnamelen > 0) { /* * If an '@' is found and the dns name matches, search for the * name with the dns stripped off. */ if (cnt == 0 && i < len && i > 0 && (len - 1 - i) == nfsrv_dnsnamelen && !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) { len -= (nfsrv_dnsnamelen + 1); *(cp - 1) = '\0'; } /* * Check for the special case of "nogroup". */ if (len == 7 && !NFSBCMP(str, "nogroup", 7)) { *gidp = nfsrv_defaultgid; error = 0; goto out; } hp = NFSGROUPNAMEHASH(str, len); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) { if (usrp->lug_namelen == len && !NFSBCMP(usrp->lug_name, str, len)) { if (usrp->lug_expiry < NFSD_MONOSEC) break; hp2 = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&hp2->mtx); TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp2->lughead, usrp, lug_numhash); *gidp = usrp->lug_gid; mtx_unlock(&hp2->mtx); mtx_unlock(&hp->mtx); error = 0; goto out; } } mtx_unlock(&hp->mtx); cnt++; ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0, str, p); if (ret == 0 && cnt < 2) goto tryagain; } error = NFSERR_BADOWNER; out: NFSEXITCODE(error); return (error); } /* * Cmp len chars, allowing mixed case in the first argument to match lower * case in the second, but not if the first argument is all upper case. * Return 0 for a match, 1 otherwise. */ static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len) { int i; u_char tmp; int fndlower = 0; for (i = 0; i < len; i++) { if (*cp >= 'A' && *cp <= 'Z') { tmp = *cp++ + ('a' - 'A'); } else { tmp = *cp++; if (tmp >= 'a' && tmp <= 'z') fndlower = 1; } if (tmp != *cp2++) return (1); } if (fndlower) return (0); else return (1); } /* * Set the port for the nfsuserd. */ APPLESTATIC int nfsrv_nfsuserdport(struct sockaddr *sad, u_short port, NFSPROC_T *p) { struct nfssockreq *rp; struct sockaddr_in *ad; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd) { NFSUNLOCKNAMEID(); error = EPERM; NFSSOCKADDRFREE(sad); goto out; } nfsrv_nfsuserd = 1; NFSUNLOCKNAMEID(); /* * Set up the socket record and connect. */ rp = &nfsrv_nfsuserdsock; rp->nr_client = NULL; rp->nr_cred = NULL; rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST); if (sad != NULL) { /* Use the AF_LOCAL socket address passed in. */ rp->nr_sotype = SOCK_STREAM; rp->nr_soproto = 0; rp->nr_nam = sad; } else { /* Use the port# for a UDP socket (old nfsuserd). */ rp->nr_sotype = SOCK_DGRAM; rp->nr_soproto = IPPROTO_UDP; NFSSOCKADDRALLOC(rp->nr_nam); NFSSOCKADDRSIZE(rp->nr_nam, sizeof (struct sockaddr_in)); ad = NFSSOCKADDR(rp->nr_nam, struct sockaddr_in *); ad->sin_family = AF_INET; ad->sin_addr.s_addr = htonl((u_int32_t)0x7f000001); ad->sin_port = port; } rp->nr_prog = RPCPROG_NFSUSERD; rp->nr_vers = RPCNFSUSERD_VERS; error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0); if (error) { NFSSOCKADDRFREE(rp->nr_nam); nfsrv_nfsuserd = 0; } out: NFSEXITCODE(error); return (error); } /* * Delete the nfsuserd port. */ APPLESTATIC void nfsrv_nfsuserddelport(void) { NFSLOCKNAMEID(); if (nfsrv_nfsuserd == 0) { NFSUNLOCKNAMEID(); return; } nfsrv_nfsuserd = 0; NFSUNLOCKNAMEID(); newnfs_disconnect(&nfsrv_nfsuserdsock); NFSSOCKADDRFREE(nfsrv_nfsuserdsock.nr_nam); } /* * Do upcalls to the nfsuserd, for cache misses of the owner/ownergroup * name<-->id cache. * Returns 0 upon success, non-zero otherwise. */ static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript *nd; int len; struct nfsrv_descript nfsd; struct ucred *cred; int error; NFSLOCKNAMEID(); if (nfsrv_nfsuserd == 0) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } NFSUNLOCKNAMEID(); nd = &nfsd; cred = newnfs_getcred(); nd->nd_flag = ND_GSSINITREPLY; nfsrvd_rephead(nd); nd->nd_procnum = procnum; if (procnum == RPCNFSUSERD_GETUID || procnum == RPCNFSUSERD_GETGID) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (procnum == RPCNFSUSERD_GETUID) *tl = txdr_unsigned(uid); else *tl = txdr_unsigned(gid); } else { len = strlen(name); (void) nfsm_strtom(nd, name, len); } error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL, cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSFREECRED(cred); if (!error) { mbuf_freem(nd->nd_mrep); error = nd->nd_repstat; } out: NFSEXITCODE(error); return (error); } /* * This function is called from the nfssvc(2) system call, to update the * kernel user/group name list(s) for the V4 owner and ownergroup attributes. */ APPLESTATIC int nfssvc_idname(struct nfsd_idargs *nidp) { struct nfsusrgrp *nusrp, *usrp, *newusrp; struct nfsrv_lughash *hp_name, *hp_idnum, *thp; int i, group_locked, groupname_locked, user_locked, username_locked; int error = 0; u_char *cp; gid_t *grps; struct ucred *cr; static int onethread = 0; static time_t lasttime = 0; if (nidp->nid_namelen <= 0 || nidp->nid_namelen > MAXHOSTNAMELEN) { error = EINVAL; goto out; } if (nidp->nid_flag & NFSID_INITIALIZE) { cp = malloc(nidp->nid_namelen + 1, M_NFSSTRING, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), cp, nidp->nid_namelen); if (error != 0) { free(cp, M_NFSSTRING); goto out; } if (atomic_cmpset_acq_int(&nfsrv_dnsnamelen, 0, 0) == 0) { /* * Free up all the old stuff and reinitialize hash * lists. All mutexes for both lists must be locked, * with the user/group name ones before the uid/gid * ones, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 1); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 0); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); free(nfsrv_dnsname, M_NFSSTRING); nfsrv_dnsname = NULL; } if (nfsuserhash == NULL) { /* Allocate the hash tables. */ nfsuserhash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsuserhash[i].mtx, "nfsuidhash", NULL, MTX_DEF | MTX_DUPOK); nfsusernamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsusernamehash[i].mtx, "nfsusrhash", NULL, MTX_DEF | MTX_DUPOK); nfsgrouphash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgrouphash[i].mtx, "nfsgidhash", NULL, MTX_DEF | MTX_DUPOK); nfsgroupnamehash = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) mtx_init(&nfsgroupnamehash[i].mtx, "nfsgrphash", NULL, MTX_DEF | MTX_DUPOK); } /* (Re)initialize the list heads. */ for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsuserhash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsusernamehash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgrouphash[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_INIT(&nfsgroupnamehash[i].lughead); /* * Put name in "DNS" string. */ nfsrv_dnsname = cp; nfsrv_defaultuid = nidp->nid_uid; nfsrv_defaultgid = nidp->nid_gid; nfsrv_usercnt = 0; nfsrv_usermax = nidp->nid_usermax; atomic_store_rel_int(&nfsrv_dnsnamelen, nidp->nid_namelen); goto out; } /* * malloc the new one now, so any potential sleep occurs before * manipulation of the lists. */ newusrp = malloc(sizeof(struct nfsusrgrp) + nidp->nid_namelen, M_NFSUSERGROUP, M_WAITOK | M_ZERO); error = copyin(CAST_USER_ADDR_T(nidp->nid_name), newusrp->lug_name, nidp->nid_namelen); if (error == 0 && nidp->nid_ngroup > 0 && (nidp->nid_flag & NFSID_ADDUID) != 0) { grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, M_WAITOK); error = copyin(CAST_USER_ADDR_T(nidp->nid_grps), grps, sizeof(gid_t) * nidp->nid_ngroup); if (error == 0) { /* * Create a credential just like svc_getcred(), * but using the group list provided. */ cr = crget(); cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid; crsetgroups(cr, nidp->nid_ngroup, grps); cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0]; cr->cr_prison = &prison0; prison_hold(cr->cr_prison); #ifdef MAC mac_cred_associate_nfsd(cr); #endif newusrp->lug_cred = cr; } free(grps, M_TEMP); } if (error) { free(newusrp, M_NFSUSERGROUP); goto out; } newusrp->lug_namelen = nidp->nid_namelen; /* * The lock order is username[0]->[nfsrv_lughashsize - 1] followed * by uid[0]->[nfsrv_lughashsize - 1], with the same for group. * The flags user_locked, username_locked, group_locked and * groupname_locked are set to indicate all of those hash lists are * locked. hp_name != NULL and hp_idnum != NULL indicates that * the respective one mutex is locked. */ user_locked = username_locked = group_locked = groupname_locked = 0; hp_name = hp_idnum = NULL; /* * Delete old entries, as required. */ if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) { /* Must lock all username hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_uid == nidp->nid_uid) nfsrv_removeuser(usrp, 1); } } else if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) { hp_name = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSUSERHASH(usrp->lug_uid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 1); mtx_unlock(&thp->mtx); } } hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); } else if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) { /* Must lock all groupname hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash, nusrp) { if (usrp->lug_gid == nidp->nid_gid) nfsrv_removeuser(usrp, 0); } } else if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) { hp_name = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_lock(&hp_name->mtx); TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash, nusrp) { if (usrp->lug_namelen == newusrp->lug_namelen && !NFSBCMP(usrp->lug_name, newusrp->lug_name, usrp->lug_namelen)) { thp = NFSGROUPHASH(usrp->lug_gid); mtx_lock(&thp->mtx); nfsrv_removeuser(usrp, 0); mtx_unlock(&thp->mtx); } } hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); } /* * Now, we can add the new one. */ if (nidp->nid_usertimeout) newusrp->lug_expiry = NFSD_MONOSEC + nidp->nid_usertimeout; else newusrp->lug_expiry = NFSD_MONOSEC + 5; if (nidp->nid_flag & (NFSID_ADDUID | NFSID_ADDUSERNAME)) { newusrp->lug_uid = nidp->nid_uid; thp = NFSUSERHASH(newusrp->lug_uid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) { newusrp->lug_gid = nidp->nid_gid; thp = NFSGROUPHASH(newusrp->lug_gid); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash); thp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); atomic_add_int(&nfsrv_usercnt, 1); } else { if (newusrp->lug_cred != NULL) crfree(newusrp->lug_cred); free(newusrp, M_NFSUSERGROUP); } /* * Once per second, allow one thread to trim the cache. */ if (lasttime < NFSD_MONOSEC && atomic_cmpset_acq_int(&onethread, 0, 1) != 0) { /* * First, unlock the single mutexes, so that all entries * can be locked and any LOR is avoided. */ if (hp_name != NULL) { mtx_unlock(&hp_name->mtx); hp_name = NULL; } if (hp_idnum != NULL) { mtx_unlock(&hp_idnum->mtx); hp_idnum = NULL; } if ((nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID | NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) != 0) { if (username_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsusernamehash[i].mtx); username_locked = 1; } KASSERT(user_locked == 0, ("nfssvc_idname: user_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsuserhash[i].mtx); user_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsuserhash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 1); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently used entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsuserhash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 1); } } else { if (groupname_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgroupnamehash[i].mtx); groupname_locked = 1; } KASSERT(group_locked == 0, ("nfssvc_idname: group_locked")); for (i = 0; i < nfsrv_lughashsize; i++) mtx_lock(&nfsgrouphash[i].mtx); group_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, &nfsgrouphash[i].lughead, lug_numhash, nusrp) if (usrp->lug_expiry < NFSD_MONOSEC) nfsrv_removeuser(usrp, 0); } for (i = 0; i < nfsrv_lughashsize; i++) { /* * Trim the cache using an approximate LRU * algorithm. This code deletes the least * recently user entry on each hash list. */ if (nfsrv_usercnt <= nfsrv_usermax) break; usrp = TAILQ_FIRST(&nfsgrouphash[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 0); } } lasttime = NFSD_MONOSEC; atomic_store_rel_int(&onethread, 0); } /* Now, unlock all locked mutexes. */ if (hp_idnum != NULL) mtx_unlock(&hp_idnum->mtx); if (hp_name != NULL) mtx_unlock(&hp_name->mtx); if (user_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsuserhash[i].mtx); if (username_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsusernamehash[i].mtx); if (group_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgrouphash[i].mtx); if (groupname_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) mtx_unlock(&nfsgroupnamehash[i].mtx); out: NFSEXITCODE(error); return (error); } /* * Remove a user/group name element. */ static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser) { struct nfsrv_lughash *hp; if (isuser != 0) { hp = NFSUSERHASH(usrp->lug_uid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } else { hp = NFSGROUPHASH(usrp->lug_gid); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } atomic_add_int(&nfsrv_usercnt, -1); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } /* * Free up all the allocations related to the name<-->id cache. * This function should only be called when the nfsuserd daemon isn't * running, since it doesn't do any locking. * This function is meant to be used when the nfscommon module is unloaded. */ APPLESTATIC void nfsrv_cleanusergroup(void) { struct nfsrv_lughash *hp, *hp2; struct nfsusrgrp *nusrp, *usrp; int i; if (nfsuserhash == NULL) return; for (i = 0; i < nfsrv_lughashsize; i++) { hp = &nfsuserhash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } hp = &nfsgrouphash[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen); TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } mtx_destroy(&nfsuserhash[i].mtx); mtx_destroy(&nfsusernamehash[i].mtx); mtx_destroy(&nfsgroupnamehash[i].mtx); mtx_destroy(&nfsgrouphash[i].mtx); } free(nfsuserhash, M_NFSUSERGROUP); free(nfsusernamehash, M_NFSUSERGROUP); free(nfsgrouphash, M_NFSUSERGROUP); free(nfsgroupnamehash, M_NFSUSERGROUP); free(nfsrv_dnsname, M_NFSSTRING); } /* * This function scans a byte string and checks for UTF-8 compliance. * It returns 0 if it conforms and NFSERR_INVAL if not. */ APPLESTATIC int nfsrv_checkutf8(u_int8_t *cp, int len) { u_int32_t val = 0x0; int cnt = 0, gotd = 0, shift = 0; u_int8_t byte; static int utf8_shift[5] = { 7, 11, 16, 21, 26 }; int error = 0; /* * Here are what the variables are used for: * val - the calculated value of a multibyte char, used to check * that it was coded with the correct range * cnt - the number of 10xxxxxx bytes to follow * gotd - set for a char of Dxxx, so D800<->DFFF can be checked for * shift - lower order bits of range (ie. "val >> shift" should * not be 0, in other words, dividing by the lower bound * of the range should get a non-zero value) * byte - used to calculate cnt */ while (len > 0) { if (cnt > 0) { /* This handles the 10xxxxxx bytes */ if ((*cp & 0xc0) != 0x80 || (gotd && (*cp & 0x20))) { error = NFSERR_INVAL; goto out; } gotd = 0; val <<= 6; val |= (*cp & 0x3f); cnt--; if (cnt == 0 && (val >> shift) == 0x0) { error = NFSERR_INVAL; goto out; } } else if (*cp & 0x80) { /* first byte of multi byte char */ byte = *cp; while ((byte & 0x40) && cnt < 6) { cnt++; byte <<= 1; } if (cnt == 0 || cnt == 6) { error = NFSERR_INVAL; goto out; } val = (*cp & (0x3f >> cnt)); shift = utf8_shift[cnt - 1]; if (cnt == 2 && val == 0xd) /* Check for the 0xd800-0xdfff case */ gotd = 1; } cp++; len--; } if (cnt > 0) error = NFSERR_INVAL; out: NFSEXITCODE(error); return (error); } /* * Parse the xdr for an NFSv4 FsLocations attribute. Return two malloc'd * strings, one with the root path in it and the other with the list of * locations. The list is in the same format as is found in nfr_refs. * It is a "," separated list of entries, where each of them is of the * form :. For example * "nfsv4-test:/sub2,nfsv4-test2:/user/mnt,nfsv4-test2:/user/mnt2" * The nilp argument is set to 1 for the special case of a null fs_root * and an empty server list. * It returns NFSERR_BADXDR, if the xdr can't be parsed and returns the * number of xdr bytes parsed in sump. */ static int nfsrv_getrefstr(struct nfsrv_descript *nd, u_char **fsrootp, u_char **srvp, int *sump, int *nilp) { u_int32_t *tl; u_char *cp = NULL, *cp2 = NULL, *cp3, *str; int i, j, len, stringlen, cnt, slen, siz, xdrsum, error = 0, nsrv; struct list { SLIST_ENTRY(list) next; int len; u_char host[1]; } *lsp, *nlsp; SLIST_HEAD(, list) head; *fsrootp = NULL; *srvp = NULL; *nilp = 0; /* * Get the fs_root path and check for the special case of null path * and 0 length server list. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len < 0 || len > 10240) { error = NFSERR_BADXDR; goto nfsmout; } if (len == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = NFSERR_BADXDR; goto nfsmout; } *nilp = 1; *sump = 2 * NFSX_UNSIGNED; error = 0; goto nfsmout; } cp = malloc(len + 1, M_NFSSTRING, M_WAITOK); error = nfsrv_mtostr(nd, cp, len); if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(int, *tl); if (cnt <= 0) error = NFSERR_BADXDR; } if (error) goto nfsmout; /* * Now, loop through the location list and make up the srvlist. */ xdrsum = (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); cp2 = cp3 = malloc(1024, M_NFSSTRING, M_WAITOK); slen = 1024; siz = 0; for (i = 0; i < cnt; i++) { SLIST_INIT(&head); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nsrv = fxdr_unsigned(int, *tl); if (nsrv <= 0) { error = NFSERR_BADXDR; goto nfsmout; } /* * Handle the first server by putting it in the srvstr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 3, &cp2, &cp3, &slen); if (cp3 != cp2) { *cp3++ = ','; siz++; } error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; cp3 += len; *cp3++ = ':'; siz += (len + 1); xdrsum += (2 * NFSX_UNSIGNED) + NFSM_RNDUP(len); for (j = 1; j < nsrv; j++) { /* * Yuck, put them in an slist and process them later. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } lsp = (struct list *)malloc(sizeof (struct list) + len, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, lsp->host, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); lsp->len = len; SLIST_INSERT_HEAD(&head, lsp, next); } /* * Finally, we can get the path. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len <= 0 || len > 1024) { error = NFSERR_BADXDR; goto nfsmout; } nfsrv_refstrbigenough(siz + len + 1, &cp2, &cp3, &slen); error = nfsrv_mtostr(nd, cp3, len); if (error) goto nfsmout; xdrsum += NFSX_UNSIGNED + NFSM_RNDUP(len); str = cp3; stringlen = len; cp3 += len; siz += len; SLIST_FOREACH_SAFE(lsp, &head, next, nlsp) { nfsrv_refstrbigenough(siz + lsp->len + stringlen + 3, &cp2, &cp3, &slen); *cp3++ = ','; NFSBCOPY(lsp->host, cp3, lsp->len); cp3 += lsp->len; *cp3++ = ':'; NFSBCOPY(str, cp3, stringlen); cp3 += stringlen; *cp3 = '\0'; siz += (lsp->len + stringlen + 2); - free((caddr_t)lsp, M_TEMP); + free(lsp, M_TEMP); } } *fsrootp = cp; *srvp = cp2; *sump = xdrsum; NFSEXITCODE2(0, nd); return (0); nfsmout: if (cp != NULL) free(cp, M_NFSSTRING); if (cp2 != NULL) free(cp2, M_NFSSTRING); NFSEXITCODE2(error, nd); return (error); } /* * Make the malloc'd space large enough. This is a pain, but the xdr * doesn't set an upper bound on the side, so... */ static void nfsrv_refstrbigenough(int siz, u_char **cpp, u_char **cpp2, int *slenp) { u_char *cp; int i; if (siz <= *slenp) return; cp = malloc(siz + 1024, M_NFSSTRING, M_WAITOK); NFSBCOPY(*cpp, cp, *slenp); free(*cpp, M_NFSSTRING); i = *cpp2 - *cpp; *cpp = cp; *cpp2 = cp + i; *slenp = siz + 1024; } /* * Initialize the reply header data structures. */ APPLESTATIC void nfsrvd_rephead(struct nfsrv_descript *nd) { mbuf_t mreq; /* * If this is a big reply, use a cluster. */ if ((nd->nd_flag & ND_GSSINITREPLY) == 0 && nfs_bigreply[nd->nd_procnum]) { NFSMCLGET(mreq, M_WAITOK); nd->nd_mreq = mreq; nd->nd_mb = mreq; } else { NFSMGET(mreq); nd->nd_mreq = mreq; nd->nd_mb = mreq; } nd->nd_bpos = NFSMTOD(mreq, caddr_t); mbuf_setlen(mreq, 0); if ((nd->nd_flag & ND_GSSINITREPLY) == 0) NFSM_BUILD(nd->nd_errp, int *, NFSX_UNSIGNED); } /* * Lock a socket against others. * Currently used to serialize connect/disconnect attempts. */ int newnfs_sndlock(int *flagp) { struct timespec ts; NFSLOCKSOCK(); while (*flagp & NFSR_SNDLOCK) { *flagp |= NFSR_WANTSND; ts.tv_sec = 0; ts.tv_nsec = 0; (void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR, PZERO - 1, "nfsndlck", &ts); } *flagp |= NFSR_SNDLOCK; NFSUNLOCKSOCK(); return (0); } /* * Unlock the stream socket for others. */ void newnfs_sndunlock(int *flagp) { NFSLOCKSOCK(); if ((*flagp & NFSR_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSR_SNDLOCK; if (*flagp & NFSR_WANTSND) { *flagp &= ~NFSR_WANTSND; wakeup((caddr_t)flagp); } NFSUNLOCKSOCK(); } APPLESTATIC int nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t *saf, int *isudp) { struct in_addr saddr; uint32_t portnum, *tl; int i, j, k; sa_family_t af = AF_UNSPEC; char addr[64], protocol[5], *cp; int cantparse = 0, error = 0; uint16_t portv; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (strcmp(protocol, "tcp") == 0) { af = AF_INET; *isudp = 0; } else if (strcmp(protocol, "udp") == 0) { af = AF_INET; *isudp = 1; } else if (strcmp(protocol, "tcp6") == 0) { af = AF_INET6; *isudp = 0; } else if (strcmp(protocol, "udp6") == 0) { af = AF_INET6; *isudp = 1; } else cantparse = 1; } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (cantparse == 0 && i >= 11 && i < 64) { /* * The shortest address is 11chars and the longest is < 64. */ error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* Find the port# at the end and extract that. */ i = strlen(addr); k = 0; cp = &addr[i - 1]; /* Count back two '.'s from end to get port# field. */ for (j = 0; j < i; j++) { if (*cp == '.') { k++; if (k == 2) break; } cp--; } if (k == 2) { /* * The NFSv4 port# is appended as .N.N, where N is * a decimal # in the range 0-255, just like an inet4 * address. Cheat and use inet_aton(), which will * return a Class A address and then shift the high * order 8bits over to convert it to the port#. */ *cp++ = '\0'; if (inet_aton(cp, &saddr) == 1) { portnum = ntohl(saddr.s_addr); portv = (uint16_t)((portnum >> 16) | (portnum & 0xff)); } else cantparse = 1; } else cantparse = 1; if (cantparse == 0) { if (af == AF_INET) { if (inet_pton(af, addr, &sin->sin_addr) == 1) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = htons(portv); *saf = af; return (0); } } else { if (inet_pton(af, addr, &sin6->sin6_addr) == 1) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(portv); *saf = af; return (0); } } } } else { if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } error = EPERM; nfsmout: return (error); } /* * Handle an NFSv4.1 Sequence request for the session. * If reply != NULL, use it to return the cached reply, as required. * The client gets a cached reply via this call for callbacks, however the * server gets a cached reply via the nfsv4_seqsess_cachereply() call. */ int nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot, struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot) { int error; error = 0; if (reply != NULL) *reply = NULL; if (slotid > maxslot) return (NFSERR_BADSLOT); if (seqid == slots[slotid].nfssl_seq) { /* A retry. */ if (slots[slotid].nfssl_inprog != 0) error = NFSERR_DELAY; else if (slots[slotid].nfssl_reply != NULL) { if (reply != NULL) { *reply = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } slots[slotid].nfssl_inprog = 1; error = NFSERR_REPLYFROMCACHE; } else /* No reply cached, so just do it. */ slots[slotid].nfssl_inprog = 1; } else if ((slots[slotid].nfssl_seq + 1) == seqid) { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = NULL; slots[slotid].nfssl_inprog = 1; slots[slotid].nfssl_seq++; } else error = NFSERR_SEQMISORDERED; return (error); } /* * Cache this reply for the slot. * Use the "rep" argument to return the cached reply if repstat is set to * NFSERR_REPLYFROMCACHE. The client never sets repstat to this value. */ void nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, struct mbuf **rep) { if (repstat == NFSERR_REPLYFROMCACHE) { *rep = slots[slotid].nfssl_reply; slots[slotid].nfssl_reply = NULL; } else { if (slots[slotid].nfssl_reply != NULL) m_freem(slots[slotid].nfssl_reply); slots[slotid].nfssl_reply = *rep; } slots[slotid].nfssl_inprog = 0; } /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ APPLESTATIC void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, struct nfsclsession *sep, int dont_replycache) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); nd->nd_sequence = tl; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; if (error == 0) { *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl = newnfs_true; else *tl = newnfs_false; } else { /* * There are two errors and the rest of the session can * just be zeros. * NFSERR_BADSESSION: This bad session should just generate * the same error again when the RPC is retried. * ESTALE: A forced dismount is in progress and will cause the * RPC to fail later. */ *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl = 0; } nd->nd_flag |= ND_HASSEQUENCE; } int nfsv4_sequencelookup(struct nfsmount *nmp, struct nfsclsession *sep, int *slotposp, int *maxslotp, uint32_t *slotseqp, uint8_t *sessionid) { int i, maxslot, slotpos; uint64_t bitval; /* Find an unused slot. */ slotpos = -1; maxslot = -1; mtx_lock(&sep->nfsess_mtx); do { if (nmp != NULL && sep->nfsess_defunct != 0) { /* Just return the bad session. */ bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); return (NFSERR_BADSESSION); } bitval = 1; for (i = 0; i < sep->nfsess_foreslots; i++) { if ((bitval & sep->nfsess_slots) == 0) { slotpos = i; sep->nfsess_slots |= bitval; sep->nfsess_slotseq[i]++; *slotseqp = sep->nfsess_slotseq[i]; break; } bitval <<= 1; } if (slotpos == -1) { /* * If a forced dismount is in progress, just return. * This RPC attempt will fail when it calls * newnfs_request(). */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { mtx_unlock(&sep->nfsess_mtx); return (ESTALE); } /* Wake up once/sec, to check for a forced dismount. */ (void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx, PZERO, "nfsclseq", hz); } } while (slotpos == -1); /* Now, find the highest slot in use. (nfsc_slots is 64bits) */ bitval = 1; for (i = 0; i < 64; i++) { if ((bitval & sep->nfsess_slots) != 0) maxslot = i; bitval <<= 1; } bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID); mtx_unlock(&sep->nfsess_mtx); *slotposp = slotpos; *maxslotp = maxslot; return (0); } /* * Free a session slot. */ APPLESTATIC void nfsv4_freeslot(struct nfsclsession *sep, int slot) { uint64_t bitval; bitval = 1; if (slot > 0) bitval <<= slot; mtx_lock(&sep->nfsess_mtx); if ((bitval & sep->nfsess_slots) == 0) printf("freeing free slot!!\n"); sep->nfsess_slots &= ~bitval; wakeup(&sep->nfsess_slots); mtx_unlock(&sep->nfsess_mtx); } Index: head/sys/fs/nfs/nfsport.h =================================================================== --- head/sys/fs/nfs/nfsport.h (revision 328416) +++ head/sys/fs/nfs/nfsport.h (revision 328417) @@ -1,1067 +1,1067 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSPORT_H_ #define _NFS_NFSPORT_H_ /* * In general, I'm not fond of #includes in .h files, but this seems * to be the cleanest way to handle #include files for the ports. */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * For Darwin, these functions should be "static" when built in a kext. * (This is always defined as nil otherwise.) */ #define APPLESTATIC #include #include #include #include #include #include #include #include #include #include #include "opt_nfs.h" #include "opt_ufs.h" /* * These types must be defined before the nfs includes. */ #define NFSSOCKADDR_T struct sockaddr * #define NFSPROC_T struct thread #define NFSDEV_T dev_t #define NFSSVCARGS nfssvc_args #define NFSACL_T struct acl /* * These should be defined as the types used for the corresponding VOP's * argument type. */ #define NFS_ACCESS_ARGS struct vop_access_args #define NFS_OPEN_ARGS struct vop_open_args #define NFS_GETATTR_ARGS struct vop_getattr_args #define NFS_LOOKUP_ARGS struct vop_lookup_args #define NFS_READDIR_ARGS struct vop_readdir_args /* * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL. */ #define NFSMGET(m) do { \ MGET((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMGETHDR(m) do { \ MGETHDR((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMCLGET(m, w) do { \ MGET((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_WAITOK, MT_DATA); \ } \ MCLGET((m), (w)); \ } while (0) #define NFSMCLGETHDR(m, w) do { \ MGETHDR((m), M_WAITOK, MT_DATA); \ while ((m) == NULL ) { \ (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_WAITOK, MT_DATA); \ } \ } while (0) #define NFSMTOD mtod /* * Client side constant for size of a lockowner name. */ #define NFSV4CL_LOCKNAMELEN 12 /* * Type for a mutex lock. */ #define NFSMUTEX_T struct mtx #endif /* _KERNEL */ /* * NFSv4 Operation numbers. */ #define NFSV4OP_ACCESS 3 #define NFSV4OP_CLOSE 4 #define NFSV4OP_COMMIT 5 #define NFSV4OP_CREATE 6 #define NFSV4OP_DELEGPURGE 7 #define NFSV4OP_DELEGRETURN 8 #define NFSV4OP_GETATTR 9 #define NFSV4OP_GETFH 10 #define NFSV4OP_LINK 11 #define NFSV4OP_LOCK 12 #define NFSV4OP_LOCKT 13 #define NFSV4OP_LOCKU 14 #define NFSV4OP_LOOKUP 15 #define NFSV4OP_LOOKUPP 16 #define NFSV4OP_NVERIFY 17 #define NFSV4OP_OPEN 18 #define NFSV4OP_OPENATTR 19 #define NFSV4OP_OPENCONFIRM 20 #define NFSV4OP_OPENDOWNGRADE 21 #define NFSV4OP_PUTFH 22 #define NFSV4OP_PUTPUBFH 23 #define NFSV4OP_PUTROOTFH 24 #define NFSV4OP_READ 25 #define NFSV4OP_READDIR 26 #define NFSV4OP_READLINK 27 #define NFSV4OP_REMOVE 28 #define NFSV4OP_RENAME 29 #define NFSV4OP_RENEW 30 #define NFSV4OP_RESTOREFH 31 #define NFSV4OP_SAVEFH 32 #define NFSV4OP_SECINFO 33 #define NFSV4OP_SETATTR 34 #define NFSV4OP_SETCLIENTID 35 #define NFSV4OP_SETCLIENTIDCFRM 36 #define NFSV4OP_VERIFY 37 #define NFSV4OP_WRITE 38 #define NFSV4OP_RELEASELCKOWN 39 /* * Must be one greater than the last Operation#. */ #define NFSV4OP_NOPS 40 /* * Additional Ops for NFSv4.1. */ #define NFSV4OP_BACKCHANNELCTL 40 #define NFSV4OP_BINDCONNTOSESS 41 #define NFSV4OP_EXCHANGEID 42 #define NFSV4OP_CREATESESSION 43 #define NFSV4OP_DESTROYSESSION 44 #define NFSV4OP_FREESTATEID 45 #define NFSV4OP_GETDIRDELEG 46 #define NFSV4OP_GETDEVINFO 47 #define NFSV4OP_GETDEVLIST 48 #define NFSV4OP_LAYOUTCOMMIT 49 #define NFSV4OP_LAYOUTGET 50 #define NFSV4OP_LAYOUTRETURN 51 #define NFSV4OP_SECINFONONAME 52 #define NFSV4OP_SEQUENCE 53 #define NFSV4OP_SETSSV 54 #define NFSV4OP_TESTSTATEID 55 #define NFSV4OP_WANTDELEG 56 #define NFSV4OP_DESTROYCLIENTID 57 #define NFSV4OP_RECLAIMCOMPL 58 /* * Must be one more than last op#. * NFSv4.2 isn't implemented yet, but define the op# limit for it. */ #define NFSV41_NOPS 59 #define NFSV42_NOPS 72 /* Quirky case if the illegal op code */ #define NFSV4OP_OPILLEGAL 10044 /* * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS. */ #define NFSV4OP_SYMLINK (NFSV42_NOPS) #define NFSV4OP_MKDIR (NFSV42_NOPS + 1) #define NFSV4OP_RMDIR (NFSV42_NOPS + 2) #define NFSV4OP_READDIRPLUS (NFSV42_NOPS + 3) #define NFSV4OP_MKNOD (NFSV42_NOPS + 4) #define NFSV4OP_FSSTAT (NFSV42_NOPS + 5) #define NFSV4OP_FSINFO (NFSV42_NOPS + 6) #define NFSV4OP_PATHCONF (NFSV42_NOPS + 7) #define NFSV4OP_V3CREATE (NFSV42_NOPS + 8) /* * This is the count of the fake operations listed above. */ #define NFSV4OP_FAKENOPS 9 /* * and the Callback OPs */ #define NFSV4OP_CBGETATTR 3 #define NFSV4OP_CBRECALL 4 /* * Must be one greater than the last Callback Operation# for NFSv4.0. */ #define NFSV4OP_CBNOPS 5 /* * Additional Callback Ops for NFSv4.1 only. */ #define NFSV4OP_CBLAYOUTRECALL 5 #define NFSV4OP_CBNOTIFY 6 #define NFSV4OP_CBPUSHDELEG 7 #define NFSV4OP_CBRECALLANY 8 #define NFSV4OP_CBRECALLOBJAVAIL 9 #define NFSV4OP_CBRECALLSLOT 10 #define NFSV4OP_CBSEQUENCE 11 #define NFSV4OP_CBWANTCANCELLED 12 #define NFSV4OP_CBNOTIFYLOCK 13 #define NFSV4OP_CBNOTIFYDEVID 14 #define NFSV41_CBNOPS 15 #define NFSV42_CBNOPS 16 /* * The lower numbers -> 21 are used by NFSv2 and v3. These define higher * numbers used by NFSv4. * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ #ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 #define NFSPROC_SETCLIENTID 23 #define NFSPROC_SETCLIENTIDCFRM 24 #define NFSPROC_LOCK 25 #define NFSPROC_LOCKU 26 #define NFSPROC_OPEN 27 #define NFSPROC_CLOSE 28 #define NFSPROC_OPENCONFIRM 29 #define NFSPROC_LOCKT 30 #define NFSPROC_OPENDOWNGRADE 31 #define NFSPROC_RENEW 32 #define NFSPROC_PUTROOTFH 33 #define NFSPROC_RELEASELCKOWN 34 #define NFSPROC_DELEGRETURN 35 #define NFSPROC_RETDELEGREMOVE 36 #define NFSPROC_RETDELEGRENAME1 37 #define NFSPROC_RETDELEGRENAME2 38 #define NFSPROC_GETACL 39 #define NFSPROC_SETACL 40 /* * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 /* Additional procedures for NFSv4.1. */ #define NFSPROC_EXCHANGEID 41 #define NFSPROC_CREATESESSION 42 #define NFSPROC_DESTROYSESSION 43 #define NFSPROC_DESTROYCLIENT 44 #define NFSPROC_FREESTATEID 45 #define NFSPROC_LAYOUTGET 46 #define NFSPROC_GETDEVICEINFO 47 #define NFSPROC_LAYOUTCOMMIT 48 #define NFSPROC_LAYOUTRETURN 49 #define NFSPROC_RECLAIMCOMPL 50 #define NFSPROC_WRITEDS 51 #define NFSPROC_READDS 52 #define NFSPROC_COMMITDS 53 #define NFSPROC_OPENLAYGET 54 #define NFSPROC_CREATELAYGET 55 /* * Must be defined as one higher than the last NFSv4.1 Proc# above. */ #define NFSV41_NPROCS 56 #endif /* NFS_V3NPROCS */ /* * New stats structure. * The vers field will be set to NFSSTATS_V1 by the caller. */ #define NFSSTATS_V1 1 struct nfsstatsv1 { int vers; /* Set to version requested by caller. */ uint64_t attrcache_hits; uint64_t attrcache_misses; uint64_t lookupcache_hits; uint64_t lookupcache_misses; uint64_t direofcache_hits; uint64_t direofcache_misses; uint64_t accesscache_hits; uint64_t accesscache_misses; uint64_t biocache_reads; uint64_t read_bios; uint64_t read_physios; uint64_t biocache_writes; uint64_t write_bios; uint64_t write_physios; uint64_t biocache_readlinks; uint64_t readlink_bios; uint64_t biocache_readdirs; uint64_t readdir_bios; uint64_t rpccnt[NFSV41_NPROCS + 13]; uint64_t rpcretries; uint64_t srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS]; uint64_t srvrpc_errs; uint64_t srv_errs; uint64_t rpcrequests; uint64_t rpctimeouts; uint64_t rpcunexpected; uint64_t rpcinvalid; uint64_t srvcache_inproghits; uint64_t srvcache_idemdonehits; uint64_t srvcache_nonidemdonehits; uint64_t srvcache_misses; uint64_t srvcache_tcppeak; int srvcache_size; /* Updated by atomic_xx_int(). */ uint64_t srvclients; uint64_t srvopenowners; uint64_t srvopens; uint64_t srvlockowners; uint64_t srvlocks; uint64_t srvdelegates; uint64_t cbrpccnt[NFSV42_CBNOPS]; uint64_t clopenowners; uint64_t clopens; uint64_t cllockowners; uint64_t cllocks; uint64_t cldelegates; uint64_t cllocalopenowners; uint64_t cllocalopens; uint64_t cllocallockowners; uint64_t cllocallocks; uint64_t srvstartcnt; uint64_t srvdonecnt; uint64_t srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS]; uint64_t srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS]; struct bintime srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS]; struct bintime busyfrom; struct bintime busytime; }; /* * Old stats structure. */ struct ext_nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int accesscache_hits; int accesscache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFSV4_NPROCS]; int rpcretries; int srvrpccnt[NFSV4OP_NOPS + NFSV4OP_FAKENOPS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvcache_tcppeak; int srvcache_size; int srvclients; int srvopenowners; int srvopens; int srvlockowners; int srvlocks; int srvdelegates; int cbrpccnt[NFSV4OP_CBNOPS]; int clopenowners; int clopens; int cllockowners; int cllocks; int cldelegates; int cllocalopenowners; int cllocalopens; int cllocallockowners; int cllocallocks; }; #ifdef _KERNEL /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ #ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Just to keep nfs_var.h happy. */ struct nfs_vattr { int junk; }; struct nfsvattr { struct vattr na_vattr; nfsattrbit_t na_suppattr; u_int64_t na_mntonfileno; u_int64_t na_filesid[2]; }; #define na_type na_vattr.va_type #define na_mode na_vattr.va_mode #define na_nlink na_vattr.va_nlink #define na_uid na_vattr.va_uid #define na_gid na_vattr.va_gid #define na_fsid na_vattr.va_fsid #define na_fileid na_vattr.va_fileid #define na_size na_vattr.va_size #define na_blocksize na_vattr.va_blocksize #define na_atime na_vattr.va_atime #define na_mtime na_vattr.va_mtime #define na_ctime na_vattr.va_ctime #define na_gen na_vattr.va_gen #define na_flags na_vattr.va_flags #define na_rdev na_vattr.va_rdev #define na_bytes na_vattr.va_bytes #define na_filerev na_vattr.va_filerev #define na_vaflags na_vattr.va_vaflags #include /* * This is the header structure used for the lists, etc. (It has the * above record in it. */ struct nfsrv_stablefirst { LIST_HEAD(, nfsrv_stable) nsf_head; /* Head of nfsrv_stable list */ time_t nsf_eograce; /* Time grace period ends */ time_t *nsf_bootvals; /* Previous boottime values */ struct file *nsf_fp; /* File table pointer */ u_char nsf_flags; /* NFSNSF_ flags */ struct nfsf_rec nsf_rec; /* and above first record */ }; #define nsf_lease nsf_rec.lease #define nsf_numboots nsf_rec.numboots /* NFSNSF_xxx flags */ #define NFSNSF_UPDATEDONE 0x01 #define NFSNSF_GRACEOVER 0x02 #define NFSNSF_NEEDLOCK 0x04 #define NFSNSF_EXPIREDCLIENT 0x08 #define NFSNSF_NOOPENS 0x10 #define NFSNSF_OK 0x20 /* * Maximum number of boot times allowed in record. Although there is * really no need for a fixed upper bound, this serves as a sanity check * for a corrupted file. */ #define NFSNSF_MAXNUMBOOTS 10000 /* * This structure defines the other records in the file. The * nst_client array is actually the size of the client string name. */ struct nfst_rec { u_int16_t len; u_char flag; u_char client[1]; }; /* and the values for flag */ #define NFSNST_NEWSTATE 0x1 #define NFSNST_REVOKE 0x2 #define NFSNST_GOTSTATE 0x4 /* * This structure is linked onto nfsrv_stablefirst for the duration of * reclaim. */ struct nfsrv_stable { LIST_ENTRY(nfsrv_stable) nst_list; struct nfsclient *nst_clp; struct nfst_rec nst_rec; }; #define nst_timestamp nst_rec.timestamp #define nst_len nst_rec.len #define nst_flag nst_rec.flag #define nst_client nst_rec.client /* * At some point the server will run out of kernel storage for * state structures. For FreeBSD5.2, this results in a panic * kmem_map is full. It happens at well over 1000000 opens plus * locks on a PIII-800 with 256Mbytes, so that is where I've set * the limit. If your server panics due to too many opens/locks, * decrease the size of NFSRV_V4STATELIMIT. If you find the server * returning NFS4ERR_RESOURCE a lot and have lots of memory, try * increasing it. */ #define NFSRV_V4STATELIMIT 500000 /* Max # of Opens + Locks */ /* * The type required differs with BSDen (just the second arg). */ void nfsrvd_rcv(struct socket *, void *, int); /* * Macros for handling socket addresses. (Hopefully this makes the code * more portable, since I've noticed some 'BSD don't have sockaddrs in * mbufs any more.) */ #define NFSSOCKADDR(a, t) ((t)(a)) #define NFSSOCKADDRALLOC(a) \ do { \ - MALLOC((a), struct sockaddr *, sizeof (struct sockaddr), \ + (a) = malloc(sizeof (struct sockaddr), \ M_SONAME, M_WAITOK); \ NFSBZERO((a), sizeof (struct sockaddr)); \ } while (0) #define NFSSOCKADDRSIZE(a, s) ((a)->sa_len = (s)) #define NFSSOCKADDRFREE(a) \ do { \ if (a) \ - FREE((caddr_t)(a), M_SONAME); \ + free((a), M_SONAME); \ } while (0) /* * These should be defined as a process or thread structure, as required * for signal handling, etc. */ #define NFSNEWCRED(c) (crdup(c)) #define NFSPROCCRED(p) ((p)->td_ucred) #define NFSFREECRED(c) (crfree(c)) #define NFSUIOPROC(u, p) ((u)->uio_td = NULL) #define NFSPROCP(p) ((p)->td_proc) /* * Define these so that cn_hash and its length is ignored. */ #define NFSCNHASHZERO(c) #define NFSCNHASH(c, v) #define NCHNAMLEN 9999999 /* * These macros are defined to initialize and set the timer routine. */ #define NFS_TIMERINIT \ newnfs_timer(NULL) /* * Handle SMP stuff: */ #define NFSSTATESPINLOCK extern struct mtx nfs_state_mutex #define NFSLOCKSTATE() mtx_lock(&nfs_state_mutex) #define NFSUNLOCKSTATE() mtx_unlock(&nfs_state_mutex) #define NFSSTATEMUTEXPTR (&nfs_state_mutex) #define NFSREQSPINLOCK extern struct mtx nfs_req_mutex #define NFSLOCKREQ() mtx_lock(&nfs_req_mutex) #define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex) #define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex #define NFSSOCKMUTEXPTR (&nfs_slock_mutex) #define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex) #define NFSUNLOCKSOCK() mtx_unlock(&nfs_slock_mutex) #define NFSNAMEIDMUTEX extern struct mtx nfs_nameid_mutex #define NFSLOCKNAMEID() mtx_lock(&nfs_nameid_mutex) #define NFSUNLOCKNAMEID() mtx_unlock(&nfs_nameid_mutex) #define NFSNAMEIDREQUIRED() mtx_assert(&nfs_nameid_mutex, MA_OWNED) #define NFSCLSTATEMUTEX extern struct mtx nfs_clstate_mutex #define NFSCLSTATEMUTEXPTR (&nfs_clstate_mutex) #define NFSLOCKCLSTATE() mtx_lock(&nfs_clstate_mutex) #define NFSUNLOCKCLSTATE() mtx_unlock(&nfs_clstate_mutex) #define NFSDLOCKMUTEX extern struct mtx newnfsd_mtx #define NFSDLOCKMUTEXPTR (&newnfsd_mtx) #define NFSD_LOCK() mtx_lock(&newnfsd_mtx) #define NFSD_UNLOCK() mtx_unlock(&newnfsd_mtx) #define NFSD_LOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_OWNED) #define NFSD_UNLOCK_ASSERT() mtx_assert(&newnfsd_mtx, MA_NOTOWNED) #define NFSV4ROOTLOCKMUTEX extern struct mtx nfs_v4root_mutex #define NFSV4ROOTLOCKMUTEXPTR (&nfs_v4root_mutex) #define NFSLOCKV4ROOTMUTEX() mtx_lock(&nfs_v4root_mutex) #define NFSUNLOCKV4ROOTMUTEX() mtx_unlock(&nfs_v4root_mutex) #define NFSLOCKNODE(n) mtx_lock(&((n)->n_mtx)) #define NFSUNLOCKNODE(n) mtx_unlock(&((n)->n_mtx)) #define NFSLOCKMNT(m) mtx_lock(&((m)->nm_mtx)) #define NFSUNLOCKMNT(m) mtx_unlock(&((m)->nm_mtx)) #define NFSLOCKREQUEST(r) mtx_lock(&((r)->r_mtx)) #define NFSUNLOCKREQUEST(r) mtx_unlock(&((r)->r_mtx)) #define NFSPROCLISTLOCK() sx_slock(&allproc_lock) #define NFSPROCLISTUNLOCK() sx_sunlock(&allproc_lock) #define NFSLOCKSOCKREQ(r) mtx_lock(&((r)->nr_mtx)) #define NFSUNLOCKSOCKREQ(r) mtx_unlock(&((r)->nr_mtx)) #define NFSLOCKDS(d) mtx_lock(&((d)->nfsclds_mtx)) #define NFSUNLOCKDS(d) mtx_unlock(&((d)->nfsclds_mtx)) #define NFSSESSIONMUTEXPTR(s) (&((s)->mtx)) #define NFSLOCKSESSION(s) mtx_lock(&((s)->mtx)) #define NFSUNLOCKSESSION(s) mtx_unlock(&((s)->mtx)) #define NFSLOCKLAYOUT(l) mtx_lock(&((l)->mtx)) #define NFSUNLOCKLAYOUT(l) mtx_unlock(&((l)->mtx)) #define NFSDDSLOCK() mtx_lock(&nfsrv_dslock_mtx) #define NFSDDSUNLOCK() mtx_unlock(&nfsrv_dslock_mtx) #define NFSDSCLOCKMUTEXPTR (&nfsrv_dsclock_mtx) #define NFSDSCLOCK() mtx_lock(&nfsrv_dsclock_mtx) #define NFSDSCUNLOCK() mtx_unlock(&nfsrv_dsclock_mtx) #define NFSDSRMLOCKMUTEXPTR (&nfsrv_dsrmlock_mtx) #define NFSDSRMLOCK() mtx_lock(&nfsrv_dsrmlock_mtx) #define NFSDSRMUNLOCK() mtx_unlock(&nfsrv_dsrmlock_mtx) #define NFSDWRPCLOCKMUTEXPTR (&nfsrv_dwrpclock_mtx) #define NFSDWRPCLOCK() mtx_lock(&nfsrv_dwrpclock_mtx) #define NFSDWRPCUNLOCK() mtx_unlock(&nfsrv_dwrpclock_mtx) #define NFSDSRPCLOCKMUTEXPTR (&nfsrv_dsrpclock_mtx) #define NFSDSRPCLOCK() mtx_lock(&nfsrv_dsrpclock_mtx) #define NFSDSRPCUNLOCK() mtx_unlock(&nfsrv_dsrpclock_mtx) #define NFSDARPCLOCKMUTEXPTR (&nfsrv_darpclock_mtx) #define NFSDARPCLOCK() mtx_lock(&nfsrv_darpclock_mtx) #define NFSDARPCUNLOCK() mtx_unlock(&nfsrv_darpclock_mtx) /* * Use these macros to initialize/free a mutex. */ #define NFSINITSOCKMUTEX(m) mtx_init((m), "nfssock", NULL, MTX_DEF) #define NFSFREEMUTEX(m) mtx_destroy((m)) int nfsmsleep(void *, void *, int, const char *, struct timespec *); /* * And weird vm stuff in the nfs server. */ #define PDIRUNLOCK 0x0 #define MAX_COMMIT_COUNT (1024 * 1024) /* * Define these to handle the type of va_rdev. */ #define NFSMAKEDEV(m, n) makedev((m), (n)) #define NFSMAJOR(d) major(d) #define NFSMINOR(d) minor(d) /* * The vnode tag for nfsv4root. */ #define VT_NFSV4ROOT "nfsv4root" /* * Define whatever it takes to do a vn_rdwr(). */ #define NFSD_RDWR(r, v, b, l, o, s, i, c, a, p) \ vn_rdwr((r), (v), (b), (l), (o), (s), (i), (c), NULL, (a), (p)) /* * Macros for handling memory for different BSDen. * NFSBCOPY(src, dst, len) - copies len bytes, non-overlapping * NFSOVBCOPY(src, dst, len) - ditto, but data areas might overlap * NFSBCMP(cp1, cp2, len) - compare len bytes, return 0 if same * NFSBZERO(cp, len) - set len bytes to 0x0 */ #define NFSBCOPY(s, d, l) bcopy((s), (d), (l)) #define NFSOVBCOPY(s, d, l) ovbcopy((s), (d), (l)) #define NFSBCMP(s, d, l) bcmp((s), (d), (l)) #define NFSBZERO(s, l) bzero((s), (l)) /* * Some queue.h files don't have these dfined in them. */ #define LIST_END(head) NULL #define SLIST_END(head) NULL #define TAILQ_END(head) NULL /* * This must be defined to be a global variable that increments once * per second, but never stops or goes backwards, even when a "date" * command changes the TOD clock. It is used for delta times for * leases, etc. */ #define NFSD_MONOSEC time_uptime /* * Declare the malloc types. */ MALLOC_DECLARE(M_NEWNFSRVCACHE); MALLOC_DECLARE(M_NEWNFSDCLIENT); MALLOC_DECLARE(M_NEWNFSDSTATE); MALLOC_DECLARE(M_NEWNFSDLOCK); MALLOC_DECLARE(M_NEWNFSDLOCKFILE); MALLOC_DECLARE(M_NEWNFSSTRING); MALLOC_DECLARE(M_NEWNFSUSERGROUP); MALLOC_DECLARE(M_NEWNFSDREQ); MALLOC_DECLARE(M_NEWNFSFH); MALLOC_DECLARE(M_NEWNFSCLOWNER); MALLOC_DECLARE(M_NEWNFSCLOPEN); MALLOC_DECLARE(M_NEWNFSCLDELEG); MALLOC_DECLARE(M_NEWNFSCLCLIENT); MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER); MALLOC_DECLARE(M_NEWNFSCLLOCK); MALLOC_DECLARE(M_NEWNFSDIROFF); MALLOC_DECLARE(M_NEWNFSV4NODE); MALLOC_DECLARE(M_NEWNFSDIRECTIO); MALLOC_DECLARE(M_NEWNFSMNT); MALLOC_DECLARE(M_NEWNFSDROLLBACK); MALLOC_DECLARE(M_NEWNFSLAYOUT); MALLOC_DECLARE(M_NEWNFSFLAYOUT); MALLOC_DECLARE(M_NEWNFSDEVINFO); MALLOC_DECLARE(M_NEWNFSSOCKREQ); MALLOC_DECLARE(M_NEWNFSCLDS); MALLOC_DECLARE(M_NEWNFSLAYRECALL); MALLOC_DECLARE(M_NEWNFSDSESSION); #define M_NFSRVCACHE M_NEWNFSRVCACHE #define M_NFSDCLIENT M_NEWNFSDCLIENT #define M_NFSDSTATE M_NEWNFSDSTATE #define M_NFSDLOCK M_NEWNFSDLOCK #define M_NFSDLOCKFILE M_NEWNFSDLOCKFILE #define M_NFSSTRING M_NEWNFSSTRING #define M_NFSUSERGROUP M_NEWNFSUSERGROUP #define M_NFSDREQ M_NEWNFSDREQ #define M_NFSFH M_NEWNFSFH #define M_NFSCLOWNER M_NEWNFSCLOWNER #define M_NFSCLOPEN M_NEWNFSCLOPEN #define M_NFSCLDELEG M_NEWNFSCLDELEG #define M_NFSCLCLIENT M_NEWNFSCLCLIENT #define M_NFSCLLOCKOWNER M_NEWNFSCLLOCKOWNER #define M_NFSCLLOCK M_NEWNFSCLLOCK #define M_NFSDIROFF M_NEWNFSDIROFF #define M_NFSV4NODE M_NEWNFSV4NODE #define M_NFSDIRECTIO M_NEWNFSDIRECTIO #define M_NFSDROLLBACK M_NEWNFSDROLLBACK #define M_NFSLAYOUT M_NEWNFSLAYOUT #define M_NFSFLAYOUT M_NEWNFSFLAYOUT #define M_NFSDEVINFO M_NEWNFSDEVINFO #define M_NFSSOCKREQ M_NEWNFSSOCKREQ #define M_NFSCLDS M_NEWNFSCLDS #define M_NFSLAYRECALL M_NEWNFSLAYRECALL #define M_NFSDSESSION M_NEWNFSDSESSION #define NFSINT_SIGMASK(set) \ (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \ SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \ SIGISMEMBER(set, SIGQUIT)) /* * Convert a quota block count to byte count. */ #define NFSQUOTABLKTOBYTE(q, b) (q) *= (b) /* * Define this as the largest file size supported. (It should probably * be available via a VFS_xxx Op, but it isn't. */ #define NFSRV_MAXFILESIZE ((u_int64_t)0x800000000000) /* * Set this macro to index() or strchr(), whichever is supported. */ #define STRCHR(s, c) strchr((s), (c)) /* * Set the n_time in the client write rpc, as required. */ #define NFSWRITERPC_SETTIME(w, n, a, v4) \ do { \ if (w) { \ mtx_lock(&((n)->n_mtx)); \ (n)->n_mtime = (a)->na_mtime; \ if (v4) \ (n)->n_change = (a)->na_filerev; \ mtx_unlock(&((n)->n_mtx)); \ } \ } while (0) /* * Fake value, just to make the client work. */ #define NFS_LATTR_NOSHRINK 1 /* * Prototypes for functions where the arguments vary for different ports. */ int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, void *, int, int); int newnfs_realign(struct mbuf **, int); /* * If the port runs on an SMP box that can enforce Atomic ops with low * overheads, define these as atomic increments/decrements. If not, * don't worry about it, since these are used for stats that can be * "out by one" without disastrous consequences. */ #define NFSINCRGLOBAL(a) ((a)++) /* * Assorted funky stuff to make things work under Darwin8. */ /* * These macros checks for a field in vattr being set. */ #define NFSATTRISSET(t, v, a) ((v)->a != (t)VNOVAL) #define NFSATTRISSETTIME(v, a) ((v)->a.tv_sec != VNOVAL) /* * Manipulate mount flags. */ #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the fsinfo */ #define NFSSTA_OPENMODE 0x00200000 /* Must use correct open mode */ #define NFSSTA_FLEXFILE 0x00800000 /* Use Flex File Layout */ #define NFSSTA_NOLAYOUTCOMMIT 0x04000000 /* Don't do LayoutCommit */ #define NFSSTA_SESSPERSIST 0x08000000 /* Has a persistent session */ #define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */ #define NFSSTA_LOCKTIMEO 0x20000000 /* Experiencing a lockd timeout */ #define NFSSTA_HASSETFSID 0x40000000 /* Has set the fsid */ #define NFSSTA_PNFS 0x80000000 /* pNFS is enabled */ #define NFSHASNFSV3(n) ((n)->nm_flag & NFSMNT_NFSV3) #define NFSHASNFSV4(n) ((n)->nm_flag & NFSMNT_NFSV4) #define NFSHASNFSV4N(n) ((n)->nm_minorvers > 0) #define NFSHASNFSV3OR4(n) ((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) #define NFSHASGOTFSINFO(n) ((n)->nm_state & NFSSTA_GOTFSINFO) #define NFSHASHASSETFSID(n) ((n)->nm_state & NFSSTA_HASSETFSID) #define NFSHASSTRICT3530(n) ((n)->nm_flag & NFSMNT_STRICT3530) #define NFSHASWRITEVERF(n) ((n)->nm_state & NFSSTA_HASWRITEVERF) #define NFSHASINT(n) ((n)->nm_flag & NFSMNT_INT) #define NFSHASSOFT(n) ((n)->nm_flag & NFSMNT_SOFT) #define NFSHASINTORSOFT(n) ((n)->nm_flag & (NFSMNT_INT | NFSMNT_SOFT)) #define NFSHASDUMBTIMR(n) ((n)->nm_flag & NFSMNT_DUMBTIMR) #define NFSHASNOCONN(n) ((n)->nm_flag & NFSMNT_MNTD) #define NFSHASKERB(n) ((n)->nm_flag & NFSMNT_KERB) #define NFSHASALLGSSNAME(n) ((n)->nm_flag & NFSMNT_ALLGSSNAME) #define NFSHASINTEGRITY(n) ((n)->nm_flag & NFSMNT_INTEGRITY) #define NFSHASPRIVACY(n) ((n)->nm_flag & NFSMNT_PRIVACY) #define NFSSETWRITEVERF(n) ((n)->nm_state |= NFSSTA_HASWRITEVERF) #define NFSSETHASSETFSID(n) ((n)->nm_state |= NFSSTA_HASSETFSID) #define NFSHASPNFSOPT(n) ((n)->nm_flag & NFSMNT_PNFS) #define NFSHASNOLAYOUTCOMMIT(n) ((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT) #define NFSHASSESSPERSIST(n) ((n)->nm_state & NFSSTA_SESSPERSIST) #define NFSHASPNFS(n) ((n)->nm_state & NFSSTA_PNFS) #define NFSHASFLEXFILE(n) ((n)->nm_state & NFSSTA_FLEXFILE) #define NFSHASOPENMODE(n) ((n)->nm_state & NFSSTA_OPENMODE) #define NFSHASONEOPENOWN(n) (((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 && \ (n)->nm_minorvers > 0) /* * Gets the stats field out of the mount structure. */ #define vfs_statfs(m) (&((m)->mnt_stat)) /* * Set boottime. */ #define NFSSETBOOTTIME(b) (getboottime(&b)) /* * The size of directory blocks in the buffer cache. * MUST BE in the range of PAGE_SIZE <= NFS_DIRBLKSIZ <= MAXBSIZE!! */ #define NFS_DIRBLKSIZ (16 * DIRBLKSIZ) /* Must be a multiple of DIRBLKSIZ */ /* * Define these macros to access mnt_flag fields. */ #define NFSMNT_RDONLY(m) ((m)->mnt_flag & MNT_RDONLY) #endif /* _KERNEL */ /* * Define a structure similar to ufs_args for use in exporting the V4 root. */ struct nfsex_args { char *fspec; struct export_args export; }; /* * These export flags should be defined, but there are no bits left. * Maybe a separate mnt_exflag field could be added or the mnt_flag * field increased to 64 bits? */ #ifndef MNT_EXSTRICTACCESS #define MNT_EXSTRICTACCESS 0x0 #endif #ifndef MNT_EXV4ONLY #define MNT_EXV4ONLY 0x0 #endif #ifdef _KERNEL /* * Define this to invalidate the attribute cache for the nfs node. */ #define NFSINVALATTRCACHE(n) ((n)->n_attrstamp = 0) /* Used for FreeBSD only */ void nfsd_mntinit(void); /* * Define these for vnode lock/unlock ops. * * These are good abstractions to macro out, so that they can be added to * later, for debugging or stats, etc. */ #define NFSVOPLOCK(v, f) vn_lock((v), (f)) #define NFSVOPUNLOCK(v, f) VOP_UNLOCK((v), (f)) #define NFSVOPISLOCKED(v) VOP_ISLOCKED((v)) /* * Define ncl_hash(). */ #define ncl_hash(f, l) (fnv_32_buf((f), (l), FNV1_32_INIT)) int newnfs_iosize(struct nfsmount *); int newnfs_vncmpf(struct vnode *, void *); #ifndef NFS_MINDIRATTRTIMO #define NFS_MINDIRATTRTIMO 3 /* VDIR attrib cache timeout in sec */ #endif #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; u_int32_t r_flags; /* flags on request, see below */ struct nfsmount *r_nmp; /* Client mnt ptr */ struct mtx r_mtx; /* Mutex lock for this structure */ }; #ifndef NFS_MAXBSIZE #define NFS_MAXBSIZE (maxbcachebuf) #endif /* * This macro checks to see if issuing of delegations is allowed for this * vnode. */ #ifdef VV_DISABLEDELEG #define NFSVNO_DELEGOK(v) \ ((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0) #else #define NFSVNO_DELEGOK(v) (1) #endif /* * Name used by getnewvnode() to describe filesystem, "nfs". * For performance reasons it is useful to have the same string * used in both places that call getnewvnode(). */ extern const char nfs_vnode_tag[]; #endif /* _KERNEL */ #endif /* _NFS_NFSPORT_H */ Index: head/sys/fs/nfsclient/nfs_clcomsubs.c =================================================================== --- head/sys/fs/nfsclient/nfs_clcomsubs.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clcomsubs.c (revision 328417) @@ -1,679 +1,679 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include extern struct nfsstatsv1 nfsstatsv1; extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; extern enum vtype nv34tov_type[8]; extern int nfs_bigreply[NFSV41_NPROCS]; NFSCLSTATEMUTEX; #endif /* !APPLEKEXT */ static nfsuint64 nfs_nullcookie = {{ 0, 0 }}; static struct { int op; int opcnt; const u_char *tag; int taglen; } nfsv4_opmap[NFSV41_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, { NFSV4OP_LOOKUP, 3, "Lookup", 6, }, { NFSV4OP_ACCESS, 2, "Access", 6, }, { NFSV4OP_READLINK, 2, "Readlink", 8, }, { NFSV4OP_READ, 1, "Read", 4, }, { NFSV4OP_WRITE, 2, "Write", 5, }, { NFSV4OP_OPEN, 5, "Open", 4, }, { NFSV4OP_CREATE, 5, "Create", 6, }, { NFSV4OP_CREATE, 1, "Create", 6, }, { NFSV4OP_CREATE, 3, "Create", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_SAVEFH, 5, "Rename", 6, }, { NFSV4OP_SAVEFH, 4, "Link", 4, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_COMMIT, 2, "Commit", 6, }, { NFSV4OP_LOOKUPP, 3, "Lookupp", 7, }, { NFSV4OP_SETCLIENTID, 1, "SetClientID", 11, }, { NFSV4OP_SETCLIENTIDCFRM, 1, "SetClientIDConfirm", 18, }, { NFSV4OP_LOCK, 1, "Lock", 4, }, { NFSV4OP_LOCKU, 1, "LockU", 5, }, { NFSV4OP_OPEN, 2, "Open", 4, }, { NFSV4OP_CLOSE, 1, "Close", 5, }, { NFSV4OP_OPENCONFIRM, 1, "Openconfirm", 11, }, { NFSV4OP_LOCKT, 1, "LockT", 5, }, { NFSV4OP_OPENDOWNGRADE, 1, "Opendowngrade", 13, }, { NFSV4OP_RENEW, 1, "Renew", 5, }, { NFSV4OP_PUTROOTFH, 1, "Dirpath", 7, }, { NFSV4OP_RELEASELCKOWN, 1, "Rellckown", 9, }, { NFSV4OP_DELEGRETURN, 1, "Delegret", 8, }, { NFSV4OP_DELEGRETURN, 3, "DelegRemove", 11, }, { NFSV4OP_DELEGRETURN, 7, "DelegRename1", 12, }, { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, { NFSV4OP_WRITE, 1, "WriteDS", 7, }, { NFSV4OP_READ, 1, "ReadDS", 6, }, { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, { NFSV4OP_OPEN, 3, "OpenLayoutGet", 13, }, { NFSV4OP_OPEN, 8, "CreateLayGet", 12, }, }; /* * NFS RPCS that have large request message size. */ static int nfs_bigrequest[NFSV41_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }; /* * Start building a request. Mostly just put the first file handle in * place. */ APPLESTATIC void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep, int vers, int minorvers) { struct mbuf *mb; u_int32_t *tl; int opcnt; nfsattrbit_t attrbits; /* * First, fill in some of the fields of nd. */ nd->nd_slotseq = NULL; if (vers == NFS_VER4) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (minorvers == NFSV41_MINORVERSION) nd->nd_flag |= ND_NFSV41; } else if (vers == NFS_VER3) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else { if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (NFSHASNFSV4N(nmp)) nd->nd_flag |= ND_NFSV41; } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; } nd->nd_procnum = procnum; nd->nd_repstat = 0; /* * Get the first mbuf for the request. */ if (nfs_bigrequest[procnum]) NFSMCLGET(mb, M_WAITOK); else NFSMGET(mb); mbuf_setlen(mb, 0); nd->nd_mreq = nd->nd_mb = mb; nd->nd_bpos = NFSMTOD(mb, caddr_t); /* * And fill the first file handle into the request. */ if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; if ((nd->nd_flag & ND_NFSV41) != 0) { opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; if (procnum == NFSPROC_RENEW) /* * For the special case of Renew, just do a * Sequence Op. */ opcnt = 1; else if (procnum == NFSPROC_WRITEDS || procnum == NFSPROC_COMMITDS) /* * For the special case of a Writeor Commit to * a DS, the opcnt == 3, for Sequence, PutFH, * Write/Commit. */ opcnt = 3; } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) *tl++ = txdr_unsigned(NFSV41_MINORVERSION); else *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; *tl = txdr_unsigned(opcnt); if ((nd->nd_flag & ND_NFSV41) != 0 && nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { if (nfsv4_opflag[nfsv4_opmap[procnum].op].loopbadsess > 0) nd->nd_flag |= ND_LOOPBADSESS; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SEQUENCE); if (sep == NULL) { sep = nfsmnt_mdssession(nmp); nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } else nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh == 2 && procnum != NFSPROC_WRITEDS && procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); /* * For Lookup Ops, we want all the directory * attributes, so we can load the name cache. */ if (procnum == NFSPROC_LOOKUP || procnum == NFSPROC_LOOKUPP) NFSGETATTR_ATTRBIT(&attrbits); else { NFSWCCATTR_ATTRBIT(&attrbits); nd->nd_flag |= ND_V4WCCATTR; } (void) nfsrv_putattrbit(nd, &attrbits); } } if (procnum != NFSPROC_RENEW || (nd->nd_flag & ND_NFSV41) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } if (procnum < NFSV41_NPROCS) NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ APPLESTATIC void nfsm_uiombuf(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *uiocp; struct mbuf *mp, *mp2; int xfer, left, mlen; int uiosiz, clflg, rem; char *cp, *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; mp = mp2 = nd->nd_mb; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else copyin(CAST_USER_ADDR_T(uiocp), NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); mbuf_setlen(mp, mbuf_len(mp) + xfer); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); } cp = NFSMTOD(mp, caddr_t) + mbuf_len(mp); for (left = 0; left < rem; left++) *cp++ = '\0'; mbuf_setlen(mp, mbuf_len(mp) + rem); nd->nd_bpos = cp; } else nd->nd_bpos = NFSMTOD(mp, caddr_t) + mbuf_len(mp); nd->nd_mb = mp; } /* * copies a uio scatter/gather list to an mbuf chain. * This version returns the mbuf list and does not use "nd". * NOTE: can ony handle iovcnt == 1 */ struct mbuf * nfsm_uiombuflist(struct uio *uiop, int siz, struct mbuf **mbp, char **cpp) { char *uiocp; struct mbuf *mp, *mp2, *firstmp; int xfer, left, mlen; int uiosiz, clflg, rem; char *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; if (clflg != 0) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); firstmp = mp2 = mp; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else copyin(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); mbuf_setlen(mp, mbuf_len(mp) + xfer); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (cpp != NULL) *cpp = NFSMTOD(mp, caddr_t) + mbuf_len(mp); if (mbp != NULL) *mbp = mp; return (firstmp); } /* * Load vnode attributes from the xdr file attributes. * Returns EBADRPC if they can't be parsed, 0 otherwise. */ APPLESTATIC int nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap) { struct nfs_fattr *fp; int error = 0; if (nd->nd_flag & ND_NFSV4) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR); nap->na_type = nfsv34tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); nap->na_rdev = makedev(fxdr_unsigned(u_char, fp->fa3_rdev.specdata1), fxdr_unsigned(u_char, fp->fa3_rdev.specdata2)); nap->na_nlink = fxdr_unsigned(uint32_t, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_hyper(&fp->fa3_size); nap->na_blocksize = NFS_FABLKSIZE; nap->na_bytes = fxdr_hyper(&fp->fa3_used); nap->na_fileid = fxdr_hyper(&fp->fa3_fileid); fxdr_nfsv3time(&fp->fa3_atime, &nap->na_atime); fxdr_nfsv3time(&fp->fa3_ctime, &nap->na_ctime); fxdr_nfsv3time(&fp->fa3_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_filerev = 0; } else { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V2FATTR); nap->na_type = nfsv2tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); if (nap->na_type == VNON || nap->na_type == VREG) nap->na_type = IFTOVT(nap->na_mode); nap->na_rdev = fxdr_unsigned(dev_t, fp->fa2_rdev); /* * Really ugly NFSv2 kludge. */ if (nap->na_type == VCHR && nap->na_rdev == ((dev_t)-1)) nap->na_type = VFIFO; nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_unsigned(u_int32_t, fp->fa2_size); nap->na_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); nap->na_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) * NFS_FABLKSIZE; nap->na_fileid = fxdr_unsigned(uint64_t, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &nap->na_atime); fxdr_nfsv2time(&fp->fa2_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_ctime.tv_sec = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_sec); nap->na_ctime.tv_nsec = 0; nap->na_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); nap->na_filerev = 0; } nfsmout: return (error); } /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ APPLESTATIC nfsuint64 * nfscl_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { KASSERT(!add, ("nfs getcookie add at 0")); return (&nfs_nullcookie); } pos--; dp = LIST_FIRST(&np->n_cookies); if (!dp) { if (add) { - MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), + dp = malloc(sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return (NULL); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list) != NULL) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return (NULL); dp = LIST_NEXT(dp, ndm_list); } else if (add) { - MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), + dp2 = malloc(sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return (NULL); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return (NULL); } return (&dp->ndm_cookies[pos]); } /* * Gets a file handle out of an nfs reply sent to the client and returns * the file handle and the file's attributes. * For V4, it assumes that Getfh and Getattr Op's results are here. */ APPLESTATIC int nfscl_mtofh(struct nfsrv_descript *nd, struct nfsfh **nfhpp, struct nfsvattr *nap, int *attrflagp) { u_int32_t *tl; int error = 0, flag = 1; *nfhpp = NULL; *attrflagp = 0; /* * First get the file handle and vnode. */ if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); flag = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); /* If the GetFH failed, clear flag. */ if (*++tl != 0) { nd->nd_flag |= ND_NOMOREDATA; flag = 0; error = ENXIO; /* Return ENXIO so *nfhpp isn't used. */ } } if (flag) { error = nfsm_getfh(nd, nfhpp); if (error) return (error); } /* * Now, get the attributes. */ if (flag != 0 && (nd->nd_flag & ND_NFSV4) != 0) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) { nd->nd_flag |= ND_NOMOREDATA; flag = 0; } } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (flag) { flag = fxdr_unsigned(int, *tl); } else if (fxdr_unsigned(int, *tl)) { error = nfsm_advance(nd, NFSX_V3FATTR, -1); if (error) return (error); } } if (flag) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } nfsmout: return (error); } /* * Put a state Id in the mbuf list. */ APPLESTATIC void nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) { nfsv4stateid_t *st; NFSM_BUILD(st, nfsv4stateid_t *, NFSX_STATEID); if (flag == NFSSTATEID_PUTALLZERO) { st->seqid = 0; st->other[0] = 0; st->other[1] = 0; st->other[2] = 0; } else if (flag == NFSSTATEID_PUTALLONE) { st->seqid = 0xffffffff; st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; } else if (flag == NFSSTATEID_PUTSEQIDZERO) { st->seqid = 0; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } } /* * Initialize the owner/delegation sleep lock. */ APPLESTATIC void nfscl_lockinit(struct nfsv4lock *lckp) { lckp->nfslock_usecnt = 0; lckp->nfslock_lock = 0; } /* * Get an exclusive lock. (Not needed for OpenBSD4, since there is only one * thread for each posix process in the kernel.) */ APPLESTATIC void nfscl_lockexcl(struct nfsv4lock *lckp, void *mutex) { int igotlock; do { igotlock = nfsv4_lock(lckp, 1, NULL, mutex, NULL); } while (!igotlock); } /* * Release an exclusive lock. */ APPLESTATIC void nfscl_lockunlock(struct nfsv4lock *lckp) { nfsv4_unlock(lckp, 0); } /* * Called to derefernce a lock on a stateid (delegation or open owner). */ APPLESTATIC void nfscl_lockderef(struct nfsv4lock *lckp) { NFSLOCKCLSTATE(); lckp->nfslock_usecnt--; if (lckp->nfslock_usecnt == 0 && (lckp->nfslock_lock & NFSV4LOCK_WANTED)) { lckp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)lckp); } NFSUNLOCKCLSTATE(); } Index: head/sys/fs/nfsclient/nfs_clnode.c =================================================================== --- head/sys/fs/nfsclient/nfs_clnode.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clnode.c (revision 328417) @@ -1,363 +1,363 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_node.c 8.6 (Berkeley) 5/22/95 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct vop_vector newnfs_vnodeops; extern struct buf_ops buf_ops_newnfs; MALLOC_DECLARE(M_NEWNFSREQ); uma_zone_t newnfsnode_zone; const char nfs_vnode_tag[] = "nfs"; static void nfs_freesillyrename(void *arg, __unused int pending); void ncl_nhinit(void) { newnfsnode_zone = uma_zcreate("NCLNODE", sizeof(struct nfsnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } void ncl_nhuninit(void) { uma_zdestroy(newnfsnode_zone); } /* * ONLY USED FOR THE ROOT DIRECTORY. nfscl_nget() does the rest. If this * function is going to be used to get Regular Files, code must be added * to fill in the "struct nfsv4node". * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. */ int ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp, int lkflags) { struct thread *td = curthread; /* XXX */ struct nfsnode *np; struct vnode *vp; struct vnode *nvp; int error; u_int hash; struct nfsmount *nmp; struct nfsfh *nfhp; nmp = VFSTONFS(mntp); *npp = NULL; hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); - MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize, + nfhp = malloc(sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, &nfhp->nfh_fh[0], fhsize); nfhp->nfh_len = fhsize; error = vfs_hash_get(mntp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); - FREE(nfhp, M_NFSFH); + free(nfhp, M_NFSFH); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); return (0); } np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp); if (error) { uma_zfree(newnfsnode_zone, np); return (error); } vp = nvp; KASSERT(vp->v_bufobj.bo_bsize != 0, ("ncl_nget: bo_bsize == 0")); vp->v_bufobj.bo_ops = &buf_ops_newnfs; vp->v_data = np; np->n_vnode = vp; /* * Initialize the mutex even if the vnode is going to be a loser. * This simplifies the logic in reclaim, which can then unconditionally * destroy the mutex (in the case of the loser, or if hash_insert * happened to return an error no special casing is needed). */ mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE | LK_CANRECURSE); /* * NFS supports recursive and shared locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); /* * Are we getting the root? If so, make sure the vnode flags * are correct */ if ((fhsize == nmp->nm_fhsize) && !bcmp(fhp, nmp->nm_fh, fhsize)) { if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; } - MALLOC(np->n_fhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize, + np->n_fhp = malloc(sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, np->n_fhp->nfh_fh, fhsize); np->n_fhp->nfh_len = fhsize; error = insmntque(vp, mntp); if (error != 0) { *npp = NULL; - FREE((caddr_t)np->n_fhp, M_NFSFH); + free(np->n_fhp, M_NFSFH); mtx_destroy(&np->n_mtx); lockdestroy(&np->n_excl); uma_zfree(newnfsnode_zone, np); return (error); } error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, np->n_fhp); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); /* vfs_hash_insert() vput()'s the losing vnode */ return (0); } *npp = np; return (0); } /* * Do the vrele(sp->s_dvp) as a separate task in order to avoid a * deadlock because of a LOR when vrele() locks the directory vnode. */ static void nfs_freesillyrename(void *arg, __unused int pending) { struct sillyrename *sp; sp = arg; vrele(sp->s_dvp); free(sp, M_NEWNFSREQ); } static void ncl_releasesillyrename(struct vnode *vp, struct thread *td) { struct nfsnode *np; struct sillyrename *sp; ASSERT_VOP_ELOCKED(vp, "releasesillyrename"); np = VTONFS(vp); mtx_assert(&np->n_mtx, MA_OWNED); if (vp->v_type != VDIR) { sp = np->n_sillyrename; np->n_sillyrename = NULL; } else sp = NULL; if (sp != NULL) { mtx_unlock(&np->n_mtx); (void) ncl_vinvalbuf(vp, 0, td, 1); /* * Remove the silly file that was rename'd earlier */ ncl_removeit(sp, vp); crfree(sp->s_cred); TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp); taskqueue_enqueue(taskqueue_thread, &sp->s_task); mtx_lock(&np->n_mtx); } } int ncl_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np; boolean_t retv; if (NFS_ISV4(vp) && vp->v_type == VREG) { /* * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4 * Close operations are delayed until now. Any dirty * buffers/pages must be flushed before the close, so that the * stateid is available for the writes. */ if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); retv = vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } else retv = TRUE; if (retv == TRUE) { (void)ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0); (void)nfsrpc_close(vp, 1, ap->a_td); } } np = VTONFS(vp); mtx_lock(&np->n_mtx); ncl_releasesillyrename(vp, ap->a_td); /* * NMODIFIED means that there might be dirty/stale buffers * associated with the NFS vnode. * NDSCOMMIT means that the file is on a pNFS server and commits * should be done to the DS. * None of the other flags are meaningful after the vnode is unused. */ np->n_flag &= (NMODIFIED | NDSCOMMIT); mtx_unlock(&np->n_mtx); return (0); } /* * Reclaim an nfsnode so that it can be used for other purposes. */ int ncl_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsdmap *dp, *dp2; /* * If the NLM is running, give it a chance to abort pending * locks. */ if (nfs_reclaim_p != NULL) nfs_reclaim_p(ap); mtx_lock(&np->n_mtx); ncl_releasesillyrename(vp, ap->a_td); mtx_unlock(&np->n_mtx); /* * Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); if (NFS_ISV4(vp) && vp->v_type == VREG) /* * We can now safely close any remaining NFSv4 Opens for * this file. Most opens will have already been closed by * ncl_inactive(), but there are cases where it is not * called, so we need to do it again here. */ (void) nfsrpc_close(vp, 1, ap->a_td); vfs_hash_remove(vp); /* * Call nfscl_reclaimnode() to save attributes in the delegation, * as required. */ if (vp->v_type == VREG) nfscl_reclaimnode(vp); /* * Free up any directory cookie structures and * large file handle structures that might be associated with * this nfs node. */ if (vp->v_type == VDIR) { dp = LIST_FIRST(&np->n_cookies); while (dp) { dp2 = dp; dp = LIST_NEXT(dp, ndm_list); - FREE((caddr_t)dp2, M_NFSDIROFF); + free(dp2, M_NFSDIROFF); } } if (np->n_writecred != NULL) crfree(np->n_writecred); - FREE((caddr_t)np->n_fhp, M_NFSFH); + free(np->n_fhp, M_NFSFH); if (np->n_v4 != NULL) - FREE((caddr_t)np->n_v4, M_NFSV4NODE); + free(np->n_v4, M_NFSV4NODE); mtx_destroy(&np->n_mtx); lockdestroy(&np->n_excl); uma_zfree(newnfsnode_zone, vp->v_data); vp->v_data = NULL; return (0); } /* * Invalidate both the access and attribute caches for this vnode. */ void ncl_invalcaches(struct vnode *vp) { struct nfsnode *np = VTONFS(vp); int i; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) np->n_accesscache[i].stamp = 0; KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); mtx_unlock(&np->n_mtx); } Index: head/sys/fs/nfsclient/nfs_clport.c =================================================================== --- head/sys/fs/nfsclient/nfs_clport.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clport.c (revision 328417) @@ -1,1495 +1,1495 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include /* * generally, I don't like #includes inside .h files, but it seems to * be the easiest way to handle the port. */ #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfscl_attrcache_flush_done_probe; uint32_t nfscl_attrcache_flush_done_id; dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfscl_attrcache_get_hit_probe; uint32_t nfscl_attrcache_get_hit_id; dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfscl_attrcache_get_miss_probe; uint32_t nfscl_attrcache_get_miss_id; dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfscl_attrcache_load_done_probe; uint32_t nfscl_attrcache_load_done_id; #endif /* !KDTRACE_HOOKS */ extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern struct vop_vector newnfs_vnodeops; extern struct vop_vector newnfs_fifoops; extern uma_zone_t newnfsnode_zone; extern struct buf_ops buf_ops_newnfs; extern int ncl_pbuf_freecnt; extern short nfsv4_cbport; extern int nfscl_enablecallb; extern int nfs_numnfscbd; extern int nfscl_inited; struct mtx ncl_iod_mutex; NFSDLOCKMUTEX; extern void (*ncl_call_invalcaches)(struct vnode *); SYSCTL_DECL(_vfs_nfs); static int ncl_fileid_maxwarnings = 10; SYSCTL_INT(_vfs_nfs, OID_AUTO, fileid_maxwarnings, CTLFLAG_RWTUN, &ncl_fileid_maxwarnings, 0, "Limit fileid corruption warnings; 0 is off; -1 is unlimited"); static volatile int ncl_fileid_nwarnings; static void nfscl_warn_fileid(struct nfsmount *, struct nfsvattr *, struct nfsvattr *); /* * Comparison function for vfs_hash functions. */ int newnfs_vncmpf(struct vnode *vp, void *arg) { struct nfsfh *nfhp = (struct nfsfh *)arg; struct nfsnode *np = VTONFS(vp); if (np->n_fhp->nfh_len != nfhp->nfh_len || NFSBCMP(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len)) return (1); return (0); } /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. * This variant takes a "struct nfsfh *" as second argument and uses * that structure up, either by hanging off the nfsnode or FREEing it. */ int nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, struct componentname *cnp, struct thread *td, struct nfsnode **npp, void *stuff, int lkflags) { struct nfsnode *np, *dnp; struct vnode *vp, *nvp; struct nfsv4node *newd, *oldd; int error; u_int hash; struct nfsmount *nmp; nmp = VFSTONFS(mntp); dnp = VTONFS(dvp); *npp = NULL; hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); error = vfs_hash_get(mntp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { /* * I believe there is a slight chance that vgonel() could * get called on this vnode between when NFSVOPLOCK() drops * the VI_LOCK() and vget() acquires it again, so that it * hasn't yet had v_usecount incremented. If this were to * happen, the VI_DOOMED flag would be set, so check for * that here. Since we now have the v_usecount incremented, * we should be ok until we vrele() it, if the VI_DOOMED * flag isn't set now. */ VI_LOCK(nvp); if ((nvp->v_iflag & VI_DOOMED)) { VI_UNLOCK(nvp); vrele(nvp); error = ENOENT; } else { VI_UNLOCK(nvp); } } if (error) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); return (error); } if (nvp != NULL) { np = VTONFS(nvp); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ oldd = newd = NULL; if ((nmp->nm_flag & NFSMNT_NFSV4) && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { - MALLOC(newd, struct nfsv4node *, + newd = malloc( sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); NFSLOCKNODE(np); if (newd != NULL && np->n_v4 != NULL && nvp->v_type == VREG && (np->n_v4->n4_namelen != cnp->cn_namelen || NFSBCMP(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { oldd = np->n_v4; np->n_v4 = newd; newd = NULL; np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } NFSUNLOCKNODE(np); } if (newd != NULL) - FREE((caddr_t)newd, M_NFSV4NODE); + free(newd, M_NFSV4NODE); if (oldd != NULL) - FREE((caddr_t)oldd, M_NFSV4NODE); + free(oldd, M_NFSV4NODE); *npp = np; - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); return (0); } np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO); error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp); if (error) { uma_zfree(newnfsnode_zone, np); - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); return (error); } vp = nvp; KASSERT(vp->v_bufobj.bo_bsize != 0, ("nfscl_nget: bo_bsize == 0")); vp->v_bufobj.bo_ops = &buf_ops_newnfs; vp->v_data = np; np->n_vnode = vp; /* * Initialize the mutex even if the vnode is going to be a loser. * This simplifies the logic in reclaim, which can then unconditionally * destroy the mutex (in the case of the loser, or if hash_insert * happened to return an error no special casing is needed). */ mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK); lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE | LK_CANRECURSE); /* * Are we getting the root? If so, make sure the vnode flags * are correct */ if ((nfhp->nfh_len == nmp->nm_fhsize) && !bcmp(nfhp->nfh_fh, nmp->nm_fh, nfhp->nfh_len)) { if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; } np->n_fhp = nfhp; /* * For NFSv4, we have to attach the directory file handle and * file name, so that Open Ops can be done later. */ if (nmp->nm_flag & NFSMNT_NFSV4) { - MALLOC(np->n_v4, struct nfsv4node *, sizeof (struct nfsv4node) + np->n_v4 = malloc(sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + cnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = cnp->cn_namelen; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(cnp->cn_nameptr, NFS4NODENAME(np->n_v4), cnp->cn_namelen); } else { np->n_v4 = NULL; } /* * NFS supports recursive and shared locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); error = insmntque(vp, mntp); if (error != 0) { *npp = NULL; mtx_destroy(&np->n_mtx); lockdestroy(&np->n_excl); - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); if (np->n_v4 != NULL) - FREE((caddr_t)np->n_v4, M_NFSV4NODE); + free(np->n_v4, M_NFSV4NODE); uma_zfree(newnfsnode_zone, np); return (error); } error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); /* vfs_hash_insert() vput()'s the losing vnode */ return (0); } *npp = np; return (0); } /* * Another variant of nfs_nget(). This one is only used by reopen. It * takes almost the same args as nfs_nget(), but only succeeds if an entry * exists in the cache. (Since files should already be "open" with a * vnode ref cnt on the node when reopen calls this, it should always * succeed.) * Also, don't get a vnode lock, since it may already be locked by some * other process that is handling it. This is ok, since all other threads * on the client are blocked by the nfsc_lock being exclusively held by the * caller of this function. */ int nfscl_ngetreopen(struct mount *mntp, u_int8_t *fhp, int fhsize, struct thread *td, struct nfsnode **npp) { struct vnode *nvp; u_int hash; struct nfsfh *nfhp; int error; *npp = NULL; /* For forced dismounts, just return error. */ if (NFSCL_FORCEDISM(mntp)) return (EINTR); - MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + fhsize, + nfhp = malloc(sizeof (struct nfsfh) + fhsize, M_NFSFH, M_WAITOK); bcopy(fhp, &nfhp->nfh_fh[0], fhsize); nfhp->nfh_len = fhsize; hash = fnv_32_buf(fhp, fhsize, FNV1_32_INIT); /* * First, try to get the vnode locked, but don't block for the lock. */ error = vfs_hash_get(mntp, hash, (LK_EXCLUSIVE | LK_NOWAIT), td, &nvp, newnfs_vncmpf, nfhp); if (error == 0 && nvp != NULL) { NFSVOPUNLOCK(nvp, 0); } else if (error == EBUSY) { /* * It is safe so long as a vflush() with * FORCECLOSE has not been done. Since the Renew thread is * stopped and the MNTK_UNMOUNTF flag is set before doing * a vflush() with FORCECLOSE, we should be ok here. */ if (NFSCL_FORCEDISM(mntp)) error = EINTR; else { vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp); if (nvp == NULL) { error = ENOENT; } else if ((nvp->v_iflag & VI_DOOMED) != 0) { error = ENOENT; vrele(nvp); } else { error = 0; } } } - FREE(nfhp, M_NFSFH); + free(nfhp, M_NFSFH); if (error) return (error); if (nvp != NULL) { *npp = VTONFS(nvp); return (0); } return (EINVAL); } static void nfscl_warn_fileid(struct nfsmount *nmp, struct nfsvattr *oldnap, struct nfsvattr *newnap) { int off; if (ncl_fileid_maxwarnings >= 0 && ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) return; off = 0; if (ncl_fileid_maxwarnings >= 0) { if (++ncl_fileid_nwarnings >= ncl_fileid_maxwarnings) off = 1; } printf("newnfs: server '%s' error: fileid changed. " "fsid %jx:%jx: expected fileid %#jx, got %#jx. " "(BROKEN NFS SERVER OR MIDDLEWARE)\n", nmp->nm_com.nmcom_hostname, (uintmax_t)nmp->nm_fsid[0], (uintmax_t)nmp->nm_fsid[1], (uintmax_t)oldnap->na_fileid, (uintmax_t)newnap->na_fileid); if (off) printf("newnfs: Logged %d times about fileid corruption; " "going quiet to avoid spamming logs excessively. (Limit " "is: %d).\n", ncl_fileid_nwarnings, ncl_fileid_maxwarnings); } /* * Load the attribute cache (that lives in the nfsnode entry) with * the attributes of the second argument and * Iff vaper not NULL * copy the attributes to *vaper * Similar to nfs_loadattrcache(), except the attributes are passed in * instead of being parsed out of the mbuf list. */ int nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, void *stuff, int writeattr, int dontshrink) { struct vnode *vp = *vpp; struct vattr *vap, *nvap = &nap->na_vattr, *vaper = nvaper; struct nfsnode *np; struct nfsmount *nmp; struct timespec mtime_save; u_quad_t nsize; int setnsize, error, force_fid_err; error = 0; setnsize = 0; nsize = 0; /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); NFSLOCKNODE(np); if (vp->v_type != nvap->va_type) { vp->v_type = nvap->va_type; if (vp->v_type == VFIFO) vp->v_op = &newnfs_fifoops; np->n_mtime = nvap->va_mtime; } nmp = VFSTONFS(vp->v_mount); vap = &np->n_vattr.na_vattr; mtime_save = vap->va_mtime; if (writeattr) { np->n_vattr.na_filerev = nap->na_filerev; np->n_vattr.na_size = nap->na_size; np->n_vattr.na_mtime = nap->na_mtime; np->n_vattr.na_ctime = nap->na_ctime; np->n_vattr.na_fsid = nap->na_fsid; np->n_vattr.na_mode = nap->na_mode; } else { force_fid_err = 0; KFAIL_POINT_ERROR(DEBUG_FP, nfscl_force_fileid_warning, force_fid_err); /* * BROKEN NFS SERVER OR MIDDLEWARE * * Certain NFS servers (certain old proprietary filers ca. * 2006) or broken middleboxes (e.g. WAN accelerator products) * will respond to GETATTR requests with results for a * different fileid. * * The WAN accelerator we've observed not only serves stale * cache results for a given file, it also occasionally serves * results for wholly different files. This causes surprising * problems; for example the cached size attribute of a file * may truncate down and then back up, resulting in zero * regions in file contents read by applications. We observed * this reliably with Clang and .c files during parallel build. * A pcap revealed packet fragmentation and GETATTR RPC * responses with wholly wrong fileids. */ if ((np->n_vattr.na_fileid != 0 && np->n_vattr.na_fileid != nap->na_fileid) || force_fid_err) { nfscl_warn_fileid(nmp, &np->n_vattr, nap); error = EIDRM; goto out; } NFSBCOPY((caddr_t)nap, (caddr_t)&np->n_vattr, sizeof (struct nfsvattr)); } /* * For NFSv4, if the node's fsid is not equal to the mount point's * fsid, return the low order 32bits of the node's fsid. This * allows getcwd(3) to work. There is a chance that the fsid might * be the same as a local fs, but since this is in an NFS mount * point, I don't think that will cause any problems? */ if (NFSHASNFSV4(nmp) && NFSHASHASSETFSID(nmp) && (nmp->nm_fsid[0] != np->n_vattr.na_filesid[0] || nmp->nm_fsid[1] != np->n_vattr.na_filesid[1])) { /* * va_fsid needs to be set to some value derived from * np->n_vattr.na_filesid that is not equal * vp->v_mount->mnt_stat.f_fsid[0], so that it changes * from the value used for the top level server volume * in the mounted subtree. */ vn_fsid(vp, vap); if ((uint32_t)vap->va_fsid == np->n_vattr.na_filesid[0]) vap->va_fsid = hash32_buf( np->n_vattr.na_filesid, 2 * sizeof(uint64_t), 0); } else vn_fsid(vp, vap); np->n_attrstamp = time_second; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (dontshrink && vap->va_size < np->n_size) { /* * We've been told not to shrink the file; * zero np->n_attrstamp to indicate that * the attributes are stale. */ vap->va_size = np->n_size; np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); vnode_pager_setsize(vp, np->n_size); } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger * of our size, and the server's size. */ if (vap->va_size < np->n_size) { vap->va_size = np->n_size; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; } vnode_pager_setsize(vp, np->n_size); } else if (vap->va_size < np->n_size) { /* * When shrinking the size, the call to * vnode_pager_setsize() cannot be done * with the mutex held, so delay it until * after the mtx_unlock call. */ nsize = np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; setnsize = 1; } else { np->n_size = vap->va_size; np->n_flag |= NSIZECHANGED; vnode_pager_setsize(vp, np->n_size); } } else { np->n_size = vap->va_size; } } /* * The following checks are added to prevent a race between (say) * a READDIR+ and a WRITE. * READDIR+, WRITE requests sent out. * READDIR+ resp, WRITE resp received on client. * However, the WRITE resp was handled before the READDIR+ resp * causing the post op attrs from the write to be loaded first * and the attrs from the READDIR+ to be loaded later. If this * happens, we have stale attrs loaded into the attrcache. * We detect this by for the mtime moving back. We invalidate the * attrcache when this happens. */ if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { /* Size changed or mtime went backwards */ np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (vaper != NULL) { NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } out: #ifdef KDTRACE_HOOKS if (np->n_attrstamp != 0) KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error); #endif NFSUNLOCKNODE(np); if (setnsize) vnode_pager_setsize(vp, nsize); return (error); } /* * Fill in the client id name. For these bytes: * 1 - they must be unique * 2 - they should be persistent across client reboots * 1 is more critical than 2 * Use the mount point's unique id plus either the uuid or, if that * isn't set, random junk. */ void nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) { int uuidlen; /* * First, put in the 64bit mount point identifier. */ if (idlen >= sizeof (u_int64_t)) { NFSBCOPY((caddr_t)&clval, cp, sizeof (u_int64_t)); cp += sizeof (u_int64_t); idlen -= sizeof (u_int64_t); } /* * If uuid is non-zero length, use it. */ uuidlen = strlen(uuid); if (uuidlen > 0 && idlen >= uuidlen) { NFSBCOPY(uuid, cp, uuidlen); cp += uuidlen; idlen -= uuidlen; } /* * This only normally happens if the uuid isn't set. */ while (idlen > 0) { *cp++ = (u_int8_t)(arc4random() % 256); idlen--; } } /* * Fill in a lock owner name. For now, pid + the process's creation time. */ void nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; if (id == NULL) { /* Return the single open_owner of all 0 bytes. */ bzero(cp, NFSV4CL_LOCKNAMELEN); return; } if ((flags & F_POSIX) != 0) { p = (struct proc *)id; tl.lval = p->p_pid; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_sec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp++ = tl.cval[3]; tl.lval = p->p_stats->p_start.tv_usec; *cp++ = tl.cval[0]; *cp++ = tl.cval[1]; *cp++ = tl.cval[2]; *cp = tl.cval[3]; } else if ((flags & F_FLOCK) != 0) { bcopy(&id, cp, sizeof(id)); bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); } else { printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); bzero(cp, NFSV4CL_LOCKNAMELEN); } } /* * Find the parent process for the thread passed in as an argument. * If none exists, return NULL, otherwise return a thread for the parent. * (Can be any of the threads, since it is only used for td->td_proc.) */ NFSPROC_T * nfscl_getparent(struct thread *td) { struct proc *p; struct thread *ptd; if (td == NULL) return (NULL); p = td->td_proc; if (p->p_pid == 0) return (NULL); p = p->p_pptr; if (p == NULL) return (NULL); ptd = TAILQ_FIRST(&p->p_threads); return (ptd); } /* * Start up the renew kernel thread. */ static void start_nfscl(void *arg) { struct nfsclclient *clp; struct thread *td; clp = (struct nfsclclient *)arg; td = TAILQ_FIRST(&clp->nfsc_renewthread->p_threads); nfscl_renewthread(clp, td); kproc_exit(0); } void nfscl_start_renewthread(struct nfsclclient *clp) { kproc_create(start_nfscl, (void *)clp, &clp->nfsc_renewthread, 0, 0, "nfscl"); } /* * Handle wcc_data. * For NFSv4, it assumes that nfsv4_wccattr() was used to set up the getattr * as the first Op after PutFH. * (For NFSv4, the postop attributes are after the Op, so they can't be * parsed here. A separate call to nfscl_postop_attr() is required.) */ int nfscl_wcc_data(struct nfsrv_descript *nd, struct vnode *vp, struct nfsvattr *nap, int *flagp, int *wccflagp, void *stuff) { u_int32_t *tl; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; int error = 0; if (wccflagp != NULL) *wccflagp = 0; if (nd->nd_flag & ND_NFSV3) { *flagp = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); if (wccflagp != NULL) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && np->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); mtx_unlock(&np->n_mtx); } } error = nfscl_postop_attr(nd, nap, flagp, stuff); if (wccflagp != NULL && *flagp == 0) *wccflagp = 0; } else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) == (ND_NFSV4 | ND_V4WCCATTR)) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); if (error) return (error); /* * Get rid of Op# and status for next op. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl) nd->nd_flag |= ND_NOMOREDATA; if (wccflagp != NULL && nfsva.na_vattr.va_mtime.tv_sec != 0) { mtx_lock(&np->n_mtx); *wccflagp = (np->n_mtime.tv_sec == nfsva.na_vattr.va_mtime.tv_sec && np->n_mtime.tv_nsec == nfsva.na_vattr.va_mtime.tv_sec); mtx_unlock(&np->n_mtx); } } nfsmout: return (error); } /* * Get postop attributes. */ int nfscl_postop_attr(struct nfsrv_descript *nd, struct nfsvattr *nap, int *retp, void *stuff) { u_int32_t *tl; int error = 0; *retp = 0; if (nd->nd_flag & ND_NOMOREDATA) return (error); if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); *retp = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { /* * For NFSv4, the postop attr are at the end, so no point * in looking if nd_repstat != 0. */ if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) /* should never happen since nd_repstat != 0 */ nd->nd_flag |= ND_NOMOREDATA; else *retp = 1; } } else if (!nd->nd_repstat) { /* For NFSv2, the attributes are here iff nd_repstat == 0 */ *retp = 1; } if (*retp) { error = nfsm_loadattr(nd, nap); if (error) *retp = 0; } nfsmout: return (error); } /* * Fill in the setable attributes. The full argument indicates whether * to fill in them all or just mode and time. */ void nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, struct vnode *vp, int flags, u_int32_t rdev) { u_int32_t *tl; struct nfsv2_sattr *sp; nfsattrbit_t attrbits; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_BUILD(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = newnfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = newnfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = newnfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); if (flags & NFSSATTR_SIZE0) sp->sa_size = 0; else if (flags & NFSSATTR_SIZENEG1) sp->sa_size = newnfs_xdrneg1; else if (flags & NFSSATTR_SIZERDEV) sp->sa_size = txdr_unsigned(rdev); else sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); break; case ND_NFSV3: if (vap->va_mode != (mode_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_mode); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_uid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl = txdr_unsigned(vap->va_gid); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; txdr_hyper(vap->va_size, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if ((vap->va_vaflags & VA_UTIMES_NULL) == 0) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } break; case ND_NFSV4: NFSZERO_ATTRBIT(&attrbits); if (vap->va_mode != (mode_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MODE); if ((flags & NFSSATTR_FULL) && vap->va_uid != (uid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); if ((flags & NFSSATTR_FULL) && vap->va_gid != (gid_t)VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET); (void) nfsv4_fillattr(nd, vp->v_mount, vp, NULL, vap, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); break; } } /* * nfscl_request() - mostly a wrapper for newnfs_request(). */ int nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, struct ucred *cred, void *stuff) { int ret, vers; struct nfsmount *nmp; nmp = VFSTONFS(vp->v_mount); if (nd->nd_flag & ND_NFSV4) vers = NFS_VER4; else if (nd->nd_flag & ND_NFSV3) vers = NFS_VER3; else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } /* * fill in this bsden's variant of statfs using nfsstatfs. */ void nfscl_loadsbinfo(struct nfsmount *nmp, struct nfsstatfs *sfp, void *statfs) { struct statfs *sbp = (struct statfs *)statfs; if (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) { sbp->f_bsize = NFS_FABLKSIZE; sbp->f_blocks = sfp->sf_tbytes / NFS_FABLKSIZE; sbp->f_bfree = sfp->sf_fbytes / NFS_FABLKSIZE; /* * Although sf_abytes is uint64_t and f_bavail is int64_t, * the value after dividing by NFS_FABLKSIZE is small * enough that it will fit in 63bits, so it is ok to * assign it to f_bavail without fear that it will become * negative. */ sbp->f_bavail = sfp->sf_abytes / NFS_FABLKSIZE; sbp->f_files = sfp->sf_tfiles; /* Since f_ffree is int64_t, clip it to 63bits. */ if (sfp->sf_ffiles > INT64_MAX) sbp->f_ffree = INT64_MAX; else sbp->f_ffree = sfp->sf_ffiles; } else if ((nmp->nm_flag & NFSMNT_NFSV4) == 0) { /* * The type casts to (int32_t) ensure that this code is * compatible with the old NFS client, in that it will * propagate bit31 to the high order bits. This may or may * not be correct for NFSv2, but since it is a legacy * environment, I'd rather retain backwards compatibility. */ sbp->f_bsize = (int32_t)sfp->sf_bsize; sbp->f_blocks = (int32_t)sfp->sf_blocks; sbp->f_bfree = (int32_t)sfp->sf_bfree; sbp->f_bavail = (int32_t)sfp->sf_bavail; sbp->f_files = 0; sbp->f_ffree = 0; } } /* * Use the fsinfo stuff to update the mount point. */ void nfscl_loadfsinfo(struct nfsmount *nmp, struct nfsfsinfo *fsp) { if ((nmp->nm_wsize == 0 || fsp->fs_wtpref < nmp->nm_wsize) && fsp->fs_wtpref >= NFS_FABLKSIZE) nmp->nm_wsize = (fsp->fs_wtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_wtmax < nmp->nm_wsize && fsp->fs_wtmax > 0) { nmp->nm_wsize = fsp->fs_wtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = fsp->fs_wtmax; } if (nmp->nm_wsize < NFS_FABLKSIZE) nmp->nm_wsize = NFS_FABLKSIZE; if ((nmp->nm_rsize == 0 || fsp->fs_rtpref < nmp->nm_rsize) && fsp->fs_rtpref >= NFS_FABLKSIZE) nmp->nm_rsize = (fsp->fs_rtpref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); if (fsp->fs_rtmax < nmp->nm_rsize && fsp->fs_rtmax > 0) { nmp->nm_rsize = fsp->fs_rtmax & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = fsp->fs_rtmax; } if (nmp->nm_rsize < NFS_FABLKSIZE) nmp->nm_rsize = NFS_FABLKSIZE; if ((nmp->nm_readdirsize == 0 || fsp->fs_dtpref < nmp->nm_readdirsize) && fsp->fs_dtpref >= NFS_DIRBLKSIZ) nmp->nm_readdirsize = (fsp->fs_dtpref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (fsp->fs_rtmax < nmp->nm_readdirsize && fsp->fs_rtmax > 0) { nmp->nm_readdirsize = fsp->fs_rtmax & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = fsp->fs_rtmax; } if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; if (fsp->fs_maxfilesize > 0 && fsp->fs_maxfilesize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = fsp->fs_maxfilesize; nmp->nm_mountp->mnt_stat.f_iosize = newnfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; } /* * Lookups source address which should be used to communicate with * @nmp and stores it inside @pdst. * * Returns 0 on success. */ u_int8_t * nfscl_getmyip(struct nfsmount *nmp, struct in6_addr *paddr, int *isinet6p) { #if defined(INET6) || defined(INET) int error, fibnum; fibnum = curthread->td_proc->p_fibnum; #endif #ifdef INET if (nmp->nm_nam->sa_family == AF_INET) { struct sockaddr_in *sin; struct nhop4_extended nh_ext; sin = (struct sockaddr_in *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = fib4_lookup_nh_ext(fibnum, sin->sin_addr, 0, 0, &nh_ext); CURVNET_RESTORE(); if (error != 0) return (NULL); if ((ntohl(nh_ext.nh_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { /* Ignore loopback addresses */ return (NULL); } *isinet6p = 0; *((struct in_addr *)paddr) = nh_ext.nh_src; return (u_int8_t *)paddr; } #endif #ifdef INET6 if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)nmp->nm_nam; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); error = in6_selectsrc_addr(fibnum, &sin6->sin6_addr, sin6->sin6_scope_id, NULL, paddr, NULL); CURVNET_RESTORE(); if (error != 0) return (NULL); if (IN6_IS_ADDR_LOOPBACK(paddr)) return (NULL); /* Scope is embedded in */ *isinet6p = 1; return (u_int8_t *)paddr; } #endif return (NULL); } /* * Copy NFS uid, gids from the cred structure. */ void newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) { int i; KASSERT(cr->cr_ngroups >= 0, ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); for (i = 0; i < nfscr->nfsc_ngroups; i++) nfscr->nfsc_groups[i] = cr->cr_groups[i]; } /* * Do any client specific initialization. */ void nfscl_init(void) { static int inited = 0; if (inited) return; inited = 1; nfscl_inited = 1; ncl_pbuf_freecnt = nswbuf / 2 + 1; } /* * Check each of the attributes to be set, to ensure they aren't already * the correct value. Disable setting ones already correct. */ int nfscl_checksattr(struct vattr *vap, struct nfsvattr *nvap) { if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode == nvap->na_mode) vap->va_mode = (mode_t)VNOVAL; } if (vap->va_uid != (uid_t)VNOVAL) { if (vap->va_uid == nvap->na_uid) vap->va_uid = (uid_t)VNOVAL; } if (vap->va_gid != (gid_t)VNOVAL) { if (vap->va_gid == nvap->na_gid) vap->va_gid = (gid_t)VNOVAL; } if (vap->va_size != VNOVAL) { if (vap->va_size == nvap->na_size) vap->va_size = VNOVAL; } /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. * Set the VA_UTIMES_NULL flag for this case, so that * the server's time will be used. This is needed to * work around a bug in some Solaris servers, where * setting the time TOCLIENT causes the Setattr RPC * to return NFS_OK, but not set va_mode. */ if (vap->va_mtime.tv_sec == VNOVAL) { vfs_timestamp(&vap->va_mtime); vap->va_vaflags |= VA_UTIMES_NULL; } if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; return (1); } /* * Map nfsv4 errors to errno.h errors. * The uid and gid arguments are only used for NFSERR_BADOWNER and that * error should only be returned for the Open, Create and Setattr Ops. * As such, most calls can just pass in 0 for those arguments. */ APPLESTATIC int nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) { struct proc *p; if (error < 10000 || error >= NFSERR_STALEWRITEVERF) return (error); if (td != NULL) p = td->td_proc; else p = NULL; switch (error) { case NFSERR_BADOWNER: tprintf(p, LOG_INFO, "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); case NFSERR_BADNAME: case NFSERR_BADCHAR: printf("nfsv4 char/name not handled by server\n"); return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: case NFSERR_SERVERFAULT: case NFSERR_BADTYPE: case NFSERR_FHEXPIRED: case NFSERR_RESOURCE: case NFSERR_MOVED: case NFSERR_NOFILEHANDLE: case NFSERR_MINORVERMISMATCH: case NFSERR_OLDSTATEID: case NFSERR_BADSEQID: case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); return (EIO); default: tprintf(p, LOG_INFO, "nfsv4 err=%d\n", error); return (EIO); }; } /* * Check to see if the process for this owner exists. Return 1 if it doesn't * and 0 otherwise. */ int nfscl_procdoesntexist(u_int8_t *own) { union { u_int32_t lval; u_int8_t cval[4]; } tl; struct proc *p; pid_t pid; int i, ret = 0; /* For the single open_owner of all 0 bytes, just return 0. */ for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++) if (own[i] != 0) break; if (i == NFSV4CL_LOCKNAMELEN) return (0); tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; pid = tl.lval; p = pfind_locked(pid); if (p == NULL) return (1); if (p->p_stats == NULL) { PROC_UNLOCK(p); return (0); } tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own++; if (tl.lval != p->p_stats->p_start.tv_sec) { ret = 1; } else { tl.cval[0] = *own++; tl.cval[1] = *own++; tl.cval[2] = *own++; tl.cval[3] = *own; if (tl.lval != p->p_stats->p_start.tv_usec) ret = 1; } PROC_UNLOCK(p); return (ret); } /* * - nfs pseudo system call for the client */ /* * MPSAFE */ static int nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfscbd_args nfscbdarg; struct nfsd_nfscbd_args nfscbdarg2; struct nameidata nd; struct nfscl_dumpmntopts dumpmntopts; cap_rights_t rights; char *buf; int error; struct mount *mp; struct nfsmount *nmp; if (uap->flag & NFSSVC_CBADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); if (error) return (error); /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, nfscbdarg.sock, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); return (EPERM); } error = nfscbd_addsock(fp); fdrop(fp, td); if (!error && nfscl_enablecallb == 0) { nfsv4_cbport = nfscbdarg.port; nfscl_enablecallb = 1; } } else if (uap->flag & NFSSVC_NFSCBD) { if (uap->argp == NULL) return (EINVAL); error = copyin(uap->argp, (caddr_t)&nfscbdarg2, sizeof(nfscbdarg2)); if (error) return (error); error = nfscbd_nfsd(td, &nfscbdarg2); } else if (uap->flag & NFSSVC_DUMPMNTOPTS) { error = copyin(uap->argp, &dumpmntopts, sizeof(dumpmntopts)); if (error == 0 && (dumpmntopts.ndmnt_blen < 256 || dumpmntopts.ndmnt_blen > 1024)) error = EINVAL; if (error == 0) error = nfsrv_lookupfilename(&nd, dumpmntopts.ndmnt_fname, td); if (error == 0 && strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = EINVAL; } if (error == 0) { buf = malloc(dumpmntopts.ndmnt_blen, M_TEMP, M_WAITOK); nfscl_retopts(VFSTONFS(nd.ni_vp->v_mount), buf, dumpmntopts.ndmnt_blen); vput(nd.ni_vp); error = copyout(buf, dumpmntopts.ndmnt_buf, dumpmntopts.ndmnt_blen); free(buf, M_TEMP); } } else if (uap->flag & NFSSVC_FORCEDISM) { buf = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); error = copyinstr(uap->argp, buf, MNAMELEN + 1, NULL); if (error == 0) { nmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, buf) == 0 && strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 && mp->mnt_data != NULL) { nmp = VFSTONFS(mp); mtx_lock(&nmp->nm_mtx); if ((nmp->nm_privflag & NFSMNTP_FORCEDISM) == 0) { nmp->nm_privflag |= (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS); mtx_unlock(&nmp->nm_mtx); } else { nmp = NULL; mtx_unlock(&nmp->nm_mtx); } break; } } mtx_unlock(&mountlist_mtx); if (nmp != NULL) { /* * Call newnfs_nmcancelreqs() to cause * any RPCs in progress on the mount point to * fail. * This will cause any process waiting for an * RPC to complete while holding a vnode lock * on the mounted-on vnode (such as "df" or * a non-forced "umount") to fail. * This will unlock the mounted-on vnode so * a forced dismount can succeed. * Then clear NFSMNTP_CANCELRPCS and wakeup(), * so that nfs_unmount() can complete. */ newnfs_nmcancelreqs(nmp); mtx_lock(&nmp->nm_mtx); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); mtx_unlock(&nmp->nm_mtx); } else error = EINVAL; } free(buf, M_TEMP); } else { error = EINVAL; } return (error); } extern int (*nfsd_call_nfscl)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfscl_modevent(module_t mod, int type, void *data) { int error = 0; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) return (0); newnfs_portinit(); mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF); nfscl_init(); NFSD_LOCK(); nfsrvd_cbinit(0); NFSD_UNLOCK(); ncl_call_invalcaches = ncl_invalcaches; nfsd_call_nfscl = nfssvc_nfscl; loaded = 1; break; case MOD_UNLOAD: if (nfs_numnfscbd != 0) { error = EBUSY; break; } /* * XXX: Unloading of nfscl module is unsupported. */ #if 0 ncl_call_invalcaches = NULL; nfsd_call_nfscl = NULL; /* and get rid of the mutexes */ mtx_destroy(&ncl_iod_mutex); loaded = 0; break; #else /* FALLTHROUGH */ #endif default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfscl_mod = { "nfscl", nfscl_modevent, NULL, }; DECLARE_MODULE(nfscl, nfscl_mod, SI_SUB_VFS, SI_ORDER_FIRST); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfscl, 1); MODULE_DEPEND(nfscl, nfscommon, 1, 1, 1); MODULE_DEPEND(nfscl, krpc, 1, 1, 1); MODULE_DEPEND(nfscl, nfssvc, 1, 1, 1); MODULE_DEPEND(nfscl, nfslock, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clrpcops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clrpcops.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clrpcops.c (revision 328417) @@ -1,7594 +1,7594 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Rpc op calls, generally called from the vnode op calls or through the * buffer cache, for NFS v2, 3 and 4. * These do not normally make any changes to vnode arguments or use * structures that might change between the VFS variants. The returned * arguments are all at the end, after the NFSPROC_T *p one. */ #ifndef APPLEKEXT #include "opt_inet6.h" #include #include #include SYSCTL_DECL(_vfs_nfs); static int nfsignore_eexist = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW, &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink"); /* * Global variables */ extern int nfs_numnfscbd; extern struct timeval nfsboottime; extern u_int32_t newnfs_false, newnfs_true; extern nfstype nfsv34_type[9]; extern int nfsrv_useacl; extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN]; extern int nfscl_debuglevel; extern int nfs_pnfsiothreads; NFSCLSTATEMUTEX; int nfstest_outofseq = 0; int nfscl_assumeposixlocks = 1; int nfscl_enablecallb = 0; short nfsv4_cbport = NFSV4_CBPORT; int nfstest_openallsetattr = 0; #endif /* !APPLEKEXT */ #define DIRHDSIZ offsetof(struct dirent, d_name) /* * nfscl_getsameserver() can return one of three values: * NFSDSP_USETHISSESSION - Use this session for the DS. * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new * session. * NFSDSP_NOTFOUND - No matching server was found. */ enum nfsclds_state { NFSDSP_USETHISSESSION = 0, NFSDSP_SEQTHISSESSION = 1, NFSDSP_NOTFOUND = 2, }; /* * Do a write RPC on a DS data file, using this structure for the arguments, * so that this function can be executed by a separate kernel process. */ struct nfsclwritedsdorpc { int done; int inprog; struct task tsk; struct vnode *vp; int iomode; int must_commit; nfsv4stateid_t *stateidp; struct nfsclds *dsp; uint64_t off; int len; struct nfsfh *fhp; struct mbuf *m; int vers; int minorvers; struct ucred *cred; NFSPROC_T *p; int err; }; static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, struct nfscllockowner *, u_int64_t, u_int64_t, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, struct ucred *, NFSPROC_T *); static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *, struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *); static void nfscl_initsessionslots(struct nfsclsession *); static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *, NFSPROC_T *); static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *, nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *, struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static struct mbuf *nfsm_copym(struct mbuf *, int, int); static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int, struct ucred *, NFSPROC_T *); static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *); static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int, struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *, struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int, struct ucred *, NFSPROC_T *); static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, struct nfsclds *, struct nfsclds **); static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *); static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, struct nfsfh *, int, int, struct ucred *, NFSPROC_T *); static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, int); static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *, NFSPROC_T *); static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *, int *, struct nfsclflayouthead *); static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *); static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *, int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int, struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *, struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *); static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *, nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *, int, int, int, int *, struct nfsclflayouthead *, int *); static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *); static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *, int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **, struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *); int nfs_pnfsio(task_fn_t *, void *); /* * nfs null call from vfs. */ APPLESTATIC int nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p) { int error; struct nfsrv_descript nfsd, *nd = &nfsd; NFSCL_REQSTART(nd, NFSPROC_NULL, vp); error = nfscl_request(nd, vp, p, cred, NULL); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs access rpc op. * For nfs version 3 and 4, use the access rpc to check accessibility. If file * modes are changed on the server, accesses might still fail later. */ APPLESTATIC int nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp) { int error; u_int32_t mode, rmode; if (acmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vnode_vtype(vp) == VDIR) { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE); if (acmode & VEXEC) mode |= NFSACCESS_LOOKUP; } else { if (acmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (acmode & VEXEC) mode |= NFSACCESS_EXECUTE; } /* * Now, just call nfsrpc_accessrpc() to do the actual RPC. */ error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode, NULL); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if (!error && (rmode & mode) != mode) error = EACCES; return (error); } /* * The actual rpc, separated out for Darwin. */ APPLESTATIC int nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep, void *stuff) { u_int32_t *tl; u_int32_t supported, rmode; int error; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *attrflagp = 0; supported = mode; NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(mode); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); supported = fxdr_unsigned(u_int32_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } rmode = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); /* * It's not obvious what should be done about * unsupported access modes. For now, be paranoid * and clear the unsupported ones. */ rmode &= supported; *rmodep = rmode; } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs open rpc */ APPLESTATIC int nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int32_t mode, clidrev; int ret, newone, error, expireret = 0, retrycnt; /* * For NFSv4, Open Ops are only done on Regular Files. */ if (vnode_vtype(vp) != VREG) return (0); mode = 0; if (amode & FREAD) mode |= NFSV4OPEN_ACCESSREAD; if (amode & FWRITE) mode |= NFSV4OPEN_ACCESSWRITE; nfhp = np->n_fhp; retrycnt = 0; #ifdef notdef { char name[100]; int namel; namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99; bcopy(NFS4NODENAME(np->n_v4), name, namel); name[namel] = '\0'; printf("rpcopen p=0x%x name=%s",p->p_pid,name); if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]); else printf(" fhl=0\n"); } #endif do { dp = NULL; error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1, cred, p, NULL, &op, &newone, &ret, 1); if (error) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (ret == NFSCLOPEN_DOOPEN) { if (np->n_v4 != NULL) { /* * For the first attempt, try and get a layout, if * pNFS is enabled for the mount. */ if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0) error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, 0, 0x0, cred, p, 0, 0); else error = nfsrpc_getopenlayout(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp, cred, p); if (dp != NULL) { #ifdef APPLE OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag); #else NFSLOCKNODE(np); np->n_flag &= ~NDELEGMOD; /* * Invalidate the attribute cache, so that * attributes that pre-date the issue of a * delegation are not cached, since the * cached attributes will remain valid while * the delegation is held. */ NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); #endif (void) nfscl_deleg(nmp->nm_mountp, op->nfso_own->nfsow_clp, nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp); } } else { error = EIO; } newnfs_copyincred(cred, &op->nfso_cred); } else if (ret == NFSCLOPEN_SETCRED) /* * This is a new local open on a delegation. It needs * to have credentials so that an open can be done * against the server during recovery. */ newnfs_copyincred(cred, &op->nfso_cred); /* * nfso_opencnt is the count of how many VOP_OPEN()s have * been done on this Open successfully and a VOP_CLOSE() * is expected for each of these. * If error is non-zero, don't increment it, since the Open * hasn't succeeded yet. */ if (!error) op->nfso_opencnt++; nfscl_openrelease(nmp, op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * the actual open rpc */ APPLESTATIC int nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **dpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p, int syscred, int recursed) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *dp, *ndp = NULL; struct nfsvattr nfsva; u_int32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby; struct nfsclsession *tsep; dp = *dpp; *dpp = NULL; nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); if (reclaim) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(delegtype); } else { if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; } else { *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); } (void) nfsm_strtom(nd, name, namelen); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); (void) nfsrv_putattrbit(nd, &attrbits); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); - MALLOC(ndp, struct nfscldeleg *, + ndp = malloc( sizeof (struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { ndp->nfsdl_flags = NFSCLDL_READ; } if (ret) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; } if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open"); } while (ret == NFSERR_DELAY); error = ret; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) && (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL && ndp == NULL && !recursed) { do { ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, &ndp, 0, 0x0, cred, p, syscred, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_open2"); } while (ret == NFSERR_DELAY); if (ret) { if (ndp != NULL) { - FREE((caddr_t)ndp, M_NFSCLDELEG); + free(ndp, M_NFSCLDELEG); ndp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) *dpp = ndp; else if (ndp != NULL) - FREE((caddr_t)ndp, M_NFSCLDELEG); + free(ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * open downgrade rpc */ APPLESTATIC int nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Close operation. */ APPLESTATIC int nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p) { struct nfsclclient *clp; int error; if (vnode_vtype(vp) != VREG) return (0); if (doclose) error = nfscl_doclose(vp, &clp, p); else error = nfscl_getclose(vp, &clp); if (error) return (error); nfscl_clientrelease(clp); return (0); } /* * Close the open. */ APPLESTATIC void nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct ucred *tcred; u_int64_t off = 0, len = 0; u_int32_t type = NFSV4LOCKT_READ; int error, do_unlock, trycnt; tcred = newnfs_getcred(); newnfs_copycred(&op->nfso_cred, tcred); /* * (Theoretically this could be done in the same * compound as the close, but having multiple * sequenced Ops in the same compound might be * too scary for some servers.) */ if (op->nfso_posixlock) { off = 0; len = NFS64BITSSET; type = NFSV4LOCKT_READ; } /* * Since this function is only called from VOP_INACTIVE(), no * other thread will be manipulating this Open. As such, the * lock lists are not being changed by other threads, so it should * be safe to do this without locking. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { do_unlock = 1; LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { if (op->nfso_posixlock == 0) { off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; if (lop->nfslo_type == F_WRLCK) type = NFSV4LOCKT_WRITE; else type = NFSV4LOCKT_READ; } if (do_unlock) { trycnt = 0; do { error = nfsrpc_locku(nd, nmp, lp, off, len, type, tcred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_close"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0 && trycnt++ < 5); if (op->nfso_posixlock) do_unlock = 0; } nfscl_freelock(lop, 0); } /* * Do a ReleaseLockOwner. * The lock owner name nfsl_owner may be used by other opens for * other files but the lock_owner4 name that nfsrpc_rellockown() * puts on the wire has the file handle for this file appended * to it, so it can be done now. */ (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, tcred, p); } /* * There could be other Opens for different files on the same * OpenOwner, so locking is required. */ NFSLOCKCLSTATE(); nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR); NFSUNLOCKCLSTATE(); do { error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfs_close"); } while (error == NFSERR_GRACE); NFSLOCKCLSTATE(); nfscl_lockunlock(&op->nfso_own->nfsow_rwlock); LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) nfscl_freelockowner(lp, 0); nfscl_freeopen(op, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * The actual Close RPC. */ APPLESTATIC int nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, op->nfso_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * V4 Open Confirm RPC. */ APPLESTATIC int nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int error; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4N(nmp)) return (0); /* No confirmation for NFSv4.1. */ nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; *tl = txdr_unsigned(op->nfso_own->nfsow_seqid); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (!nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs() * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9]; u_short port; int error, isinet6 = 0, callblen; nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; struct nfsclds *dsp; struct in6_addr a6; struct nfsclsession *tsep; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); clp->nfsc_rev = rev++; if (NFSHASNFSV4N(nmp)) { /* * Either there was no previous session or the * previous session has failed, so... * do an ExchangeID followed by the CreateSession. */ error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); NFSCL_DEBUG(1, "aft exch=%d\n", error); if (error == 0) error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, &nmp->nm_sockreq, dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); if (error == 0) { NFSLOCKMNT(nmp); /* * The old sessions cannot be safely free'd * here, since they may still be used by * in-progress RPCs. */ tsep = NULL; if (TAILQ_FIRST(&nmp->nm_sess) != NULL) tsep = NFSMNT_MDSSESSION(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); /* * Wake up RPCs waiting for a slot on the * old session. These will then fail with * NFSERR_BADSESSION and be retried with the * new session by nfsv4_setsequence(). * Also wakeup() processes waiting for the * new session. */ if (tsep != NULL) wakeup(&tsep->nfsess_slots); wakeup(&nmp->nm_sess); NFSUNLOCKMNT(nmp); } else nfscl_freenfsclds(dsp); NFSCL_DEBUG(1, "aft createsess=%d\n", error); if (error == 0 && reclaim == 0) { error = nfsrpc_reclaimcomplete(nmp, cred, p); NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); if (error == NFSERR_COMPLETEALREADY || error == NFSERR_NOTSUPP) /* Ignore this error. */ error = 0; } return (error); } /* * Allocate a single session structure for NFSv4.0, because some of * the fields are used by NFSv4.0 although it doesn't do a session. */ dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); NFSLOCKMNT(nmp); TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); tsep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* * set up the callback address */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFS_CALLBCKPROG); callblen = strlen(nfsv4_callbackaddr); if (callblen == 0) cp = nfscl_getmyip(nmp, &a6, &isinet6); if (nfscl_enablecallb && nfs_numnfscbd > 0 && (callblen > 0 || cp != NULL)) { port = htons(nfsv4_cbport); cp2 = (u_int8_t *)&port; #ifdef INET6 if ((callblen > 0 && strchr(nfsv4_callbackaddr, ':')) || isinet6) { char ip6buf[INET6_ADDRSTRLEN], *ip6add; (void) nfsm_strtom(nd, "tcp6", 4); if (callblen == 0) { ip6_sprintf(ip6buf, (struct in6_addr *)cp); ip6add = ip6buf; } else { ip6add = nfsv4_callbackaddr; } snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", ip6add, cp2[0], cp2[1]); } else #endif { (void) nfsm_strtom(nd, "tcp", 3); if (callblen == 0) snprintf(addr, INET6_ADDRSTRLEN + 9, "%d.%d.%d.%d.%d.%d", cp[0], cp[1], cp[2], cp[3], cp2[0], cp2[1]); else snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d", nfsv4_callbackaddr, cp2[0], cp2[1]); } (void) nfsm_strtom(nd, addr, strlen(addr)); } else { (void) nfsm_strtom(nd, "tcp", 3); (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); tsep->nfsess_clientid.lval[0] = *tl++; tsep->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; /* * and confirm it. */ nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = tsep->nfsess_clientid.lval[0]; *tl++ = tsep->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, nmp->nm_fhsize, NULL, NULL, 0, 0); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred); if (error) goto nfsmout; clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; } } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call. */ APPLESTATIC int nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsm_loadattr(nd, nap); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getattr call with non-vnode arguemnts. */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); if (nd->nd_repstat == 0) { if ((nd->nd_flag & ND_NFSV4) != 0) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, NULL, NULL); else error = nfsm_loadattr(nd, nap); } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs setattr operation. */ APPLESTATIC int nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { int error, expireret = 0, openerr, retrycnt; u_int32_t clidrev = 0, mode; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsfh *nfhp; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; retrycnt = 0; do { lckp = NULL; openerr = 1; if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { /* * No Open stateid, so try and open the file * now. */ if (mode == NFSV4OPEN_ACCESSWRITE) openerr = nfsrpc_open(vp, FWRITE, cred, p); else openerr = nfsrpc_open(vp, FREAD, cred, p); if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p, rnap, attrflagp, stuff); else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (!openerr) (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } static int nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); vap->va_type = vnode_vtype(vp); nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = newnfs_false; } else if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff); if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error) error = nfscl_postop_attr(nd, rnap, attrflagp, stuff); mbuf_freem(nd->nd_mrep); if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs lookup rpc */ APPLESTATIC int nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; int error = 0, lookupp = 0; *attrflagp = 0; *dattrflagp = 0; if (vnode_vtype(dvp) != VDIR) return (ENOTDIR); nmp = VFSTONFS(vnode_mount(dvp)); if (len > NFS_MAXNAMLEN) return (ENAMETOOLONG); if (NFSHASNFSV4(nmp) && len == 1 && name[0] == '.') { /* * Just return the current dir's fh. */ np = VTONFS(dvp); - MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + + nfhp = malloc(sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; return (0); } if (NFSHASNFSV4(nmp) && len == 2 && name[0] == '.' && name[1] == '.') { lookupp = 1; NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp); } else { NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp); (void) nfsm_strtom(nd, name, len); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * When an NFSv4 Lookupp returns ENOENT, it means that * the lookup is at the root of an fs, so return this dir. */ if (nd->nd_repstat == NFSERR_NOENT && lookupp) { np = VTONFS(dvp); - MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) + + nfhp = malloc(sizeof (struct nfsfh) + np->n_fhp->nfh_len, M_NFSFH, M_WAITOK); nfhp->nfh_len = np->n_fhp->nfh_len; NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len); *nfhpp = nfhp; mbuf_freem(nd->nd_mrep); return (0); } if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } goto nfsmout; } if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error != 0) goto nfsmout; *dattrflagp = 1; /* Skip over the Lookup and GetFH operation status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); } error = nfsm_getfh(nd, nfhpp); if (error) goto nfsmout; error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff); nfsmout: mbuf_freem(nd->nd_mrep); if (!error && nd->nd_repstat) error = nd->nd_repstat; return (error); } /* * Do a readlink rpc. */ APPLESTATIC int nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np = VTONFS(vp); nfsattrbit_t attrbits; int error, len, cangetattr = 1; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READLINK, vp); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_STRSIZ(len, NFS_MAXPATHLEN); /* * This seems weird to me, but must have been added to * FreeBSD for some reason. The only thing I can think of * is that there was/is some server that replies with * more link data than it should? */ if (len == NFS_MAXPATHLEN) { NFSLOCKNODE(np); if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) { len = np->n_size; cangetattr = 0; } NFSUNLOCKNODE(np); } error = nfsm_mbufuio(nd, uiop, len); if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Read operation. */ APPLESTATIC int nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { int error, expireret = 0, retrycnt; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { nfhp = np->n_fhp; newcred = NFSNEWCRED(cred); } retrycnt = 0; do { lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_OPENMODE) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4) || (error == NFSERR_OPENMODE && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual read RPC. */ static int nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; int error = 0, len, retlen, tsiz, eof = 0; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; int rsize; off_t tmp_off; *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } rsize = nmp->nm_rsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; while (tsiz > 0) { *attrflagp = 0; len = (tsiz > rsize) ? rsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_READ, vp); if (nd->nd_flag & ND_NFSV4) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3); if (nd->nd_flag & ND_NFSV2) { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } else { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } /* * Since I can't do a Getattr for NFSv4 for Write, there * doesn't seem any point in doing one here, either. * (See the comment in nfsrpc_writerpc() for more info.) */ error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); error = nfsm_mbufuio(nd, uiop, retlen); if (error) goto nfsmout; mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= retlen; if (!(nd->nd_flag & ND_NFSV2)) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } return (0); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * nfs write operation * When called_from_strategy != 0, it should return EIO for an error that * indicates recovery is in progress, so that the buffer will be left * dirty and be written back to the server later. If it loops around, * the recovery thread could get stuck waiting for the buffer and recovery * will then deadlock. */ APPLESTATIC int nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff, int called_from_strategy) { int error, expireret = 0, retrycnt, nostateid; u_int32_t clidrev = 0; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); struct ucred *newcred; struct nfsfh *nfhp = NULL; nfsv4stateid_t stateid; void *lckp; *must_commit = 0; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; if (NFSHASNFSV4(nmp)) { newcred = NFSNEWCRED(cred); nfhp = np->n_fhp; } retrycnt = 0; do { lckp = NULL; nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; NFSCL_DEBUG(1, "stateid0 in write\n"); } } /* * If there is no stateid for NFSv4, it means this is an * extraneous write after close. Basically a poorly * implemented buffer cache. Just don't do the write. */ if (nostateid) error = 0; else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) NFSFREECRED(newcred); return (error); } /* * The actual write RPC. */ static int nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *np = VTONFS(vp); int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC; int wccflag = 0, wsize; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; nfsattrbit_t attrbits; off_t tmp_off; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); *attrflagp = 0; tsiz = uio_uio_resid(uiop); tmp_off = uiop->uio_offset + tsiz; NFSLOCKMNT(nmp); if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) { NFSUNLOCKMNT(nmp); return (EFBIG); } wsize = nmp->nm_wsize; NFSUNLOCKMNT(nmp); nd->nd_mrep = NULL; /* NFSv2 sometimes does a write with */ nd->nd_repstat = 0; /* uio_resid == 0, so the while is not done */ while (tsiz > 0) { *attrflagp = 0; len = (tsiz > wsize) ? wsize : tsiz; NFSCL_REQSTART(nd, NFSPROC_WRITE, vp); if (nd->nd_flag & ND_NFSV4) { nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* * Not sure why someone changed this, since the * RFC clearly states that "beginoffset" and * "totalcount" are ignored, but it wouldn't * surprise me if there's a busted server out there. */ /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiombuf(nd, uiop, len); /* * Although it is tempting to do a normal Getattr Op in the * NFSv4 compound, the result can be a nearly hung client * system if the Getattr asks for Owner and/or OwnerGroup. * It occurs when the client can't map either the Owner or * Owner_group name in the Getattr reply to a uid/gid. When * there is a cache miss, the kernel does an upcall to the * nfsuserd. Then, it can try and read the local /etc/passwd * or /etc/group file. It can then block in getnewbuf(), * waiting for dirty writes to be pushed to the NFS server. * The only reason this doesn't result in a complete * deadlock, is that the upcall times out and allows * the write to complete. However, progress is so slow * that it might just as well be deadlocked. * As such, we get the rest of the attributes, but not * Owner or Owner_group. * nb: nfscl_loadattrcache() needs to be told that these * partial attributes from a write rpc are being * passed in, via a argument flag. */ if (nd->nd_flag & ND_NFSV4) { NFSWRITEGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, stuff); if (error) goto nfsmout; } if (!nd->nd_repstat) { if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY((caddr_t)tl, (caddr_t)&nmp->nm_verf[0], NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } if (nd->nd_flag & ND_NFSV4) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = NFS_LATTR_NOSHRINK; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4)); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; tsiz -= len; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat && !error) error = nd->nd_repstat; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ APPLESTATIC int nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap, u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp); if (nd->nd_flag & ND_NFSV4) { if (vtyp == VBLK || vtyp == VCHR) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = vtonfsv34_type(vtyp); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = vtonfsv34_type(vtyp); } if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if ((nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSMAJOR(rdev)); *tl = txdr_unsigned(NFSMINOR(rdev)); } if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!error && nd->nd_repstat) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs file create call * Mostly just call the approriate routine. (I separated out v4, so that * error recovery wouldn't be as difficult.) */ APPLESTATIC int nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { int error = 0, newone, expireret = 0, retrycnt, unlocked; struct nfsclowner *owp; struct nfscldeleg *dp; struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp)); u_int32_t clidrev; if (NFSHASNFSV4(nmp)) { retrycnt = 0; do { dp = NULL; error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone, NULL, 1); if (error) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || retrycnt > 0) error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); else error = nfsrpc_getcreatelayout(dvp, name, namelen, vap, cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, &unlocked); /* * There is no need to invalidate cached attributes here, * since new post-delegation issue attributes are always * returned by nfsrpc_createv4() and these will update the * attribute cache. */ if (dp != NULL) (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(nmp, owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; } else { error = nfsrpc_createv23(dvp, name, namelen, vap, cverf, fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff); } return (error); } /* * The create rpc for v2 and 3. */ static int nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; int error = 0; struct nfsrv_descript nfsd, *nd = &nfsd; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV3) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } static int nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { u_int32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { if (NFSHASNFSV4N(nmp)) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { /* NFSv4.0 */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); (void) nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); - MALLOC(dp, struct nfscldeleg *, + dp = malloc( sizeof (struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; } } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error) goto nfsmout; /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); if ((rflags & NFSV4OPEN_RESULTCONFIRM)) { do { ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh, nfhp->nfh_len, op, cred, p); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_create"); } while (ret == NFSERR_DELAY); error = ret; } /* * If the server is handing out delegations, but we didn't * get one because an OpenConfirm was required, try the * Open again, to get a delegation. This is a harmless no-op, * from a server's point of view. */ if ((rflags & NFSV4OPEN_RESULTCONFIRM) && (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) && !error && dp == NULL) { do { ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op, name, namelen, &dp, 0, 0x0, cred, p, 0, 1); if (ret == NFSERR_DELAY) (void) nfs_catnap(PZERO, ret, "nfs_crt2"); } while (ret == NFSERR_DELAY); if (ret) { if (dp != NULL) { - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); dp = NULL; } if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) error = ret; } } nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) *dpp = dp; else if (dp != NULL) - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Nfs remove rpc */ APPLESTATIC int nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsnode *np; struct nfsmount *nmp; nfsv4stateid_t dstateid; int error, ret = 0, i; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); nmp = VFSTONFS(vnode_mount(dvp)); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_removedeleg(vp, p, &dstateid); if (ret == 1) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(dvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_REMOVE); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } } error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do an nfs rename rpc. */ APPLESTATIC int nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen, vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap, int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; struct nfsnode *np; nfsattrbit_t attrbits; nfsv4stateid_t fdstateid, tdstateid; int error = 0, ret = 0, gottd = 0, gotfd = 0, i; *fattrflagp = 0; *tattrflagp = 0; nmp = VFSTONFS(vnode_mount(fdvp)); if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); tryagain: if (NFSHASNFSV4(nmp) && ret == 0) { ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp, &tdstateid, &gottd, p); if (gotfd && gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp); } else if (gotfd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp); } else if (gottd) { NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp); } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(tvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_DELEGRETURN); } } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; } if (ret > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); np = VTONFS(fdvp); (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SAVEFH); } } else { ret = 0; } if (ret == 0) NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_RENAME); } (void) nfsm_strtom(nd, fnameptr, fnamelen); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh, VTONFS(tdvp)->n_fhp->nfh_len, 0); (void) nfsm_strtom(nd, tnameptr, tnamelen); error = nfscl_request(nd, fdvp, p, cred, fstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { /* For NFSv4, parse out any Delereturn replies. */ if (ret > 0 && nd->nd_repstat != 0 && (nd->nd_flag & ND_NOMOREDATA)) { /* * If the Delegreturn failed, try again without * it. The server will Recall, as required. */ mbuf_freem(nd->nd_mrep); goto tryagain; } for (i = 0; i < (ret * 2); i++) { if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) { if (i == 0 && ret > 1) { /* * If the Delegreturn failed, try again * without it. The server will Recall, as * required. * If ret > 1, the first iteration of this * loop is the second DelegReturn result. */ mbuf_freem(nd->nd_mrep); goto tryagain; } else { nd->nd_flag |= ND_NOMOREDATA; } } } } /* Now, the first wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, fstuff); /* and the second wcc attribute reply. */ if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; } if (!error) error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp, NULL, tstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs hard link create rpc */ APPLESTATIC int nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_LINK, vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); } (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh, VTONFS(dvp)->n_fhp->nfh_len, 0); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LINK); } (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, vp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, dstuff); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) { /* * First, parse out the PutFH and Getattr result. */ NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (!(*(tl + 1))) NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*(tl + 1)) nd->nd_flag |= ND_NOMOREDATA; /* * Get the pre-op attributes. */ error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs symbolic link create rpc */ APPLESTATIC int nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; int slen, error = 0; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); slen = strlen(target); if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFLNK); (void) nfsm_strtom(nd, target, slen); } (void) nfsm_strtom(nd, name, namelen); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) nfscl_fillsattr(nd, vap, dvp, 0, 0); if (!(nd->nd_flag & ND_NFSV4)) (void) nfsm_strtom(nd, target, slen); if (nd->nd_flag & ND_NFSV2) nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if ((nd->nd_flag & ND_NFSV3) && !error) { if (!nd->nd_repstat) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (!error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); } if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs make dir rpc */ APPLESTATIC int nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error = 0; struct nfsfh *fhp; struct nfsmount *nmp; *nfhpp = NULL; *attrflagp = 0; *dattrflagp = 0; nmp = VFSTONFS(vnode_mount(dvp)); fhp = VTONFS(dvp)->n_fhp; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFDIR); } (void) nfsm_strtom(nd, name, namelen); nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0); if (nd->nd_flag & ND_NFSV4) { NFSGETATTR_ATTRBIT(&attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & ND_NFSV4) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (!nd->nd_repstat && !error) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); } if (!error) error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) { /* Get rid of the PutFH and Getattr status values. */ NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); if (error == 0) *dattrflagp = 1; } } if ((nd->nd_flag & ND_NFSV3) && !error) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. * Only do this if vfs.nfs.ignore_eexist is set. * Never do this for NFSv4.1 or later minor versions, since sessions * should guarantee "exactly once" RPC semantics. */ if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) || nmp->nm_minorvers == 0)) error = 0; return (error); } /* * nfs remove directory call */ APPLESTATIC int nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp); (void) nfsm_strtom(nd, name, namelen); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error) return (error); if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * Readdir rpc. * Always returns with either uio_resid unchanged, if you are at the * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks * filled in. * I felt this would allow caching of directory blocks more easily * than returning a pertially filled block. * Directory offset cookies: * Oh my, what to do with them... * I can think of three ways to deal with them: * 1 - have the layer above these RPCs maintain a map between logical * directory byte offsets and the NFS directory offset cookies * 2 - pass the opaque directory offset cookies up into userland * and let the libc functions deal with them, via the system call * 3 - return them to userland in the "struct dirent", so future versions * of libc can use them and do whatever is necessary to make things work * above these rpc calls, in the meantime * For now, I do #3 by "hiding" the directory offset cookies after the * d_name field in struct dirent. This is space inside d_reclen that * will be ignored by anything that doesn't know about them. * The directory offset cookies are filled in as the last 8 bytes of * each directory entry, after d_name. Someday, the userland libc * functions may be able to use these. In the meantime, it satisfies * OpenBSD's requirements for cookies being returned. * If expects the directory offset cookie for the read to be in uio_offset * and returns the one for the next entry after this directory block in * there, as well. */ APPLESTATIC int nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; nfsquad_t cookie, ncookie; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp); struct nfsvattr nfsva; struct nfsrv_descript nfsd, *nd = &nfsd; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0; u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX; char *cp; nfsattrbit_t attrbits, dattrbits; u_int32_t rderr, *tl2 = NULL; size_t tresid; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * There is no point in reading a lot more than uio_resid, however * adding one additional DIRBLKSIZ makes sense. Since uio_resid * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this * will never make readsize > nm_readdirsize. */ readsize = nmp->nm_readdirsize; if (readsize > uio_uio_resid(uiop)) readsize = uio_uio_resid(uiop) + DIRBLKSIZ; *attrflagp = 0; if (eofp) *eofp = 0; tresid = uio_uio_resid(uiop); cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; nd->nd_mrep = NULL; /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { reqsize = 6 * NFSX_UNSIGNED; NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != UINT64_MAX) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; *((uint64_t *)dp->d_name) = 0; /* Zero pad it. */ dp->d_name[0] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; *((uint64_t *)dp->d_name) = 0; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR); } else { reqsize = 5 * NFSX_UNSIGNED; } /* * Loop around doing readdir rpc's of size readsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIR, vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.lval[1]; *tl = txdr_unsigned(readsize); } else { NFSM_BUILD(tl, u_int32_t *, reqsize); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } if (nd->nd_flag & ND_NFSV4) { *tl++ = txdr_unsigned(readsize); *tl = txdr_unsigned(readsize); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } else { *tl = txdr_unsigned(readsize); } } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!(nd->nd_flag & ND_NFSV2)) { if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (!nd->nd_repstat && !error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; NFSUNLOCKNODE(dnp); } } if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; len = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_hyper(tl); tl += 2; len = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_fileid = fxdr_unsigned(uint64_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = roundup2(len, 8); if (tlen == len) tlen += 8; /* To ensure null termination. */ left = DIRBLKSIZ - blksiz; if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if (_GENERIC_DIRLEN(len) + NFSX_HYPER > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_namlen = len; dp->d_reclen = _GENERIC_DIRLEN(len) + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; /* null terminate */ cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) { rderr = 0; nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); ncookie.lval[0] = 0; ncookie.lval[1] = *tl++; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else { if (gotmnton) { if (nfsva.na_mntonfileno != UINT64_MAX) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } dp->d_type = vtonfs_dtype(nfsva.na_type); } } else { dp->d_fileno = nfsva.na_fileid; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; } more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp) { if (tresid == ((size_t)(uio_uio_resid(uiop)))) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #ifndef APPLE /* * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir(). * (Also used for NFS V4 when mount flag set.) * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.) */ APPLESTATIC int nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int *eofp, void *stuff) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; vnode_t newvp = NULLVP; struct nfsrv_descript nfsd, *nd = &nfsd; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsnode *dnp = VTONFS(vp), *np; struct nfsvattr nfsva; struct nfsfh *nfhp; nfsquad_t cookie, ncookie; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0; int isdotdot = 0, unlocknewvp = 0; u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX; u_int64_t fileno = 0; char *cp; nfsattrbit_t attrbits, dattrbits; size_t tresid; u_int32_t *tl2 = NULL, rderr; struct timespec dctime; KASSERT(uiop->uio_iovcnt == 1 && (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); timespecclear(&dctime); *attrflagp = 0; if (eofp != NULL) *eofp = 0; ndp->ni_dvp = vp; nd->nd_mrep = NULL; cookie.lval[0] = cookiep->nfsuquad[0]; cookie.lval[1] = cookiep->nfsuquad[1]; tresid = uio_uio_resid(uiop); /* * For NFSv4, first create the "." and ".." entries. */ if (NFSHASNFSV4(nmp)) { NFSGETATTR_ATTRBIT(&dattrbits); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID); if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_MOUNTEDONFILEID)) { NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); gotmnton = 1; } else { /* * Must fake it. Use the fileno, except when the * fsid is != to that of the directory. For that * case, generate a fake fileno that is not the same. */ NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID); gotmnton = 0; } /* * Joy, oh joy. For V4 we get to hand craft '.' and '..'. */ if (uiop->uio_offset == 0) { NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); dotfileid = 0; /* Fake out the compiler. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { error = nfsm_loadattr(nd, &nfsva); if (error != 0) goto nfsmout; dctime = nfsva.na_ctime; dotfileid = nfsva.na_fileid; } if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 4)); if (len > 0 && len <= NFSX_V4FHMAX) error = nfsm_advance(nd, NFSM_RNDUP(len), -1); else error = EPERM; if (!error) { NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED); nfsva.na_mntonfileno = UINT64_MAX; error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error) { dotdotfileid = dotfileid; } else if (gotmnton) { if (nfsva.na_mntonfileno != UINT64_MAX) dotdotfileid = nfsva.na_mntonfileno; else dotdotfileid = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dotdotfileid = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dotdotfileid = fakefileno; } } } else if (nd->nd_repstat == NFSERR_NOENT) { /* * Lookupp returns NFSERR_NOENT when we are * at the root, so just use the current dir. */ nd->nd_repstat = 0; dotdotfileid = dotfileid; } else { error = nd->nd_repstat; } mbuf_freem(nd->nd_mrep); if (error) return (error); nd->nd_mrep = NULL; dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotfileid; dp->d_namlen = 1; *((uint64_t *)dp->d_name) = 0; /* Zero pad it. */ dp->d_name[0] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_type = DT_DIR; dp->d_fileno = dotdotfileid; dp->d_namlen = 2; *((uint64_t *)dp->d_name) = 0; dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER; /* * Just make these offset cookie 0. */ tl = (u_int32_t *)&dp->d_name[8]; *tl++ = 0; *tl = 0; blksiz += dp->d_reclen; uio_uio_resid_add(uiop, -(dp->d_reclen)); uiop->uio_offset += dp->d_reclen; uio_iov_base_add(uiop, dp->d_reclen); uio_iov_len_add(uiop, -(dp->d_reclen)); } NFSREADDIRPLUS_ATTRBIT(&attrbits); if (gotmnton) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_MOUNTEDONFILEID); } /* * Loop around doing readdir rpc's of size nm_readdirsize. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp); NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.lval[0]; *tl++ = cookie.lval[1]; if (cookie.qval == 0) { *tl++ = 0; *tl++ = 0; } else { NFSLOCKNODE(dnp); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; NFSUNLOCKNODE(dnp); } *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_readdirsize); if (nd->nd_flag & ND_NFSV4) { (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); (void) nfsrv_putattrbit(nd, &dattrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat || error) { if (!error) error = nd->nd_repstat; goto nfsmout; } if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0) dctime = nap->na_ctime; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); NFSLOCKNODE(dnp); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; NFSUNLOCKNODE(dnp); more_dirs = fxdr_unsigned(int, *tl); if (!more_dirs) tryformoredirs = 0; /* loop through the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NFSV4) { ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; } else { fileno = fxdr_hyper(tl); tl += 2; } len = fxdr_unsigned(int, *tl); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; goto nfsmout; } tlen = roundup2(len, 8); if (tlen == len) tlen += 8; /* To ensure null termination. */ left = DIRBLKSIZ - blksiz; if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) { dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; blksiz = 0; } if (_GENERIC_DIRLEN(len) + NFSX_HYPER > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_off = 0; dp->d_namlen = len; dp->d_reclen = _GENERIC_DIRLEN(len) + NFSX_HYPER; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uio_uio_resid_add(uiop, -(DIRHDSIZ)); uiop->uio_offset += DIRHDSIZ; uio_iov_base_add(uiop, DIRHDSIZ); uio_iov_len_add(uiop, -(DIRHDSIZ)); cnp->cn_nameptr = uio_iov_base(uiop); cnp->cn_namelen = len; NFSCNHASHZERO(cnp); error = nfsm_mbufuio(nd, uiop, len); if (error) goto nfsmout; cp = uio_iov_base(uiop); tlen -= len; *cp = '\0'; cp += tlen; /* points to cookie storage */ tl2 = (u_int32_t *)cp; if (len == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') isdotdot = 1; else isdotdot = 0; uio_iov_base_add(uiop, (tlen + NFSX_HYPER)); uio_iov_len_add(uiop, -(tlen + NFSX_HYPER)); uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER)); uiop->uio_offset += (tlen + NFSX_HYPER); } else { error = nfsm_advance(nd, NFSM_RNDUP(len), -1); if (error) goto nfsmout; } nfhp = NULL; if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED); ncookie.lval[0] = *tl++; ncookie.lval[1] = *tl++; attrflag = fxdr_unsigned(int, *tl); if (attrflag) { error = nfsm_loadattr(nd, &nfsva); if (error) goto nfsmout; } NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED); if (*tl) { error = nfsm_getfh(nd, &nfhp); if (error) goto nfsmout; } if (!attrflag && nfhp != NULL) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); nfhp = NULL; } } else { rderr = 0; nfsva.na_mntonfileno = 0xffffffff; error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, &rderr, p, cred); if (error) goto nfsmout; } if (bigenough) { if (nd->nd_flag & ND_NFSV4) { if (rderr) { dp->d_fileno = 0; } else if (gotmnton) { if (nfsva.na_mntonfileno != 0xffffffff) dp->d_fileno = nfsva.na_mntonfileno; else dp->d_fileno = nfsva.na_fileid; } else if (nfsva.na_filesid[0] == dnp->n_vattr.na_filesid[0] && nfsva.na_filesid[1] == dnp->n_vattr.na_filesid[1]) { dp->d_fileno = nfsva.na_fileid; } else { do { fakefileno--; } while (fakefileno == nfsva.na_fileid); dp->d_fileno = fakefileno; } } else { dp->d_fileno = fileno; } *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] = ncookie.lval[0]; *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] = ncookie.lval[1]; if (nfhp != NULL) { if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len, dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) { VREF(vp); newvp = vp; unlocknewvp = 0; - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); np = dnp; } else if (isdotdot != 0) { /* * Skip doing a nfscl_nget() call for "..". * There's a race between acquiring the nfs * node here and lookups that look for the * directory being read (in the parent). * It would try to get a lock on ".." here, * owning the lock on the directory being * read. Lookup will hold the lock on ".." * and try to acquire the lock on the * directory being read. * If the directory is unlocked/relocked, * then there is a LOR with the buflock * vp is relocked. */ free(nfhp, M_NFSFH); } else { error = nfscl_nget(vnode_mount(vp), vp, nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE); if (!error) { newvp = NFSTOV(np); unlocknewvp = 1; } } nfhp = NULL; if (newvp != NULLVP) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 0); if (error) { if (unlocknewvp) vput(newvp); else vrele(newvp); goto nfsmout; } dp->d_type = vtonfs_dtype(np->n_vattr.na_type); ndp->ni_vp = newvp; NFSCNHASH(cnp, HASHINIT); if (cnp->cn_namelen <= NCHNAMLEN && (newvp->v_type != VDIR || dctime.tv_sec != 0)) { cache_enter_time(ndp->ni_dvp, ndp->ni_vp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dctime); } if (unlocknewvp) vput(newvp); else vrele(newvp); newvp = NULLVP; } } } else if (nfhp != NULL) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); eof = fxdr_unsigned(int, *tl); if (tryformoredirs) more_dirs = !eof; if (nd->nd_flag & ND_NFSV4) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } } mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uio_iov_base_add(uiop, left); uio_iov_len_add(uiop, -(left)); uio_uio_resid_add(uiop, -(left)); uiop->uio_offset += left; } /* * If returning no data, assume end of file. * If not bigenough, return not end of file, since you aren't * returning all the data * Otherwise, return the eof flag from the server. */ if (eofp != NULL) { if (tresid == uio_uio_resid(uiop)) *eofp = 1; else if (!bigenough) *eofp = 0; else *eofp = eof; } /* * Add extra empty records to any remaining DIRBLKSIZ chunks. */ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) { dp = (struct dirent *)uio_iov_base(uiop); dp->d_type = DT_UNKNOWN; dp->d_fileno = 0; dp->d_namlen = 0; dp->d_name[0] = '\0'; tl = (u_int32_t *)&dp->d_name[4]; *tl++ = cookie.lval[0]; *tl = cookie.lval[1]; dp->d_reclen = DIRBLKSIZ; uio_iov_base_add(uiop, DIRBLKSIZ); uio_iov_len_add(uiop, -(DIRBLKSIZ)); uio_uio_resid_add(uiop, -(DIRBLKSIZ)); uiop->uio_offset += DIRBLKSIZ; } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } #endif /* !APPLE */ /* * Nfs commit rpc */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); if (nd->nd_flag & ND_NFSV4) { /* * And do a Getattr op. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); } error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKMNT(nmp); if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); nd->nd_repstat = NFSERR_STALEWRITEVERF; } NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } nfsmout: if (!error && nd->nd_repstat) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * NFS byte range lock rpc. * (Mostly just calls one of the three lower level RPC routines.) */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; struct nfsfh *nfhp; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); u_int64_t off, len; off_t start, end; u_int32_t clidrev = 0; int error = 0, newone = 0, expireret = 0, retrycnt, donelocally; int callcnt, dorpc; /* * Convert the flock structure into a start and end and do POSIX * bounds checking. */ switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ start = fl->l_start; off = fl->l_start; break; case SEEK_END: start = size + fl->l_start; off = size + fl->l_start; break; default: return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len != 0) { end = start + fl->l_len - 1; if (end < start) return (EINVAL); } len = fl->l_len; if (len == 0) len = NFS64BITSSET; retrycnt = 0; do { nd->nd_repstat = 0; if (op == F_GETLK) { error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, p, id, flags); } else if (error == -1) { error = 0; } nfscl_clientrelease(clp); } else if (op == F_UNLCK && fl->l_type == F_UNLCK) { /* * We must loop around for all lockowner cases. */ callcnt = 0; error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ if (lp == NULL) { if (callcnt == 0) nfscl_clientrelease(clp); else nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; /* * If the server doesn't support Posix lock semantics, * only allow locks on the entire file, since it won't * handle overlapping byte ranges. * There might still be a problem when a lock * upgrade/downgrade (read<->write) occurs, since the * server "might" expect an unlock first? */ if (dorpc && (lp->nfsl_open->nfso_posixlock || (off == 0 && len == NFS64BITSSET))) { /* * Since the lock records will go away, we must * wait for grace and delay here. */ do { error = nfsrpc_locku(nd, nmp, lp, off, len, NFSV4LOCKT_READ, cred, p, 0); if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfs_advlock"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0); } callcnt++; } while (error == 0 && nd->nd_repstat == 0); nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; if (!lp->nfsl_open->nfso_posixlock && (off != 0 || len != NFS64BITSSET)) { error = EINVAL; } else { error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, lp, newone, reclaim, off, len, fl->l_type, cred, p, 0); } if (!error) error = nd->nd_repstat; nfscl_lockrelease(lp, error, newone); } else { error = EINVAL; } if (!error) error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); retrycnt++; } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) error = EIO; return (error); } /* * The lower level routine for the LockT case. */ APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; struct nfsmount *nmp; struct nfsclsession *tsep; nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], np->n_fhp->nfh_len); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len); error = nfscl_request(nd, vp, p, cred, NULL); if (error) return (error); if (nd->nd_repstat == 0) { fl->l_type = F_UNLCK; } else if (nd->nd_repstat == NFSERR_DENIED) { nd->nd_repstat = 0; fl->l_whence = SEEK_SET; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); fl->l_start = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (len == NFS64BITSSET) fl->l_len = 0; else fl->l_len = len; type = fxdr_unsigned(int, *tl++); if (type == NFSV4LOCKT_WRITE) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; /* * XXX For now, I have no idea what to do with the * conflicting lock_owner, so I'll just set the pid == 0 * and skip over the lock_owner. */ fl->l_pid = (pid_t)0; tl += 2; size = fxdr_unsigned(int, *tl); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALECLIENTID) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Lower level function that performs the LockU RPC. */ static int nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfscllockowner *lp, u_int64_t off, u_int64_t len, u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * The actual Lock RPC. */ APPLESTATIC int nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, int syscred) { u_int32_t *tl; int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); else *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); *tl++ = txdr_unsigned(reclaim); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (newone) { *tl = newnfs_true; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; *tl = txdr_unsigned(lp->nfsl_seqid); if (nfstest_outofseq && (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); } if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); lp->nfsl_stateid.seqid = *tl++; lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); size = fxdr_unsigned(int, *(tl + 7)); if (size < 0 || size > NFSV4_OPAQUELIMIT) error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); } else if (nd->nd_repstat == NFSERR_STALESTATEID) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs statfs rpc * (always called with the vp for the mount point) */ APPLESTATIC int nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl = NULL; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSSTATFS_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) { nmp->nm_fsid[0] = nap->na_filesid[0]; nmp->nm_fsid[1] = nap->na_filesid[1]; NFSSETHASSETFSID(nmp); *attrflagp = 1; } } else { error = nd->nd_repstat; } if (error) goto nfsmout; } else { NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_flag & ND_NFSV3) { error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (error) goto nfsmout; } if (nd->nd_repstat) { error = nd->nd_repstat; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATFS(nd->nd_flag & ND_NFSV3)); } if (NFSHASNFSV3(nmp)) { sbp->sf_tbytes = fxdr_hyper(tl); tl += 2; sbp->sf_fbytes = fxdr_hyper(tl); tl += 2; sbp->sf_abytes = fxdr_hyper(tl); tl += 2; sbp->sf_tfiles = fxdr_hyper(tl); tl += 2; sbp->sf_ffiles = fxdr_hyper(tl); tl += 2; sbp->sf_afiles = fxdr_hyper(tl); tl += 2; sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl); } else if (NFSHASNFSV4(nmp) == 0) { sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++); sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs pathconf rpc */ APPLESTATIC int nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp; u_int32_t *tl; nfsattrbit_t attrbits; int error; *attrflagp = 0; nmp = VFSTONFS(vnode_mount(vp)); if (NFSHASNFSV4(nmp)) { /* * For V4, you actually do a getattr. */ NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp); NFSPATHCONF_GETATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (nd->nd_repstat == 0) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (!error) *attrflagp = 1; } else { error = nd->nd_repstat; } } else { NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF); pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++); pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++); pc->pc_chownrestricted = fxdr_unsigned(u_int32_t, *tl++); pc->pc_caseinsensitive = fxdr_unsigned(u_int32_t, *tl++); pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl); } } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * nfs version 3 fsinfo rpc call */ APPLESTATIC int nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); error = nfscl_postop_attr(nd, nap, attrflagp, stuff); if (nd->nd_repstat && !error) error = nd->nd_repstat; if (!error) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO); fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++); fsp->fs_maxfilesize = fxdr_hyper(tl); tl += 2; fxdr_nfsv3time(tl, &fsp->fs_timedelta); tl += 2; fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl); } nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Renew RPC. */ APPLESTATIC int nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; struct nfssockreq *nrp; struct nfsclsession *tsep; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); if (dsp == NULL) nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0, 0); else nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, &dsp->nfsclds_sess, 0, 0); if (!NFSHASNFSV4N(nmp)) { /* NFSv4.1 just uses a Sequence Op and not a Renew. */ NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; } nrp = NULL; if (dsp != NULL) nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; if (dsp == NULL) error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); else error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Releaselockowner RPC. */ APPLESTATIC int nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; u_int32_t *tl; int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsclsession *tsep; if (NFSHASNFSV4N(nmp)) { /* For NFSv4.1, do a FreeStateID. */ nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, NULL, 0, 0); nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); } else { nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Compound to get the mount pt FH. */ APPLESTATIC int nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2; int error, cnt, len, setnil; u_int32_t *opcntp; nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0, 0); cp = dirpath; cnt = 0; do { setnil = 0; while (*cp == '/') cp++; cp2 = cp; while (*cp2 != '\0' && *cp2 != '/') cp2++; if (*cp2 == '/') { setnil = 1; *cp2 = '\0'; } if (cp2 != cp) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LOOKUP); nfsm_strtom(nd, cp, strlen(cp)); cnt++; } if (setnil) *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); if (NFSHASNFSV4N(nmp)) /* Has a Sequence Op done by nfscl_reqstart(). */ *opcntp = txdr_unsigned(3 + cnt); else *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); tl += (2 + 2 * cnt); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len); if (nd->nd_repstat == 0) nmp->nm_fhsize = len; } } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * This function performs the Delegreturn RPC. */ APPLESTATIC int nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p, int syscred) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); if (NFSHASNFSV4N(nmp)) *tl++ = 0; else *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs getacl call. */ APPLESTATIC int nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_GETACL, vp); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsrv_putattrbit(nd, &attrbits); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); if (!nd->nd_repstat) error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred); else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * nfs setacl call. */ APPLESTATIC int nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, void *stuff) { int error; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp)) return (EOPNOTSUPP); error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff); return (error); } /* * nfs setacl call. */ static int nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfsattrbit_t attrbits; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (EOPNOTSUPP); NFSCL_REQSTART(nd, NFSPROC_SETACL, vp); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL); (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0, &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); error = nfscl_request(nd, vp, p, cred, stuff); if (error) return (error); /* Don't care about the pre/postop attributes */ mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } /* * Do the NFSv4.1 Exchange ID. */ int nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl, v41flags; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsclds *dsp; struct timespec verstime; int error, len; *dspp = NULL; nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(exchflags); *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* Set the implementation id4 */ *tl = txdr_unsigned(1); (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void) nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, (int)nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); len = fxdr_unsigned(int, *(tl + 7)); if (len < 0 || len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; dsp->nfsclds_servownlen = len; dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; dsp->nfsclds_sess.nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); v41flags = fxdr_unsigned(uint32_t, *tl); if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && NFSHASPNFSOPT(nmp)) { NFSCL_DEBUG(1, "set PNFS\n"); NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_PNFS; NFSUNLOCKMNT(nmp); dsp->nfsclds_flags |= NFSCLDS_MDS; } if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) dsp->nfsclds_flags |= NFSCLDS_DS; if (len > 0) nd->nd_repstat = nfsrv_mtostr(nd, dsp->nfsclds_serverown, len); if (nd->nd_repstat == 0) { mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); nfscl_initsessionslots(&dsp->nfsclds_sess); *dspp = dsp; } else free(dsp, M_NFSCLDS); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Create Session. */ int nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; /* Make sure nm_rsize, nm_wsize is set. */ if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0) nmp->nm_wsize = NFS_MAXBSIZE; nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); *tl++ = sep->nfsess_clientid.lval[0]; *tl++ = sep->nfsess_clientid.lval[1]; *tl++ = txdr_unsigned(sequenceid); crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0) crflags |= NFSV4CRSESS_CONNBACKCHAN; *tl = txdr_unsigned(crflags); /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ *tl = 0; /* No rdma ird */ /* Fill in back channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ *tl++ = txdr_unsigned(10000); /* Max request size */ *tl++ = txdr_unsigned(10000); /* Max response size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(4); /* Max operations */ *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ *tl = 0; /* No rdma ird */ NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ /* Allow AUTH_SYS callbacks as uid, gid == 0. */ *tl++ = txdr_unsigned(1); /* Auth_sys only */ *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ *tl++ = 0; /* Null machine name */ *tl++ = 0; /* Uid == 0 */ *tl++ = 0; /* Gid == 0 */ *tl = 0; /* No additional gids */ nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED); bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); crflags = fxdr_unsigned(uint32_t, *tl); if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_SESSPERSIST; NFSUNLOCKMNT(nmp); } /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Skip the header pad size. */ /* Make sure nm_wsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); while (maxval < nmp->nm_wsize + NFS_MAXXDR) { if (nmp->nm_wsize > 8096) nmp->nm_wsize /= 2; else break; } /* Make sure nm_rsize is small enough. */ maxval = fxdr_unsigned(uint32_t, *tl++); while (maxval < nmp->nm_rsize + NFS_MAXXDR) { if (nmp->nm_rsize > 8096) nmp->nm_rsize /= 2; else break; } sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); irdcnt = fxdr_unsigned(int, *tl); if (irdcnt > 0) NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); /* and the back channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 5; sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); } error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Session. */ int nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); tsep = nfsmnt_mdssession(nmp); bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Destroy Client. */ int nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; struct nfsclsession *tsep; nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutGet. */ static int nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, int layouttype, int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p, void *stuff) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0, 0); nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp, layouttype, layoutlen, 0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 Get Device Info. */ int nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, NFSPROC_T *p) { uint32_t cnt, *tl, vers, minorvers; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct sockaddr_in sin, ssin; struct sockaddr_in6 sin6, ssin6; struct nfsclds *dsp = NULL, **dspp, **gotdspp; struct nfscldevinfo *ndi; int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt; uint8_t stripeindex; sa_family_t af, safilled; *ndip = NULL; ndi = NULL; gotdspp = NULL; nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(100000); if (notifybitsp != NULL && *notifybitsp != 0) { *tl = txdr_unsigned(1); /* One word of bits. */ NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*notifybitsp); } else *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (layouttype != fxdr_unsigned(int, *tl)) printf("EEK! devinfo layout type not same!\n"); if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); stripecnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); if (stripecnt < 1 || stripecnt > 4096) { printf("pNFS File layout devinfo stripecnt %d:" " out of range\n", stripecnt); error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); if (addrcnt < 1 || addrcnt > 128) { printf("NFS devinfo addrcnt %d: out of range\n", addrcnt); error = NFSERR_BADXDR; goto nfsmout; } /* * Now we know how many stripe indices and addresses, so * we can allocate the structure the correct size. */ i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + 1; NFSCL_DEBUG(4, "stripeindices=%d\n", i); ndi = malloc(sizeof(*ndi) + (addrcnt + i) * sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_flags = NFSDI_FILELAYOUT; ndi->nfsdi_stripecnt = stripecnt; ndi->nfsdi_addrcnt = addrcnt; /* Fill in the stripe indices. */ for (i = 0; i < stripecnt; i++) { stripeindex = fxdr_unsigned(uint8_t, *tl++); NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); if (stripeindex >= addrcnt) { printf("pNFS File Layout devinfo" " stripeindex %d: too big\n", (int)stripeindex); error = NFSERR_BADXDR; goto nfsmout; } nfsfldi_setstripeindex(ndi, i, stripeindex); } } else if (layouttype == NFSLAYOUT_FLEXFILE) { /* For Flex File, we only get one address list. */ ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); ndi->nfsdi_refcnt = 0; ndi->nfsdi_flags = NFSDI_FLEXFILE; addrcnt = ndi->nfsdi_addrcnt = 1; } /* Now, dissect the server address(es). */ safilled = AF_UNSPEC; for (i = 0; i < addrcnt; i++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); cnt = fxdr_unsigned(uint32_t, *tl); if (cnt == 0) { printf("NFS devinfo 0 len addrlist\n"); error = NFSERR_BADXDR; goto nfsmout; } dspp = nfsfldi_addr(ndi, i); safilled = AF_UNSPEC; for (j = 0; j < cnt; j++) { error = nfsv4_getipaddr(nd, &sin, &sin6, &af, &isudp); if (error != 0 && error != EPERM) { error = NFSERR_BADXDR; goto nfsmout; } if (error == 0 && isudp == 0) { /* * The priority is: * - Same address family. * Save the address and dspp, so that * the connection can be done after * parsing is complete. */ if (safilled == AF_UNSPEC || (af == nmp->nm_nam->sa_family && safilled != nmp->nm_nam->sa_family) ) { if (af == AF_INET) ssin = sin; else ssin6 = sin6; safilled = af; gotdspp = dspp; } } } } gotvers = NFS_VER4; /* Always NFSv4 for File Layout. */ /* For Flex File, we will take one of the versions to use. */ if (layouttype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 1 || j > NFSDEV_MAXVERS) { printf("pNFS: too many versions\n"); error = NFSERR_BADXDR; goto nfsmout; } gotvers = 0; for (i = 0; i < j; i++) { NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED); vers = fxdr_unsigned(uint32_t, *tl++); minorvers = fxdr_unsigned(uint32_t, *tl++); if ((vers == NFS_VER4 && minorvers == NFSV41_MINORVERSION) || (vers == NFS_VER3 && gotvers == 0)) { gotvers = vers; /* We'll take this one. */ ndi->nfsdi_versindex = i; ndi->nfsdi_vers = vers; ndi->nfsdi_minorvers = minorvers; ndi->nfsdi_rsize = fxdr_unsigned( uint32_t, *tl++); ndi->nfsdi_wsize = fxdr_unsigned( uint32_t, *tl++); if (*tl == newnfs_true) ndi->nfsdi_flags |= NFSDI_TIGHTCOUPLED; else ndi->nfsdi_flags &= ~NFSDI_TIGHTCOUPLED; } } if (gotvers == 0) { printf("pNFS: no NFSv3 or NFSv4.1\n"); error = NFSERR_BADXDR; goto nfsmout; } } /* And the notify bits. */ NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); bitcnt = fxdr_unsigned(int, *tl); if (bitcnt > 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (notifybitsp != NULL) *notifybitsp = fxdr_unsigned(uint32_t, *tl); } if (safilled != AF_UNSPEC) { KASSERT(ndi != NULL, ("ndi is NULL")); *ndip = ndi; } else error = EPERM; if (error == 0) { /* * Now we can do a TCP connection for the correct * NFS version and IP address. */ error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled, gotvers, &dsp, p); } if (error == 0) { KASSERT(gotdspp != NULL, ("gotdspp is NULL")); *gotdspp = dsp; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: if (error != 0 && ndi != NULL) nfscl_freedevinfo(ndi); mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutCommit. */ int nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); txdr_hyper(off, tl); tl += 2; txdr_hyper(len, tl); tl += 2; if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; *tl++ = newnfs_true; if (lastbyte < off) lastbyte = off; else if (lastbyte >= (off + len)) lastbyte = off + len - 1; txdr_hyper(lastbyte, tl); tl += 2; *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); /* All supported layouts are 0 length. */ *tl = txdr_unsigned(0); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Do the NFSv4.1 LayoutReturn. */ int nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p, void *stuff) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if (reclaim != 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); *tl = txdr_unsigned(layoutreturn); if (layoutreturn == NFSLAYOUTRETURN_FILE) { NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); *tl++ = txdr_unsigned(stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; if (layouttype == NFSLAYOUT_NFSV4_1_FILES) *tl = txdr_unsigned(0); else if (layouttype == NFSLAYOUT_FLEXFILE) { *tl = txdr_unsigned(2 * NFSX_UNSIGNED); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); /* No ioerrs or stats yet. */ *tl++ = 0; *tl = 0; } } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl != 0) { NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl; } } else error = nd->nd_repstat; nfsmout: mbuf_freem(nd->nd_mrep); return (error); } /* * Acquire a layout and devinfo, if possible. The caller must have acquired * a reference count on the nfsclclient structure before calling this. * Return the layout in lypp with a reference count on it, if successful. */ static int nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp; struct nfsclflayouthead flh; int error = 0, islocked, layoutlen, layouttype, recalled, retonclose; nfsv4stateid_t stateid; struct nfsclsession *tsep; *lypp = NULL; if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, off, &flp, &recalled); islocked = 0; if (lyp == NULL || flp == NULL) { if (recalled != 0) return (EIO); LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) { stateid.seqid = 0; stateid.other[0] = stateidp->other[0]; stateid.other[1] = stateidp->other[1]; stateid.other[2] = stateidp->other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX, (uint64_t)0, layouttype, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } else { islocked = 1; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, nfhp->nfh_len, iomode, off, UINT64_MAX, (uint64_t)0, layouttype, layoutlen, &stateid, &retonclose, &flh, cred, p, NULL); } error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp, &flh, layouttype, error, NULL, cred, p); if (error == 0) *lypp = lyp; else if (islocked != 0) nfscl_rellayout(lyp, 1); } else *lypp = lyp; return (error); } /* * Do a TCP connection plus exchange id and create session. * If successful, a "struct nfsclds" is linked into the list for the * mount point and a pointer to it is returned. */ static int nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin, struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp, NFSPROC_T *p) { struct sockaddr_in *msad, *sad; struct sockaddr_in6 *msad6, *sad6; struct nfsclclient *clp; struct nfssockreq *nrp; struct nfsclds *dsp, *tdsp; int error; enum nfsclds_state retv; uint32_t sequenceid; KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("nfsrpc_fillsa: NULL nr_cred")); NFSLOCKCLSTATE(); clp = nmp->nm_clp; NFSUNLOCKCLSTATE(); if (clp == NULL) return (EPERM); if (af == AF_INET) { NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad != NULL && msad->sin_family == AF_INET && sin->sin_addr.s_addr == msad->sin_addr.s_addr && sin->sin_port == msad->sin_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); NFSCL_DEBUG(4, "fnd same addr\n"); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad = (struct sockaddr_in *) tdsp->nfsclds_sockp->nr_nam; else msad = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); sad->sin_len = sizeof(*sad); sad->sin_family = AF_INET; sad->sin_port = sin->sin_port; sad->sin_addr.s_addr = sin->sin_addr.s_addr; nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad; } else if (af == AF_INET6) { NFSLOCKMNT(nmp); /* * Check to see if we already have a session for this * address that is usable for a DS. * Note that the MDS's address is in a different place * than the sessions already acquired for DS's. */ msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; tdsp = TAILQ_FIRST(&nmp->nm_sess); while (tdsp != NULL) { if (msad6 != NULL && msad6->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &msad6->sin6_addr) && sin6->sin6_port == msad6->sin6_port && (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 && tdsp->nfsclds_sess.nfsess_defunct == 0) { *dspp = tdsp; NFSUNLOCKMNT(nmp); return (0); } tdsp = TAILQ_NEXT(tdsp, nfsclds_list); if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) msad6 = (struct sockaddr_in6 *) tdsp->nfsclds_sockp->nr_nam; else msad6 = NULL; } NFSUNLOCKMNT(nmp); /* No IP address match, so look for new/trunked one. */ sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); sad6->sin6_len = sizeof(*sad6); sad6->sin6_family = AF_INET6; sad6->sin6_port = sin6->sin6_port; NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr, sizeof(struct in6_addr)); nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); nrp->nr_nam = (struct sockaddr *)sad6; } else return (EPERM); nrp->nr_sotype = SOCK_STREAM; mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); nrp->nr_prog = NFS_PROG; nrp->nr_vers = vers; /* * Use the credentials that were used for the mount, which are * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. * Ref. counting the credentials with crhold() is probably not * necessary, since nm_sockreq.nr_cred won't be crfree()'d until * unmount, but I did it anyhow. */ nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); error = newnfs_connect(nmp, nrp, NULL, p, 0); NFSCL_DEBUG(3, "DS connect=%d\n", error); dsp = NULL; /* Now, do the exchangeid and create session. */ if (error == 0) { if (vers == NFS_VER4) { error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); if (error != 0) newnfs_disconnect(nrp); } else { dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); dsp->nfsclds_flags |= NFSCLDS_DS; dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */ mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); } } if (error == 0) { dsp->nfsclds_sockp = nrp; if (vers == NFS_VER4) { NFSLOCKMNT(nmp); retv = nfscl_getsameserver(nmp, dsp, &tdsp); NFSCL_DEBUG(3, "getsame ret=%d\n", retv); if (retv == NFSDSP_USETHISSESSION) { NFSUNLOCKMNT(nmp); /* * If there is already a session for this * server, use it. */ (void)newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); *dspp = tdsp; return (0); } if (retv == NFSDSP_SEQTHISSESSION) sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; else sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; NFSUNLOCKMNT(nmp); error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, nrp, sequenceid, 0, nrp->nr_cred, p); NFSCL_DEBUG(3, "DS createsess=%d\n", error); } } else { NFSFREECRED(nrp->nr_cred); NFSFREEMUTEX(&nrp->nr_mtx); free(nrp->nr_nam, M_SONAME); free(nrp, M_NFSSOCKREQ); } if (error == 0) { NFSCL_DEBUG(3, "add DS session\n"); /* * Put it at the end of the list. That way the list * is ordered by when the entry was added. This matters * since the one done first is the one that should be * used for sequencid'ing any subsequent create sessions. */ NFSLOCKMNT(nmp); TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); NFSUNLOCKMNT(nmp); *dspp = dsp; } else if (dsp != NULL) { newnfs_disconnect(nrp); nfscl_freenfsclds(dsp); } return (error); } /* * Do the NFSv4.1 Reclaim Complete. */ int nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error; nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = newnfs_false; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Initialize the slot tables for a session. */ static void nfscl_initsessionslots(struct nfsclsession *sep) { int i; for (i = 0; i < NFSV4_CBSLOTS; i++) { if (sep->nfsess_cbslots[i].nfssl_reply != NULL) m_freem(sep->nfsess_cbslots[i].nfssl_reply); NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); } for (i = 0; i < 64; i++) sep->nfsess_slotseq[i] = 0; sep->nfsess_slots = 0; } /* * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). */ int nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfscllayout *layp; struct nfscldevinfo *dip; struct nfsclflayout *rflp; struct mbuf *m; struct nfsclwritedsdorpc *drpc, *tdrpc; nfsv4stateid_t stateid; struct ucred *newcred; uint64_t lastbyte, len, off, oresid, xfer; int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo; void *lckp; uint8_t *dev; void *iovbase; size_t iovlen; off_t offs; ssize_t resid; if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (np->n_flag & NNOLAYOUT) != 0) return (EIO); /* Now, get a reference cnt on the clientid for this mount. */ if (nfscl_getref(nmp) == 0) return (EIO); /* Find an appropriate stateid. */ newcred = NFSNEWCRED(cred); error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, rwaccess, 1, newcred, p, &stateid, &lckp); if (error != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (error); } /* Search for a layout for this file. */ off = uiop->uio_offset; layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, off, &rflp, &recalled); if (layp == NULL || rflp == NULL) { if (recalled != 0) { NFSFREECRED(newcred); nfscl_relref(nmp); return (EIO); } if (layp != NULL) { nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); layp = NULL; } /* Try and get a Layout, if it is supported. */ if (rwaccess == NFSV4OPEN_ACCESSWRITE || (np->n_flag & NWRITEOPENED) != 0) iolaymode = NFSLAYOUTIOMODE_RW; else iolaymode = NFSLAYOUTIOMODE_READ; error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, NULL, &stateid, off, &layp, newcred, p); if (error != 0) { NFSLOCKNODE(np); np->n_flag |= NNOLAYOUT; NFSUNLOCKNODE(np); if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); if (layp != NULL) nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } } /* * Loop around finding a layout that works for the first part of * this I/O operation, and then call the function that actually * does the RPC. */ eof = 0; len = (uint64_t)uiop->uio_resid; while (len > 0 && error == 0 && eof == 0) { off = uiop->uio_offset; error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); if (error == 0) { oresid = xfer = (uint64_t)uiop->uio_resid; if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) xfer = rflp->nfsfl_end - rflp->nfsfl_off; /* * For Flex File layout with mirrored DSs, select one * of them at random for reads. For writes and commits, * do all mirrors. */ m = NULL; drpc = NULL; firstmirror = 0; mirrorcnt = 1; if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 && (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) { if (rwaccess == NFSV4OPEN_ACCESSREAD) { firstmirror = arc4random() % mirrorcnt; mirrorcnt = firstmirror + 1; } else { if (docommit == 0) { /* * Save values, so uiop can be * rolled back upon a write * error. */ offs = uiop->uio_offset; resid = uiop->uio_resid; iovbase = uiop->uio_iov->iov_base; iovlen = uiop->uio_iov->iov_len; m = nfsm_uiombuflist(uiop, len, NULL, NULL); } tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, M_WAITOK | M_ZERO); } } for (i = firstmirror; i < mirrorcnt && error == 0; i++){ if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) dev = rflp->nfsfl_ffm[i].dev; else dev = rflp->nfsfl_dev; dip = nfscl_getdevinfo(nmp->nm_clp, dev, rflp->nfsfl_devp); if (dip != NULL) { if ((rflp->nfsfl_flags & NFSFL_FLEXFILE) != 0) error = nfscl_dofflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, i, docommit, m, tdrpc, newcred, p); else error = nfscl_doflayoutio(vp, uiop, iomode, must_commit, &eof, &stateid, rwaccess, dip, layp, rflp, off, xfer, docommit, newcred, p); nfscl_reldevinfo(dip); } else error = EIO; tdrpc++; } if (m != NULL) m_freem(m); tdrpc = drpc; timo = hz / 50; /* Wait for 20msec. */ if (timo < 1) timo = 1; for (i = firstmirror; i < mirrorcnt - 1 && tdrpc != NULL; i++, tdrpc++) { /* * For the unused drpc entries, both inprog and * err == 0, so this loop won't break. */ while (tdrpc->inprog != 0 && tdrpc->done == 0) tsleep(&tdrpc->tsk, PVFS, "clrpcio", timo); if (error == 0 && tdrpc->err != 0) error = tdrpc->err; } free(drpc, M_TEMP); if (error == 0) { if (mirrorcnt > 1 && rwaccess == NFSV4OPEN_ACCESSWRITE && docommit == 0) { NFSLOCKCLSTATE(); layp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } lastbyte = off + xfer - 1; NFSLOCKCLSTATE(); if (lastbyte > layp->nfsly_lastbyte) layp->nfsly_lastbyte = lastbyte; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_OPENMODE && rwaccess == NFSV4OPEN_ACCESSREAD) { NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_OPENMODE; NFSUNLOCKMNT(nmp); } else error = EIO; if (error == 0) len -= (oresid - (uint64_t)uiop->uio_resid); else if (mirrorcnt > 1 && rwaccess == NFSV4OPEN_ACCESSWRITE && docommit == 0) { /* * In case the rpc gets retried, roll the * uio fields changed by nfsm_uiombuflist() * back. */ uiop->uio_offset = offs; uiop->uio_resid = resid; uiop->uio_iov->iov_base = iovbase; uiop->uio_iov->iov_len = iovlen; } } } if (lckp != NULL) nfscl_lockderef(lckp); NFSFREECRED(newcred); nfscl_rellayout(layp, 0); nfscl_relref(nmp); return (error); } /* * Make a copy of the mbuf chain and add an mbuf for null padding, as required. */ static struct mbuf * nfsm_copym(struct mbuf *m, int off, int xfer) { struct mbuf *m2, *m3, *m4; uint32_t *tl; int rem; m2 = m_copym(m, off, xfer, M_WAITOK); rem = NFSM_RNDUP(xfer) - xfer; if (rem > 0) { /* * The zero padding to a multiple of 4 bytes is required by * the XDR. So that the mbufs copied by reference aren't * modified, add an mbuf with the zero'd bytes to the list. * rem will be a maximum of 3, so one zero'd uint32_t is * sufficient. */ m3 = m2; while (m3->m_next != NULL) m3 = m3->m_next; NFSMGET(m4); tl = NFSMTOD(m4, uint32_t *); *tl = 0; mbuf_setlen(m4, rem); mbuf_setnext(m3, m4); } return (m2); } /* * Find a file layout that will handle the first bytes of the requested * range and return the information from it needed to the I/O operation. */ int nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, struct nfsclflayout **retflpp) { struct nfsclflayout *flp, *nflp, *rflp; uint32_t rw; rflp = NULL; rw = rwaccess; /* For reading, do the Read list first and then the Write list. */ do { if (rw == NFSV4OPEN_ACCESSREAD) flp = LIST_FIRST(&lyp->nfsly_flayread); else flp = LIST_FIRST(&lyp->nfsly_flayrw); while (flp != NULL) { nflp = LIST_NEXT(flp, nfsfl_list); if (flp->nfsfl_off > off) break; if (flp->nfsfl_end > off && (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) rflp = flp; flp = nflp; } if (rw == NFSV4OPEN_ACCESSREAD) rw = NFSV4OPEN_ACCESSWRITE; else rw = 0; } while (rw != 0); if (rflp != NULL) { /* This one covers the most bytes starting at off. */ *retflpp = rflp; return (0); } return (EIO); } /* * Do I/O using an NFSv4.1 file layout. */ static int nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p) { uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; int commit_thru_mds, error, stripe_index, stripe_pos; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; np = VTONFS(vp); rel_off = off - flp->nfsfl_patoff; stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % dp->nfsdi_stripecnt; transfer = stripe_unit_size - (rel_off % stripe_unit_size); error = 0; /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { stripe_index = nfsfldi_stripeindex(dp, stripe_pos); dspp = nfsfldi_addr(dp, stripe_index); if (len > transfer && docommit == 0) xfer = transfer; else xfer = len; if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { /* Dense layout. */ if (stripe_pos >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_pos]; io_off = (rel_off / (stripe_unit_size * dp->nfsdi_stripecnt)) * stripe_unit_size + rel_off % stripe_unit_size; } else { /* Sparse layout. */ if (flp->nfsfl_fhcnt > 1) { if (stripe_index >= flp->nfsfl_fhcnt) return (EIO); fhp = flp->nfsfl_fh[stripe_index]; } else if (flp->nfsfl_fhcnt == 1) fhp = flp->nfsfl_fh[0]; else fhp = np->n_fhp; io_off = off; } if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) { commit_thru_mds = 1; if (docommit != 0) error = EIO; } else { commit_thru_mds = 0; mtx_lock(&np->n_mtx); np->n_flag |= NDSCOMMIT; mtx_unlock(&np->n_mtx); } if (docommit != 0) { if (error == 0) error = nfsrpc_commitds(vp, io_off, xfer, *dspp, fhp, 0, 0, cred, p); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any * loops. */ *eofp = 1; uiop->uio_resid = 0; } else { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, io_off, xfer, fhp, 0, 0, 0, cred, p); else { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, 0, 0, 0, cred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } if (error == 0) { transfer = stripe_unit_size; stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; len -= xfer; off += xfer; } } return (error); } /* * Do I/O using an NFSv4.1 flex file layout. */ static int nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, uint64_t len, int mirror, int docommit, struct mbuf *mp, struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { uint64_t transfer, xfer; int error, rel_off; struct nfsnode *np; struct nfsfh *fhp; struct nfsclds **dspp; struct ucred *tcred; struct mbuf *m; np = VTONFS(vp); error = 0; rel_off = 0; NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off, (uintmax_t)len); /* Loop around, doing I/O for each stripe unit. */ while (len > 0 && error == 0) { dspp = nfsfldi_addr(dp, 0); fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex]; stateidp = &flp->nfsfl_ffm[mirror].st; NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n", mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid); if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) { tcred = NFSNEWCRED(cred); tcred->cr_uid = flp->nfsfl_ffm[mirror].user; tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group; tcred->cr_ngroups = 1; } else tcred = cred; if (rwflag == NFSV4OPEN_ACCESSREAD) transfer = dp->nfsdi_rsize; else transfer = dp->nfsdi_wsize; mtx_lock(&np->n_mtx); np->n_flag |= NDSCOMMIT; mtx_unlock(&np->n_mtx); if (len > transfer && docommit == 0) xfer = transfer; else xfer = len; if (docommit != 0) { if (error == 0) { /* * Do last mirrored DS commit with this thread. */ if (mirror < flp->nfsfl_mirrorcnt - 1) error = nfsio_commitds(vp, off, xfer, *dspp, fhp, dp->nfsdi_vers, dp->nfsdi_minorvers, drpc, tcred, p); else error = nfsrpc_commitds(vp, off, xfer, *dspp, fhp, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); } NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error); if (error == 0) { /* * Set both eof and uio_resid = 0 to end any * loops. */ *eofp = 1; uiop->uio_resid = 0; } else { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } else if (rwflag == NFSV4OPEN_ACCESSREAD) error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, off, xfer, fhp, 1, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); else { if (flp->nfsfl_mirrorcnt == 1) { error = nfsrpc_writeds(vp, uiop, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, 0, 1, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); if (error == 0) { NFSLOCKCLSTATE(); lyp->nfsly_flags |= NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); } } else { m = nfsm_copym(mp, rel_off, xfer); NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n", rel_off, (uintmax_t)xfer); /* * Do last write to a mirrored DS with this * thread. */ if (mirror < flp->nfsfl_mirrorcnt - 1) error = nfsio_writedsmir(vp, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers, drpc, tcred, p); else error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp, *dspp, off, xfer, fhp, m, dp->nfsdi_vers, dp->nfsdi_minorvers, tcred, p); NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error); } } NFSCL_DEBUG(4, "aft read/writeds=%d\n", error); if (error == 0) { len -= xfer; off += xfer; rel_off += xfer; } if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) NFSFREECRED(tcred); } NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error); return (error); } /* * The actual read RPC done to a DS. */ static int nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; int attrflag, error, retlen; struct nfsrv_descript nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers); if (flex != 0) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); else nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); } else { nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n"); } NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); txdr_hyper(io_off, tl); *(tl + 2) = txdr_unsigned(len); nrp = dsp->nfsclds_sockp; NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp); if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat, error); if (error != 0) return (error); if (vers == NFS_VER3) { error = nfscl_postop_attr(nd, &na, &attrflag, NULL); NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error); if (error != 0) goto nfsmout; } if (nd->nd_repstat != 0) { error = nd->nd_repstat; goto nfsmout; } if (vers == NFS_VER3) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *(tl + 1)); } else { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); *eofp = fxdr_unsigned(int, *tl); } NFSM_STRSIZ(retlen, len); NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp); error = nfsm_mbufuio(nd, uiop, retlen); nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); return (error); } /* * The actual write RPC done to a DS. */ static int nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC; int32_t backup; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers); vers = NFS_VER4; if (flex != 0) nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); else nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); } else { nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n"); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED); } txdr_hyper(io_off, tl); tl += 2; if (vers == NFS_VER3) *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); nfsm_uiombuf(nd, uiop, len); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat != 0) { /* * In case the rpc gets retried, roll * the uio fileds changed by nfsm_uiombuf() * back. */ uiop->uio_offset -= len; uio_uio_resid_add(uiop, len); uio_iov_base_add(uiop, -len); uio_iov_len_add(uiop, len); error = nd->nd_repstat; } else { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen); if (rlen == 0) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { backup = len - rlen; uio_iov_base_add(uiop, -(backup)); uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; if (commit_thru_mds != 0) { NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); NFSSETWRITEVERF(nmp); } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } else { NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* * The actual write RPC done to a DS. * This variant is called from a separate kernel process for mirrors. * Any short write is considered an IO error. */ static int nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfssockreq *nrp; struct nfsvattr na; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n", minorvers); nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); } else { nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n"); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED); } txdr_hyper(io_off, tl); tl += 2; if (vers == NFS_VER3) *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); if (len > 0) { /* Put data in mbuf chain. */ nd->nd_mb->m_next = m; /* Set nd_mb and nd_bpos to end of data. */ while (m->m_next != NULL) m = m->m_next; nd->nd_mb = m; nd->nd_bpos = mtod(m, char *) + m->m_len; NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len); } nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat != 0) error = nd->nd_repstat; else { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len, rlen); if (rlen != len) { error = NFSERR_IO; NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len, rlen); goto nfsmout; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest commitment level * obtained by any of the RPCs. */ if (committed == NFSWRITE_FILESYNC) committed = commit; else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; NFSLOCKDS(dsp); if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { *must_commit = 1; NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); } NFSUNLOCKDS(dsp); } nfsmout: if (nd->nd_mrep != NULL) mbuf_freem(nd->nd_mrep); *iomode = committed; if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; return (error); } /* * Start up the thread that will execute nfsrpc_writedsmir(). */ static void start_writedsmir(void *arg, int pending) { struct nfsclwritedsdorpc *drpc; drpc = (struct nfsclwritedsdorpc *)arg; drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode, &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len, drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred, drpc->p); drpc->done = 1; NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err); } /* * Set up the write DS mirror call for the pNFS I/O thread. */ static int nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit, nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len, struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers, struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { int error, ret; error = 0; drpc->done = 0; drpc->vp = vp; drpc->iomode = *iomode; drpc->must_commit = *must_commit; drpc->stateidp = stateidp; drpc->dsp = dsp; drpc->off = off; drpc->len = len; drpc->fhp = fhp; drpc->m = m; drpc->vers = vers; drpc->minorvers = minorvers; drpc->cred = cred; drpc->p = p; drpc->inprog = 0; ret = EIO; if (nfs_pnfsiothreads > 0) { ret = nfs_pnfsio(start_writedsmir, drpc); NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret); } if (ret != 0) error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p); NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error); return (error); } /* * Free up the nfsclds structure. */ void nfscl_freenfsclds(struct nfsclds *dsp) { int i; if (dsp == NULL) return; if (dsp->nfsclds_sockp != NULL) { NFSFREECRED(dsp->nfsclds_sockp->nr_cred); NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); free(dsp->nfsclds_sockp->nr_nam, M_SONAME); free(dsp->nfsclds_sockp, M_NFSSOCKREQ); } NFSFREEMUTEX(&dsp->nfsclds_mtx); NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); for (i = 0; i < NFSV4_CBSLOTS; i++) { if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) m_freem( dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); } free(dsp, M_NFSCLDS); } static enum nfsclds_state nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, struct nfsclds **retdspp) { struct nfsclds *dsp, *cur_dsp; /* * Search the list of nfsclds structures for one with the same * server. */ cur_dsp = NULL; TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && dsp->nfsclds_servownlen != 0 && !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, dsp->nfsclds_servownlen) && dsp->nfsclds_sess.nfsess_defunct == 0) { NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", TAILQ_FIRST(&nmp->nm_sess), dsp, dsp->nfsclds_flags); /* Server major id matches. */ if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { *retdspp = dsp; return (NFSDSP_USETHISSESSION); } /* * Note the first match, so it can be used for * sequence'ing new sessions. */ if (cur_dsp == NULL) cur_dsp = dsp; } } if (cur_dsp != NULL) { *retdspp = cur_dsp; return (NFSDSP_SEQTHISSESSION); } return (NFSDSP_NOTFOUND); } /* * NFS commit rpc to a NFSv4.1 DS. */ static int nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfssockreq *nrp; struct nfsvattr na; int attrflag, error; nd->nd_mrep = NULL; if (vers == 0 || vers == NFS_VER4) { nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); vers = NFS_VER4; } else nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh, fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers); NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers, minorvers); NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nrp = dsp->nfsclds_sockp; if (nrp == NULL) /* If NULL, use the MDS socket. */ nrp = &nmp->nm_sockreq; error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess); NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error, nd->nd_repstat); if (error != 0) return (error); if (nd->nd_repstat == 0) { if (vers == NFS_VER3) { error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL, NULL); NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error); if (error != 0) goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); NFSLOCKDS(dsp); if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); error = NFSERR_STALEWRITEVERF; } NFSUNLOCKDS(dsp); } nfsmout: if (error == 0 && nd->nd_repstat != 0) error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); } /* * Start up the thread that will execute nfsrpc_commitds(). */ static void start_commitds(void *arg, int pending) { struct nfsclwritedsdorpc *drpc; drpc = (struct nfsclwritedsdorpc *)arg; drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred, drpc->p); drpc->done = 1; NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err); } /* * Set up the commit DS mirror call for the pNFS I/O thread. */ static int nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers, struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p) { int error, ret; error = 0; drpc->done = 0; drpc->vp = vp; drpc->off = offset; drpc->len = cnt; drpc->dsp = dsp; drpc->fhp = fhp; drpc->vers = vers; drpc->minorvers = minorvers; drpc->cred = cred; drpc->p = p; drpc->inprog = 0; ret = EIO; if (nfs_pnfsiothreads > 0) { ret = nfs_pnfsio(start_commitds, drpc); NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret); } if (ret != 0) error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers, minorvers, cred, p); NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error); return (error); } /* * Set up the XDR arguments for the LayoutGet operation. */ static void nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset, uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype, int layoutlen, int usecurstateid) { uint32_t *tl; NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + NFSX_STATEID); *tl++ = newnfs_false; /* Don't signal availability. */ *tl++ = txdr_unsigned(layouttype); *tl++ = txdr_unsigned(iomode); txdr_hyper(offset, tl); tl += 2; txdr_hyper(len, tl); tl += 2; txdr_hyper(minlen, tl); tl += 2; if (usecurstateid != 0) { /* Special stateid for Current stateid. */ *tl++ = txdr_unsigned(1); *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = txdr_unsigned(stateidp->seqid); NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); *tl++ = stateidp->other[0]; *tl++ = stateidp->other[1]; *tl++ = stateidp->other[2]; } *tl = txdr_unsigned(layoutlen); } /* * Parse the reply for a successful LayoutGet operation. */ static int nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp) { uint32_t *tl; struct nfsclflayout *flp, *prevflp, *tflp; int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen; int m, mirrorcnt; uint64_t retlen, off; struct nfsfh *nfhp; uint8_t *cp; uid_t user; gid_t grp; NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n"); error = 0; flp = NULL; gotiomode = -1; NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); if (*tl++ != 0) *retonclosep = 1; else *retonclosep = 0; stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, (int)stateidp->seqid); stateidp->other[0] = *tl++; stateidp->other[1] = *tl++; stateidp->other[2] = *tl++; cnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); if (cnt <= 0 || cnt > 10000) { /* Don't accept more than 10000 layouts in reply. */ error = NFSERR_BADXDR; goto nfsmout; } for (i = 0; i < cnt; i++) { /* Dissect to the layout type. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; retlen = fxdr_hyper(tl); tl += 2; iomode = fxdr_unsigned(int, *tl++); laytype = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype, (uintmax_t)off, (uintmax_t)retlen, iomode); /* Ignore length of layout body for now. */ if (laytype == NFSLAYOUT_NFSV4_1_FILES) { /* Parse the File layout up to fhcnt. */ NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER + NFSX_V4DEVICEID); fhcnt = fxdr_unsigned(int, *(tl + 4 + NFSX_V4DEVICEID / NFSX_UNSIGNED)); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 0 || fhcnt > 100) { /* Don't accept more than 100 file handles. */ error = NFSERR_BADXDR; goto nfsmout; } if (fhcnt > 0) flp = malloc(sizeof(*flp) + fhcnt * sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK); else flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = NFSFL_FILE; flp->nfsfl_fhcnt = 0; flp->nfsfl_devp = NULL; flp->nfsfl_off = off; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = iomode; if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; /* Ignore layout body length for now. */ NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n", flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff); for (j = 0; j < fhcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_fh[j] = nfhp; flp->nfsfl_fhcnt++; nfhp->nfh_len = nfhlen; NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); } } else if (laytype == NFSLAYOUT_FLEXFILE) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER); mirrorcnt = fxdr_unsigned(int, *(tl + 2)); NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt); if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) { error = NFSERR_BADXDR; goto nfsmout; } flp = malloc(sizeof(*flp) + mirrorcnt * sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK); flp->nfsfl_flags = NFSFL_FLEXFILE; flp->nfsfl_mirrorcnt = mirrorcnt; flp->nfsfl_devp = NULL; flp->nfsfl_off = off; if (flp->nfsfl_off + retlen < flp->nfsfl_off) flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; else flp->nfsfl_end = flp->nfsfl_off + retlen; flp->nfsfl_iomode = iomode; if (gotiomode == -1) gotiomode = flp->nfsfl_iomode; flp->nfsfl_stripeunit = fxdr_hyper(tl); NFSCL_DEBUG(4, "stripeunit=%ju\n", (uintmax_t)flp->nfsfl_stripeunit); for (j = 0; j < mirrorcnt; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); k = fxdr_unsigned(int, *tl); if (k < 1 || k > 128) { error = NFSERR_BADXDR; goto nfsmout; } NFSCL_DEBUG(4, "servercnt=%d\n", k); for (l = 0; l < k; l++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + NFSX_STATEID + 2 * NFSX_UNSIGNED); if (l == 0) { /* Just use the first server. */ NFSBCOPY(tl, flp->nfsfl_ffm[j].dev, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); tl++; flp->nfsfl_ffm[j].st.seqid = *tl++; flp->nfsfl_ffm[j].st.other[0] = *tl++; flp->nfsfl_ffm[j].st.other[1] = *tl++; flp->nfsfl_ffm[j].st.other[2] = *tl++; NFSCL_DEBUG(4, "st.seqid=%u " "st.o0=0x%x st.o1=0x%x " "st.o2=0x%x\n", flp->nfsfl_ffm[j].st.seqid, flp->nfsfl_ffm[j].st.other[0], flp->nfsfl_ffm[j].st.other[1], flp->nfsfl_ffm[j].st.other[2]); } else tl += ((NFSX_V4DEVICEID + NFSX_STATEID + NFSX_UNSIGNED) / NFSX_UNSIGNED); fhcnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); if (fhcnt < 1 || fhcnt > NFSDEV_MAXVERS) { error = NFSERR_BADXDR; goto nfsmout; } for (m = 0; m < fhcnt; m++) { NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); nfhlen = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "nfhlen=%d\n", nfhlen); if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { error = NFSERR_BADXDR; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); if (l == 0) { flp->nfsfl_ffm[j].fhcnt = fhcnt; nfhp = malloc( sizeof(*nfhp) + nfhlen - 1, M_NFSFH, M_WAITOK); flp->nfsfl_ffm[j].fh[m] = nfhp; nfhp->nfh_len = nfhlen; NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); NFSCL_DEBUG(4, "got fh\n"); } } /* Now, get the ffsd_user/ffds_group. */ error = nfsrv_parseug(nd, 0, &user, &grp, curthread); NFSCL_DEBUG(4, "after parseu=%d\n", error); if (error == 0) error = nfsrv_parseug(nd, 1, &user, &grp, curthread); NFSCL_DEBUG(4, "aft parseg=%d\n", grp); if (error != 0) goto nfsmout; NFSCL_DEBUG(4, "user=%d group=%d\n", user, grp); if (l == 0) { flp->nfsfl_ffm[j].user = user; flp->nfsfl_ffm[j].group = grp; NFSCL_DEBUG(4, "usr=%d grp=%d\n", user, grp); } } } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++); flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl); NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n", flp->nfsfl_fflags, flp->nfsfl_statshint); } else { error = NFSERR_BADXDR; goto nfsmout; } if (flp->nfsfl_iomode == gotiomode) { /* Keep the list in increasing offset order. */ tflp = LIST_FIRST(flhp); prevflp = NULL; while (tflp != NULL && tflp->nfsfl_off < flp->nfsfl_off) { prevflp = tflp; tflp = LIST_NEXT(tflp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(flhp, flp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, flp, nfsfl_list); NFSCL_DEBUG(4, "flp inserted\n"); } else { printf("nfscl_layoutget(): got wrong iomode\n"); nfscl_freeflayout(flp); } flp = NULL; } nfsmout: NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error); if (error != 0 && flp != NULL) nfscl_freeflayout(flp); return (error); } /* * Parse a user/group digit string. */ static int nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, NFSPROC_T *p) { uint32_t *tl; char *cp, *str, str0[NFSV4_SMALLSTR + 1]; uint32_t len = 0; int error = 0; NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(uint32_t, *tl); if (len > NFSV4_OPAQUELIMIT) { error = NFSERR_BADXDR; goto nfsmout; } NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len); if (len == 0) { if (dogrp != 0) *gidp = GID_NOGROUP; else *uidp = UID_NOBODY; return (0); } if (len > NFSV4_SMALLSTR) str = malloc(len + 1, M_TEMP, M_WAITOK); else str = str0; NFSM_DISSECT(cp, char *, NFSM_RNDUP(len)); NFSBCOPY(cp, str, len); str[len] = '\0'; NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str); if (dogrp != 0) error = nfsv4_strtogid(nd, str, len, gidp, p); else error = nfsv4_strtouid(nd, str, len, uidp, p); nfsmout: if (len > NFSV4_SMALLSTR) free(str, M_TEMP); NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error); return (error); } /* * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(), * so that it does both an Open and a Layoutget. */ static int nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode, struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfscllayout *lyp; struct nfsclflayout *flp; struct nfsclflayouthead flh; int error, islocked, layoutlen, recalled, retonclose, usecurstateid; int layouttype, laystat; nfsv4stateid_t stateid; struct nfsclsession *tsep; error = 0; if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; /* * If lyp is returned non-NULL, there will be a refcnt (shared lock) * on it, iff flp != NULL or a lock (exclusive lock) on it iff * flp == NULL. */ lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp, &recalled); NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp); if (lyp == NULL) islocked = 0; else if (flp != NULL) islocked = 1; else islocked = 2; if ((lyp == NULL || flp == NULL) && recalled == 0) { LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); if (lyp == NULL) usecurstateid = 1; else { usecurstateid = 0; stateid.seqid = lyp->nfsly_stateid.seqid; stateid.other[0] = lyp->nfsly_stateid.other[0]; stateid.other[1] = lyp->nfsly_stateid.other[1]; stateid.other[2] = lyp->nfsly_stateid.other[2]; } error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, dpp, &stateid, usecurstateid, layouttype, layoutlen, &retonclose, &flh, &laystat, cred, p); NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n", laystat, error); laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, &islocked, cred, p); } else error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0); if (islocked == 2) nfscl_rellayout(lyp, 1); else if (islocked == 1) nfscl_rellayout(lyp, 0); return (error); } /* * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS * enabled, only for the CLAIM_NULL case. All other NFSv4 Opens are * handled by nfsrpc_openrpc(). * For the case where op == NULL, dvp is the directory. When op != NULL, it * can be NULL. */ static int nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode, struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp, nfsv4stateid_t *stateidp, int usecurstateid, int layouttype, int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp, int *laystatp, struct ucred *cred, NFSPROC_T *p) { uint32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfscldeleg *ndp = NULL; struct nfsvattr nfsva; struct nfsclsession *tsep; uint32_t rflags, deleg; nfsattrbit_t attrbits; int error, ret, acesize, limitby, iomode; *dpp = NULL; *laystatp = ENXIO; nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL, 0, 0); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); nfsm_strtom(nd, name, namelen); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY); nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_LAYOUTGET); if ((mode & NFSV4OPEN_ACCESSWRITE) != 0) iomode = NFSLAYOUTIOMODE_RW; else iomode = NFSLAYOUTIOMODE_READ; nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp, layouttype, layoutlen, usecurstateid); error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error != 0) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat != 0) *laystatp = nd->nd_repstat; if ((nd->nd_flag & ND_NOMOREDATA) == 0) { /* ND_NOMOREDATA will be set if the Open operation failed. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); op->nfso_stateid.seqid = *tl++; op->nfso_stateid.other[0] = *tl++; op->nfso_stateid.other[1] = *tl++; op->nfso_stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(u_int32_t, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) op->nfso_own->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); ndp = malloc(sizeof(struct nfscldeleg) + newfhlen, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&ndp->nfsdl_owner); LIST_INIT(&ndp->nfsdl_lock); ndp->nfsdl_clp = op->nfso_own->nfsow_clp; ndp->nfsdl_fhlen = newfhlen; NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen); newnfs_copyincred(cred, &ndp->nfsdl_cred); nfscl_lockinit(&ndp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); ndp->nfsdl_stateid.seqid = *tl++; ndp->nfsdl_stateid.other[0] = *tl++; ndp->nfsdl_stateid.other[1] = *tl++; ndp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { ndp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: ndp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: ndp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); ndp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else ndp->nfsdl_flags = NFSCLDL_READ; if (ret != 0) ndp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret, &acesize, p); if (error != 0) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 || nfscl_assumeposixlocks) op->nfso_posixlock = 1; else op->nfso_posixlock = 0; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); /* If the 2nd element == NFS_OK, the Getattr succeeded. */ if (*++tl == 0) { error = nfsv4_loadattr(nd, NULL, &nfsva, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, cred); if (error != 0) goto nfsmout; if (ndp != NULL) { ndp->nfsdl_change = nfsva.na_filerev; ndp->nfsdl_modtime = nfsva.na_mtime; ndp->nfsdl_flags |= NFSCLDL_MODTIMESET; *dpp = ndp; ndp = NULL; } /* * At this point, the Open has succeeded, so set * nd_repstat = NFS_OK. If the Layoutget failed, * this function just won't return a layout. */ if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *++tl); if (*laystatp == 0) { error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; } } else nd->nd_repstat = 0; /* Return 0 for Open. */ } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; nfsmout: free(ndp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Similar nfsrpc_createv4(), but also does the LayoutGet operation. * Used only for mounts with pNFS enabled. */ static int nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp, int usecurstateid, int layouttype, int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp, int *laystatp) { uint32_t *tl; int error = 0, deleg, newone, ret, acesize, limitby; struct nfsrv_descript nfsd, *nd = &nfsd; struct nfsclopen *op; struct nfscldeleg *dp = NULL; struct nfsnode *np; struct nfsfh *nfhp; struct nfsclsession *tsep; nfsattrbit_t attrbits; nfsv4stateid_t stateid; uint32_t rflags; struct nfsmount *nmp; nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); *laystatp = ENXIO; *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; *attrflagp = 0; *dattrflagp = 0; if (namelen > NFS_MAXNAMLEN) return (ENAMETOOLONG); NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp); /* * For V4, this is actually an Open op. */ NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(owp->nfsow_seqid); *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); tsep = nfsmnt_mdssession(nmp); *tl++ = tsep->nfsess_clientid.lval[0]; *tl = tsep->nfsess_clientid.lval[1]; nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if ((fmode & O_EXCL) != 0) { if (NFSHASSESSPERSIST(nmp)) { /* Use GUARDED for persistent sessions. */ *tl = txdr_unsigned(NFSCREATE_GUARDED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } else { /* Otherwise, use EXCLUSIVE4_1. */ *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = cverf.lval[0]; *tl = cverf.lval[1]; nfscl_fillsattr(nd, vap, dvp, 0, 0); } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL); nfsm_strtom(nd, name, namelen); /* Get the new file's handle and attributes, plus save the FH. */ NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_SAVEFH); *tl++ = txdr_unsigned(NFSV4OP_GETFH); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSGETATTR_ATTRBIT(&attrbits); nfsrv_putattrbit(nd, &attrbits); /* Get the directory's post-op attributes. */ NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH); *tl = txdr_unsigned(NFSV4OP_LAYOUTGET); nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp, layouttype, layoutlen, usecurstateid); error = nfscl_request(nd, dvp, p, cred, dstuff); if (error != 0) return (error); NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat, error); if (nd->nd_repstat != 0) *laystatp = nd->nd_repstat; NFSCL_INCRSEQID(owp->nfsow_seqid, nd); if ((nd->nd_flag & ND_NOMOREDATA) == 0) { NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); stateid.seqid = *tl++; stateid.other[0] = *tl++; stateid.other[1] = *tl++; stateid.other[2] = *tl; rflags = fxdr_unsigned(u_int32_t, *(tl + 6)); nfsrv_getattrbits(nd, &attrbits, NULL, NULL); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); deleg = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEREAD || deleg == NFSV4OPEN_DELEGATEWRITE) { if (!(owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_FIRSTDELEG)) owp->nfsow_clp->nfsc_flags |= (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG); dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX, M_NFSCLDELEG, M_WAITOK); LIST_INIT(&dp->nfsdl_owner); LIST_INIT(&dp->nfsdl_lock); dp->nfsdl_clp = owp->nfsow_clp; newnfs_copyincred(cred, &dp->nfsdl_cred); nfscl_lockinit(&dp->nfsdl_rwlock); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); dp->nfsdl_stateid.seqid = *tl++; dp->nfsdl_stateid.other[0] = *tl++; dp->nfsdl_stateid.other[1] = *tl++; dp->nfsdl_stateid.other[2] = *tl++; ret = fxdr_unsigned(int, *tl); if (deleg == NFSV4OPEN_DELEGATEWRITE) { dp->nfsdl_flags = NFSCLDL_WRITE; /* * Indicates how much the file can grow. */ NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); limitby = fxdr_unsigned(int, *tl++); switch (limitby) { case NFSV4OPEN_LIMITSIZE: dp->nfsdl_sizelimit = fxdr_hyper(tl); break; case NFSV4OPEN_LIMITBLOCKS: dp->nfsdl_sizelimit = fxdr_unsigned(u_int64_t, *tl++); dp->nfsdl_sizelimit *= fxdr_unsigned(u_int64_t, *tl); break; default: error = NFSERR_BADXDR; goto nfsmout; }; } else { dp->nfsdl_flags = NFSCLDL_READ; } if (ret != 0) dp->nfsdl_flags |= NFSCLDL_RECALL; error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret, &acesize, p); if (error != 0) goto nfsmout; } else if (deleg != NFSV4OPEN_DELEGATENONE) { error = NFSERR_BADXDR; goto nfsmout; } /* Now, we should have the status for the SaveFH. */ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl == 0) { NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n"); /* * Now, process the GetFH and Getattr for the newly * created file. nfscl_mtofh() will set * ND_NOMOREDATA if these weren't successful. */ error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp); NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error); if (error != 0) goto nfsmout; } else nd->nd_flag |= ND_NOMOREDATA; /* Now we have the PutFH and Getattr for the directory. */ if ((nd->nd_flag & ND_NOMOREDATA) == 0) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; else { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) nd->nd_flag |= ND_NOMOREDATA; } } if ((nd->nd_flag & ND_NOMOREDATA) == 0) { /* Load the directory attributes. */ error = nfsm_loadattr(nd, dnap); NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error); if (error != 0) goto nfsmout; *dattrflagp = 1; if (dp != NULL && *attrflagp != 0) { dp->nfsdl_change = nnap->na_filerev; dp->nfsdl_modtime = nnap->na_mtime; dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } /* * We can now complete the Open state. */ nfhp = *nfhpp; if (dp != NULL) { dp->nfsdl_fhlen = nfhp->nfh_len; NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len); } /* * Get an Open structure that will be * attached to the OpenOwner, acquired already. */ error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0, cred, p, NULL, &op, &newone, NULL, 0); if (error != 0) goto nfsmout; op->nfso_stateid = stateid; newnfs_copyincred(cred, &op->nfso_cred); nfscl_openrelease(nmp, op, error, newone); *unlockedp = 1; /* Now, handle the RestoreFH and LayoutGet. */ if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); *laystatp = fxdr_unsigned(int, *(tl + 3)); if (*laystatp == 0) { error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp); if (error != 0) *laystatp = error; } NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n", error); } else nd->nd_repstat = 0; } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error); if (error == 0) *dpp = dp; else free(dp, M_NFSCLDELEG); mbuf_freem(nd->nd_mrep); return (error); } /* * Similar to nfsrpc_getopenlayout(), except that it used for the Create case. */ static int nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *dstuff, int *unlockedp) { struct nfscllayout *lyp; struct nfsclflayouthead flh; struct nfsfh *nfhp; struct nfsclsession *tsep; struct nfsmount *nmp; nfsv4stateid_t stateid; int error, layoutlen, layouttype, retonclose, laystat; error = 0; nmp = VFSTONFS(dvp->v_mount); if (NFSHASFLEXFILE(nmp)) layouttype = NFSLAYOUT_FLEXFILE; else layouttype = NFSLAYOUT_NFSV4_1_FILES; LIST_INIT(&flh); tsep = nfsmnt_mdssession(nmp); layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED); error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode, owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp, dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose, &flh, &laystat); NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n", laystat, error); lyp = NULL; if (laystat == 0) { nfhp = *nfhpp; laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh, nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL, cred, p); } else laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL, cred, p); if (laystat == 0) nfscl_rellayout(lyp, 0); return (error); } /* * Process the results of a layoutget() operation. */ static int nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp, int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit, struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype, int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p) { struct nfsclflayout *tflp; struct nfscldevinfo *dip; uint8_t *dev; if (laystat == NFSERR_UNKNLAYOUTTYPE) { NFSLOCKMNT(nmp); if (!NFSHASFLEXFILE(nmp)) { /* Switch to using Flex File Layout. */ nmp->nm_state |= NFSSTA_FLEXFILE; } else if (layouttype == NFSLAYOUT_FLEXFILE) { /* Disable pNFS. */ NFSCL_DEBUG(1, "disable PNFS\n"); nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE); } NFSUNLOCKMNT(nmp); } if (laystat == 0) { NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n"); LIST_FOREACH(tflp, flhp, nfsfl_list) { laystat = nfscl_adddevinfo(nmp, NULL, tflp); NFSCL_DEBUG(4, "aft adddev=%d\n", laystat); if (laystat != 0) { if (layouttype == NFSLAYOUT_FLEXFILE) dev = tflp->nfsfl_ffm[0].dev; else dev = tflp->nfsfl_dev; laystat = nfsrpc_getdeviceinfo(nmp, dev, layouttype, notifybit, &dip, cred, p); NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n", laystat); if (laystat != 0) break; laystat = nfscl_adddevinfo(nmp, dip, tflp); if (laystat != 0) printf("getlayout: cannot add\n"); } } } if (laystat == 0) { /* * nfscl_layout() always returns with the nfsly_lock * set to a refcnt (shared lock). * Passing in dvp is sufficient, since it is only used to * get the fsid for the file system. */ laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp, layouttype, retonclose, flhp, lypp, cred, p); NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n", laystat); if (laystat == 0 && islockedp != NULL) *islockedp = 1; } return (laystat); } Index: head/sys/fs/nfsclient/nfs_clstate.c =================================================================== --- head/sys/fs/nfsclient/nfs_clstate.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clstate.c (revision 328417) @@ -1,5353 +1,5353 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions implement the client side state handling for NFSv4. * NFSv4 state handling: * - A lockowner is used to determine lock contention, so it * corresponds directly to a Posix pid. (1 to 1 mapping) * - The correct granularity of an OpenOwner is not nearly so * obvious. An OpenOwner does the following: * - provides a serial sequencing of Open/Close/Lock-with-new-lockowner * - is used to check for Open/Share contention (not applicable to * this client, since all Opens are Deny_None) * As such, I considered both extreme. * 1 OpenOwner per ClientID - Simple to manage, but fully serializes * all Open, Close and Lock (with a new lockowner) Ops. * 1 OpenOwner for each Open - This one results in an OpenConfirm for * every Open, for most servers. * So, I chose to use the same mapping as I did for LockOwnwers. * The main concern here is that you can end up with multiple Opens * for the same File Handle, but on different OpenOwners (opens * inherited from parents, grandparents...) and you do not know * which of these the vnodeop close applies to. This is handled by * delaying the Close Op(s) until all of the Opens have been closed. * (It is not yet obvious if this is the correct granularity.) * - How the code handles serialization: * - For the ClientId, it uses an exclusive lock while getting its * SetClientId and during recovery. Otherwise, it uses a shared * lock via a reference count. * - For the rest of the data structures, it uses an SMP mutex * (once the nfs client is SMP safe) and doesn't sleep while * manipulating the linked lists. * - The serialization of Open/Close/Lock/LockU falls out in the * "wash", since OpenOwners and LockOwners are both mapped from * Posix pid. In other words, there is only one Posix pid using * any given owner, so that owner is serialized. (If you change * the granularity of the OpenOwner, then code must be added to * serialize Ops on the OpenOwner.) * - When to get rid of OpenOwners and LockOwners. * - The function nfscl_cleanup_common() is executed after a process exits. * It goes through the client list looking for all Open and Lock Owners. * When one is found, it is marked "defunct" or in the case of * an OpenOwner without any Opens, freed. * The renew thread scans for defunct Owners and gets rid of them, * if it can. The LockOwners will also be deleted when the * associated Open is closed. * - If the LockU or Close Op(s) fail during close in a way * that could be recovered upon retry, they are relinked to the * ClientId's defunct open list and retried by the renew thread * until they succeed or an unmount/recovery occurs. * (Since we are done with them, they do not need to be recovered.) */ #ifndef APPLEKEXT #include /* * Global variables */ extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern u_int32_t newnfs_false, newnfs_true; extern int nfscl_debuglevel; extern int nfscl_enablecallb; extern int nfs_numnfscbd; NFSREQSPINLOCK; NFSCLSTATEMUTEX; int nfscl_inited = 0; struct nfsclhead nfsclhead; /* Head of clientid list */ int nfscl_deleghighwater = NFSCLDELEGHIGHWATER; int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER; #endif /* !APPLEKEXT */ static int nfscl_delegcnt = 0; static int nfscl_layoutcnt = 0; static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **); static void nfscl_clrelease(struct nfsclclient *); static void nfscl_cleanclient(struct nfsclclient *); static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *, struct ucred *, NFSPROC_T *); static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *, struct nfsmount *, struct ucred *, NFSPROC_T *); static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *); static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *, struct nfscllock *, int); static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **, struct nfscllock **, int); static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *); static u_int32_t nfscl_nextcbident(void); static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **); static struct nfsclclient *nfscl_getclnt(u_int32_t); static struct nfsclclient *nfscl_getclntsess(uint8_t *); static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *, int); static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *, int, struct nfsclrecalllayout **); static void nfscl_reldevinfo_locked(struct nfscldevinfo *); static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *, int); static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *); static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *, u_int8_t *, struct nfscllock **); static void nfscl_freealllocks(struct nfscllockownerhead *, int); static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int, struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **); static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *, struct nfsclowner **, struct nfsclowner **, struct nfsclopen **, struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *); static int nfscl_moveopen(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfsclopen *, struct nfsclowner *, struct nfscldeleg *, struct ucred *, NFSPROC_T *); static void nfscl_totalrecall(struct nfsclclient *); static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *); static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int, u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *); static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *); static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t, struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *); static int nfscl_errmap(struct nfsrv_descript *, u_int32_t); static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *); static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *, struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int); static void nfscl_freeopenowner(struct nfsclowner *, int); static void nfscl_cleandeleg(struct nfscldeleg *); static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *); static void nfscl_emptylockowner(struct nfscllockowner *, struct nfscllockownerfhhead *); static void nfscl_mergeflayouts(struct nfsclflayouthead *, struct nfsclflayouthead *); static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t, uint64_t, uint32_t, struct nfsclrecalllayout *); static int nfscl_seq(uint32_t, uint32_t); static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *, struct ucred *, NFSPROC_T *); static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *, struct ucred *, NFSPROC_T *); static short nfscberr_null[] = { 0, 0, }; static short nfscberr_getattr[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short nfscberr_recall[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short *nfscl_cberrmap[] = { nfscberr_null, nfscberr_null, nfscberr_null, nfscberr_getattr, nfscberr_recall }; #define NETFAMILY(clp) \ (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET) /* * Called for an open operation. * If the nfhp argument is NULL, just get an openowner. */ APPLESTATIC int nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp, struct nfsclopen **opp, int *newonep, int *retp, int lockit) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op = NULL, *nop = NULL; struct nfscldeleg *dp; struct nfsclownerhead *ohp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret; if (newonep != NULL) *newonep = 0; if (opp != NULL) *opp = NULL; if (owpp != NULL) *owpp = NULL; /* * Might need one or both of these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ - MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), + nowp = malloc(sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); if (nfhp != NULL) - MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + + nop = malloc(sizeof (struct nfsclopen) + fhlen - 1, M_NFSCLOPEN, M_WAITOK); ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (ret != 0) { - FREE((caddr_t)nowp, M_NFSCLOWNER); + free(nowp, M_NFSCLOWNER); if (nop != NULL) - FREE((caddr_t)nop, M_NFSCLOPEN); + free(nop, M_NFSCLOPEN); return (ret); } /* * Get the Open iff it already exists. * If none found, add the new one or return error, depending upon * "create". */ NFSLOCKCLSTATE(); dp = NULL; /* First check the delegation list */ if (nfhp != NULL && usedeleg) { LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(amode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) break; dp = NULL; break; } } } if (dp != NULL) { nfscl_filllockowner(p->td_proc, own, F_POSIX); ohp = &dp->nfsdl_owner; } else { /* For NFSv4.1 and this option, use a single open_owner. */ if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, own, F_POSIX); else nfscl_filllockowner(p->td_proc, own, F_POSIX); ohp = &clp->nfsc_owner; } /* Now, search for an openowner */ LIST_FOREACH(owp, ohp, nfsow_list) { if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) break; } /* * Create a new open, as required. */ nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen, cred, newonep); /* * Now, check the mode on the open and return the appropriate * value. */ if (retp != NULL) { if (nfhp != NULL && dp != NULL && nop == NULL) /* new local open on delegation */ *retp = NFSCLOPEN_SETCRED; else *retp = NFSCLOPEN_OK; } if (op != NULL && (amode & ~(op->nfso_mode))) { op->nfso_mode |= amode; if (retp != NULL && dp == NULL) *retp = NFSCLOPEN_DOOPEN; } /* * Serialize modifications to the open owner for multiple threads * within the same process using a read/write sleep lock. * For NFSv4.1 and a single OpenOwner, allow concurrent open operations * by acquiring a shared lock. The close operations still use an * exclusive lock for this case. */ if (lockit != 0) { if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) { /* * Get a shared lock on the OpenOwner, but first * wait for any pending exclusive lock, so that the * exclusive locker gets priority. */ nfsv4_lock(&owp->nfsow_rwlock, 0, NULL, NFSCLSTATEMUTEXPTR, NULL); nfsv4_getref(&owp->nfsow_rwlock, NULL, NFSCLSTATEMUTEXPTR, NULL); } else nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR); } NFSUNLOCKCLSTATE(); if (nowp != NULL) - FREE((caddr_t)nowp, M_NFSCLOWNER); + free(nowp, M_NFSCLOWNER); if (nop != NULL) - FREE((caddr_t)nop, M_NFSCLOPEN); + free(nop, M_NFSCLOPEN); if (owpp != NULL) *owpp = owp; if (opp != NULL) *opp = op; return (0); } /* * Create a new open, as required. */ static void nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp, struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen, struct ucred *cred, int *newonep) { struct nfsclowner *owp = *owpp, *nowp; struct nfsclopen *op, *nop; if (nowpp != NULL) nowp = *nowpp; else nowp = NULL; if (nopp != NULL) nop = *nopp; else nop = NULL; if (owp == NULL && nowp != NULL) { NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); if (dp != NULL) { nfsstatsv1.cllocalopenowners++; LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list); } else { nfsstatsv1.clopenowners++; LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list); } owp = *owpp = nowp; *nowpp = NULL; if (newonep != NULL) *newonep = 1; } /* If an fhp has been specified, create an Open as well. */ if (fhp != NULL) { /* and look for the correct open, based upon FH */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) break; } if (op == NULL && nop != NULL) { nop->nfso_own = owp; nop->nfso_mode = 0; nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = fhlen; NFSBCOPY(fhp, nop->nfso_fh, fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; KASSERT(cred != NULL, ("%s: cred NULL\n", __func__)); newnfs_copyincred(cred, &nop->nfso_cred); if (dp != NULL) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; nfsstatsv1.cllocalopens++; } else { nfsstatsv1.clopens++; } LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list); *opp = nop; *nopp = NULL; if (newonep != NULL) *newonep = 1; } else { *opp = op; } } } /* * Called to find/add a delegation to a client. */ APPLESTATIC int nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp, int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp) { struct nfscldeleg *dp = *dpp, *tdp; /* * First, if we have received a Read delegation for a file on a * read/write file system, just return it, because they aren't * useful, imho. */ if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) && (dp->nfsdl_flags & NFSCLDL_READ)) { (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p); - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); *dpp = NULL; return (0); } /* Look for the correct deleg, based upon FH */ NFSLOCKCLSTATE(); tdp = nfscl_finddeleg(clp, nfhp, fhlen); if (tdp == NULL) { if (dp == NULL) { NFSUNLOCKCLSTATE(); return (NFSERR_BADSTATEID); } *dpp = NULL; TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp, nfsdl_hash); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; nfsstatsv1.cldelegates++; nfscl_delegcnt++; } else { /* * Delegation already exists, what do we do if a new one?? */ if (dp != NULL) { printf("Deleg already exists!\n"); - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); *dpp = NULL; } else { *dpp = tdp; } } NFSUNLOCKCLSTATE(); return (0); } /* * Find a delegation for this file handle. Return NULL upon failure. */ static struct nfscldeleg * nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscldeleg *dp; LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(dp->nfsdl_fh, fhp, fhlen)) break; } return (dp); } /* * Get a stateid for an I/O operation. First, look for an open and iff * found, return either a lockowner stateid or the open stateid. * If no Open is found, just return error and the special stateid of all zeros. */ APPLESTATIC int nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, void **lckpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op = NULL, *top; struct nfscllockowner *lp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error, done; *lckpp = NULL; /* * Initially, just set the special stateid of all zeros. * (Don't do this for a DS, since the special stateid can't be used.) */ if (fords == 0) { stateidp->seqid = 0; stateidp->other[0] = 0; stateidp->other[1] = 0; stateidp->other[2] = 0; } if (vnode_vtype(vp) != VREG) return (EISDIR); np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } /* * Wait for recovery to complete. */ while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG)) (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR, PZERO, "nfsrecvr", NULL); /* * First, look for a delegation. */ LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(mode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) { stateidp->seqid = dp->nfsdl_stateid.seqid; stateidp->other[0] = dp->nfsdl_stateid.other[0]; stateidp->other[1] = dp->nfsdl_stateid.other[1]; stateidp->other[2] = dp->nfsdl_stateid.other[2]; if (!(np->n_flag & NDELEGRECALL)) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; dp->nfsdl_rwlock.nfslock_usecnt++; *lckpp = (void *)&dp->nfsdl_rwlock; } NFSUNLOCKCLSTATE(); return (0); } break; } } if (p != NULL) { /* * If p != NULL, we want to search the parentage tree * for a matching OpenOwner and use that. */ if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, own, F_POSIX); else nfscl_filllockowner(p->td_proc, own, F_POSIX); lp = NULL; error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own, mode, &lp, &op); if (error == 0 && lp != NULL && fords == 0) { /* Don't return a lock stateid for a DS. */ stateidp->seqid = lp->nfsl_stateid.seqid; stateidp->other[0] = lp->nfsl_stateid.other[0]; stateidp->other[1] = lp->nfsl_stateid.other[1]; stateidp->other[2] = lp->nfsl_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } } if (op == NULL) { /* If not found, just look for any OpenOwner that will work. */ top = NULL; done = 0; owp = LIST_FIRST(&clp->nfsc_owner); while (!done && owp != NULL) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen)) { if (top == NULL && (op->nfso_mode & NFSV4OPEN_ACCESSWRITE) != 0 && (mode & NFSV4OPEN_ACCESSREAD) != 0) top = op; if ((mode & op->nfso_mode) == mode) { done = 1; break; } } } if (!done) owp = LIST_NEXT(owp, nfsow_list); } if (!done) { NFSCL_DEBUG(2, "openmode top=%p\n", top); if (top == NULL || NFSHASOPENMODE(nmp)) { NFSUNLOCKCLSTATE(); return (ENOENT); } else op = top; } /* * For read aheads or write behinds, use the open cred. * A read ahead or write behind is indicated by p == NULL. */ if (p == NULL) newnfs_copycred(&op->nfso_cred, cred); } /* * No lock stateid, so return the open stateid. */ stateidp->seqid = op->nfso_stateid.seqid; stateidp->other[0] = op->nfso_stateid.other[0]; stateidp->other[1] = op->nfso_stateid.other[1]; stateidp->other[2] = op->nfso_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } /* * Search for a matching file, mode and, optionally, lockowner. */ static int nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown, u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp) { struct nfsclowner *owp; struct nfsclopen *op, *rop, *rop2; struct nfscllockowner *lp; int keep_looping; if (lpp != NULL) *lpp = NULL; /* * rop will be set to the open to be returned. There are three * variants of this, all for an open of the correct file: * 1 - A match of lockown. * 2 - A match of the openown, when no lockown match exists. * 3 - A match for any open, if no openown or lockown match exists. * Looking for #2 over #3 probably isn't necessary, but since * RFC3530 is vague w.r.t. the relationship between openowners and * lockowners, I think this is the safer way to go. */ rop = NULL; rop2 = NULL; keep_looping = 1; /* Search the client list */ owp = LIST_FIRST(ohp); while (owp != NULL && keep_looping != 0) { /* and look for the correct open */ op = LIST_FIRST(&owp->nfsow_open); while (op != NULL && keep_looping != 0) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen) && (op->nfso_mode & mode) == mode) { if (lpp != NULL) { /* Now look for a matching lockowner. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, lockown, NFSV4CL_LOCKNAMELEN)) { *lpp = lp; rop = op; keep_looping = 0; break; } } } if (rop == NULL && !NFSBCMP(owp->nfsow_owner, openown, NFSV4CL_LOCKNAMELEN)) { rop = op; if (lpp == NULL) keep_looping = 0; } if (rop2 == NULL) rop2 = op; } op = LIST_NEXT(op, nfso_list); } owp = LIST_NEXT(owp, nfsow_list); } if (rop == NULL) rop = rop2; if (rop == NULL) return (EBADF); *opp = rop; return (0); } /* * Release use of an open owner. Called when open operations are done * with the open owner. */ APPLESTATIC void nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp, __unused int error, __unused int candelete, int unlocked) { if (owp == NULL) return; NFSLOCKCLSTATE(); if (unlocked == 0) { if (NFSHASONEOPENOWN(nmp)) nfsv4_relref(&owp->nfsow_rwlock); else nfscl_lockunlock(&owp->nfsow_rwlock); } nfscl_clrelease(owp->nfsow_clp); NFSUNLOCKCLSTATE(); } /* * Release use of an open structure under an open owner. */ APPLESTATIC void nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error, int candelete) { struct nfsclclient *clp; struct nfsclowner *owp; if (op == NULL) return; NFSLOCKCLSTATE(); owp = op->nfso_own; if (NFSHASONEOPENOWN(nmp)) nfsv4_relref(&owp->nfsow_rwlock); else nfscl_lockunlock(&owp->nfsow_rwlock); clp = owp->nfsow_clp; if (error && candelete && op->nfso_opencnt == 0) nfscl_freeopen(op, 0); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to get a clientid structure. It will optionally lock the * client data structures to do the SetClientId/SetClientId_confirm, * but will release that lock and return the clientid with a reference * count on it. * If the "cred" argument is NULL, a new clientid should not be created. * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot * be done. * The start_renewthread argument tells nfscl_getcl() to start a renew * thread if this creates a new clp. * It always clpp with a reference count on it, unless returning an error. */ APPLESTATIC int nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p, int start_renewthread, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclclient *newclp = NULL; struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); idlen = strlen(uuid); if (idlen > 0) idlen += sizeof (u_int64_t); else idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */ - MALLOC(newclp, struct nfsclclient *, + newclp = malloc( sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT, M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); /* * If a forced dismount is already in progress, don't * allocate a new clientid and get out now. For the case where * clp != NULL, this is a harmless optimization. */ if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); return (EBADF); } clp = nmp->nm_clp; if (clp == NULL) { if (newclp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } clp = newclp; clp->nfsc_idlen = idlen; LIST_INIT(&clp->nfsc_owner); TAILQ_INIT(&clp->nfsc_deleg); TAILQ_INIT(&clp->nfsc_layout); LIST_INIT(&clp->nfsc_devinfo); for (i = 0; i < NFSCLDELEGHASHSIZE; i++) LIST_INIT(&clp->nfsc_deleghash[i]); for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) LIST_INIT(&clp->nfsc_layouthash[i]); clp->nfsc_flags = NFSCLFLAGS_INITED; clp->nfsc_clientidrev = 1; clp->nfsc_cbident = nfscl_nextcbident(); nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id, clp->nfsc_idlen); LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list); nmp->nm_clp = clp; clp->nfsc_nmp = nmp; NFSUNLOCKCLSTATE(); if (start_renewthread != 0) nfscl_start_renewthread(clp); } else { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); } NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock && !NFSCL_FORCEDISM(mp)) igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); if (igotlock == 0) { /* * Call nfsv4_lock() with "iwantlock == 0" so that it will * wait for a pending exclusive lock request. This gives the * exclusive lock request priority over this shared lock * request. * An exclusive lock on nfsc_lock is used mainly for server * crash recoveries. */ nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp); nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp); } if (igotlock == 0 && NFSCL_FORCEDISM(mp)) { /* * Both nfsv4_lock() and nfsv4_getref() know to check * for NFSCL_FORCEDISM() and return without sleeping to * wait for the exclusive lock to be released, since it * might be held by nfscl_umount() and we need to get out * now for that case and not wait until nfscl_umount() * releases it. */ NFSUNLOCKCLSTATE(); return (EBADF); } NFSUNLOCKCLSTATE(); /* * If it needs a clientid, do the setclientid now. */ if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) { if (!igotlock) panic("nfscl_clget"); if (p == NULL || cred == NULL) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (EACCES); } /* * If RFC3530 Sec. 14.2.33 is taken literally, * NFSERR_CLIDINUSE will be returned persistently for the * case where a new mount of the same file system is using * a different principal. In practice, NFSERR_CLIDINUSE is * only returned when there is outstanding unexpired state * on the clientid. As such, try for twice the lease * interval, if we know what that is. Otherwise, make a * wild ass guess. * The case of returning NFSERR_STALECLIENTID is far less * likely, but might occur if there is a significant delay * between doing the SetClientID and SetClientIDConfirm Ops, * such that the server throws away the clientid before * receiving the SetClientIDConfirm. */ if (clp->nfsc_renew > 0) clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2; else clidinusedelay = 120; trystalecnt = 3; do { error = nfsrpc_setclient(nmp, clp, 0, cred, p); if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_BADSESSION || error == NFSERR_CLIDINUSE) { (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || (error == NFSERR_CLIDINUSE && --clidinusedelay > 0)); if (error) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (error); } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; } if (igotlock) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 1); NFSUNLOCKCLSTATE(); } *clpp = clp; return (0); } /* * Get a reference to a clientid and return it, if valid. */ APPLESTATIC struct nfsclclient * nfscl_findcl(struct nfsmount *nmp) { struct nfsclclient *clp; clp = nmp->nm_clp; if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) return (NULL); return (clp); } /* * Release the clientid structure. It may be locked or reference counted. */ static void nfscl_clrelease(struct nfsclclient *clp) { if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); } /* * External call for nfscl_clrelease. */ APPLESTATIC void nfscl_clientrelease(struct nfsclclient *clp) { NFSLOCKCLSTATE(); if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Called when wanting to lock a byte region. */ APPLESTATIC int nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp, int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp, struct nfscllockowner **lpp, int *newonep, int *donelocallyp) { struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllockowner *nlp; struct nfscllock *nlop, *otherlop; struct nfscldeleg *dp = NULL, *ldp = NULL; struct nfscllockownerhead *lhp = NULL; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN]; u_int8_t *openownp; int error = 0, ret, donelocally = 0; u_int32_t mode; /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */ mode = 0; np = VTONFS(vp); *lpp = NULL; lp = NULL; *newonep = 0; *donelocallyp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ - MALLOC(nlp, struct nfscllockowner *, + nlp = malloc( sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK); - MALLOC(otherlop, struct nfscllock *, + otherlop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); - MALLOC(nlop, struct nfscllock *, + nlop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = type; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) error = NFSERR_INVAL; } if (!error) { if (recovery) clp = rclp; else error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); } if (error) { - FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); - FREE((caddr_t)otherlop, M_NFSCLLOCK); - FREE((caddr_t)nlop, M_NFSCLLOCK); + free(nlp, M_NFSCLLOCKOWNER); + free(otherlop, M_NFSCLLOCK); + free(nlop, M_NFSCLLOCK); return (error); } op = NULL; if (recovery) { ownp = rownp; openownp = ropenownp; } else { nfscl_filllockowner(id, own, flags); ownp = own; if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, openown, F_POSIX); else nfscl_filllockowner(p->td_proc, openown, F_POSIX); openownp = openown; } if (!recovery) { NFSLOCKCLSTATE(); /* * First, search for a delegation. If one exists for this file, * the lock can be done locally against it, so long as there * isn't a local lock conflict. */ ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* Just sanity check for correct type of delegation */ if (dp != NULL && ((dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 || (type == F_WRLCK && (dp->nfsdl_flags & NFSCLDL_WRITE) == 0))) dp = NULL; } if (dp != NULL) { /* Now, find an open and maybe a lockowner. */ ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (ret) ret = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (!ret) { lhp = &dp->nfsdl_lock; TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; donelocally = 1; } else { dp = NULL; } } if (!donelocally) { /* * Get the related Open and maybe lockowner. */ error = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, &lp, &op); if (!error) lhp = &op->nfso_lock; } if (!error && !recovery) error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nlop, ownp, ldp, NULL); if (error) { if (!recovery) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } - FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); - FREE((caddr_t)otherlop, M_NFSCLLOCK); - FREE((caddr_t)nlop, M_NFSCLLOCK); + free(nlp, M_NFSCLLOCKOWNER); + free(otherlop, M_NFSCLLOCK); + free(nlop, M_NFSCLLOCK); return (error); } /* * Ok, see if a lockowner exists and create one, as required. */ if (lp == NULL) LIST_FOREACH(lp, lhp, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN)) break; } if (lp == NULL) { NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN); if (recovery) NFSBCOPY(ropenownp, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); else NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); nlp->nfsl_seqid = 0; nlp->nfsl_lockflags = flags; nlp->nfsl_inprog = NULL; nfscl_lockinit(&nlp->nfsl_rwlock); LIST_INIT(&nlp->nfsl_lock); if (donelocally) { nlp->nfsl_open = NULL; nfsstatsv1.cllocallockowners++; } else { nlp->nfsl_open = op; nfsstatsv1.cllockowners++; } LIST_INSERT_HEAD(lhp, nlp, nfsl_list); lp = nlp; nlp = NULL; *newonep = 1; } /* * Now, update the byte ranges for locks. */ ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally); if (!ret) donelocally = 1; if (donelocally) { *donelocallyp = 1; if (!recovery) nfscl_clrelease(clp); } else { /* * Serial modifications on the lock owner for multiple threads * for the same process using a read/write lock. */ if (!recovery) nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); } if (!recovery) NFSUNLOCKCLSTATE(); if (nlp) - FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); + free(nlp, M_NFSCLLOCKOWNER); if (nlop) - FREE((caddr_t)nlop, M_NFSCLLOCK); + free(nlop, M_NFSCLLOCK); if (otherlop) - FREE((caddr_t)otherlop, M_NFSCLLOCK); + free(otherlop, M_NFSCLLOCK); *lpp = lp; return (0); } /* * Called to unlock a byte range, for LockU. */ APPLESTATIC int nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, __unused struct ucred *cred, NFSPROC_T *p, int callcnt, struct nfsclclient *clp, void *id, int flags, struct nfscllockowner **lpp, int *dorpcp) { struct nfscllockowner *lp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllock *nlop, *other_lop = NULL; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret = 0, fnd; np = VTONFS(vp); *lpp = NULL; *dorpcp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ - MALLOC(nlop, struct nfscllock *, + nlop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = F_UNLCK; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) { - FREE((caddr_t)nlop, M_NFSCLLOCK); + free(nlop, M_NFSCLLOCK); return (NFSERR_INVAL); } } if (callcnt == 0) { - MALLOC(other_lop, struct nfscllock *, + other_lop = malloc( sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); *other_lop = *nlop; } nfscl_filllockowner(id, own, flags); dp = NULL; NFSLOCKCLSTATE(); if (callcnt == 0) dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* * First, unlock any local regions on a delegation. */ if (dp != NULL) { /* Look for this lockowner. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) /* Use other_lop, so nlop is still available */ (void)nfscl_updatelock(lp, &other_lop, NULL, 1); } /* * Now, find a matching open/lockowner that hasn't already been done, * as marked by nfsl_inprog. */ lp = NULL; fnd = 0; LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == NULL && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { fnd = 1; break; } } if (fnd) break; } } if (fnd) break; } if (lp != NULL) { ret = nfscl_updatelock(lp, &nlop, NULL, 0); if (ret) *dorpcp = 1; /* * Serial modifications on the lock owner for multiple * threads for the same process using a read/write lock. */ lp->nfsl_inprog = p; nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); *lpp = lp; } NFSUNLOCKCLSTATE(); if (nlop) - FREE((caddr_t)nlop, M_NFSCLLOCK); + free(nlop, M_NFSCLLOCK); if (other_lop) - FREE((caddr_t)other_lop, M_NFSCLLOCK); + free(other_lop, M_NFSCLLOCK); return (0); } /* * Release all lockowners marked in progess for this process and file. */ APPLESTATIC void nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == p && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { lp->nfsl_inprog = NULL; nfscl_lockunlock(&lp->nfsl_rwlock); } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to find out if any bytes within the byte range specified are * write locked by the calling process. Used to determine if flushing * is required before a LockU. * If in doubt, return 1, so the flush will occur. */ APPLESTATIC int nfscl_checkwritelocked(vnode_t vp, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllock *lop; struct nfscldeleg *dp; struct nfsnode *np; u_int64_t off, end; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error = 0; np = VTONFS(vp); switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ off = fl->l_start; break; case SEEK_END: off = np->n_size + fl->l_start; break; default: return (1); } if (fl->l_len != 0) { end = off + fl->l_len; if (end < off) return (1); } else { end = NFS64BITSSET; } error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (1); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); /* * First check the delegation locks. */ dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } /* * Now, check state against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (0); } /* * Release a byte range lock owner structure. */ APPLESTATIC void nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete) { struct nfsclclient *clp; if (lp == NULL) return; NFSLOCKCLSTATE(); clp = lp->nfsl_open->nfso_own->nfsow_clp; if (error != 0 && candelete && (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0) nfscl_freelockowner(lp, 0); else nfscl_lockunlock(&lp->nfsl_rwlock); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Free up an open structure and any associated byte range lock structures. */ APPLESTATIC void nfscl_freeopen(struct nfsclopen *op, int local) { LIST_REMOVE(op, nfso_list); nfscl_freealllocks(&op->nfso_lock, local); - FREE((caddr_t)op, M_NFSCLOPEN); + free(op, M_NFSCLOPEN); if (local) nfsstatsv1.cllocalopens--; else nfsstatsv1.clopens--; } /* * Free up all lock owners and associated locks. */ static void nfscl_freealllocks(struct nfscllockownerhead *lhp, int local) { struct nfscllockowner *lp, *nlp; LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, local); } } /* * Called for an Open when NFSERR_EXPIRED is received from the server. * If there are no byte range locks nor a Share Deny lost, try to do a * fresh Open. Otherwise, free the open. */ static int nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *lp; struct nfscldeleg *dp; int mustdelete = 0, error; /* * Look for any byte range lock(s). */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { mustdelete = 1; break; } } /* * If no byte range lock(s) nor a Share deny, try to re-open. */ if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) { newnfs_copycred(&op->nfso_cred, cred); dp = NULL; error = nfsrpc_reopen(nmp, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p); if (error) { mustdelete = 1; if (dp != NULL) { - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); dp = NULL; } } if (dp != NULL) nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh, op->nfso_fhlen, cred, p, &dp); } /* * If a byte range lock or Share deny or couldn't re-open, free it. */ if (mustdelete) nfscl_freeopen(op, 0); return (mustdelete); } /* * Free up an open owner structure. */ static void nfscl_freeopenowner(struct nfsclowner *owp, int local) { LIST_REMOVE(owp, nfsow_list); - FREE((caddr_t)owp, M_NFSCLOWNER); + free(owp, M_NFSCLOWNER); if (local) nfsstatsv1.cllocalopenowners--; else nfsstatsv1.clopenowners--; } /* * Free up a byte range lock owner structure. */ APPLESTATIC void nfscl_freelockowner(struct nfscllockowner *lp, int local) { struct nfscllock *lop, *nlop; LIST_REMOVE(lp, nfsl_list); LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { nfscl_freelock(lop, local); } - FREE((caddr_t)lp, M_NFSCLLOCKOWNER); + free(lp, M_NFSCLLOCKOWNER); if (local) nfsstatsv1.cllocallockowners--; else nfsstatsv1.cllockowners--; } /* * Free up a byte range lock structure. */ APPLESTATIC void nfscl_freelock(struct nfscllock *lop, int local) { LIST_REMOVE(lop, nfslo_list); - FREE((caddr_t)lop, M_NFSCLLOCK); + free(lop, M_NFSCLLOCK); if (local) nfsstatsv1.cllocallocks--; else nfsstatsv1.cllocks--; } /* * Clean out the state related to a delegation. */ static void nfscl_cleandeleg(struct nfscldeleg *dp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscleandel"); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } nfscl_freealllocks(&dp->nfsdl_lock, 1); } /* * Free a delegation. */ static void nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp) { TAILQ_REMOVE(hdp, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); nfsstatsv1.cldelegates--; nfscl_delegcnt--; } /* * Free up all state related to this client structure. */ static void nfscl_cleanclient(struct nfsclclient *clp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllayout *lyp, *nlyp; struct nfscldevinfo *dip, *ndip; TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) nfscl_freelayout(lyp); LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) nfscl_freedevinfo(dip); /* Now, all the OpenOwners, etc. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { nfscl_freeopen(op, 0); } nfscl_freeopenowner(owp, 0); } } /* * Called when an NFSERR_EXPIRED is received from the server. */ static void nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp, *towp; struct nfsclopen *op, *nop, *top; struct nfscldeleg *dp, *ndp; int ret, printed = 0; /* * First, merge locally issued Opens into the list for the server. */ dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); owp = LIST_FIRST(&dp->nfsdl_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfsclexp"); LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) break; } if (towp != NULL) { /* Merge opens in */ LIST_FOREACH(top, &towp->nfsow_open, nfso_list) { if (top->nfso_fhlen == op->nfso_fhlen && !NFSBCMP(top->nfso_fh, op->nfso_fh, op->nfso_fhlen)) { top->nfso_mode |= op->nfso_mode; top->nfso_opencnt += op->nfso_opencnt; break; } } if (top == NULL) { /* Just add the open to the owner list */ LIST_REMOVE(op, nfso_list); op->nfso_own = towp; LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list); nfsstatsv1.cllocalopens--; nfsstatsv1.clopens++; } } else { /* Just add the openowner to the client list */ LIST_REMOVE(owp, nfsow_list); owp->nfsow_clp = clp; LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list); nfsstatsv1.cllocalopenowners--; nfsstatsv1.clopenowners++; nfsstatsv1.cllocalopens--; nfsstatsv1.clopens++; } } owp = nowp; } if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) { printed = 1; printf("nfsv4 expired locks lost\n"); } nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); dp = ndp; } if (!TAILQ_EMPTY(&clp->nfsc_deleg)) panic("nfsclexp"); /* * Now, try and reopen against the server. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { owp->nfsow_seqid = 0; LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { ret = nfscl_expireopen(clp, op, nmp, cred, p); if (ret && !printed) { printed = 1; printf("nfsv4 expired locks lost\n"); } } if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); } } /* * This function must be called after the process represented by "own" has * exited. Must be called with CLSTATE lock held. */ static void nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own) { struct nfsclowner *owp, *nowp; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; /* First, get rid of local locks on delegations. */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, 1); } } } owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) { /* * If there are children that haven't closed the * file descriptors yet, the opens will still be * here. For that case, let the renew thread clear * out the OpenOwner later. */ if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); else owp->nfsow_defunct = 1; } owp = nowp; } } /* * Find open/lock owners for processes that have exited. */ static void nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; NFSPROCLISTLOCK(); NFSLOCKCLSTATE(); LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) { if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_emptylockowner(lp, lhp); } } if (nfscl_procdoesntexist(owp->nfsow_owner)) nfscl_cleanup_common(clp, owp->nfsow_owner); } /* * For the single open_owner case, these lock owners need to be * checked to see if they still exist separately. * This is because nfscl_procdoesntexist() never returns true for * the single open_owner so that the above doesn't ever call * nfscl_cleanup_common(). */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (nfscl_procdoesntexist(lp->nfsl_owner)) nfscl_cleanup_common(clp, lp->nfsl_owner); } } NFSUNLOCKCLSTATE(); NFSPROCLISTUNLOCK(); } /* * Take the empty lock owner and move it to the local lhp list if the * associated process no longer exists. */ static void nfscl_emptylockowner(struct nfscllockowner *lp, struct nfscllockownerfhhead *lhp) { struct nfscllockownerfh *lfhp, *mylfhp; struct nfscllockowner *nlp; int fnd_it; /* If not a Posix lock owner, just return. */ if ((lp->nfsl_lockflags & F_POSIX) == 0) return; fnd_it = 0; mylfhp = NULL; /* * First, search to see if this lock owner is already in the list. * If it is, then the associated process no longer exists. */ SLIST_FOREACH(lfhp, lhp, nfslfh_list) { if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen && !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh, lfhp->nfslfh_len)) mylfhp = lfhp; LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list) if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner, NFSV4CL_LOCKNAMELEN)) fnd_it = 1; } /* If not found, check if process still exists. */ if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0) return; /* Move the lock owner over to the local list. */ if (mylfhp == NULL) { mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP, M_NOWAIT); if (mylfhp == NULL) return; mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen; NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh, mylfhp->nfslfh_len); LIST_INIT(&mylfhp->nfslfh_lock); SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list); } LIST_REMOVE(lp, nfsl_list); LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list); } static int fake_global; /* Used to force visibility of MNTK_UNMOUNTF */ /* * Called from nfs umount to free up the clientid. */ APPLESTATIC void nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) { struct nfsclclient *clp; struct ucred *cred; int igotlock; /* * For the case that matters, this is the thread that set * MNTK_UNMOUNTF, so it will see it set. The code that follows is * done to ensure that any thread executing nfscl_getcl() after * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the * mutex for NFSLOCKCLSTATE(), so it is "m" for the following * explanation, courtesy of Alan Cox. * What follows is a snippet from Alan Cox's email at: * http://docs.FreeBSD.org/cgi/ * mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw * * 1. Set MNTK_UNMOUNTF * 2. Acquire a standard FreeBSD mutex "m". * 3. Update some data structures. * 4. Release mutex "m". * * Then, other threads that acquire "m" after step 4 has occurred will * see MNTK_UNMOUNTF as set. But, other threads that beat thread X to * step 2 may or may not see MNTK_UNMOUNTF as set. */ NFSLOCKCLSTATE(); if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { fake_global++; NFSUNLOCKCLSTATE(); NFSLOCKCLSTATE(); } clp = nmp->nm_clp; if (clp != NULL) { if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0) panic("nfscl umount"); /* * First, handshake with the nfscl renew thread, to terminate * it. */ clp->nfsc_flags |= NFSCLFLAGS_UMOUNT; while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfsclumnt", hz); /* * Now, get the exclusive lock on the client state, so * that no uses of the state are still in progress. */ do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); /* * Free up all the state. It will expire on the server, but * maybe we should do a SetClientId/SetClientIdConfirm so * the server throws it away? */ LIST_REMOVE(clp, nfsc_list); nfscl_delegreturnall(clp, p); cred = newnfs_getcred(); if (NFSHASNFSV4N(nmp)) { (void)nfsrpc_destroysession(nmp, clp, cred, p); (void)nfsrpc_destroyclient(nmp, clp, cred, p); } else (void)nfsrpc_setclient(nmp, clp, 0, cred, p); nfscl_cleanclient(clp); nmp->nm_clp = NULL; NFSFREECRED(cred); free(clp, M_NFSCLCLIENT); } else NFSUNLOCKCLSTATE(); } /* * This function is called when a server replies with NFSERR_STALECLIENTID * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists, * doing Opens and Locks with reclaim. If these fail, it deletes the * corresponding state. */ static void nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct nfscldeleg *dp, *ndp, *tdp; struct nfsmount *nmp; struct ucred *tcred; struct nfsclopenhead extra_open; struct nfscldeleghead extra_deleg; struct nfsreq *rep; u_int64_t len; u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode; int i, igotlock = 0, error, trycnt, firstlock; struct nfscllayout *lyp, *nlyp; /* * First, lock the client structure, so everyone else will * block when trying to use state. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl recover"); /* * For now, just get rid of all layouts. There may be a need * to do LayoutCommit Ops with reclaim == true later. */ TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) nfscl_freelayout(lyp); TAILQ_INIT(&clp->nfsc_layout); for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) LIST_INIT(&clp->nfsc_layouthash[i]); trycnt = 5; do { error = nfsrpc_setclient(nmp, clp, 1, cred, p); } while ((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return; } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; /* * Mark requests already queued on the server, so that they don't * initiate another recovery cycle. Any requests already in the * queue that handle state information will have the old stale * clientid/stateid and will get a NFSERR_STALESTATEID, * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server. * This will be translated to NFSERR_STALEDONTRECOVER when * R_DONTRECOVER is set. */ NFSLOCKREQ(); TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) { if (rep->r_nmp == nmp) rep->r_flags |= R_DONTRECOVER; } NFSUNLOCKREQ(); /* * Now, mark all delegations "need reclaim". */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM; TAILQ_INIT(&extra_deleg); LIST_INIT(&extra_open); /* * Now traverse the state lists, doing Open and Lock Reclaims. */ tcred = newnfs_getcred(); owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); owp->nfsow_seqid = 0; op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { nop = LIST_NEXT(op, nfso_list); if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) { /* Search for a delegation to reclaim with the open */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) break; } ndp = dp; if (dp == NULL) delegtype = NFSV4OPEN_DELEGATENONE; newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryopen(nmp, NULL, op->nfso_fh, op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype, tcred, p); if (!error) { /* Handle any replied delegation */ if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE) || NFSMNT_RDONLY(nmp->nm_mountp))) { if ((ndp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) { dp->nfsdl_stateid = ndp->nfsdl_stateid; dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit; dp->nfsdl_ace = ndp->nfsdl_ace; dp->nfsdl_change = ndp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((ndp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; - FREE((caddr_t)ndp, M_NFSCLDELEG); + free(ndp, M_NFSCLDELEG); ndp = NULL; break; } } } if (ndp != NULL) TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list); /* and reclaim all byte range locks */ lp = LIST_FIRST(&op->nfso_lock); while (lp != NULL) { nlp = LIST_NEXT(lp, nfsl_list); lp->nfsl_seqid = 0; firstlock = 1; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { nlop = LIST_NEXT(lop, nfslo_list); if (lop->nfslo_end == NFS64BITSSET) len = NFS64BITSSET; else len = lop->nfslo_end - lop->nfslo_first; error = nfscl_trylock(nmp, NULL, op->nfso_fh, op->nfso_fhlen, lp, firstlock, 1, lop->nfslo_first, len, lop->nfslo_type, tcred, p); if (error != 0) nfscl_freelock(lop, 0); else firstlock = 0; lop = nlop; } /* If no locks, but a lockowner, just delete it. */ if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_freelockowner(lp, 0); lp = nlp; } } } if (error != 0 && error != NFSERR_BADSESSION) nfscl_freeopen(op, 0); op = nop; } owp = nowp; } /* * Now, try and get any delegations not yet reclaimed by cobbling * to-gether an appropriate open. */ nowp = NULL; dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) { if (nowp == NULL) { - MALLOC(nowp, struct nfsclowner *, + nowp = malloc( sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); /* * Name must be as long an largest possible * NFSV4CL_LOCKNAMELEN. 12 for now. */ NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); } nop = NULL; if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) { - MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + + nop = malloc(sizeof (struct nfsclopen) + dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK); nop->nfso_own = nowp; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { nop->nfso_mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { nop->nfso_mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = dp->nfsdl_fhlen; NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; newnfs_copycred(&dp->nfsdl_cred, tcred); newnfs_copyincred(tcred, &nop->nfso_cred); tdp = NULL; error = nfscl_tryopen(nmp, NULL, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_mode, nop, NULL, 0, &tdp, 1, delegtype, tcred, p); if (tdp != NULL) { if ((tdp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; if ((nop->nfso_mode & mode) == mode && nop->nfso_fhlen == tdp->nfsdl_fhlen && !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh, nop->nfso_fhlen)) { dp->nfsdl_stateid = tdp->nfsdl_stateid; dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit; dp->nfsdl_ace = tdp->nfsdl_ace; dp->nfsdl_change = tdp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((tdp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; - FREE((caddr_t)tdp, M_NFSCLDELEG); + free(tdp, M_NFSCLDELEG); } else { TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list); } } } if (error) { if (nop != NULL) - FREE((caddr_t)nop, M_NFSCLOPEN); + free(nop, M_NFSCLOPEN); /* * Couldn't reclaim it, so throw the state * away. Ouch!! */ nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } else { LIST_INSERT_HEAD(&extra_open, nop, nfso_list); } } dp = ndp; } /* * Now, get rid of extra Opens and Delegations. */ LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) { do { newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexcls"); } while (error == NFSERR_GRACE); LIST_REMOVE(op, nfso_list); - FREE((caddr_t)op, M_NFSCLOPEN); + free(op, M_NFSCLOPEN); } if (nowp != NULL) - FREE((caddr_t)nowp, M_NFSCLOWNER); + free(nowp, M_NFSCLOWNER); TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) { do { newnfs_copycred(&dp->nfsdl_cred, tcred); error = nfscl_trydelegreturn(dp, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexdlg"); } while (error == NFSERR_GRACE); TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list); - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); } /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */ if (NFSHASNFSV4N(nmp)) (void)nfsrpc_reclaimcomplete(nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * This function is called when a server replies with NFSERR_EXPIRED. * It deletes all state for the client and does a fresh SetClientId/confirm. * XXX Someday it should post a signal to the process(es) that hold the * state, so they know that lock state has been lost. */ APPLESTATIC int nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) { struct nfsmount *nmp; struct ucred *cred; int igotlock = 0, error, trycnt; /* * If the clientid has gone away or a new SetClientid has already * been done, just return ok. */ if (clp == NULL || clidrev != clp->nfsc_clientidrev) return (0); /* * First, lock the client structure, so everyone else will * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so * that only one thread does the work. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT)); if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) { if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (0); } clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl expired"); cred = newnfs_getcred(); trycnt = 5; do { error = nfsrpc_setclient(nmp, clp, 0, cred, p); } while ((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } else { /* * Expire the state for the client. */ nfscl_expireclient(clp, nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(cred); return (error); } /* * This function inserts a lock in the list after insert_lop. */ static void nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop, struct nfscllock *insert_lop, int local) { if ((struct nfscllockowner *)insert_lop == lp) LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list); else LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list); if (local) nfsstatsv1.cllocallocks++; else nfsstatsv1.cllocks++; } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. * Returns 1 if the locks were modified, 0 otherwise. */ static int nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp, struct nfscllock **other_lopp, int local) { struct nfscllock *new_lop = *new_lopp; struct nfscllock *lop, *tlop, *ilop; struct nfscllock *other_lop; int unlock = 0, modified = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->nfslo_type == F_UNLCK) unlock = 1; ilop = (struct nfscllock *)lp; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->nfslo_end >= new_lop->nfslo_first) { if (new_lop->nfslo_end < lop->nfslo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->nfslo_type == lop->nfslo_type || (new_lop->nfslo_first <= lop->nfslo_first && new_lop->nfslo_end >= lop->nfslo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (new_lop->nfslo_type != lop->nfslo_type || new_lop->nfslo_first != lop->nfslo_first || new_lop->nfslo_end != lop->nfslo_end) modified = 1; if (lop->nfslo_first < new_lop->nfslo_first) new_lop->nfslo_first = lop->nfslo_first; if (lop->nfslo_end > new_lop->nfslo_end) new_lop->nfslo_end = lop->nfslo_end; tlop = lop; lop = LIST_NEXT(lop, nfslo_list); nfscl_freelock(tlop, local); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->nfslo_first <= lop->nfslo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ if (lop->nfslo_first != new_lop->nfslo_end) { lop->nfslo_first = new_lop->nfslo_end; modified = 1; } break; } if (new_lop->nfslo_end >= lop->nfslo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ if (lop->nfslo_end != new_lop->nfslo_first) { lop->nfslo_end = new_lop->nfslo_first; modified = 1; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->nfslo_type any longer. */ tmp = new_lop->nfslo_first; if (unlock) { other_lop = new_lop; *new_lopp = NULL; } else { other_lop = *other_lopp; *other_lopp = NULL; } other_lop->nfslo_first = new_lop->nfslo_end; other_lop->nfslo_end = lop->nfslo_end; other_lop->nfslo_type = lop->nfslo_type; lop->nfslo_end = tmp; nfscl_insertlock(lp, other_lop, lop, local); ilop = lop; modified = 1; break; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); if (lop == NULL) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfscl_insertlock(lp, new_lop, ilop, local); *new_lopp = NULL; modified = 1; } return (modified); } /* * This function must be run as a kernel thread. * It does Renew Ops and recovery, when required. */ APPLESTATIC void nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleghead dh; struct nfscldeleg *dp, *ndp; struct ucred *cred; u_int32_t clidrev; int error, cbpathdown, islept, igotlock, ret, clearok; uint32_t recover_done_time = 0; time_t mytime; static time_t prevsec = 0; struct nfscllockownerfh *lfhp, *nlfhp; struct nfscllockownerfhhead lfh; struct nfscllayout *lyp, *nlyp; struct nfscldevinfo *dip, *ndip; struct nfscllayouthead rlh; struct nfsclrecalllayout *recallp; struct nfsclds *dsp; cred = newnfs_getcred(); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); for(;;) { newnfs_setroot(cred); cbpathdown = 0; if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) { /* * Only allow one recover within 1/2 of the lease * duration (nfsc_renew). */ if (recover_done_time < NFSD_MONOSEC) { recover_done_time = NFSD_MONOSEC + clp->nfsc_renew; NFSCL_DEBUG(1, "Doing recovery..\n"); nfscl_recover(clp, cred, p); } else { NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n", recover_done_time, (intmax_t)NFSD_MONOSEC); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } } if (clp->nfsc_expire <= NFSD_MONOSEC && (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clidrev = clp->nfsc_clientidrev; error = nfsrpc_renew(clp, NULL, cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; else if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) { NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_EXPIRED) (void) nfscl_hasexpired(clp, clidrev, p); } checkdsrenew: if (NFSHASNFSV4N(clp->nfsc_nmp)) { /* Do renews for any DS sessions. */ NFSLOCKMNT(clp->nfsc_nmp); /* Skip first entry, since the MDS is handled above. */ dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess); if (dsp != NULL) dsp = TAILQ_NEXT(dsp, nfsclds_list); while (dsp != NULL) { if (dsp->nfsclds_expire <= NFSD_MONOSEC && dsp->nfsclds_sess.nfsess_defunct == 0) { dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; NFSUNLOCKMNT(clp->nfsc_nmp); (void)nfsrpc_renew(clp, dsp, cred, p); goto checkdsrenew; } dsp = TAILQ_NEXT(dsp, nfsclds_list); } NFSUNLOCKMNT(clp->nfsc_nmp); } TAILQ_INIT(&dh); NFSLOCKCLSTATE(); if (cbpathdown) /* It's a Total Recall! */ nfscl_totalrecall(clp); /* * Now, handle defunct owners. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { if (LIST_EMPTY(&owp->nfsow_open)) { if (owp->nfsow_defunct != 0) nfscl_freeopenowner(owp, 0); } } /* * Do the recall on any delegations. To avoid trouble, always * come back up here after having slept. */ igotlock = 0; tryagain: dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_RECALL)) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); goto tryagain; } while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) goto tryagain; } NFSUNLOCKCLSTATE(); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp, NULL, cred, p, 1); if (!ret) { nfscl_cleandeleg(dp); TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; nfsstatsv1.cldelegates--; } NFSLOCKCLSTATE(); } dp = ndp; } /* * Clear out old delegations, if we are above the high water * mark. Only clear out ones with no state related to them. * The tailq list is in LRU order. */ dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead); while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) { ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list); if (dp->nfsdl_rwlock.nfslock_usecnt == 0 && dp->nfsdl_rwlock.nfslock_lock == 0 && dp->nfsdl_timestamp < NFSD_MONOSEC && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED | NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) { clearok = 1; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { clearok = 0; break; } } if (clearok) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { clearok = 0; break; } } } if (clearok) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; nfsstatsv1.cldelegates--; } } dp = ndp; } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); /* * Do the recall on any layouts. To avoid trouble, always * come back up here after having slept. */ TAILQ_INIT(&rlh); tryagain2: TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) { if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) { /* * Wait for outstanding I/O ops to be done. */ if (lyp->nfsly_lock.nfslock_usecnt > 0 || (lyp->nfsly_lock.nfslock_lock & NFSV4LOCK_LOCK) != 0) { lyp->nfsly_lock.nfslock_lock |= NFSV4LOCK_WANTED; (void)nfsmsleep(&lyp->nfsly_lock, NFSCLSTATEMUTEXPTR, PZERO, "nfslyp", NULL); goto tryagain2; } /* Move the layout to the recall list. */ TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); LIST_REMOVE(lyp, nfsly_hash); TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list); /* Handle any layout commits. */ if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) && (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { lyp->nfsly_flags &= ~NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); NFSCL_DEBUG(3, "do layoutcommit\n"); nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, cred, p); NFSLOCKCLSTATE(); goto tryagain2; } } } /* Now, look for stale layouts. */ lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead); while (lyp != NULL) { nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list); if (lyp->nfsly_timestamp < NFSD_MONOSEC && (lyp->nfsly_flags & NFSLY_RECALL) == 0 && lyp->nfsly_lock.nfslock_usecnt == 0 && lyp->nfsly_lock.nfslock_lock == 0) { NFSCL_DEBUG(4, "ret stale lay=%d\n", nfscl_layoutcnt); recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_NOWAIT); if (recallp == NULL) break; (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); } lyp = nlyp; } /* * Free up any unreferenced device info structures. */ LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) { if (dip->nfsdi_layoutrefs == 0 && dip->nfsdi_refcnt == 0) { NFSCL_DEBUG(4, "freeing devinfo\n"); LIST_REMOVE(dip, nfsdi_list); nfscl_freedevinfo(dip); } } NFSUNLOCKCLSTATE(); /* Do layout return(s), as required. */ TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) { TAILQ_REMOVE(&rlh, lyp, nfsly_list); NFSCL_DEBUG(4, "ret layout\n"); nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p); nfscl_freelayout(lyp); } /* * Delegreturn any delegations cleaned out or recalled. */ TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) { newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); TAILQ_REMOVE(&dh, dp, nfsdl_list); - FREE((caddr_t)dp, M_NFSCLDELEG); + free(dp, M_NFSCLDELEG); } SLIST_INIT(&lfh); /* * Call nfscl_cleanupkext() once per second to check for * open/lock owners where the process has exited. */ mytime = NFSD_MONOSEC; if (prevsec != mytime) { prevsec = mytime; nfscl_cleanupkext(clp, &lfh); } /* * Do a ReleaseLockOwner for all lock owners where the * associated process no longer exists, as found by * nfscl_cleanupkext(). */ newnfs_setroot(cred); SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) { LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list, nlp) { (void)nfsrpc_rellockown(clp->nfsc_nmp, lp, lfhp->nfslfh_fh, lfhp->nfslfh_len, cred, p); nfscl_freelockowner(lp, 0); } free(lfhp, M_TEMP); } SLIST_INIT(&lfh); NFSLOCKCLSTATE(); if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl", hz); if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) { clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); NFSFREECRED(cred); wakeup((caddr_t)clp); return; } NFSUNLOCKCLSTATE(); } } /* * Initiate state recovery. Called when NFSERR_STALECLIENTID, * NFSERR_STALESTATEID or NFSERR_BADSESSION is received. */ APPLESTATIC void nfscl_initiate_recovery(struct nfsclclient *clp) { if (clp == NULL) return; NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); wakeup((caddr_t)clp); } /* * Dump out the state stuff for debugging. */ APPLESTATIC void nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens, int lockowner, int locks) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfscllock *lop; struct nfscldeleg *dp; clp = nmp->nm_clp; if (clp == NULL) { printf("nfscl dumpstate NULL clp\n"); return; } NFSLOCKCLSTATE(); TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } NFSUNLOCKCLSTATE(); } /* * Check for duplicate open owners and opens. * (Only used as a diagnostic aid.) */ APPLESTATIC void nfscl_dupopen(vnode_t vp, int dupopens) { struct nfsclclient *clp; struct nfsclowner *owp, *owp2; struct nfsclopen *op, *op2; struct nfsfh *nfhp; clp = VFSTONFS(vnode_mount(vp))->nm_clp; if (clp == NULL) { printf("nfscl dupopen NULL clp\n"); return; } nfhp = VTONFS(vp)->n_fhp; NFSLOCKCLSTATE(); /* * First, search for duplicate owners. * These should never happen! */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (owp != owp2 && !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("DUP OWNER\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } /* * Now, search for duplicate stateids. * These shouldn't happen, either. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && (op->nfso_stateid.other[0] != 0 || op->nfso_stateid.other[1] != 0 || op->nfso_stateid.other[2] != 0) && op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] && op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] && op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) { NFSUNLOCKCLSTATE(); printf("DUP STATEID\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } /* * Now search for duplicate opens. * Duplicate opens for the same owner * should never occur. Other duplicates are * possible and are checked for if "dupopens" * is true. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { if (nfhp->nfh_len == op2->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && nfhp->nfh_len == op->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) && (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) || dupopens)) { if (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("BADDUP OPEN\n"); } else { NFSUNLOCKCLSTATE(); printf("DUP OPEN\n"); } nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } } NFSUNLOCKCLSTATE(); } /* * During close, find an open that needs to be dereferenced and * dereference it. If there are no more opens for this file, * log a message to that effect. * Opens aren't actually Close'd until VOP_INACTIVE() is performed * on the file's vnode. * This is the safe way, since it is difficult to identify * which open the close is for and I/O can be performed after the * close(2) system call when a file is mmap'd. * If it returns 0 for success, there will be a referenced * clp returned via clpp. */ APPLESTATIC int nfscl_getclose(vnode_t vp, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; int error, notdecr; error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; notdecr = 1; NFSLOCKCLSTATE(); /* * First, look for one under a delegation that was locally issued * and just decrement the opencnt for it. Since all my Opens against * the server are DENY_NONE, I don't see a problem with hanging * onto them. (It is much easier to use one of the extant Opens * that I already have on the server when a Delegation is recalled * than to do fresh Opens.) Someday, I might need to rethink this, but. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { /* * Since a delegation is for a file, there * should never be more than one open for * each openowner. */ if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscdeleg opens"); if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; break; } } } } /* Now process the opens against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, decrement cnt if possible */ if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; } /* * There are more opens, so just return. */ if (op->nfso_opencnt > 0) { NFSUNLOCKCLSTATE(); return (0); } } } } NFSUNLOCKCLSTATE(); if (notdecr) printf("nfscl: never fnd open\n"); return (0); } APPLESTATIC int nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsclrecalllayout *recallp; int error; error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK); NFSLOCKCLSTATE(); /* * First get rid of the local Open structures, which should be no * longer in use. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on deleg")); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } } /* Return any layouts marked return on close. */ nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp); /* Now process the opens against the server. */ lookformore: LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, close it. */ KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on server")); NFSUNLOCKCLSTATE(); nfsrpc_doclose(VFSTONFS(vnode_mount(vp)), op, p); NFSLOCKCLSTATE(); goto lookformore; } op = LIST_NEXT(op, nfso_list); } } NFSUNLOCKCLSTATE(); /* * recallp has been set NULL by nfscl_retoncloselayout() if it was * used by the function, but calling free() with a NULL pointer is ok. */ free(recallp, M_NFSLAYRECALL); return (0); } /* * Return all delegations on this client. * (Must be called with client sleep lock.) */ static void nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p) { struct nfscldeleg *dp, *ndp; struct ucred *cred; cred = newnfs_getcred(); TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) { nfscl_cleandeleg(dp); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); nfscl_freedeleg(&clp->nfsc_deleg, dp); } NFSFREECRED(cred); } /* * Do a callback RPC. */ APPLESTATIC void nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) { int clist, gotseq_ok, i, j, k, op, rcalls; u_int32_t *tl; struct nfsclclient *clp; struct nfscldeleg *dp = NULL; int numops, taglen = -1, error = 0, trunc; u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident; u_char tag[NFSV4_SMALLSTR + 1], *tagstr; vnode_t vp = NULL; struct nfsnode *np; struct vattr va; struct nfsfh *nfhp; mount_t mp; nfsattrbit_t attrbits, rattrbits; nfsv4stateid_t stateid; uint32_t seqid, slotid = 0, highslot, cachethis; uint8_t sessionid[NFSX_V4SESSIONID]; struct mbuf *rep; struct nfscllayout *lyp; uint64_t filesid[2], len, off; int changed, gotone, laytype, recalltype; uint32_t iomode; struct nfsclrecalllayout *recallp = NULL; struct nfsclsession *tsep; gotseq_ok = 0; nfsrvd_rephead(nd); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); if (taglen < 0) { error = EBADRPC; goto nfsmout; } if (taglen <= NFSV4_SMALLSTR) tagstr = tag; else tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, tagstr, taglen); if (error) { if (taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); taglen = -1; goto nfsmout; } (void) nfsm_strtom(nd, tag, taglen); if (taglen > NFSV4_SMALLSTR) { free(tagstr, M_TEMP); } NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) numops = 0; else numops = fxdr_unsigned(int, *tl); /* * Loop around doing the sub ops. */ for (i = 0; i < numops; i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp++ = *tl; op = fxdr_unsigned(int, *tl); if (op < NFSV4OP_CBGETATTR || (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || (op > NFSV4OP_CBNOTIFYDEVID && minorvers == NFSV41_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd, minorvers); retops++; break; } nd->nd_procnum = op; if (op < NFSV41_CBNOPS) nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: NFSCL_DEBUG(4, "cbgetattr\n"); mp = NULL; vp = NULL; error = nfsm_getfh(nd, &nfhp); if (!error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error == 0 && i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; if (!error) { mp = nfscl_getmnt(minorvers, sessionid, cbident, &clp); if (mp == NULL) error = NFSERR_SERVERFAULT; } if (!error) { error = nfscl_ngetreopen(mp, nfhp->nfh_fh, nfhp->nfh_len, p, &np); if (!error) vp = NFSTOV(np); } if (!error) { NFSZERO_ATTRBIT(&rattrbits); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { if (vp != NULL) va.va_size = np->n_size; else va.va_size = dp->nfsdl_size; NFSSETBIT_ATTRBIT(&rattrbits, NFSATTRBIT_SIZE); } if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE)) { va.va_filerev = dp->nfsdl_change; if (vp == NULL || (np->n_flag & NDELEGMOD)) va.va_filerev++; NFSSETBIT_ATTRBIT(&rattrbits, NFSATTRBIT_CHANGE); } } else error = NFSERR_SERVERFAULT; NFSUNLOCKCLSTATE(); } if (vp != NULL) vrele(vp); if (mp != NULL) vfs_unbusy(mp); if (nfhp != NULL) - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0, (uint64_t)0); break; case NFSV4OP_CBRECALL: NFSCL_DEBUG(4, "cbrecall\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stateid.seqid = *tl++; NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); trunc = fxdr_unsigned(int, *tl); error = nfsm_getfh(nd, &nfhp); if (error == 0 && i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; if (!error) { NFSLOCKCLSTATE(); if (minorvers == NFSV4_MINORVERSION) clp = nfscl_getclnt(cbident); else clp = nfscl_getclntsess(sessionid); if (clp != NULL) { dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) { dp->nfsdl_flags |= NFSCLDL_RECALL; wakeup((caddr_t)clp); } } else { error = NFSERR_SERVERFAULT; } NFSUNLOCKCLSTATE(); } if (nfhp != NULL) - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); break; case NFSV4OP_CBLAYOUTRECALL: NFSCL_DEBUG(4, "cblayrec\n"); nfhp = NULL; NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); laytype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(uint32_t, *tl++); if (newnfs_true == *tl++) changed = 1; else changed = 0; recalltype = fxdr_unsigned(int, *tl); recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK); if (laytype != NFSLAYOUT_NFSV4_1_FILES) error = NFSERR_NOMATCHLAYOUT; else if (recalltype == NFSLAYOUTRETURN_FILE) { error = nfsm_getfh(nd, &nfhp); NFSCL_DEBUG(4, "retfile getfh=%d\n", error); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID); off = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); if (minorvers == NFSV4_MINORVERSION) error = NFSERR_NOTSUPP; else if (i == 0) error = NFSERR_OPNOTINSESS; if (error == 0) { NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); NFSCL_DEBUG(4, "cbly clp=%p\n", clp); if (clp != NULL) { lyp = nfscl_findlayout(clp, nfhp->nfh_fh, nfhp->nfh_len); NFSCL_DEBUG(4, "cblyp=%p\n", lyp); if (lyp != NULL && (lyp->nfsly_flags & NFSLY_FILES) != 0 && !NFSBCMP(stateid.other, lyp->nfsly_stateid.other, NFSX_STATEIDOTHER)) { error = nfscl_layoutrecall( recalltype, lyp, iomode, off, len, stateid.seqid, recallp); recallp = NULL; wakeup(clp); NFSCL_DEBUG(4, "aft layrcal=%d\n", error); } else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } free(nfhp, M_NFSFH); } else if (recalltype == NFSLAYOUTRETURN_FSID) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER); filesid[0] = fxdr_hyper(tl); tl += 2; filesid[1] = fxdr_hyper(tl); tl += 2; gotone = 0; NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { TAILQ_FOREACH(lyp, &clp->nfsc_layout, nfsly_list) { if (lyp->nfsly_filesid[0] == filesid[0] && lyp->nfsly_filesid[1] == filesid[1]) { error = nfscl_layoutrecall( recalltype, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); recallp = NULL; gotone = 1; } } if (gotone != 0) wakeup(clp); else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } else if (recalltype == NFSLAYOUTRETURN_ALL) { gotone = 0; NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { TAILQ_FOREACH(lyp, &clp->nfsc_layout, nfsly_list) { error = nfscl_layoutrecall( recalltype, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); recallp = NULL; gotone = 1; } if (gotone != 0) wakeup(clp); else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } else error = NFSERR_NOMATCHLAYOUT; if (recallp != NULL) { free(recallp, M_NFSLAYRECALL); recallp = NULL; } break; case NFSV4OP_CBSEQUENCE: NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(tl, sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; seqid = fxdr_unsigned(uint32_t, *tl++); slotid = fxdr_unsigned(uint32_t, *tl++); highslot = fxdr_unsigned(uint32_t, *tl++); cachethis = *tl++; /* Throw away the referring call stuff. */ clist = fxdr_unsigned(int, *tl); for (j = 0; j < clist; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + NFSX_UNSIGNED); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; rcalls = fxdr_unsigned(int, *tl); for (k = 0; k < rcalls; k++) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); } } NFSLOCKCLSTATE(); if (i == 0) { clp = nfscl_getclntsess(sessionid); if (clp == NULL) error = NFSERR_SERVERFAULT; } else error = NFSERR_SEQUENCEPOS; if (error == 0) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); error = nfsv4_seqsession(seqid, slotid, highslot, tsep->nfsess_cbslots, &rep, tsep->nfsess_backslots); } NFSUNLOCKCLSTATE(); if (error == 0 || error == NFSERR_REPLYFROMCACHE) { gotseq_ok = 1; if (rep != NULL) { /* * Handle a reply for a retried * callback. The reply will be * re-inserted in the session cache * by the nfsv4_seqsess_cacherep() call * after out: */ KASSERT(error == NFSERR_REPLYFROMCACHE, ("cbsequence: non-NULL rep")); NFSCL_DEBUG(4, "Got cbretry\n"); m_freem(nd->nd_mreq); nd->nd_mreq = rep; rep = NULL; goto out; } NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(seqid); *tl++ = txdr_unsigned(slotid); *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1); *tl = txdr_unsigned(NFSV4_CBSLOTS - 1); } break; default: if (i == 0 && minorvers == NFSV41_MINORVERSION) error = NFSERR_OPNOTINSESS; else { NFSCL_DEBUG(1, "unsupp callback %d\n", op); error = NFSERR_NOTSUPP; } break; } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = error; } error = 0; } retops++; if (nd->nd_repstat) { *repp = nfscl_errmap(nd, minorvers); break; } else *repp = 0; /* NFS4_OK */ } nfsmout: if (recallp != NULL) free(recallp, M_NFSLAYRECALL); if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; else printf("nfsv4 comperr1=%d\n", error); } if (taglen == -1) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; } else { *retopsp = txdr_unsigned(retops); } *nd->nd_errp = nfscl_errmap(nd, minorvers); out: if (gotseq_ok != 0) { rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots, NFSERR_OK, &rep); NFSUNLOCKCLSTATE(); } else { NFSUNLOCKCLSTATE(); m_freem(rep); } } } /* * Generate the next cbident value. Basically just increment a static value * and then check that it isn't already in the list, if it has wrapped around. */ static u_int32_t nfscl_nextcbident(void) { struct nfsclclient *clp; int matched; static u_int32_t nextcbident = 0; static int haswrapped = 0; nextcbident++; if (nextcbident == 0) haswrapped = 1; if (haswrapped) { /* * Search the clientid list for one already using this cbident. */ do { matched = 0; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { if (clp->nfsc_cbident == nextcbident) { matched = 1; break; } } NFSUNLOCKCLSTATE(); if (matched == 1) nextcbident++; } while (matched); } return (nextcbident); } /* * Get the mount point related to a given cbident or session and busy it. */ static mount_t nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident, struct nfsclclient **clpp) { struct nfsclclient *clp; mount_t mp; int error; struct nfsclsession *tsep; *clpp = NULL; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); if (minorvers == NFSV4_MINORVERSION) { if (clp->nfsc_cbident == cbident) break; } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } if (clp == NULL) { NFSUNLOCKCLSTATE(); return (NULL); } mp = clp->nfsc_nmp->nm_mountp; vfs_ref(mp); NFSUNLOCKCLSTATE(); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (NULL); *clpp = clp; return (mp); } /* * Get the clientid pointer related to a given cbident. */ static struct nfsclclient * nfscl_getclnt(u_int32_t cbident) { struct nfsclclient *clp; LIST_FOREACH(clp, &nfsclhead, nfsc_list) if (clp->nfsc_cbident == cbident) break; return (clp); } /* * Get the clientid pointer related to a given sessionid. */ static struct nfsclclient * nfscl_getclntsess(uint8_t *sessionid) { struct nfsclclient *clp; struct nfsclsession *tsep; LIST_FOREACH(clp, &nfsclhead, nfsc_list) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); if (!NFSBCMP(tsep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } return (clp); } /* * Search for a lock conflict locally on the client. A conflict occurs if * - not same owner and overlapping byte range and at least one of them is * a write lock or this is an unlock. */ static int nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen, struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp, struct nfscllock **lopp) { struct nfsclowner *owp; struct nfsclopen *op; int ret; if (dp != NULL) { ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp); if (ret) return (ret); } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) { ret = nfscl_checkconflict(&op->nfso_lock, nlop, own, lopp); if (ret) return (ret); } } } return (0); } static int nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop, u_int8_t *own, struct nfscllock **lopp) { struct nfscllockowner *lp; struct nfscllock *lop; LIST_FOREACH(lp, lhp, nfsl_list) { if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= nlop->nfslo_end) break; if (lop->nfslo_end <= nlop->nfslo_first) continue; if (lop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_UNLCK) { if (lopp != NULL) *lopp = lop; return (NFSERR_DENIED); } } } } return (0); } /* * Check for a local conflicting lock. */ APPLESTATIC int nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags) { struct nfscllock *lop, nlck; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error; nlck.nfslo_type = fl->l_type; nlck.nfslo_first = off; if (len == NFS64BITSSET) { nlck.nfslo_end = NFS64BITSSET; } else { nlck.nfslo_end = off + len; if (nlck.nfslo_end <= nlck.nfslo_first) return (NFSERR_INVAL); } np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, &nlck, own, dp, &lop); if (error != 0) { fl->l_whence = SEEK_SET; fl->l_start = lop->nfslo_first; if (lop->nfslo_end == NFS64BITSSET) fl->l_len = 0; else fl->l_len = lop->nfslo_end - lop->nfslo_first; fl->l_pid = (pid_t)0; fl->l_type = lop->nfslo_type; error = -1; /* no RPC required */ } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) || fl->l_type == F_RDLCK)) { /* * The delegation ensures that there isn't a conflicting * lock on the server, so return -1 to indicate an RPC * isn't required. */ fl->l_type = F_UNLCK; error = -1; } NFSUNLOCKCLSTATE(); return (error); } /* * Handle Recall of a delegation. * The clp must be exclusive locked when this is called. */ static int nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p, int called_from_renewthread) { struct nfsclowner *owp, *lowp, *nowp; struct nfsclopen *op, *lop; struct nfscllockowner *lp; struct nfscllock *lckp; struct nfsnode *np; int error = 0, ret, gotvp = 0; if (vp == NULL) { /* * First, get a vnode for the file. This is needed to do RPCs. */ ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh, dp->nfsdl_fhlen, p, &np); if (ret) { /* * File isn't open, so nothing to move over to the * server. */ return (0); } vp = NFSTOV(np); gotvp = 1; } else { np = VTONFS(vp); } dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET; /* * Ok, if it's a write delegation, flush data to the server, so * that close/open consistency is retained. */ ret = 0; NFSLOCKNODE(np); if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) { np->n_flag |= NDELEGRECALL; NFSUNLOCKNODE(np); ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread); NFSLOCKNODE(np); np->n_flag &= ~NDELEGRECALL; } NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); if (ret == EIO && called_from_renewthread != 0) { /* * If the flush failed with EIO for the renew thread, * return now, so that the dirty buffer will be flushed * later. */ if (gotvp != 0) vrele(vp); return (ret); } /* * Now, for each openowner with opens issued locally, move them * over to state against the server. */ LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) { lop = LIST_FIRST(&lowp->nfsow_open); if (lop != NULL) { if (LIST_NEXT(lop, nfso_list) != NULL) panic("nfsdlg mult opens"); /* * Look for the same openowner against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(lowp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } break; } } /* * If no openowner found, create one and get an open * for it. */ if (owp == NULL) { - MALLOC(nowp, struct nfsclowner *, + nowp = malloc( sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); nfscl_newopen(clp, NULL, &owp, &nowp, &op, NULL, lowp->nfsow_owner, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret) { nfscl_freeopenowner(owp, 0); if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } } } } } /* * Now, get byte range locks for any locks done locally. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p); if (ret == NFSERR_STALESTATEID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_STALECLIENTID || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret && !error) error = ret; } } if (gotvp) vrele(vp); return (error); } /* * Move a locally issued open over to an owner on the state list. * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and * returns with it unlocked. */ static int nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op, *nop; struct nfscldeleg *ndp; struct nfsnode *np; int error = 0, newone; /* * First, look for an appropriate open, If found, just increment the * opencnt in it. */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode && op->nfso_fhlen == lop->nfso_fhlen && !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) { op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); return (0); } } /* No appropriate open, so we have to do one against the server. */ np = VTONFS(vp); - MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + + nop = malloc(sizeof (struct nfsclopen) + lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK); newone = 0; nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner, lop->nfso_fh, lop->nfso_fhlen, cred, &newone); ndp = dp; error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p); if (error) { if (newone) nfscl_freeopen(op, 0); } else { op->nfso_mode |= lop->nfso_mode; op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); } if (nop != NULL) - FREE((caddr_t)nop, M_NFSCLOPEN); + free(nop, M_NFSCLOPEN); if (ndp != NULL) { /* * What should I do with the returned delegation, since the * delegation is being recalled? For now, just printf and * through it away. */ printf("Moveopen returned deleg\n"); - FREE((caddr_t)ndp, M_NFSCLDELEG); + free(ndp, M_NFSCLDELEG); } return (error); } /* * Recall all delegations on this client. */ static void nfscl_totalrecall(struct nfsclclient *clp) { struct nfscldeleg *dp; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) dp->nfsdl_flags |= NFSCLDL_RECALL; } } /* * Relock byte ranges. Called for delegation recall and state expiry. */ static int nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *nlp; struct nfsfh *nfhp; u_int64_t off, len; int error, newone, donelocally; off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p, clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, &donelocally); if (error || donelocally) return (error); nfhp = VTONFS(vp)->n_fhp; error = nfscl_trylock(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, nlp, newone, 0, off, len, lop->nfslo_type, cred, p); if (error) nfscl_freelockowner(nlp, 0); return (error); } /* * Called to re-open a file. Basically get a vnode for the file handle * and then call nfsrpc_openrpc() to do the rest. */ static int nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np; vnode_t vp; int error; error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np); if (error) return (error); vp = NFSTOV(np); if (np->n_v4 != NULL) { error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, fhp, fhlen, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0, cred, p); } else { error = EINVAL; } vrele(vp); return (error); } /* * Try an open against the server. Just call nfsrpc_openrpc(), retrying while * NFSERR_DELAY. Also, try system credentials, if the passed in credentials * fail. */ static int nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **ndpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p) { int error; do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 0, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 1, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a byte range lock. Just loop on nfsrpc_lock() while it returns * NFSERR_DELAY. Also, retry with system credentials, if the provided * cred don't work. */ static int nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, struct nfscllockowner *nlp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 0); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; if (error == EAUTH || error == EACCES) { /* Try again using root credentials */ newnfs_setroot(cred); do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 1); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; } return (error); } /* * Try a delegreturn against the server. Just call nfsrpc_delegreturn(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ static int nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { int error; do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a close against the server. Just call nfsrpc_closerpc(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ APPLESTATIC int nfscl_tryclose(struct nfsclopen *op, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); } return (error); } /* * Decide if a delegation on a file permits close without flushing writes * to the server. This might be a big performance win in some environments. * (Not useful until the client does caching on local stable storage.) */ APPLESTATIC int nfscl_mustflush(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == NFSCLDL_WRITE && (dp->nfsdl_sizelimit >= np->n_size || !NFSHASSTRICT3530(nmp))) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * See if a (write) delegation exists for this file. */ APPLESTATIC int nfscl_nodeleg(vnode_t vp, int writedeleg) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 && (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) == NFSCLDL_WRITE)) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * Look for an associated delegation that should be DelegReturned. */ APPLESTATIC int nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(vp)); np = VTONFS(vp); NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on vp that has state, lock the client and * do a recall * - return delegation with no state */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *stp = dp->nfsdl_stateid; retcnt = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Look for associated delegation(s) that should be DelegReturned. */ APPLESTATIC int nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp, nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(fvp)); *gotfdp = 0; *gottdp = 0; NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on fvp that has state, lock the client and * do a recall * - return delegation(s) with no state. */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } np = VTONFS(fvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gotfdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *fstp = dp->nfsdl_stateid; retcnt++; *gotfdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } if (tvp != NULL) { np = VTONFS(tvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gottdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { NFSUNLOCKCLSTATE(); return (retcnt); } } LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { NFSUNLOCKCLSTATE(); return (retcnt); } } *tstp = dp->nfsdl_stateid; retcnt++; *gottdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } } NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Get a reference on the clientid associated with the mount point. * Return 1 if success, 0 otherwise. */ APPLESTATIC int nfscl_getref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (0); } nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL); NFSUNLOCKCLSTATE(); return (1); } /* * Release a reference on a clientid acquired with the above call. */ APPLESTATIC void nfscl_relref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Save the size attribute in the delegation, since the nfsnode * is going away. */ APPLESTATIC void nfscl_reclaimnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) dp->nfsdl_size = np->n_size; NFSUNLOCKCLSTATE(); } /* * Get the saved size attribute in the delegation, since it is a * newly allocated nfsnode. */ APPLESTATIC void nfscl_newnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) np->n_size = dp->nfsdl_size; NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file, set the modtime * to the local clock time. */ APPLESTATIC void nfscl_delegmodtime(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) { nanotime(&dp->nfsdl_modtime); dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file with a modtime set, * put that modtime in mtime. */ APPLESTATIC void nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) == (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) *mtime = dp->nfsdl_modtime; NFSUNLOCKCLSTATE(); } static int nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers) { short *defaulterrp, *errp; if (!nd->nd_repstat) return (0); if (nd->nd_procnum == NFSPROC_NOOP) return (txdr_unsigned(nd->nd_repstat & 0xffff)); if (nd->nd_repstat == EBADRPC) return (txdr_unsigned(NFSERR_BADXDR)); if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 && minorvers > NFSV4_MINORVERSION) { /* NFSv4.n error. */ return (txdr_unsigned(nd->nd_repstat)); } if (nd->nd_procnum < NFSV4OP_CBNOPS) errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; else return (txdr_unsigned(nd->nd_repstat)); while (*++errp) if (*errp == (short)nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } /* * Called to find/add a layout to a client. * This function returns the layout with a refcnt (shared lock) upon * success (returns 0) or with no lock/refcnt on the layout when an * error is returned. * If a layout is passed in via lypp, it is locked (exclusively locked). */ APPLESTATIC int nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, nfsv4stateid_t *stateidp, int layouttype, int retonclose, struct nfsclflayouthead *fhlp, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscllayout *lyp, *tlyp; struct nfsclflayout *flp; struct nfsnode *np = VTONFS(vp); mount_t mp; int layout_passed_in; mp = nmp->nm_mountp; layout_passed_in = 1; tlyp = NULL; lyp = *lypp; if (lyp == NULL) { layout_passed_in = 0; tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT, M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { if (layout_passed_in != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (EPERM); } if (lyp == NULL) { /* * Although no lyp was passed in, another thread might have * allocated one. If one is found, just increment it's ref * count and return it. */ lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp == NULL) { lyp = tlyp; tlyp = NULL; lyp->nfsly_stateid.seqid = stateidp->seqid; lyp->nfsly_stateid.other[0] = stateidp->other[0]; lyp->nfsly_stateid.other[1] = stateidp->other[1]; lyp->nfsly_stateid.other[2] = stateidp->other[2]; lyp->nfsly_lastbyte = 0; LIST_INIT(&lyp->nfsly_flayread); LIST_INIT(&lyp->nfsly_flayrw); LIST_INIT(&lyp->nfsly_recall); lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0]; lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1]; lyp->nfsly_clp = clp; if (layouttype == NFSLAYOUT_FLEXFILE) lyp->nfsly_flags = NFSLY_FLEXFILE; else lyp->nfsly_flags = NFSLY_FILES; if (retonclose != 0) lyp->nfsly_flags |= NFSLY_RETONCLOSE; lyp->nfsly_fhlen = fhlen; NFSBCOPY(fhp, lyp->nfsly_fh, fhlen); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp, nfsly_hash); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; nfscl_layoutcnt++; } else { if (retonclose != 0) lyp->nfsly_flags |= NFSLY_RETONCLOSE; TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; } nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (EPERM); } *lypp = lyp; } else lyp->nfsly_stateid.seqid = stateidp->seqid; /* Merge the new list of File Layouts into the list. */ flp = LIST_FIRST(fhlp); if (flp != NULL) { if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ) nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp); else nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp); } if (layout_passed_in != 0) nfsv4_unlock(&lyp->nfsly_lock, 1); NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (0); } /* * Search for a layout by MDS file handle. * If one is found, it is returned with a refcnt (shared lock) iff * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is * returned NULL. */ struct nfscllayout * nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen, uint64_t off, struct nfsclflayout **retflpp, int *recalledp) { struct nfscllayout *lyp; mount_t mp; int error, igotlock; mp = clp->nfsc_nmp->nm_mountp; *recalledp = 0; *retflpp = NULL; NFSLOCKCLSTATE(); lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp != NULL) { if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) { TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; error = nfscl_findlayoutforio(lyp, off, NFSV4OPEN_ACCESSREAD, retflpp); if (error == 0) nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); else { do { igotlock = nfsv4_lock(&lyp->nfsly_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); } while (igotlock == 0 && !NFSCL_FORCEDISM(mp)); *retflpp = NULL; } if (NFSCL_FORCEDISM(mp)) { lyp = NULL; *recalledp = 1; } } else { lyp = NULL; *recalledp = 1; } } NFSUNLOCKCLSTATE(); return (lyp); } /* * Search for a layout by MDS file handle. If one is found, mark in to be * recalled, if it already marked "return on close". */ static void nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp, int fhlen, struct nfsclrecalllayout **recallpp) { struct nfscllayout *lyp; uint32_t iomode; if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (VTONFS(vp)->n_flag & NNOLAYOUT) != 0) return; lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE | NFSLY_RECALL)) == NFSLY_RETONCLOSE) { iomode = 0; if (!LIST_EMPTY(&lyp->nfsly_flayread)) iomode |= NFSLAYOUTIOMODE_READ; if (!LIST_EMPTY(&lyp->nfsly_flayrw)) iomode |= NFSLAYOUTIOMODE_RW; (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, *recallpp); NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode); *recallpp = NULL; } } /* * Dereference a layout. */ void nfscl_rellayout(struct nfscllayout *lyp, int exclocked) { NFSLOCKCLSTATE(); if (exclocked != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); else nfsv4_relref(&lyp->nfsly_lock); NFSUNLOCKCLSTATE(); } /* * Search for a devinfo by deviceid. If one is found, return it after * acquiring a reference count on it. */ struct nfscldevinfo * nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid, struct nfscldevinfo *dip) { NFSLOCKCLSTATE(); if (dip == NULL) dip = nfscl_finddevinfo(clp, deviceid); if (dip != NULL) dip->nfsdi_refcnt++; NFSUNLOCKCLSTATE(); return (dip); } /* * Dereference a devinfo structure. */ static void nfscl_reldevinfo_locked(struct nfscldevinfo *dip) { dip->nfsdi_refcnt--; if (dip->nfsdi_refcnt == 0) wakeup(&dip->nfsdi_refcnt); } /* * Dereference a devinfo structure. */ void nfscl_reldevinfo(struct nfscldevinfo *dip) { NFSLOCKCLSTATE(); nfscl_reldevinfo_locked(dip); NFSUNLOCKCLSTATE(); } /* * Find a layout for this file handle. Return NULL upon failure. */ static struct nfscllayout * nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscllayout *lyp; LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash) if (lyp->nfsly_fhlen == fhlen && !NFSBCMP(lyp->nfsly_fh, fhp, fhlen)) break; return (lyp); } /* * Find a devinfo for this deviceid. Return NULL upon failure. */ static struct nfscldevinfo * nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid) { struct nfscldevinfo *dip; LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list) if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID) == 0) break; return (dip); } /* * Merge the new file layout list into the main one, maintaining it in * increasing offset order. */ static void nfscl_mergeflayouts(struct nfsclflayouthead *fhlp, struct nfsclflayouthead *newfhlp) { struct nfsclflayout *flp, *nflp, *prevflp, *tflp; flp = LIST_FIRST(fhlp); prevflp = NULL; LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) { while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) { prevflp = flp; flp = LIST_NEXT(flp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list); prevflp = nflp; } } /* * Add this nfscldevinfo to the client, if it doesn't already exist. * This function consumes the structure pointed at by dip, if not NULL. */ APPLESTATIC int nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, struct nfsclflayout *flp) { struct nfsclclient *clp; struct nfscldevinfo *tdip; uint8_t *dev; NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { NFSUNLOCKCLSTATE(); if (dip != NULL) free(dip, M_NFSDEVINFO); return (ENODEV); } if ((flp->nfsfl_flags & NFSFL_FILE) != 0) dev = flp->nfsfl_dev; else dev = flp->nfsfl_ffm[0].dev; tdip = nfscl_finddevinfo(clp, dev); if (tdip != NULL) { tdip->nfsdi_layoutrefs++; flp->nfsfl_devp = tdip; nfscl_reldevinfo_locked(tdip); NFSUNLOCKCLSTATE(); if (dip != NULL) free(dip, M_NFSDEVINFO); return (0); } if (dip != NULL) { LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list); dip->nfsdi_layoutrefs = 1; flp->nfsfl_devp = dip; } NFSUNLOCKCLSTATE(); if (dip == NULL) return (ENODEV); return (0); } /* * Free up a layout structure and associated file layout structure(s). */ APPLESTATIC void nfscl_freelayout(struct nfscllayout *layp) { struct nfsclflayout *flp, *nflp; struct nfsclrecalllayout *rp, *nrp; LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) { LIST_REMOVE(flp, nfsfl_list); nfscl_freeflayout(flp); } LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) { LIST_REMOVE(flp, nfsfl_list); nfscl_freeflayout(flp); } LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) { LIST_REMOVE(rp, nfsrecly_list); free(rp, M_NFSLAYRECALL); } nfscl_layoutcnt--; free(layp, M_NFSLAYOUT); } /* * Free up a file layout structure. */ APPLESTATIC void nfscl_freeflayout(struct nfsclflayout *flp) { int i, j; if ((flp->nfsfl_flags & NFSFL_FILE) != 0) for (i = 0; i < flp->nfsfl_fhcnt; i++) free(flp->nfsfl_fh[i], M_NFSFH); if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0) for (i = 0; i < flp->nfsfl_mirrorcnt; i++) for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++) free(flp->nfsfl_ffm[i].fh[j], M_NFSFH); if (flp->nfsfl_devp != NULL) flp->nfsfl_devp->nfsdi_layoutrefs--; free(flp, M_NFSFLAYOUT); } /* * Free up a file layout devinfo structure. */ APPLESTATIC void nfscl_freedevinfo(struct nfscldevinfo *dip) { free(dip, M_NFSDEVINFO); } /* * Mark any layouts that match as recalled. */ static int nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode, uint64_t off, uint64_t len, uint32_t stateseqid, struct nfsclrecalllayout *recallp) { struct nfsclrecalllayout *rp, *orp; recallp->nfsrecly_recalltype = recalltype; recallp->nfsrecly_iomode = iomode; recallp->nfsrecly_stateseqid = stateseqid; recallp->nfsrecly_off = off; recallp->nfsrecly_len = len; /* * Order the list as file returns first, followed by fsid and any * returns, both in increasing stateseqid order. * Note that the seqids wrap around, so 1 is after 0xffffffff. * (I'm not sure this is correct because I find RFC5661 confusing * on this, but hopefully it will work ok.) */ orp = NULL; LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { orp = rp; if ((recalltype == NFSLAYOUTRETURN_FILE && (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE || nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) || (recalltype != NFSLAYOUTRETURN_FILE && rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE && nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) { LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list); break; } } if (rp == NULL) { if (orp == NULL) LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp, nfsrecly_list); else LIST_INSERT_AFTER(orp, recallp, nfsrecly_list); } lyp->nfsly_flags |= NFSLY_RECALL; return (0); } /* * Compare the two seqids for ordering. The trick is that the seqids can * wrap around from 0xffffffff->0, so check for the cases where one * has wrapped around. * Return 1 if seqid1 comes before seqid2, 0 otherwise. */ static int nfscl_seq(uint32_t seqid1, uint32_t seqid2) { if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff) /* seqid2 has wrapped around. */ return (0); if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff) /* seqid1 has wrapped around. */ return (1); if (seqid1 <= seqid2) return (1); return (0); } /* * Do a layout return for each of the recalls. */ static void nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp, struct ucred *cred, NFSPROC_T *p) { struct nfsclrecalllayout *rp; nfsv4stateid_t stateid; int layouttype; NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER); stateid.seqid = lyp->nfsly_stateid.seqid; if ((lyp->nfsly_flags & NFSLY_FILES) != 0) layouttype = NFSLAYOUT_NFSV4_1_FILES; else layouttype = NFSLAYOUT_FLEXFILE; LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, 0, layouttype, rp->nfsrecly_iomode, rp->nfsrecly_recalltype, rp->nfsrecly_off, rp->nfsrecly_len, &stateid, cred, p, NULL); } } /* * Do the layout commit for a file layout. */ static void nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp, struct ucred *cred, NFSPROC_T *p) { struct nfsclflayout *flp; uint64_t len; int error, layouttype; if ((lyp->nfsly_flags & NFSLY_FILES) != 0) layouttype = NFSLAYOUT_NFSV4_1_FILES; else layouttype = NFSLAYOUT_FLEXFILE; LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) { if (layouttype == NFSLAYOUT_FLEXFILE && (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) { NFSCL_DEBUG(4, "Flex file: no layoutcommit\n"); /* If not supported, don't bother doing it. */ NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; NFSUNLOCKMNT(nmp); break; } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) { len = flp->nfsfl_end - flp->nfsfl_off; error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, 0, flp->nfsfl_off, len, lyp->nfsly_lastbyte, &lyp->nfsly_stateid, layouttype, cred, p, NULL); NFSCL_DEBUG(4, "layoutcommit err=%d\n", error); if (error == NFSERR_NOTSUPP) { /* If not supported, don't bother doing it. */ NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; NFSUNLOCKMNT(nmp); break; } } } } /* * Commit all layouts for a file (vnode). */ int nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscllayout *lyp; struct nfsnode *np = VTONFS(vp); mount_t mp; struct nfsmount *nmp; mp = vnode_mount(vp); nmp = VFSTONFS(mp); if (NFSHASNOLAYOUTCOMMIT(nmp)) return (0); NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EPERM); } lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (lyp == NULL) { NFSUNLOCKCLSTATE(); return (EPERM); } nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); return (EPERM); } tryagain: if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { lyp->nfsly_flags &= ~NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); NFSCL_DEBUG(4, "do layoutcommit2\n"); nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p); NFSLOCKCLSTATE(); goto tryagain; } nfsv4_relref(&lyp->nfsly_lock); NFSUNLOCKCLSTATE(); return (0); } Index: head/sys/fs/nfsclient/nfs_clsubs.c =================================================================== --- head/sys/fs/nfsclient/nfs_clsubs.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clsubs.c (revision 328417) @@ -1,386 +1,386 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_subs.c 8.8 (Berkeley) 5/22/95 */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include extern struct mtx ncl_iod_mutex; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern int ncl_numasync; extern unsigned int ncl_iodmax; extern struct nfsstatsv1 nfsstatsv1; struct task ncl_nfsiodnew_task; int ncl_uninit(struct vfsconf *vfsp) { /* * XXX: Unloading of nfscl module is unsupported. */ #if 0 int i; /* * Tell all nfsiod processes to exit. Clear ncl_iodmax, and wakeup * any sleeping nfsiods so they check ncl_iodmax and exit. */ mtx_lock(&ncl_iod_mutex); ncl_iodmax = 0; for (i = 0; i < ncl_numasync; i++) if (ncl_iodwant[i] == NFSIOD_AVAILABLE) wakeup(&ncl_iodwant[i]); /* The last nfsiod to exit will wake us up when ncl_numasync hits 0 */ while (ncl_numasync) msleep(&ncl_numasync, &ncl_iod_mutex, PWAIT, "ioddie", 0); mtx_unlock(&ncl_iod_mutex); ncl_nhuninit(); return (0); #else return (EOPNOTSUPP); #endif } void ncl_dircookie_lock(struct nfsnode *np) { mtx_lock(&np->n_mtx); while (np->n_flag & NDIRCOOKIELK) (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0); np->n_flag |= NDIRCOOKIELK; mtx_unlock(&np->n_mtx); } void ncl_dircookie_unlock(struct nfsnode *np) { mtx_lock(&np->n_mtx); np->n_flag &= ~NDIRCOOKIELK; wakeup(&np->n_flag); mtx_unlock(&np->n_mtx); } bool ncl_excl_start(struct vnode *vp) { struct nfsnode *np; int vn_lk; ASSERT_VOP_LOCKED(vp, "ncl_excl_start"); vn_lk = NFSVOPISLOCKED(vp); if (vn_lk == LK_EXCLUSIVE) return (false); KASSERT(vn_lk == LK_SHARED, ("ncl_excl_start: wrong vnode lock %d", vn_lk)); /* Ensure exclusive access, this might block */ np = VTONFS(vp); lockmgr(&np->n_excl, LK_EXCLUSIVE, NULL); return (true); } void ncl_excl_finish(struct vnode *vp, bool old_lock) { struct nfsnode *np; if (!old_lock) return; np = VTONFS(vp); lockmgr(&np->n_excl, LK_RELEASE, NULL); } #ifdef NFS_ACDEBUG #include SYSCTL_DECL(_vfs_nfs); static int nfs_acdebug; SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, ""); #endif /* * Check the time stamp * If the cache is valid, copy contents to *vap and return 0 * otherwise return an error */ int ncl_getattrcache(struct vnode *vp, struct vattr *vaper) { struct nfsnode *np; struct vattr *vap; struct nfsmount *nmp; int timeo, mustflush; np = VTONFS(vp); vap = &np->n_vattr.na_vattr; nmp = VFSTONFS(vp->v_mount); mustflush = nfscl_mustflush(vp); /* must be before mtx_lock() */ mtx_lock(&np->n_mtx); /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ timeo = (time_second - np->n_mtime.tv_sec) / 10; #ifdef NFS_ACDEBUG if (nfs_acdebug>1) printf("ncl_getattrcache: initial timeo = %d\n", timeo); #endif if (vap->va_type == VDIR) { if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) timeo = nmp->nm_acdirmin; else if (timeo > nmp->nm_acdirmax) timeo = nmp->nm_acdirmax; } else { if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) timeo = nmp->nm_acregmin; else if (timeo > nmp->nm_acregmax) timeo = nmp->nm_acregmax; } #ifdef NFS_ACDEBUG if (nfs_acdebug > 2) printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", nmp->nm_acregmin, nmp->nm_acregmax, nmp->nm_acdirmin, nmp->nm_acdirmax); if (nfs_acdebug) printf("ncl_getattrcache: age = %d; final timeo = %d\n", (time_second - np->n_attrstamp), timeo); #endif if ((time_second - np->n_attrstamp) >= timeo && (mustflush != 0 || np->n_attrstamp == 0)) { nfsstatsv1.attrcache_misses++; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_GET_MISS(vp); return( ENOENT); } nfsstatsv1.attrcache_hits++; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else { np->n_size = vap->va_size; } vnode_pager_setsize(vp, np->n_size); } else { np->n_size = vap->va_size; } } bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap); return (0); } static nfsuint64 nfs_nullcookie = { { 0, 0 } }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * ncl_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; int pos; nfsuint64 *retval = NULL; pos = (uoff_t)off / NFS_DIRBLKSIZ; if (pos == 0 || off < 0) { KASSERT(!add, ("nfs getcookie add at <= 0")); return (&nfs_nullcookie); } pos--; dp = LIST_FIRST(&np->n_cookies); if (!dp) { if (add) { - MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), + dp = malloc(sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else goto out; } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list)) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) goto out; dp = LIST_NEXT(dp, ndm_list); } else if (add) { - MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), + dp2 = malloc(sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else goto out; } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else goto out; } retval = &dp->ndm_cookies[pos]; out: return (retval); } /* * Invalidate cached directory information, except for the actual directory * blocks (which are invalidated separately). * Done mainly to avoid the use of stale offset cookies. */ void ncl_invaldir(struct vnode *vp) { struct nfsnode *np = VTONFS(vp); KASSERT(vp->v_type == VDIR, ("nfs: invaldir not dir")); ncl_dircookie_lock(np); np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (LIST_FIRST(&np->n_cookies)) LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; ncl_dircookie_unlock(np); } /* * The write verifier has changed (probably due to a server reboot), so all * B_NEEDCOMMIT blocks will have to be written again. Since they are on the * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT * and B_CLUSTEROK flags. Once done the new write verifier can be set for the * mount point. * * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data * writes are not clusterable. */ void ncl_clearcommit(struct mount *mp) { struct vnode *vp, *nvp; struct buf *bp, *nbp; struct bufobj *bo; MNT_VNODE_FOREACH_ALL(vp, mp, nvp) { bo = &vp->v_bufobj; vholdl(vp); VI_UNLOCK(vp); BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); } BO_UNLOCK(bo); vdrop(vp); } } /* * Called once to initialize data structures... */ int ncl_init(struct vfsconf *vfsp) { int i; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { ncl_iodwant[i] = NFSIOD_NOT_AVAILABLE; ncl_iodmount[i] = NULL; } TASK_INIT(&ncl_nfsiodnew_task, 0, ncl_nfsiodnew_tq, NULL); ncl_nhinit(); /* Init the nfsnode table */ return (0); } Index: head/sys/fs/nfsclient/nfs_clvfsops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvfsops.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clvfsops.c (revision 328417) @@ -1,2045 +1,2045 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern struct mtx ncl_iod_mutex; NFSCLSTATEMUTEX; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct"); SYSCTL_DECL(_vfs_nfs); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); #ifdef NFS_DEBUG int nfs_debug; SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "Toggle debug flag"); #endif static int nfs_mountroot(struct mount *); static void nfs_sec_name(char *, int *); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; static vfs_purge_t nfs_purge; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = ncl_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = ncl_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, .vfs_purge = nfs_purge, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); MODULE_DEPEND(nfs, nfscommon, 1, 1, 1); MODULE_DEPEND(nfs, krpc, 1, 1, 1); MODULE_DEPEND(nfs, nfssvc, 1, 1, 1); MODULE_DEPEND(nfs, nfslock, 1, 1, 1); /* * This structure is now defined in sys/nfs/nfs_diskless.c so that it * can be shared by both NFS clients. It is declared here so that it * will be defined for kernels built without NFS_ROOT, although it * isn't used in that case. */ #if !defined(NFS_ROOT) struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; #endif SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, "Has the diskless struct been filled correctly"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr), "%Ssockaddr_in", "Diskless root nfs address"); void newnfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int newnfs_iosize(struct nfsmount *nmp) { int iosize, maxio; /* First, set the upper limit for iosize */ if (nmp->nm_flag & NFSMNT_NFSV4) { maxio = NFS_MAXBSIZE; } else if (nmp->nm_flag & NFSMNT_NFSV3) { if (nmp->nm_sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXBSIZE; } else { maxio = NFS_V2MAXDATA; } if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) nmp->nm_rsize = maxio; if (nmp->nm_rsize > NFS_MAXBSIZE) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) nmp->nm_wsize = maxio; if (nmp->nm_wsize > NFS_MAXBSIZE) nmp->nm_wsize = NFS_MAXBSIZE; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. It must also be at least NFS_DIRBLKSIZ, since * that is the buffer size used for directories. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); iosize = imax(iosize, NFS_DIRBLKSIZ); nmp->nm_mountp->mnt_stat.f_iosize = iosize; return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->readahead = oargs->readahead; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_MYFH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp) { struct vnode *vp; struct thread *td; struct nfsmount *nmp = VFSTONFS(mp); struct nfsvattr nfsva; struct nfsfsinfo fs; struct nfsstatfs sb; int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; td = curthread; error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (!error) gotfsinfo = 1; } else mtx_unlock(&nmp->nm_mtx); if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (error != 0) NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; } } (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) nfscl_loadfsinfo(nmp, &fs); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } vput(vp); vfs_unbusy(mp); return (error); } /* * nfs version 3 fsinfo rpc call */ int ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsfsinfo fs; struct nfsvattr nfsva; int error, attrflag; error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); if (!error) { if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); nfscl_loadfsinfo(nmp, &fs); mtx_unlock(&nmp->nm_mtx); } return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ static int nfs_mountroot(struct mount *mp) { struct thread *td = curthread; struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = kern_getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ mtx_lock(&prison0.pr_mtx); strlcpy(prison0.pr_hostname, nd->my_hostnam, sizeof(prison0.pr_hostname)); mtx_unlock(&prison0.pr_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int dirlen, error; char *dirpath; /* * Find the directory path in "path", which also has the server's * name/ip address in it. */ dirpath = strchr(path, ':'); if (dirpath != NULL) dirlen = strlen(++dirpath); else dirlen = 0; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_sec_name(char *sec, int *flagsp) { if (!strcmp(sec, "krb5")) *flagsp |= NFSMNT_KERB; else if (!strcmp(sec, "krb5i")) *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY); else if (!strcmp(sec, "krb5p")) *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *hostname, struct ucred *cred, struct thread *td) { int adjsock; char *p; /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_timeo = NFS_MAXTIMEO; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_retry = INT_MAX; else nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if NFSv2, it crashes some servers */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { argp->flags &= ~NFSMNT_RDIRPLUS; nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } /* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */ if (nmp->nm_minorvers == 0) { argp->flags &= ~NFSMNT_ONEOPENOWN; nmp->nm_flag &= ~NFSMNT_ONEOPENOWN; } /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_wsize > NFS_FABLKSIZE) nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1); else nmp->nm_wsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_rsize > NFS_FABLKSIZE) nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1); else nmp->nm_rsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); if (nmp->nm_client != NULL && adjsock) { int haslock = 0, error = 0; if (nmp->nm_sotype == SOCK_STREAM) { error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); if (!error) haslock = 1; } if (!error) { newnfs_disconnect(&nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "nfscon"); } } } else { nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; } if (hostname != NULL) { strlcpy(nmp->nm_hostname, hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p != NULL) *p = '\0'; } } static const char *nfs_opts[] = { "from", "nfs_args", "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs", "wcommitsize", "oneopenown", NULL }; /* * Parse the "from" mountarg, passed by the generic mount(8) program * or the mountroot code. This is used when rerooting into NFS. * * Note that the "hostname" is actually a "hostname:/share/path" string. */ static int nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep, struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp) { char *nam, *delimp, *hostp, *spec; int error, have_bracket = 0, offset, rv, speclen; struct sockaddr_in *sin; size_t len; error = vfs_getopt(opts, "from", (void **)&spec, &speclen); if (error != 0) return (error); nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK); /* * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs(). */ if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && *(delimp + 1) == ':') { hostp = spec + 1; spec = delimp + 2; have_bracket = 1; } else if ((delimp = strrchr(spec, ':')) != NULL) { hostp = spec; spec = delimp + 1; } else if ((delimp = strrchr(spec, '@')) != NULL) { printf("%s: path@server syntax is deprecated, " "use server:path\n", __func__); hostp = delimp + 1; } else { printf("%s: no : nfs-name\n", __func__); free(nam, M_TEMP); return (EINVAL); } *delimp = '\0'; /* * If there has been a trailing slash at mounttime it seems * that some mountd implementations fail to remove the mount * entries from their mountlist while unmounting. */ for (speclen = strlen(spec); speclen > 1 && spec[speclen - 1] == '/'; speclen--) spec[speclen - 1] = '\0'; if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { printf("%s: %s:%s: name too long", __func__, hostp, spec); free(nam, M_TEMP); return (EINVAL); } /* Make both '@' and ':' notations equal */ if (*hostp != '\0') { len = strlen(hostp); offset = 0; if (have_bracket) nam[offset++] = '['; memmove(nam + offset, hostp, len); if (have_bracket) nam[len + offset++] = ']'; nam[len + offset++] = ':'; memmove(nam + len + offset, spec, speclen); nam[len + speclen + offset] = '\0'; } else nam[0] = '\0'; /* * XXX: IPv6 */ sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK); rv = inet_pton(AF_INET, hostp, &sin->sin_addr); if (rv != 1) { printf("%s: cannot parse '%s', inet_pton() returned %d\n", __func__, hostp, rv); free(nam, M_TEMP); free(sin, M_SONAME); return (EINVAL); } sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; /* * XXX: hardcoded port number. */ sin->sin_port = htons(2049); *hostnamep = strdup(nam, M_NEWNFSMNT); *sinp = sin; strlcpy(dirpath, spec, dirpathsize); *dirlenp = strlen(dirpath); free(nam, M_TEMP); return (0); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .hostname = NULL, .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error = 0, ret, len; struct sockaddr *nam = NULL; struct vnode *vp; struct thread *td; char *hst; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; char *cp, *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; int minvers = 0; int dirlen, has_nfs_args_opt, has_nfs_from_opt, krbnamelen, srvkrbnamelen; size_t hstlen; has_nfs_args_opt = 0; has_nfs_from_opt = 0; hst = malloc(MNAMELEN, M_TEMP, M_WAITOK); if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } td = curthread; if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS && nfs_diskless_valid != 0) { error = nfs_mountroot(mp); goto out; } nfscl_init(); /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof(args)); if (error != 0) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } /* Handle the new style options. */ if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) { args.acdirmin = args.acdirmax = args.acregmin = args.acregmax = 0; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) { args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0) args.flags |= NFSMNT_NONCONTIGWR; if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0) args.flags |= NFSMNT_ONEOPENOWN; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal actimeo: %s", opt); error = EINVAL; goto out; } args.acdirmin = args.acdirmax = args.acregmax = args.acregmin; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeo: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); if (ret != 1 || minvers < 0 || minvers > 1 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a transfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "nfsreq". */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(td->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change version, * security, switch lockd strategies, change cookie * translation or switch oneopenown. */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_ONEOPENOWN | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt != 0) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error != 0) goto out; error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen); if (error != 0) goto out; bzero(&hst[hstlen], MNAMELEN - hstlen); args.hostname = hst; /* getsockaddr() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error != 0) goto out; } else if (nfs_mount_parse_from(mp->mnt_optnew, &args.hostname, (struct sockaddr_in **)&nam, dirpath, sizeof(dirpath), &dirlen) == 0) { has_nfs_from_opt = 1; bcopy(args.hostname, hst, MNAMELEN); hst[MNAMELEN - 1] = '\0'; /* * This only works with NFSv4 for now. */ args.fhsize = 0; args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } else { if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } bcopy(args.fh, nfh, args.fhsize); } else { args.fhsize = 0; } (void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, &len); if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } if (len >= MNAMELEN) { vfs_mount_error(mp, "Hostname too long"); error = EINVAL; goto out; } bcopy(args.hostname, hst, len); hst[len] = '\0'; } if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0) strlcpy(srvkrbname, name, sizeof (srvkrbname)); else { snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst); cp = strchr(srvkrbname, ':'); if (cp != NULL) *cp = '\0'; } srvkrbnamelen = strlen(srvkrbname); if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0) strlcpy(krbname, name, sizeof (krbname)); else krbname[0] = '\0'; krbnamelen = strlen(krbname); if (has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0) strlcpy(dirpath, name, sizeof (dirpath)); else dirpath[0] = '\0'; dirlen = strlen(dirpath); } if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } else { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } } args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF | MNTK_USES_BCACHE; if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0) mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; MNT_IUNLOCK(mp); } free(hst, M_TEMP); return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the getsockaddr() call because getsockaddr() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; struct nfsclclient *clp; struct nfsclds *dsp, *tdsp; uint32_t lease; static u_int64_t clval = 0; NFSCL_DEBUG(3, "in mnt\n"); clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); - FREE(nam, M_SONAME); + free(nam, M_SONAME); return (0); } else { - MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) + + nmp = malloc(sizeof (struct nfsmount) + krbnamelen + dirlen + srvkrbnamelen + 2, M_NEWNFSMNT, M_WAITOK | M_ZERO); TAILQ_INIT(&nmp->nm_bufq); TAILQ_INIT(&nmp->nm_sess); if (clval == 0) clval = (u_int64_t)nfsboottime.tv_sec; nmp->nm_clval = clval++; nmp->nm_krbnamelen = krbnamelen; nmp->nm_dirpathlen = dirlen; nmp->nm_srvkrbnamelen = srvkrbnamelen; if (td->td_ucred->cr_uid != (uid_t)0) { /* * nm_uid is used to get KerberosV credentials for * the nfsv4 state handling operations if there is * no host based principal set. Use the uid of * this user if not root, since they are doing the * mount. I don't think setting this for root will * work, since root normally does not have user * credentials in a credentials cache. */ nmp->nm_uid = td->td_ucred->cr_uid; } else { /* * Just set to -1, so it won't be used. */ nmp->nm_uid = (uid_t)-1; } /* Copy and null terminate all the names */ if (nmp->nm_krbnamelen > 0) { bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); nmp->nm_name[nmp->nm_krbnamelen] = '\0'; } if (nmp->nm_dirpathlen > 0) { bcopy(dirpath, NFSMNT_DIRPATH(nmp), nmp->nm_dirpathlen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + 1] = '\0'; } if (nmp->nm_srvkrbnamelen > 0) { bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), nmp->nm_srvkrbnamelen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + nmp->nm_srvkrbnamelen + 2] = '\0'; } nmp->nm_sockreq.nr_cred = crhold(cred); mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); mp->mnt_data = nmp; nmp->nm_getinfo = nfs_getnlminfo; nmp->nm_vinvalbuf = ncl_vinvalbuf; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); /* * Since nfs_decode_args() might optionally set them, these * need to be set to defaults before the call, so that the * optional settings aren't overwritten. */ nmp->nm_nametimeo = nametimeo; nmp->nm_negnametimeo = negnametimeo; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_readahead = NFS_DEFRAHEAD; /* This is empirical approximation of sqrt(hibufspace) * 256. */ nmp->nm_wcommitsize = NFS_MAXBSIZE / 256; while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace) nmp->nm_wcommitsize *= 2; nmp->nm_wcommitsize *= 256; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_minorvers = minvers; else nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = OFF_MAX; if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; } nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; if (nmp->nm_fhsize > 0) bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_sockreq.nr_prog = NFS_PROG; if ((argp->flags & NFSMNT_NFSV4)) nmp->nm_sockreq.nr_vers = NFS_VER4; else if ((argp->flags & NFSMNT_NFSV3)) nmp->nm_sockreq.nr_vers = NFS_VER3; else nmp->nm_sockreq.nr_vers = NFS_VER2; if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; /* For NFSv4.1, get the clientid now. */ if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "at getcl\n"); error = nfscl_getcl(mp, cred, td, 0, &clp); NFSCL_DEBUG(3, "aft getcl=%d\n", error); if (error != 0) goto bad; } if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. */ trycnt = 3; do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); goto bad; } } /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == UFS_ROOTINO (2). */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set * non-zero for the root vnode. f_iosize will be set correctly * by nfs_statfs() before any I/O occurs. */ mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ; error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "lease=%d\n", (int)lease); NFSLOCKCLSTATE(); clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; NFSUNLOCKCLSTATE(); /* * Mount will succeed, so the renew thread can be * started now. */ nfscl_start_renewthread(clp); nfscl_clientrelease(clp); } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); /* Mark if the mount point supports NFSv4 ACLs. */ if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 && ret == 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * Lose the lock but keep the ref. */ NFSVOPUNLOCK(*vpp, 0); return (0); } error = EIO; bad: if (clp != NULL) nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); if (nmp->nm_clp != NULL) { NFSLOCKCLSTATE(); LIST_REMOVE(nmp->nm_clp, nfsc_list); NFSUNLOCKCLSTATE(); free(nmp->nm_clp, M_NFSCLCLIENT); } TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } - FREE(nmp, M_NEWNFSMNT); - FREE(nam, M_SONAME); + free(nmp, M_NEWNFSMNT); + free(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct nfsmount *nmp; int error, flags = 0, i, trycnt = 0; struct nfsclds *dsp, *tdsp; td = curthread; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (mntflags & MNT_FORCE) { error = newnfs_nmcancelreqs(nmp); if (error) goto out; /* For a forced close, get rid of the renew thread now */ nfscl_umount(nmp, td); } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; /* * We are now committed to the unmount. */ if ((mntflags & MNT_FORCE) == 0) nfscl_umount(nmp, td); else { mtx_lock(&nmp->nm_mtx); nmp->nm_privflag |= NFSMNTP_FORCEDISM; mtx_unlock(&nmp->nm_mtx); } /* Make sure no nfsiods are assigned to this mount. */ mtx_lock(&ncl_iod_mutex); for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (ncl_iodmount[i] == nmp) { ncl_iodwant[i] = NFSIOD_AVAILABLE; ncl_iodmount[i] = NULL; } mtx_unlock(&ncl_iod_mutex); /* * We can now set mnt_data to NULL and wait for * nfssvc(NFSSVC_FORCEDISM) to complete. */ mtx_lock(&mountlist_mtx); mtx_lock(&nmp->nm_mtx); mp->mnt_data = NULL; mtx_unlock(&mountlist_mtx); while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0) msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0); mtx_unlock(&nmp->nm_mtx); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); - FREE(nmp->nm_nam, M_SONAME); + free(nmp->nm_nam, M_SONAME); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) { if (dsp != TAILQ_FIRST(&nmp->nm_sess) && dsp->nfsclds_sockp != NULL) newnfs_disconnect(dsp->nfsclds_sockp); nfscl_freenfsclds(dsp); } - FREE(nmp, M_NEWNFSMNT); + free(nmp, M_NEWNFSMNT); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct thread *td; int error, allerror = 0; td = curthread; MNT_ILOCK(mp); /* * If a forced dismount is in progress, return from here so that * the umount(2) syscall doesn't get stuck in VFS_SYNC() before * calling VFS_UNMOUNT(). */ if (NFSCL_FORCEDISM(mp)) { MNT_IUNLOCK(mp); return (EBADF); } MNT_IUNLOCK(mp); /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* XXX Racy bv_cnt check. */ if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; NFSVOPUNLOCK(vp, 0); vrele(vp); } return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } /* * Purge any RPCs in progress, so that they will all return errors. * This allows dounmount() to continue as far as VFS_UNMOUNT() for a * forced dismount. */ static void nfs_purge(struct mount *mp) { struct nfsmount *nmp = VFSTONFS(mp); newnfs_nmcancelreqs(nmp); } /* * Extract the information needed by the nlm from the nfs vnode. */ static void nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, struct timeval *timeop) { struct nfsmount *nmp; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); if (fhlenp != NULL) *fhlenp = (size_t)np->n_fhp->nfh_len; if (fhp != NULL) bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len); if (sp != NULL) bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); if (is_v3p != NULL) *is_v3p = NFS_ISV3(vp); if (sizep != NULL) *sizep = np->n_size; if (timeop != NULL) { timeop->tv_sec = nmp->nm_timeo / NFS_HZ; timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); } } /* * This function prints out an option name, based on the conditional * argument. */ static __inline void nfscl_printopt(struct nfsmount *nmp, int testval, char *opt, char **buf, size_t *blen) { int len; if (testval != 0 && *blen > strlen(opt)) { len = snprintf(*buf, *blen, "%s", opt); if (len != strlen(opt)) printf("EEK!!\n"); *buf += len; *blen -= len; } } /* * This function printf out an options integer value. */ static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval, char *opt, char **buf, size_t *blen) { int len; if (*blen > strlen(opt) + 1) { /* Could result in truncated output string. */ len = snprintf(*buf, *blen, "%s=%d", opt, optval); if (len < *blen) { *buf += len; *blen -= len; } } } /* * Load the option flags and values into the buffer. */ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen) { char *buf; size_t blen; buf = buffer; blen = buflen; nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf, &blen); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) { nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 && nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen); } nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0, "nfsv2", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0, ",noncontigwr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == 0, ",lockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == NFSMNT_NOLOCKD, ",nolockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen); } Index: head/sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- head/sys/fs/nfsclient/nfs_clvnops.c (revision 328416) +++ head/sys/fs/nfsclient/nfs_clvnops.c (revision 328417) @@ -1,3541 +1,3541 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2, 3 and 4 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfscl_accesscache_flush_done_probe; uint32_t nfscl_accesscache_flush_done_id; dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfscl_accesscache_get_hit_probe, dtrace_nfscl_accesscache_get_miss_probe; uint32_t nfscl_accesscache_get_hit_id; uint32_t nfscl_accesscache_get_miss_id; dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfscl_accesscache_load_done_probe; uint32_t nfscl_accesscache_load_done_id; #endif /* !KDTRACE_HOOKS */ /* Defs */ #define TRUE 1 #define FALSE 0 extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_useacl; extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); static vop_read_t nfsfifo_read; static vop_write_t nfsfifo_write; static vop_close_t nfsfifo_close; static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, struct thread *); static vop_lookup_t nfs_lookup; static vop_create_t nfs_create; static vop_mknod_t nfs_mknod; static vop_open_t nfs_open; static vop_pathconf_t nfs_pathconf; static vop_close_t nfs_close; static vop_access_t nfs_access; static vop_getattr_t nfs_getattr; static vop_setattr_t nfs_setattr; static vop_read_t nfs_read; static vop_fsync_t nfs_fsync; static vop_remove_t nfs_remove; static vop_link_t nfs_link; static vop_rename_t nfs_rename; static vop_mkdir_t nfs_mkdir; static vop_rmdir_t nfs_rmdir; static vop_symlink_t nfs_symlink; static vop_readdir_t nfs_readdir; static vop_strategy_t nfs_strategy; static int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs_sillyrename(struct vnode *, struct vnode *, struct componentname *); static vop_access_t nfsspec_access; static vop_readlink_t nfs_readlink; static vop_print_t nfs_print; static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; static vop_getacl_t nfs_getacl; static vop_setacl_t nfs_setacl; static vop_set_text_t nfs_set_text; /* * Global vfs data structures for nfs */ struct vop_vector newnfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nfs_access, .vop_advlock = nfs_advlock, .vop_advlockasync = nfs_advlockasync, .vop_close = nfs_close, .vop_create = nfs_create, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_getpages = ncl_getpages, .vop_putpages = ncl_putpages, .vop_inactive = ncl_inactive, .vop_link = nfs_link, .vop_lookup = nfs_lookup, .vop_mkdir = nfs_mkdir, .vop_mknod = nfs_mknod, .vop_open = nfs_open, .vop_pathconf = nfs_pathconf, .vop_print = nfs_print, .vop_read = nfs_read, .vop_readdir = nfs_readdir, .vop_readlink = nfs_readlink, .vop_reclaim = ncl_reclaim, .vop_remove = nfs_remove, .vop_rename = nfs_rename, .vop_rmdir = nfs_rmdir, .vop_setattr = nfs_setattr, .vop_strategy = nfs_strategy, .vop_symlink = nfs_symlink, .vop_write = ncl_write, .vop_getacl = nfs_getacl, .vop_setacl = nfs_setacl, .vop_set_text = nfs_set_text, }; struct vop_vector newnfs_fifoops = { .vop_default = &fifo_specops, .vop_access = nfsspec_access, .vop_close = nfsfifo_close, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = ncl_inactive, .vop_pathconf = nfs_pathconf, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = ncl_reclaim, .vop_setattr = nfs_setattr, .vop_write = nfsfifo_write, }; static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp); /* * Global variables */ SYSCTL_DECL(_vfs_nfs); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfs_prime_access_cache = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, &nfs_prime_access_cache, 0, "Prime NFS ACCESS cache when fetching attributes"); static int newnfs_commit_on_close = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, &newnfs_commit_on_close, 0, "write+commit on close, else only write"); static int nfs_clean_pages_on_close = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); int newnfs_directio_enable = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, &newnfs_directio_enable, 0, "Enable NFS directio"); int nfs_keep_dirty_on_error; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while * Direct IO is in progress can lead to Data Inconsistencies. But, we allow * this by default to prevent DoS attacks - to prevent a malicious user from * opening up files O_DIRECT preventing other users from mmap'ing these * files. "Protected" environments where stricter consistency guarantees are * required can disable this knob. The process that opened the file O_DIRECT * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not * meaningful. */ int newnfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ | NFSACCESS_DELETE | NFSACCESS_LOOKUP) /* * SMP Locking Note : * The list of locks after the description of the lock is the ordering * of other locks acquired with the lock held. * np->n_mtx : Protects the fields in the nfsnode. VM Object Lock VI_MTX (acquired indirectly) * nmp->nm_mtx : Protects the fields in the nfsmount. rep->r_mtx * ncl_iod_mutex : Global lock, protects shared nfsiod state. * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. nmp->nm_mtx rep->r_mtx * rep->r_mtx : Protects the fields in an nfsreq. */ static int nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred, u_int32_t *retmode) { int error = 0, attrflag, i, lrupos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, &rmode, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { lrupos = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (np->n_accesscache[i].uid == cred->cr_uid) { np->n_accesscache[i].mode = rmode; np->n_accesscache[i].stamp = time_second; break; } if (i > 0 && np->n_accesscache[i].stamp < np->n_accesscache[lrupos].stamp) lrupos = i; } if (i == NFS_ACCESSCACHESIZE) { np->n_accesscache[lrupos].uid = cred->cr_uid; np->n_accesscache[lrupos].mode = rmode; np->n_accesscache[lrupos].stamp = time_second; } mtx_unlock(&np->n_mtx); if (retmode != NULL) *retmode = rmode; KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } #ifdef KDTRACE_HOOKS if (error != 0) KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, error); #endif return (error); } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0, i, gotahit; u_int32_t mode, wmode, rmode; int v34 = NFS_ISV34(vp); struct nfsnode *np = VTONFS(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3 or v4, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ if (v34) { if (ap->a_accmode & VREAD) mode = NFSACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_EXECUTE; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; } else { if (ap->a_accmode & VWRITE) mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); if (ap->a_accmode & VAPPEND) mode |= NFSACCESS_EXTEND; if (ap->a_accmode & VEXEC) mode |= NFSACCESS_LOOKUP; if (ap->a_accmode & VDELETE) mode |= NFSACCESS_DELETE; if (ap->a_accmode & VDELETE_CHILD) mode |= NFSACCESS_MODIFY; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSACCESS_READ | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_EXECUTE | NFSACCESS_DELETE | NFSACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ gotahit = 0; mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { if (time_second < (np->n_accesscache[i].stamp + nfsaccess_cache_timeout) && (np->n_accesscache[i].mode & mode) == mode) { NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); gotahit = 1; } break; } } mtx_unlock(&np->n_mtx); #ifdef KDTRACE_HOOKS if (gotahit != 0) KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, ap->a_cred->cr_uid, mode); else KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, ap->a_cred->cr_uid, mode); #endif if (gotahit == 0) { /* * Either a no, or a don't know. Go to the wire. */ NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); error = nfs34_access_otw(vp, wmode, ap->a_td, ap->a_cred, &rmode); if (!error && (rmode & mode) != mode) error = EACCES; } return (error); } else { if ((error = nfsspec_access(ap)) != 0) { return (error); } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = ap->a_td; if (vp->v_type == VREG) error = ncl_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = ncl_readdirrpc(vp, &auio, ap->a_cred, ap->a_td); free(bp, M_TEMP); } else if (vp->v_type == VLNK) error = ncl_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } else mtx_unlock(&np->n_mtx); return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; int error; int fmode = ap->a_mode; struct ucred *cred; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); /* * For NFSv4, we need to do the Open Op before cache validation, * so that we conform to RFC3530 Sec. 9.3.1. */ if (NFS_ISV4(vp)) { error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); if (error) { error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); return (error); } } /* * Now, if this Open will be doing reading, re-validate/flush the * cache, so that Close/Open coherency is maintained. */ mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } else { mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; if (NFS_ISV4(vp)) np->n_change = vattr.va_filerev; } } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. */ if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { if (np->n_directio_opens == 0) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); return (error); } mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; } np->n_directio_opens++; } /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) np->n_flag |= NWRITEOPENED; /* * If this is an open for writing, capture a reference to the * credentials, so they can be used by ncl_putpages(). Using * these write credentials is preferable to the credentials of * whatever thread happens to be doing the VOP_PUTPAGES() since * the write RPCs are less likely to fail with EACCES. */ if ((fmode & FWRITE) != 0) { cred = np->n_writecred; np->n_writecred = crhold(ap->a_cred); } else cred = NULL; mtx_unlock(&np->n_mtx); if (cred != NULL) crfree(cred); vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to ncl_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NFS Version 4 - flush the dirty buffers and commit them, if * nfscl_mustflush() says this is necessary. * It is necessary if there is no write delegation held, * in order to satisfy open/close coherency. * If the file isn't cached on local stable storage, * it may be necessary in order to detect "out of space" * errors from the server, if the write delegation * issued by the server doesn't allow the file to grow. */ /* ARGSUSED */ static int nfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsvattr nfsva; struct ucred *cred; int error = 0, ret, localcred = 0; int fmode = ap->a_fflag; if (NFSCL_FORCEDISM(vp->v_mount)) return (0); /* * During shutdown, a_cred isn't valid, so just use root. */ if (ap->a_cred == NOCRED) { cred = newnfs_getcred(); localcred = 1; } else { cred = ap->a_cred; } if (vp->v_type == VREG) { /* * Examine and clean dirty pages, regardless of NMODIFIED. * This closes a major hole in close-to-open consistency. * We want to push out all dirty pages (and buffers) on * close, regardless of whether they were dirtied by * mmap'ed writes or via write(). */ if (nfs_clean_pages_on_close && vp->v_object) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We * must flush them to the NFS server. We have the option * of waiting all the way through the commit rpc or just * waiting for the initial write. The default is to only * wait through the initial write so the data is in the * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. * These races occur when there is no longer the old * traditional vnode locking implemented for Vnode Ops. */ int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); /* np->n_flag &= ~NMODIFIED; */ } else if (NFS_ISV4(vp)) { if (nfscl_mustflush(vp) != 0) { int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0); /* * as above w.r.t races when clearing * NMODIFIED. * np->n_flag &= ~NMODIFIED; */ } } else { error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); } mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. * An open is going to fetch fresh attrs any way, other procs * on this node that have file open will be forced to do an * otw attr fetch, but this is safe. * --> A user found that their RPC count dropped by 20% when * this was commented out and I can't see any requirement * for it, so I've disabled it when negative lookups are * enabled. (What does this have to do with negative lookup * caching? Well nothing, except it was reported by the * same user that needed negative lookup caching and I wanted * there to be a way to disable it to see if it * is the cause of some caching/coherency issue that might * crop up.) */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } mtx_unlock(&np->n_mtx); } if (NFS_ISV4(vp)) { /* * Get attributes so "change" is up to date. */ if (error == 0 && nfscl_mustflush(vp) != 0 && vp->v_type == VREG && (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, NULL); if (!ret) { np->n_change = nfsva.na_filerev; (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 0); } } /* * and do the close. */ ret = nfsrpc_close(vp, 0, ap->a_td); if (!error && ret) error = ret; if (error) error = nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); } if (newnfs_directio_enable) KASSERT((np->n_directio_asyncwr == 0), ("nfs_close: dirty unflushed (%d) directio buffers\n", np->n_directio_asyncwr)); if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; mtx_unlock(&np->n_mtx); } if (localcred) NFSFREECRED(cred); return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; /* XXX */ struct nfsnode *np = VTONFS(vp); int error = 0; struct nfsvattr nfsva; struct vattr *vap = ap->a_vap; struct vattr vattr; /* * Update local times for special files. */ mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; mtx_unlock(&np->n_mtx); /* * First look in the cache. */ if (ncl_getattrcache(vp, &vattr) == 0) { vap->va_type = vattr.va_type; vap->va_mode = vattr.va_mode; vap->va_nlink = vattr.va_nlink; vap->va_uid = vattr.va_uid; vap->va_gid = vattr.va_gid; vap->va_fsid = vattr.va_fsid; vap->va_fileid = vattr.va_fileid; vap->va_size = vattr.va_size; vap->va_blocksize = vattr.va_blocksize; vap->va_atime = vattr.va_atime; vap->va_mtime = vattr.va_mtime; vap->va_ctime = vattr.va_ctime; vap->va_gen = vattr.va_gen; vap->va_flags = vattr.va_flags; vap->va_rdev = vattr.va_rdev; vap->va_bytes = vattr.va_bytes; vap->va_filerev = vattr.va_filerev; /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); return (0); } if (NFS_ISV34(vp) && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); if (ncl_getattrcache(vp, ap->a_vap) == 0) { nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); return (0); } } error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); if (!error) error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); if (!error) { /* * Get the local modify time for the case of a write * delegation. */ nfscl_deleggetmodtime(vp, &vap->va_mtime); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * nfs setattr call. */ static int nfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct thread *td = curthread; /* XXX */ struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags and marking of atimes are not supported. */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ mtx_lock(&np->n_mtx); tsize = np->n_size; mtx_unlock(&np->n_mtx); error = ncl_meta_setsize(vp, ap->a_cred, td, vap->va_size); mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, vap->va_size == 0 ? 0 : V_SAVE, td, 1); if (error != 0) { vnode_pager_setsize(vp, tsize); return (error); } /* * Call nfscl_delegmodtime() to set the modify time * locally, as required. */ nfscl_delegmodtime(vp); } else mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in ncl_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * ncl_meta_setsize() and could modify np->n_size. */ mtx_lock(&np->n_mtx); np->n_vattr.na_size = np->n_size = vap->va_size; mtx_unlock(&np->n_mtx); } } else { mtx_lock(&np->n_mtx); if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error == EINTR || error == EIO) return (error); } else mtx_unlock(&np->n_mtx); } error = nfs_setattrrpc(vp, vap, ap->a_cred, td); if (error && vap->va_size != VNOVAL) { mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.na_size = tsize; vnode_pager_setsize(vp, tsize); mtx_unlock(&np->n_mtx); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td) { struct nfsnode *np = VTONFS(vp); int error, ret, attrflag, i; struct nfsvattr nfsva; if (NFS_ISV34(vp)) { mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) np->n_accesscache[i].stamp = 0; np->n_flag |= NDELEGMOD; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); } error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct mount *mp = dvp->v_mount; int flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; struct nfsnode *np, *newnp; int error = 0, attrflag, dattrflag, ltype, ncticks; struct thread *td = cnp->cn_thread; struct nfsfh *nfhp; struct nfsvattr dnfsva, nfsva; struct vattr vattr; struct timespec nctime; *vpp = NULLVP; if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); nmp = VFSTONFS(mp); np = VTONFS(dvp); /* For NFSv4, wait until any remove is done. */ mtx_lock(&np->n_mtx); while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { np->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); } mtx_unlock(&np->n_mtx); if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return (error); error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); if (error > 0 && error != ENOENT) return (error); if (error == -1) { /* * Lookups of "." are special and always return the * current directory. cache_lookup() already handles * associated locking bookkeeping, etc. */ if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { /* XXX: Is this really correct? */ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } /* * We only accept a positive hit in the cache if the * change time of the file matches our cached copy. * Otherwise, we discard the cache entry and fallback * to doing a lookup RPC. We also only trust cache * entries for less than nm_nametimeo seconds. * * To better handle stale file handles and attributes, * clear the attribute cache of this node if it is a * leaf component, part of an open() call, and not * locally modified before fetching the attributes. * This should allow stale file handles to be detected * here where we can fall back to a LOOKUP RPC to * recover rather than having nfs_open() detect the * stale file handle and failing open(2) with ESTALE. */ newvp = *vpp; newnp = VTONFS(newvp); if (!(nmp->nm_flag & NFSMNT_NOCTO) && (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(newnp->n_flag & NMODIFIED)) { mtx_lock(&newnp->n_mtx); newnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&newnp->n_mtx); } if (nfscl_nodeleg(newvp, 0) == 0 || ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_ctime, &nctime, ==))) { NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); if (dvp != newvp) vput(newvp); else vrele(newvp); *vpp = NULLVP; } else if (error == ENOENT) { if (dvp->v_iflag & VI_DOOMED) return (ENOENT); /* * We only accept a negative hit in the cache if the * modification time of the parent directory matches * the cached copy in the name cache entry. * Otherwise, we discard all of the negative cache * entries for this directory. We also only trust * negative cache entries for up to nm_negnametimeo * seconds. */ if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && timespeccmp(&vattr.va_mtime, &nctime, ==)) { NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); return (ENOENT); } cache_purge_negative(dvp); } error = 0; newvp = NULLVP; NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (error) { if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } if (error != ENOENT) { if (NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* The requested file was not found. */ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN)) { /* * XXX: UFS does a full VOP_ACCESS(dvp, * VWRITE) here instead of just checking * MNT_RDONLY. */ if (mp->mnt_flag & MNT_RDONLY) return (EROFS); cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { /* * Cache the modification time of the parent * directory from the post-op attributes in * the name cache entry. The negative cache * entry will be ignored once the directory * has changed. Don't bother adding the entry * if the directory has already changed. */ mtx_lock(&np->n_mtx); if (timespeccmp(&np->n_vattr.na_mtime, &dnfsva.na_mtime, ==)) { mtx_unlock(&np->n_mtx); cache_enter_time(dvp, NULL, cnp, &dnfsva.na_mtime, NULL); } else mtx_unlock(&np->n_mtx); } return (ENOENT); } /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); return (EISDIR); } error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); *vpp = newvp; cnp->cn_flags |= SAVENAME; return (0); } if (flags & ISDOTDOT) { ltype = NFSVOPISLOCKED(dvp); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); NFSVOPUNLOCK(dvp, 0); error = vfs_busy(mp, 0); NFSVOPLOCK(dvp, ltype | LK_RETRY); vfs_rel(mp); if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { vfs_unbusy(mp); error = ENOENT; } if (error != 0) return (error); } NFSVOPUNLOCK(dvp, 0); error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error == 0) newvp = NFSTOV(np); vfs_unbusy(mp); if (newvp != dvp) NFSVOPLOCK(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { if (error == 0) { if (newvp == dvp) vrele(newvp); else vput(newvp); } error = ENOENT; } if (error != 0) return (error); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); VREF(dvp); newvp = dvp; if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else { error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, cnp->cn_lkflags); if (error) return (error); newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && !(np->n_flag & NMODIFIED)) { /* * Flush the attribute cache when opening a * leaf node to ensure that fresh attributes * are fetched in nfs_open() since we did not * fetch attributes from the LOOKUP reply. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&np->n_mtx); } } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); *vpp = newvp; return (0); } /* * nfs read call. * Just call ncl_bioread() to do the work. */ static int nfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by ncl_doio() from below the buffer cache. */ int ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs read rpc call * Ditto above */ int ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp)) error = nfscl_doiods(vp, uiop, NULL, NULL, NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td); NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); if (error != 0) error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs write call */ int ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; int error, attrflag, ret; struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp)) error = nfscl_doiods(vp, uiop, iomode, must_commit, NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td); NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); if (error != 0) error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1); else ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) error = ret; } if (DOINGASYNC(vp)) *iomode = NFSWRITE_FILESYNC; if (error && NFS_ISV4(vp)) error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct nfsvattr nfsva, dnfsva; struct vnode *newvp = NULL; struct nfsnode *np = NULL, *dnp; struct nfsfh *nfhp; struct vattr vattr; int error = 0, attrflag, dattrflag; u_int32_t rdev; if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = vap->va_rdev; else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else return (EOPNOTSUPP); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (!nfhp) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag != 0) { error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { *vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(struct vop_mknod_args *ap) { return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } static struct mtx nfs_cverf_mtx; MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", MTX_DEF); static nfsquad_t nfs_get_cverf(void) { static nfsquad_t cverf; nfsquad_t ret; static int cverf_initialized = 0; mtx_lock(&nfs_cverf_mtx); if (cverf_initialized == 0) { cverf.lval[0] = arc4random(); cverf.lval[1] = arc4random(); cverf_initialized = 1; } else cverf.qval++; ret = cverf; mtx_unlock(&nfs_cverf_mtx); return (ret); } /* * nfs file create call */ static int nfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct nfsmount *nmp; struct nfsvattr dnfsva, nfsva; struct nfsfh *nfhp; nfsquad_t cverf; int error = 0, attrflag, dattrflag, fmode = 0; struct vattr vattr; /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; dnp = VTONFS(dvp); nmp = VFSTONFS(vnode_mount(dvp)); again: /* For NFSv4, wait until any remove is done. */ mtx_lock(&dnp->n_mtx); while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { dnp->n_flag |= NREMOVEWANT; (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); } mtx_unlock(&dnp->n_mtx); cverf = nfs_get_cverf(); error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (!error) { if (nfhp == NULL) (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp != NULL) error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (!error) { newvp = NFSTOV(np); if (attrflag == 0) error = nfsrpc_getattr(newvp, cnp->cn_cred, cnp->cn_thread, &nfsva, NULL); if (error == 0) error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (error) { if (newvp != NULL) { vput(newvp); newvp = NULL; } if (NFS_ISV34(dvp) && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { if (nfscl_checksattr(vap, &nfsva)) { error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); if (error && (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)) { /* try again without setting uid/gid */ vap->va_uid = (uid_t)VNOVAL; vap->va_gid = (uid_t)VNOVAL; error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, cnp->cn_thread, &nfsva, &attrflag, NULL); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); if (error != 0) vput(newvp); } } if (!error) { if ((cnp->cn_flags & MAKEENTRY) && attrflag) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); *ap->a_vpp = newvp; } else if (NFS_ISV4(dvp)) { error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); if (error != EINTR && error != EIO) /* Do the rpc */ error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int ncl_removeit(struct sillyrename *sp, struct vnode *vp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). */ static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, int namelen, struct ucred *cred, struct thread *td) { struct nfsvattr dnfsva; struct nfsnode *dnp = VTONFS(dvp); int error = 0, dattrflag; mtx_lock(&dnp->n_mtx); dnp->n_flag |= NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, &dattrflag, NULL); mtx_lock(&dnp->n_mtx); if ((dnp->n_flag & NREMOVEWANT)) { dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); mtx_unlock(&dnp->n_mtx); wakeup((caddr_t)dnp); } else { dnp->n_flag &= ~NREMOVEINPROG; mtx_unlock(&dnp->n_mtx); } if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (!dattrflag) { dnp->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } mtx_unlock(&dnp->n_mtx); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs file rename call */ static int nfs_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct nfsnode *fnp = VTONFS(ap->a_fvp); struct nfsnode *tdnp = VTONFS(ap->a_tdvp); struct nfsv4node *newv4 = NULL; int error; KASSERT((tcnp->cn_flags & HASBUF) != 0 && (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. * * Skip the rename operation if the fsync fails, this can happen * due to the server's volume being full, when we pushed out data * that was written back to our cache earlier. Not checking for * this condition can result in potential (silent) data loss. */ error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); NFSVOPUNLOCK(fvp, 0); if (!error && tvp) error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); if (error) goto out; /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (error == 0 && NFS_ISV4(tdvp)) { /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ - MALLOC(newv4, struct nfsv4node *, + newv4 = malloc( sizeof (struct nfsv4node) + tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, M_NFSV4NODE, M_WAITOK); mtx_lock(&tdnp->n_mtx); mtx_lock(&fnp->n_mtx); if (fnp->n_v4 != NULL && fvp->v_type == VREG && (fnp->n_v4->n4_namelen != tcnp->cn_namelen || NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen) || tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; bcopy(tcnp->cn_nameptr, nnn, nnnl); nnn[nnnl] = '\0'; printf("ren replace=%s\n",nnn); } #endif - FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); + free(fnp->n_v4, M_NFSV4NODE); fnp->n_v4 = newv4; newv4 = NULL; fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; fnp->n_v4->n4_namelen = tcnp->cn_namelen; NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, tdnp->n_fhp->nfh_len); NFSBCOPY(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); } mtx_unlock(&tdnp->n_mtx); mtx_unlock(&fnp->n_mtx); if (newv4 != NULL) - FREE((caddr_t)newv4, M_NFSV4NODE); + free(newv4, M_NFSV4NODE); } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, struct sillyrename *sp) { return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { struct nfsvattr fnfsva, tnfsva; struct nfsnode *fdnp = VTONFS(fdvp); struct nfsnode *tdnp = VTONFS(tdvp); int error = 0, fattrflag, tattrflag; error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, &tattrflag, NULL, NULL); mtx_lock(&fdnp->n_mtx); fdnp->n_flag |= NMODIFIED; if (fattrflag != 0) { mtx_unlock(&fdnp->n_mtx); (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); } else { fdnp->n_attrstamp = 0; mtx_unlock(&fdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); } mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (tattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (error && NFS_ISV4(fdvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs hard link create call */ static int nfs_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np, *tdnp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag; /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, &dattrflag, NULL); tdnp = VTONFS(tdvp); mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&tdnp->n_mtx); (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); else { np = VTONFS(vp); mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); } if (error && NFS_ISV4(vp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsvattr nfsva, dnfsva; struct nfsfh *nfhp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; int error = 0, attrflag, dattrflag, ret; vap->va_type = VLNK; error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) newvp = NFSTOV(np); else if (!error) error = ret; } if (newvp != NULL) { if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) { /* * If we do not have an error and we could not extract the * newvp from the response due to the request being NFSv2, we * have to do a lookup in order to obtain a newvp to return. */ error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { *ap->a_vpp = newvp; } dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); } return (error); } /* * nfs make dir call */ static int nfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = NULL, *dnp; struct vnode *newvp = NULL; struct vattr vattr; struct nfsfh *nfhp; struct nfsvattr nfsva, dnfsva; int error = 0, attrflag, dattrflag, ret; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); vap->va_type = VDIR; error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); if (!ret) { newvp = NFSTOV(np); if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } else if (!error) error = ret; } if (!error && newvp == NULL) { error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vput(newvp); if (NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, vap->va_gid); } else { /* * If negative lookup caching is enabled, I might as well * add an entry for this node. Not necessary for correctness, * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && dattrflag != 0) cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, &dnfsva.na_ctime); *ap->a_vpp = newvp; } return (error); } /* * nfs remove directory call */ static int nfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *dnp; struct nfsvattr dnfsva; int error, dattrflag; if (dvp == vp) return (EINVAL); error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; if (dattrflag != 0) { mtx_unlock(&dnp->n_mtx); (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } cache_purge(dvp); cache_purge(vp); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, (gid_t)0); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; ssize_t tresid, left; int error = 0; struct vattr vattr; if (ap->a_eofflag != NULL) *ap->a_eofflag = 0; if (vp->v_type != VDIR) return(EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { mtx_lock(&np->n_mtx); if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; return (0); } else mtx_unlock(&np->n_mtx); } } /* * NFS always guarantees that directory entries don't straddle * DIRBLKSIZ boundaries. As such, we need to limit the size * to an exact multiple of DIRBLKSIZ, to avoid copying a partial * directory entry. */ left = uio->uio_resid % DIRBLKSIZ; if (left == uio->uio_resid) return (EINVAL); uio->uio_resid -= left; /* * Call ncl_bioread() to do the real work. */ tresid = uio->uio_resid; error = ncl_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); if (ap->a_eofflag != NULL) *ap->a_eofflag = 1; } /* Add the partial DIRBLKSIZ (left) back in. */ uio->uio_resid += left; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by ncl_doio(). */ int ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, eof, attrflag; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at the end of the directory or have filled * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). */ int ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; nfsuint64 *cookiep, cookie; struct nfsnode *dnp = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, attrflag, eof; KASSERT(uiop->uio_iovcnt == 1 && (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, ("nfs readdirplusrpc bad uio")); /* * If there is no cookie, assume directory was stale. */ ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; ncl_dircookie_unlock(dnp); } else { ncl_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) (void)ncl_fsinfo(nmp, vp, cred, td); error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, &attrflag, &eof, NULL); if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { /* * We are now either at end of the directory or have filled the * the block. */ if (eof) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirplusrpc resid > 0\n"); ncl_dircookie_lock(dnp); cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; ncl_dircookie_unlock(dnp); } } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; unsigned int lticks; cache_purge(dvp); np = VTONFS(vp); KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); - MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), + sp = malloc(sizeof (struct sillyrename), M_NEWNFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* * Fudge together a funny name. * Changing the format of the funny name to accommodate more * sillynames per directory. * The name is now changed to .nfs...4, where ticks is * CPU ticks since boot. */ pid = cnp->cn_thread->td_proc->p_pid; lticks = (unsigned int)ticks; for ( ; ; ) { sp->s_namlen = sprintf(sp->s_name, ".nfs.%08x.%04x4.4", lticks, pid); if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL)) break; lticks++; } error = nfs_renameit(dvp, vp, cnp, sp); if (error) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); - free((caddr_t)sp, M_NEWNFSREQ); + free(sp, M_NEWNFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL, *vp; struct nfsnode *np, *dnp = VTONFS(dvp); struct nfsfh *nfhp, *onfhp; struct nfsvattr nfsva, dnfsva; struct componentname cn; int error = 0, attrflag, dattrflag; u_int hash; error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, NULL); if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (npp && !error) { if (*npp != NULL) { np = *npp; vp = NFSTOV(np); /* * For NFSv4, check to see if it is the same name and * replace the name, if it is different. */ if (np->n_v4 != NULL && nfsva.na_type == VREG && (np->n_v4->n4_namelen != len || NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len))) { #ifdef notdef { char nnn[100]; int nnnl; nnnl = (len < 100) ? len : 99; bcopy(name, nnn, nnnl); nnn[nnnl] = '\0'; printf("replace=%s\n",nnn); } #endif - FREE((caddr_t)np->n_v4, M_NFSV4NODE); - MALLOC(np->n_v4, struct nfsv4node *, + free(np->n_v4, M_NFSV4NODE); + np->n_v4 = malloc( sizeof (struct nfsv4node) + dnp->n_fhp->nfh_len + len - 1, M_NFSV4NODE, M_WAITOK); np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; np->n_v4->n4_namelen = len; NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, dnp->n_fhp->nfh_len); NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); } hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, FNV1_32_INIT); onfhp = np->n_fhp; /* * Rehash node for new file handle. */ vfs_hash_rehash(vp, hash); np->n_fhp = nfhp; if (onfhp != NULL) - FREE((caddr_t)onfhp, M_NFSFH); + free(onfhp, M_NFSFH); newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { - FREE((caddr_t)nfhp, M_NFSFH); + free(nfhp, M_NFSFH); VREF(dvp); newvp = dvp; } else { cn.cn_nameptr = name; cn.cn_namelen = len; error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, &np, NULL, LK_EXCLUSIVE); if (error) return (error); newvp = NFSTOV(np); } if (!attrflag && *npp == NULL) { if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } if (attrflag) (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 0, 1); } if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Nfs Version 3 and 4 commit rpc */ int ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { struct nfsvattr nfsva; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *np; struct uio uio; int error, attrflag; np = VTONFS(vp); error = EIO; attrflag = 0; if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) { uio.uio_offset = offset; uio.uio_resid = cnt; error = nfscl_doiods(vp, &uio, NULL, NULL, NFSV4OPEN_ACCESSWRITE, 1, cred, td); if (error != 0) { mtx_lock(&np->n_mtx); np->n_flag &= ~NDSCOMMIT; mtx_unlock(&np->n_mtx); } } if (error != 0) { mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { mtx_unlock(&nmp->nm_mtx); return (0); } mtx_unlock(&nmp->nm_mtx); error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, &attrflag, NULL); } if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0 && NFS_ISV4(vp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling ncl_asyncio(), otherwise just all ncl_doio() to do the * request. */ static int nfs_strategy(struct vop_strategy_args *ap) { struct buf *bp; struct vnode *vp; struct ucred *cr; bp = ap->a_bp; vp = ap->a_vp; KASSERT(bp->b_vp == vp, ("missing b_getvp")); KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); BUF_ASSERT_HELD(bp); if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno) bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize / DEV_BSIZE); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread)) (void) ncl_doio(vp, bp, cr, curthread, 1); return (0); } /* * fsync vnode op. Just call ncl_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(struct vop_fsync_args *ap) { if (ap->a_vp->v_type != VREG) { /* * For NFS, metadata is changed synchronously on the server, * so there is nothing to flush. Also, ncl_flush() clears * the NMODIFIED flag and that shouldn't be done here for * directories. */ return (0); } return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. * If the called_from_renewthread argument is TRUE, it has been called * from the NFSv4 renew thread and, as such, cannot block indefinitely * waiting for a buffer write to complete. */ int ncl_flush(struct vnode *vp, int waitfor, struct thread *td, int commit, int called_from_renewthread) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1, trycnt = 0; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; struct bufobj *bo; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; u_int bvecsize = 0, bveccount; if (called_from_renewthread != 0) slptimeo = hz; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, ncl_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV34(vp) && commit) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_bobufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_bobufs); continue; } BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; vfs_busy_pages(bp, 1); BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_bobufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } BO_UNLOCK(bo); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = ncl_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = ncl_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) ncl_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; } if (error == ENOLCK) { error = 0; goto loop; } if (called_from_renewthread != 0) { /* * Return EIO so the flush will be retried * later. */ error = EIO; goto done; } if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; bwrite(bp); if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; } goto loop; } if (passone) { passone = 0; BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { while (bo->bo_numoutput) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); if (called_from_renewthread != 0) { /* * Return EIO so that the flush will be * retried later. */ error = EIO; goto done; } error = newnfs_sigintr(nmp, td); if (error) goto done; if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } BO_LOCK(bo); } } if (bo->bo_dirty.bv_cnt != 0 && commit) { BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; error = newnfs_msleep(td, &np->n_directio_asyncwr, &np->n_mtx, slpflag | (PRIBIO + 1), "nfsfsync", 0); if (error) { if (newnfs_sigintr(nmp, td)) { mtx_unlock(&np->n_mtx); error = EINTR; goto done; } } } mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); if (NFSHASPNFS(nmp)) { nfscl_layoutcommit(vp, td); /* * Invalidate the attribute cache, since writes to a DS * won't update the size attribute. */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; } else mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } if (commit && bo->bo_dirty.bv_cnt == 0 && bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); if (error == 0 && commit != 0 && waitfor == MNT_WAIT && (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || np->n_directio_asyncwr != 0)) { if (trycnt++ < 5) { /* try, try again... */ passone = 1; wcred = NULL; bvec = NULL; bvecsize = 0; goto again; } vn_printf(vp, "ncl_flush failed"); error = called_from_renewthread != 0 ? EIO : EBUSY; } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; struct ucred *cred; struct nfsnode *np = VTONFS(ap->a_vp); struct proc *p = (struct proc *)ap->a_id; struct thread *td = curthread; /* XXX */ struct vattr va; int ret, error = EOPNOTSUPP; u_quad_t size; if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { if (vp->v_type != VREG) return (EINVAL); if ((ap->a_flags & F_POSIX) != 0) cred = p->p_ucred; else cred = td->td_ucred; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } /* * If this is unlocking a write locked region, flush and * commit them before unlocking. This is required by * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_UNLCK && nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, ap->a_flags)) (void) ncl_flush(vp, MNT_WAIT, td, 1, 0); /* * Loop around doing the lock op, while a blocking lock * must wait for the lock op to succeed. */ do { ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { NFSVOPUNLOCK(vp, 0); error = nfs_catnap(PZERO | PCATCH, ret, "ncladvl"); if (error) return (EINTR); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { NFSVOPUNLOCK(vp, 0); return (EBADF); } } } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK); if (ret == NFSERR_DENIED) { NFSVOPUNLOCK(vp, 0); return (EAGAIN); } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { NFSVOPUNLOCK(vp, 0); return (ret); } else if (ret != 0) { NFSVOPUNLOCK(vp, 0); return (EACCES); } /* * Now, if we just got a lock, invalidate data in the buffer * cache, as required, so that the coherency conforms with * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_SETLK) { if ((np->n_flag & NMODIFIED) == 0) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); } if ((np->n_flag & NMODIFIED) || ret || np->n_change != va.va_filerev) { (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); if (!ret) { np->n_mtime = va.va_mtime; np->n_change = va.va_filerev; } } /* Mark that a file lock has been acquired. */ mtx_lock(&np->n_mtx); np->n_flag |= NHASBEENLOCKED; mtx_unlock(&np->n_mtx); } NFSVOPUNLOCK(vp, 0); return (0); } else if (!NFS_ISV4(vp)) { error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlock(ap, &(vp->v_lockf), size); } else { if (nfs_advlock_p != NULL) error = nfs_advlock_p(ap); else { NFSVOPUNLOCK(vp, 0); error = ENOLCK; } } if (error == 0 && ap->a_op == F_SETLK) { error = NFSVOPLOCK(vp, LK_SHARED); if (error == 0) { /* Mark that a file lock has been acquired. */ mtx_lock(&np->n_mtx); np->n_flag |= NHASBEENLOCKED; mtx_unlock(&np->n_mtx); NFSVOPUNLOCK(vp, 0); } } } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; if (NFS_ISV4(vp)) return (EOPNOTSUPP); error = NFSVOPLOCK(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; NFSVOPUNLOCK(vp, 0); error = lf_advlockasync(ap, &(vp->v_lockf), size); } else { NFSVOPUNLOCK(vp, 0); error = EOPNOTSUPP; } return (error); } /* * Print out the contents of an nfsnode. */ static int nfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid, (uintmax_t)np->n_vattr.na_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * We set B_CACHE if this is a VMIO buffer. */ int ncl_writebp(struct buf *bp, int force __unused, struct thread *td) { int oldflags, rtval; BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } oldflags = bp->b_flags; bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); curthread->td_ru.ru_oublock++; /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if ((oldflags & B_ASYNC) != 0) return (0); rtval = bufwait(bp); if (oldflags & B_DELWRI) reassignbuf(bp); brelse(bp); return (rtval); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred); if (error) goto out; error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, accmode, cred, NULL); out: return error; } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); int error; /* * Set access flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NACC; vfs_timestamp(&np->n_atim); mtx_unlock(&np->n_mtx); error = fifo_specops.vop_read(ap); return error; } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NUPD; vfs_timestamp(&np->n_mtim); mtx_unlock(&np->n_mtx); return(fifo_specops.vop_write(ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { vfs_timestamp(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred); goto out; } } mtx_unlock(&np->n_mtx); out: return (fifo_specops.vop_close(ap)); } /* * Just call ncl_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(struct buf *bp) { return (ncl_writebp(bp, 1, curthread)); } struct buf_ops buf_ops_newnfs = { .bop_name = "buf_ops_nfs", .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, }; static int nfs_getacl(struct vop_getacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } static int nfs_setacl(struct vop_setacl_args *ap) { int error; if (ap->a_type != ACL_TYPE_NFS4) return (EOPNOTSUPP); error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, NULL); if (error > NFSERR_STALE) { (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); error = EPERM; } return (error); } static int nfs_set_text(struct vop_set_text_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np; /* * If the text file has been mmap'd, flush any dirty pages to the * buffer cache and then... * Make sure all writes are pushed to the NFS server. If this is not * done, the modify time of the file can change while the text * file is being executed. This will cause the process that is * executing the text file to be terminated. */ if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } /* Now, flush the buffer cache. */ ncl_flush(vp, MNT_WAIT, curthread, 0, 0); /* And, finally, make sure that n_mtime is up to date. */ np = VTONFS(vp); mtx_lock(&np->n_mtx); np->n_mtime = np->n_vattr.na_mtime; mtx_unlock(&np->n_mtx); vp->v_vflag |= VV_TEXT; return (0); } /* * Return POSIX pathconf information applicable to nfs filesystems. */ static int nfs_pathconf(struct vop_pathconf_args *ap) { struct nfsv3_pathconf pc; struct nfsvattr nfsva; struct vnode *vp = ap->a_vp; struct thread *td = curthread; int attrflag, error; if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || ap->a_name == _PC_NO_TRUNC)) || (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { /* * Since only the above 4 a_names are returned by the NFSv3 * Pathconf RPC, there is no point in doing it for others. * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can * be used for _PC_NFS4_ACL as well. */ error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (error != 0) return (error); } else { /* * For NFSv2 (or NFSv3 when not one of the above 4 a_names), * just fake them. */ pc.pc_linkmax = NFS_LINK_MAX; pc.pc_namemax = NFS_MAXNAMLEN; pc.pc_notrunc = 1; pc.pc_chownrestricted = 1; pc.pc_caseinsensitive = 0; pc.pc_casepreserving = 1; error = 0; } switch (ap->a_name) { case _PC_LINK_MAX: #ifdef _LP64 *ap->a_retval = pc.pc_linkmax; #else *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); #endif break; case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; break; case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) *ap->a_retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = pc.pc_chownrestricted; break; case _PC_NO_TRUNC: *ap->a_retval = pc.pc_notrunc; break; case _PC_ACL_EXTENDED: *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_PATH_MAX: if (NFS_ISV4(vp)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; break; case _PC_MAC_PRESENT: *ap->a_retval = 0; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: if (NFS_ISV34(vp)) *ap->a_retval = 64; else *ap->a_retval = 32; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = NFS_MAXPATHLEN; break; default: error = vop_stdpathconf(ap); break; } return (error); } Index: head/sys/fs/nfsclient/nfsnode.h =================================================================== --- head/sys/fs/nfsclient/nfsnode.h (revision 328416) +++ head/sys/fs/nfsclient/nfsnode.h (revision 328417) @@ -1,198 +1,198 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSNODE_H_ #define _NFSCLIENT_NFSNODE_H_ #include /* * Silly rename structure that hangs off the nfsnode until the name * can be removed by nfs_inactive() */ struct sillyrename { struct task s_task; struct ucred *s_cred; struct vnode *s_dvp; long s_namlen; char s_name[32]; }; /* * This structure is used to save the logical directory offset to * NFS cookie mappings. * The mappings are stored in a list headed * by n_cookies, as required. * There is one mapping for each NFS_DIRBLKSIZ bytes of directory information * stored in increasing logical offset byte order. */ #define NFSNUMCOOKIES 31 struct nfsdmap { LIST_ENTRY(nfsdmap) ndm_list; int ndm_eocookie; union { nfsuint64 ndmu3_cookies[NFSNUMCOOKIES]; uint64_t ndmu4_cookies[NFSNUMCOOKIES]; } ndm_un1; }; #define ndm_cookies ndm_un1.ndmu3_cookies #define ndm4_cookies ndm_un1.ndmu4_cookies struct nfs_accesscache { u_int32_t mode; /* ACCESS mode cache */ uid_t uid; /* credentials having mode */ time_t stamp; /* mode cache timestamp */ }; /* * The nfsnode is the nfs equivalent to ufs's inode. Any similarity * is purely coincidental. * There is a unique nfsnode allocated for each active file, * each current directory, each mounted-on file, text file, and the root. * An nfsnode is 'named' by its file handle. (nget/nfs_node.c) * If this structure exceeds 256 bytes (it is currently 256 using 4.4BSD-Lite * type definitions), file handles of > 32 bytes should probably be split out - * into a separate MALLOC()'d data structure. (Reduce the size of nfsfh_t by + * into a separate malloc()'d data structure. (Reduce the size of nfsfh_t by * changing the definition in nfsproto.h of NFS_SMALLFH.) * NB: Hopefully the current order of the fields is such that everything will * be well aligned and, therefore, tightly packed. */ struct nfsnode { struct mtx n_mtx; /* Protects all of these members */ struct lock n_excl; /* Exclusive helper for shared vnode lock */ u_quad_t n_size; /* Current size of file */ u_quad_t n_brev; /* Modify rev when cached */ u_quad_t n_lrev; /* Modify rev for lease */ struct nfsvattr n_vattr; /* Vnode attribute cache */ time_t n_attrstamp; /* Attr. cache timestamp */ struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE]; struct timespec n_mtime; /* Prev modify time. */ struct nfsfh *n_fhp; /* NFS File Handle */ struct vnode *n_vnode; /* associated vnode */ struct vnode *n_dvp; /* parent vnode */ struct lockf *n_lockf; /* Locking record of file */ int n_error; /* Save write error value */ union { struct timespec nf_atim; /* Special file times */ nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */ u_char nd4_cookieverf[NFSX_VERF]; } n_un1; union { struct timespec nf_mtim; off_t nd_direof; /* Dir. EOF offset cache */ } n_un2; union { struct sillyrename *nf_silly; /* Ptr to silly rename struct */ LIST_HEAD(, nfsdmap) nd_cook; /* cookies */ } n_un3; short n_fhsize; /* size in bytes, of fh */ u_int32_t n_flag; /* Flag for locking.. */ int n_directio_opens; int n_directio_asyncwr; u_int64_t n_change; /* old Change attribute */ struct nfsv4node *n_v4; /* extra V4 stuff */ struct ucred *n_writecred; /* Cred. for putpages */ }; #define n_atim n_un1.nf_atim #define n_mtim n_un2.nf_mtim #define n_sillyrename n_un3.nf_silly #define n_cookieverf n_un1.nd_cookieverf #define n4_cookieverf n_un1.nd4_cookieverf #define n_direofoffset n_un2.nd_direof #define n_cookies n_un3.nd_cook /* * Flags for n_flag */ #define NDIRCOOKIELK 0x00000001 /* Lock to serialize access to directory cookies */ #define NFSYNCWAIT 0x00000002 /* fsync waiting for all directio async writes to drain */ #define NMODIFIED 0x00000004 /* Might have a modified buffer in bio */ #define NWRITEERR 0x00000008 /* Flag write errors so close will know */ #define NCREATED 0x00000010 /* Opened by nfs_create() */ #define NTRUNCATE 0x00000020 /* Opened by nfs_setattr() */ #define NSIZECHANGED 0x00000040 /* File size has changed: need cache inval */ #define NNONCACHE 0x00000080 /* Node marked as noncacheable */ #define NACC 0x00000100 /* Special file accessed */ #define NUPD 0x00000200 /* Special file updated */ #define NCHG 0x00000400 /* Special file times changed */ #define NDELEGMOD 0x00000800 /* Modified delegation */ #define NDELEGRECALL 0x00001000 /* Recall in progress */ #define NREMOVEINPROG 0x00002000 /* Remove in progress */ #define NREMOVEWANT 0x00004000 /* Want notification that remove is done */ #define NLOCK 0x00008000 /* Sleep lock the node */ #define NLOCKWANT 0x00010000 /* Want the sleep lock */ #define NNOLAYOUT 0x00020000 /* Can't get a layout for this file */ #define NWRITEOPENED 0x00040000 /* Has been opened for writing */ #define NHASBEENLOCKED 0x00080000 /* Has been file locked. */ #define NDSCOMMIT 0x00100000 /* Commit is done via the DS. */ /* * Convert between nfsnode pointers and vnode pointers */ #define VTONFS(vp) ((struct nfsnode *)(vp)->v_data) #define NFSTOV(np) ((struct vnode *)(np)->n_vnode) #define NFS_TIMESPEC_COMPARE(T1, T2) (((T1)->tv_sec != (T2)->tv_sec) || ((T1)->tv_nsec != (T2)->tv_nsec)) #if defined(_KERNEL) /* * Prototypes for NFS vnode operations */ int ncl_getpages(struct vop_getpages_args *); int ncl_putpages(struct vop_putpages_args *); int ncl_write(struct vop_write_args *); int ncl_inactive(struct vop_inactive_args *); int ncl_reclaim(struct vop_reclaim_args *); /* other stuff */ int ncl_removeit(struct sillyrename *, struct vnode *); int ncl_nget(struct mount *, u_int8_t *, int, struct nfsnode **, int); nfsuint64 *ncl_getcookie(struct nfsnode *, off_t, int); void ncl_invaldir(struct vnode *); bool ncl_excl_start(struct vnode *); void ncl_excl_finish(struct vnode *, bool old_lock); void ncl_dircookie_lock(struct nfsnode *); void ncl_dircookie_unlock(struct nfsnode *); #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSNODE_H_ */ Index: head/sys/fs/nfsserver/nfs_nfsdcache.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdcache.c (revision 328416) +++ head/sys/fs/nfsserver/nfs_nfsdcache.c (revision 328417) @@ -1,1039 +1,1039 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Here is the basic algorithm: * First, some design criteria I used: * - I think a false hit is more serious than a false miss * - A false hit for an RPC that has Op(s) that order via seqid# must be * avoided at all cost * - A valid hit will probably happen a long time after the original reply * and the TCP socket that the original request was received on will no * longer be active * (The long time delay implies to me that LRU is not appropriate.) * - The mechanism will satisfy the requirements of ordering Ops with seqid#s * in them as well as minimizing the risk of redoing retried non-idempotent * Ops. * Because it is biased towards avoiding false hits, multiple entries with * the same xid are to be expected, especially for the case of the entry * in the cache being related to a seqid# sequenced Op. * * The basic algorithm I'm about to code up: * - Null RPCs bypass the cache and are just done * For TCP * - key on (as noted above, there can be several * entries with the same key) * When a request arrives: * For all that match key * - if RPC# != OR request_size != * - not a match with this one * - if NFSv4 and received on same TCP socket OR * received on a TCP connection created before the * entry was cached * - not a match with this one * (V2,3 clients might retry on same TCP socket) * - calculate checksum on first N bytes of NFS XDR * - if checksum != * - not a match for this one * If any of the remaining ones that match has a * seqid_refcnt > 0 * - not a match (go do RPC, using new cache entry) * If one match left * - a hit (reply from cache) * else * - miss (go do RPC, using new cache entry) * * During processing of NFSv4 request: * - set a flag when a non-idempotent Op is processed * - when an Op that uses a seqid# (Open,...) is processed * - if same seqid# as referenced entry in cache * - free new cache entry * - reply from referenced cache entry * else if next seqid# in order * - free referenced cache entry * - increment seqid_refcnt on new cache entry * - set pointer from Openowner/Lockowner to * new cache entry (aka reference it) * else if first seqid# in sequence * - increment seqid_refcnt on new cache entry * - set pointer from Openowner/Lockowner to * new cache entry (aka reference it) * * At end of RPC processing: * - if seqid_refcnt > 0 OR flagged non-idempotent on new * cache entry * - save reply in cache entry * - calculate checksum on first N bytes of NFS XDR * request * - note op and length of XDR request (in bytes) * - timestamp it * else * - free new cache entry * - Send reply (noting info for socket activity check, below) * * For cache entries saved above: * - if saved since seqid_refcnt was > 0 * - free when seqid_refcnt decrements to 0 * (when next one in sequence is processed above, or * when Openowner/Lockowner is discarded) * else { non-idempotent Op(s) } * - free when * - some further activity observed on same * socket * (I'm not yet sure how I'm going to do * this. Maybe look at the TCP connection * to see if the send_tcp_sequence# is well * past sent reply OR K additional RPCs * replied on same socket OR?) * OR * - when very old (hours, days, weeks?) * * For UDP (v2, 3 only), pretty much the old way: * - key on * (at most one entry for each key) * * When a Request arrives: * - if a match with entry via key * - if RPC marked In_progress * - discard request (don't send reply) * else * - reply from cache * - timestamp cache entry * else * - add entry to cache, marked In_progress * - do RPC * - when RPC done * - if RPC# non-idempotent * - mark entry Done (not In_progress) * - save reply * - timestamp cache entry * else * - free cache entry * - send reply * * Later, entries with saved replies are free'd a short time (few minutes) * after reply sent (timestamp). * Reference: Chet Juszczak, "Improving the Performance and Correctness * of an NFS Server", in Proc. Winter 1989 USENIX Conference, * pages 53-63. San Diego, February 1989. * for the UDP case. * nfsrc_floodlevel is set to the allowable upper limit for saved replies * for TCP. For V3, a reply won't be saved when the flood level is * hit. For V4, the non-idempotent Op will return NFSERR_RESOURCE in * that case. This level should be set high enough that this almost * never happens. */ #ifndef APPLEKEXT #include extern struct nfsstatsv1 nfsstatsv1; extern struct mtx nfsrc_udpmtx; extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; #endif /* !APPLEKEXT */ SYSCTL_DECL(_vfs_nfsd); static u_int nfsrc_tcphighwater = 0; static int sysctl_tcphighwater(SYSCTL_HANDLER_ARGS) { int error, newhighwater; newhighwater = nfsrc_tcphighwater; error = sysctl_handle_int(oidp, &newhighwater, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (newhighwater < 0) return (EINVAL); if (newhighwater >= nfsrc_floodlevel) nfsrc_floodlevel = newhighwater + newhighwater / 5; nfsrc_tcphighwater = newhighwater; return (0); } SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0, sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU", "High water mark for TCP cache entries"); static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER; SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW, &nfsrc_udphighwater, 0, "High water mark for UDP cache entries"); static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT; SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW, &nfsrc_tcptimeout, 0, "Timeout for TCP entries in the DRC"); static u_int nfsrc_tcpnonidempotent = 1; SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW, &nfsrc_tcpnonidempotent, 0, "Enable the DRC for NFS over TCP"); static int nfsrc_udpcachesize = 0; static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; /* * and the reverse mapping from generic to Version 2 procedure numbers */ static int newnfsv2_procid[NFS_V3NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) #define NFSRCUDPHASH(xid) \ (&nfsrvudphashtbl[nfsrc_hash(xid)]) #define NFSRCHASH(xid) \ (&nfsrchash_table[nfsrc_hash(xid)].tbl) #define NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)]) #define TRUE 1 #define FALSE 0 #define NFSRVCACHE_CHECKLEN 100 /* True iff the rpc reply is an nfs status ONLY! */ static int nfsv2_repstat[NFS_V3NPROCS] = { FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, }; /* * Will NFS want to work over IPv6 someday? */ #define NETFAMILY(rp) \ (((rp)->rc_flag & RC_INETIPV6) ? AF_INET6 : AF_INET) /* local functions */ static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp); static void nfsrc_lock(struct nfsrvcache *rp); static void nfsrc_unlock(struct nfsrvcache *rp); static void nfsrc_wanted(struct nfsrvcache *rp); static void nfsrc_freecache(struct nfsrvcache *rp); static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum); static void nfsrc_marksametcpconn(u_int64_t); /* * Return the correct mutex for this cache entry. */ static __inline struct mtx * nfsrc_cachemutex(struct nfsrvcache *rp) { if ((rp->rc_flag & RC_UDP) != 0) return (&nfsrc_udpmtx); return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx); } /* * Initialize the server request cache list */ APPLESTATIC void nfsrvd_initcache(void) { int i; static int inited = 0; if (inited) return; inited = 1; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { LIST_INIT(&nfsrvudphashtbl[i]); LIST_INIT(&nfsrchash_table[i].tbl); LIST_INIT(&nfsrcahash_table[i].tbl); } TAILQ_INIT(&nfsrvudplru); nfsrc_tcpsavedreplies = 0; nfsrc_udpcachesize = 0; nfsstatsv1.srvcache_tcppeak = 0; nfsstatsv1.srvcache_size = 0; } /* * Get a cache entry for this request. Basically just malloc a new one * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest. */ APPLESTATIC int nfsrvd_getcache(struct nfsrv_descript *nd) { struct nfsrvcache *newrp; int ret; if (nd->nd_procnum == NFSPROC_NULL) panic("nfsd cache null"); - MALLOC(newrp, struct nfsrvcache *, sizeof (struct nfsrvcache), + newrp = malloc(sizeof (struct nfsrvcache), M_NFSRVCACHE, M_WAITOK); NFSBZERO((caddr_t)newrp, sizeof (struct nfsrvcache)); if (nd->nd_flag & ND_NFSV4) newrp->rc_flag = RC_NFSV4; else if (nd->nd_flag & ND_NFSV3) newrp->rc_flag = RC_NFSV3; else newrp->rc_flag = RC_NFSV2; newrp->rc_xid = nd->nd_retxid; newrp->rc_proc = nd->nd_procnum; newrp->rc_sockref = nd->nd_sockref; newrp->rc_cachetime = nd->nd_tcpconntime; if (nd->nd_flag & ND_SAMETCPCONN) newrp->rc_flag |= RC_SAMETCPCONN; if (nd->nd_nam2 != NULL) { newrp->rc_flag |= RC_UDP; ret = nfsrc_getudp(nd, newrp); } else { ret = nfsrc_gettcp(nd, newrp); } NFSEXITCODE2(0, nd); return (ret); } /* * For UDP (v2, v3): * - key on * (at most one entry for each key) */ static int nfsrc_getudp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) { struct nfsrvcache *rp; struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; struct nfsrvhashhead *hp; int ret = 0; struct mtx *mutex; mutex = nfsrc_cachemutex(newrp); hp = NFSRCUDPHASH(newrp->rc_xid); loop: mtx_lock(mutex); LIST_FOREACH(rp, hp, rc_hash) { if (newrp->rc_xid == rp->rc_xid && newrp->rc_proc == rp->rc_proc && (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, "nfsrc", 10 * hz); goto loop; } if (rp->rc_flag == 0) panic("nfs udp cache0"); rp->rc_flag |= RC_LOCKED; TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); if (rp->rc_flag & RC_INPROG) { nfsstatsv1.srvcache_inproghits++; mtx_unlock(mutex); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; ret = RC_REPLY; rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); ret = RC_REPLY; rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; } else { panic("nfs udp cache1"); } nfsrc_unlock(rp); - free((caddr_t)newrp, M_NFSRVCACHE); + free(newrp, M_NFSRVCACHE); goto out; } } nfsstatsv1.srvcache_misses++; atomic_add_int(&nfsstatsv1.srvcache_size, 1); nfsrc_udpcachesize++; newrp->rc_flag |= RC_INPROG; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); if (saddr->sin_family == AF_INET) newrp->rc_inet = saddr->sin_addr.s_addr; else if (saddr->sin_family == AF_INET6) { saddr6 = (struct sockaddr_in6 *)saddr; NFSBCOPY((caddr_t)&saddr6->sin6_addr, (caddr_t)&newrp->rc_inet6, sizeof (struct in6_addr)); newrp->rc_flag |= RC_INETIPV6; } LIST_INSERT_HEAD(hp, newrp, rc_hash); TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); mtx_unlock(mutex); nd->nd_rp = newrp; ret = RC_DOIT; out: NFSEXITCODE2(0, nd); return (ret); } /* * Update a request cache entry after the rpc has been done */ APPLESTATIC struct nfsrvcache * nfsrvd_updatecache(struct nfsrv_descript *nd) { struct nfsrvcache *rp; struct nfsrvcache *retrp = NULL; mbuf_t m; struct mtx *mutex; rp = nd->nd_rp; if (!rp) panic("nfsrvd_updatecache null rp"); nd->nd_rp = NULL; mutex = nfsrc_cachemutex(rp); mtx_lock(mutex); nfsrc_lock(rp); if (!(rp->rc_flag & RC_INPROG)) panic("nfsrvd_updatecache not inprog"); rp->rc_flag &= ~RC_INPROG; if (rp->rc_flag & RC_UDP) { TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); } /* * Reply from cache is a special case returned by nfsrv_checkseqid(). */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_repstat = 0; if (nd->nd_mreq) mbuf_freem(nd->nd_mreq); if (!(rp->rc_flag & RC_REPMBUF)) panic("reply from cache"); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; nfsrc_unlock(rp); goto out; } /* * If rc_refcnt > 0, save it * For UDP, save it if ND_SAVEREPLY is set * For TCP, save it if ND_SAVEREPLY and nfsrc_tcpnonidempotent is set */ if (nd->nd_repstat != NFSERR_DONTREPLY && (rp->rc_refcnt > 0 || ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && nfsrc_tcpsavedreplies <= nfsrc_floodlevel && nfsrc_tcpnonidempotent))) { if (rp->rc_refcnt > 0) { if (!(rp->rc_flag & RC_NFSV4)) panic("update_cache refcnt"); rp->rc_flag |= RC_REFCNT; } if ((nd->nd_flag & ND_NFSV2) && nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; mtx_unlock(mutex); } else { if (!(rp->rc_flag & RC_UDP)) { atomic_add_int(&nfsrc_tcpsavedreplies, 1); if (nfsrc_tcpsavedreplies > nfsstatsv1.srvcache_tcppeak) nfsstatsv1.srvcache_tcppeak = nfsrc_tcpsavedreplies; } mtx_unlock(mutex); m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); mtx_lock(mutex); rp->rc_reply = m; rp->rc_flag |= RC_REPMBUF; mtx_unlock(mutex); } if (rp->rc_flag & RC_UDP) { rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; nfsrc_unlock(rp); } else { rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; if (rp->rc_refcnt > 0) nfsrc_unlock(rp); else retrp = rp; } } else { nfsrc_freecache(rp); mtx_unlock(mutex); } out: NFSEXITCODE2(0, nd); return (retrp); } /* * Invalidate and, if possible, free an in prog cache entry. * Must not sleep. */ APPLESTATIC void nfsrvd_delcache(struct nfsrvcache *rp) { struct mtx *mutex; mutex = nfsrc_cachemutex(rp); if (!(rp->rc_flag & RC_INPROG)) panic("nfsrvd_delcache not in prog"); mtx_lock(mutex); rp->rc_flag &= ~RC_INPROG; if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) nfsrc_freecache(rp); mtx_unlock(mutex); } /* * Called after nfsrvd_updatecache() once the reply is sent, to update * the entry's sequence number and unlock it. The argument is * the pointer returned by nfsrvd_updatecache(). */ APPLESTATIC void nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq) { struct nfsrchash_bucket *hbp; KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked")); if (have_seq) { hbp = NFSRCAHASH(rp->rc_sockref); mtx_lock(&hbp->mtx); rp->rc_tcpseq = seq; if (rp->rc_acked != RC_NO_ACK) LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash); rp->rc_acked = RC_NO_ACK; mtx_unlock(&hbp->mtx); } nfsrc_unlock(rp); } /* * Get a cache entry for TCP * - key on * (allow multiple entries for a given key) */ static int nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *newrp) { struct nfsrvcache *rp, *nextrp; int i; struct nfsrvcache *hitrp; struct nfsrvhashhead *hp, nfsrc_templist; int hit, ret = 0; struct mtx *mutex; mutex = nfsrc_cachemutex(newrp); hp = NFSRCHASH(newrp->rc_xid); newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); tryagain: mtx_lock(mutex); hit = 1; LIST_INIT(&nfsrc_templist); /* * Get all the matches and put them on the temp list. */ rp = LIST_FIRST(hp); while (rp != LIST_END(hp)) { nextrp = LIST_NEXT(rp, rc_hash); if (newrp->rc_xid == rp->rc_xid && (!(rp->rc_flag & RC_INPROG) || ((newrp->rc_flag & RC_SAMETCPCONN) && newrp->rc_sockref == rp->rc_sockref)) && (newrp->rc_flag & rp->rc_flag & RC_NFSVERS) && newrp->rc_proc == rp->rc_proc && ((newrp->rc_flag & RC_NFSV4) && newrp->rc_sockref != rp->rc_sockref && newrp->rc_cachetime >= rp->rc_cachetime) && newrp->rc_reqlen == rp->rc_reqlen && newrp->rc_cksum == rp->rc_cksum) { LIST_REMOVE(rp, rc_hash); LIST_INSERT_HEAD(&nfsrc_templist, rp, rc_hash); } rp = nextrp; } /* * Now, use nfsrc_templist to decide if there is a match. */ i = 0; LIST_FOREACH(rp, &nfsrc_templist, rc_hash) { i++; if (rp->rc_refcnt > 0) { hit = 0; break; } } /* * Can be a hit only if one entry left. * Note possible hit entry and put nfsrc_templist back on hash * list. */ if (i != 1) hit = 0; hitrp = rp = LIST_FIRST(&nfsrc_templist); while (rp != LIST_END(&nfsrc_templist)) { nextrp = LIST_NEXT(rp, rc_hash); LIST_REMOVE(rp, rc_hash); LIST_INSERT_HEAD(hp, rp, rc_hash); rp = nextrp; } if (LIST_FIRST(&nfsrc_templist) != LIST_END(&nfsrc_templist)) panic("nfs gettcp cache templist"); if (hit) { rp = hitrp; if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, "nfsrc", 10 * hz); goto tryagain; } if (rp->rc_flag == 0) panic("nfs tcp cache0"); rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { nfsstatsv1.srvcache_inproghits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_REPLY; nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else if (rp->rc_flag & RC_REPMBUF) { nfsstatsv1.srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_REPLY; nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else { panic("nfs tcp cache1"); } nfsrc_unlock(rp); - free((caddr_t)newrp, M_NFSRVCACHE); + free(newrp, M_NFSRVCACHE); goto out; } nfsstatsv1.srvcache_misses++; atomic_add_int(&nfsstatsv1.srvcache_size, 1); /* * For TCP, multiple entries for a key are allowed, so don't * chain it into the hash table until done. */ newrp->rc_cachetime = NFSD_MONOSEC; newrp->rc_flag |= RC_INPROG; LIST_INSERT_HEAD(hp, newrp, rc_hash); mtx_unlock(mutex); nd->nd_rp = newrp; ret = RC_DOIT; out: NFSEXITCODE2(0, nd); return (ret); } /* * Lock a cache entry. */ static void nfsrc_lock(struct nfsrvcache *rp) { struct mtx *mutex; mutex = nfsrc_cachemutex(rp); mtx_assert(mutex, MA_OWNED); while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0); } rp->rc_flag |= RC_LOCKED; } /* * Unlock a cache entry. */ static void nfsrc_unlock(struct nfsrvcache *rp) { struct mtx *mutex; mutex = nfsrc_cachemutex(rp); mtx_lock(mutex); rp->rc_flag &= ~RC_LOCKED; nfsrc_wanted(rp); mtx_unlock(mutex); } /* * Wakeup anyone wanting entry. */ static void nfsrc_wanted(struct nfsrvcache *rp) { if (rp->rc_flag & RC_WANTED) { rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } } /* * Free up the entry. * Must not sleep. */ static void nfsrc_freecache(struct nfsrvcache *rp) { struct nfsrchash_bucket *hbp; LIST_REMOVE(rp, rc_hash); if (rp->rc_flag & RC_UDP) { TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); nfsrc_udpcachesize--; } else if (rp->rc_acked != RC_NO_SEQ) { hbp = NFSRCAHASH(rp->rc_sockref); mtx_lock(&hbp->mtx); if (rp->rc_acked == RC_NO_ACK) LIST_REMOVE(rp, rc_ahash); mtx_unlock(&hbp->mtx); } nfsrc_wanted(rp); if (rp->rc_flag & RC_REPMBUF) { mbuf_freem(rp->rc_reply); if (!(rp->rc_flag & RC_UDP)) atomic_add_int(&nfsrc_tcpsavedreplies, -1); } - FREE((caddr_t)rp, M_NFSRVCACHE); + free(rp, M_NFSRVCACHE); atomic_add_int(&nfsstatsv1.srvcache_size, -1); } /* * Clean out the cache. Called when nfsserver module is unloaded. */ APPLESTATIC void nfsrvd_cleancache(void) { struct nfsrvcache *rp, *nextrp; int i; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_lock(&nfsrchash_table[i].mtx); LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) nfsrc_freecache(rp); mtx_unlock(&nfsrchash_table[i].mtx); } mtx_lock(&nfsrc_udpmtx); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { nfsrc_freecache(rp); } } nfsstatsv1.srvcache_size = 0; mtx_unlock(&nfsrc_udpmtx); nfsrc_tcpsavedreplies = 0; } #define HISTSIZE 16 /* * The basic rule is to get rid of entries that are expired. */ void nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) { struct nfsrchash_bucket *hbp; struct nfsrvcache *rp, *nextrp; int force, lastslot, i, j, k, tto, time_histo[HISTSIZE]; time_t thisstamp; static time_t udp_lasttrim = 0, tcp_lasttrim = 0; static int onethread = 0, oneslot = 0; if (sockref != 0) { hbp = NFSRCAHASH(sockref); mtx_lock(&hbp->mtx); LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) { if (sockref == rp->rc_sockref) { if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) { rp->rc_acked = RC_ACK; LIST_REMOVE(rp, rc_ahash); } else if (final) { rp->rc_acked = RC_NACK; LIST_REMOVE(rp, rc_ahash); } } } mtx_unlock(&hbp->mtx); } if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) return; if (NFSD_MONOSEC != udp_lasttrim || nfsrc_udpcachesize >= (nfsrc_udphighwater + nfsrc_udphighwater / 2)) { mtx_lock(&nfsrc_udpmtx); udp_lasttrim = NFSD_MONOSEC; TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0 && ((rp->rc_flag & RC_REFCNT) || udp_lasttrim > rp->rc_timestamp || nfsrc_udpcachesize > nfsrc_udphighwater)) nfsrc_freecache(rp); } mtx_unlock(&nfsrc_udpmtx); } if (NFSD_MONOSEC != tcp_lasttrim || nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) { force = nfsrc_tcphighwater / 4; if (force > 0 && nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) { for (i = 0; i < HISTSIZE; i++) time_histo[i] = 0; i = 0; lastslot = NFSRVCACHE_HASHSIZE - 1; } else { force = 0; if (NFSD_MONOSEC != tcp_lasttrim) { i = 0; lastslot = NFSRVCACHE_HASHSIZE - 1; } else { lastslot = i = oneslot; if (++oneslot >= NFSRVCACHE_HASHSIZE) oneslot = 0; } } tto = nfsrc_tcptimeout; tcp_lasttrim = NFSD_MONOSEC; for (; i <= lastslot; i++) { mtx_lock(&nfsrchash_table[i].mtx); LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0) { if ((rp->rc_flag & RC_REFCNT) || tcp_lasttrim > rp->rc_timestamp || rp->rc_acked == RC_ACK) { nfsrc_freecache(rp); continue; } if (force == 0) continue; /* * The timestamps range from roughly the * present (tcp_lasttrim) to the present * + nfsrc_tcptimeout. Generate a simple * histogram of where the timeouts fall. */ j = rp->rc_timestamp - tcp_lasttrim; if (j >= tto) j = HISTSIZE - 1; else if (j < 0) j = 0; else j = j * HISTSIZE / tto; time_histo[j]++; } } mtx_unlock(&nfsrchash_table[i].mtx); } if (force) { /* * Trim some more with a smaller timeout of as little * as 20% of nfsrc_tcptimeout to try and get below * 80% of the nfsrc_tcphighwater. */ k = 0; for (i = 0; i < (HISTSIZE - 2); i++) { k += time_histo[i]; if (k > force) break; } k = tto * (i + 1) / HISTSIZE; if (k < 1) k = 1; thisstamp = tcp_lasttrim + k; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_lock(&nfsrchash_table[i].mtx); LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0 && ((rp->rc_flag & RC_REFCNT) || thisstamp > rp->rc_timestamp || rp->rc_acked == RC_ACK)) nfsrc_freecache(rp); } mtx_unlock(&nfsrchash_table[i].mtx); } } } atomic_store_rel_int(&onethread, 0); } /* * Add a seqid# reference to the cache entry. */ APPLESTATIC void nfsrvd_refcache(struct nfsrvcache *rp) { struct mtx *mutex; if (rp == NULL) /* For NFSv4.1, there is no cache entry. */ return; mutex = nfsrc_cachemutex(rp); mtx_lock(mutex); if (rp->rc_refcnt < 0) panic("nfs cache refcnt"); rp->rc_refcnt++; mtx_unlock(mutex); } /* * Dereference a seqid# cache entry. */ APPLESTATIC void nfsrvd_derefcache(struct nfsrvcache *rp) { struct mtx *mutex; mutex = nfsrc_cachemutex(rp); mtx_lock(mutex); if (rp->rc_refcnt <= 0) panic("nfs cache derefcnt"); rp->rc_refcnt--; if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) nfsrc_freecache(rp); mtx_unlock(mutex); } /* * Calculate the length of the mbuf list and a checksum on the first up to * NFSRVCACHE_CHECKLEN bytes. */ static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum) { int len = 0, cklen; mbuf_t m; m = m1; while (m) { len += mbuf_len(m); m = mbuf_next(m); } cklen = (len > NFSRVCACHE_CHECKLEN) ? NFSRVCACHE_CHECKLEN : len; *cksum = in_cksum(m1, cklen); return (len); } /* * Mark a TCP connection that is seeing retries. Should never happen for * NFSv4. */ static void nfsrc_marksametcpconn(u_int64_t sockref) { } Index: head/sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdport.c (revision 328416) +++ head/sys/fs/nfsserver/nfs_nfsdport.c (revision 328417) @@ -1,3441 +1,3441 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include /* * Functions that perform the vfs operations required by the routines in * nfsd_serv.c. It is hoped that this change will make the server more * portable. */ #include #include #include #include #include FEATURE(nfsd, "NFSv4 server"); extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern int nfsrv_useacl; extern int newnfs_numnfsd; extern struct mount nfsv4root_mnt; extern struct nfsrv_stablefirst nfsrv_stablefirst; extern void (*nfsd_call_servertimer)(void); extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; extern struct nfsclienthashhead *nfsclienthash; extern struct nfslockhashhead *nfslockhash; extern struct nfssessionhash *nfssessionhash; extern int nfsrv_sessionhashsize; extern struct nfsstatsv1 nfsstatsv1; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE]; struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; struct proc *nfsd_master_proc = NULL; int nfsd_debuglevel = 0; static pid_t nfsd_master_pid = (pid_t)-1; static char nfsd_master_comm[MAXCOMLEN + 1]; static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); int nfsrv_enable_crossmntpt = 1; static int nfs_commit_blks; static int nfs_commit_miss; extern int nfsrv_issuedelegs; extern int nfsrv_dolocallocks; extern int nfsd_enable_stringtouid; SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW, &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, ""); SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW, &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW, &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names"); #define MAX_REORDERED_RPC 16 #define NUM_HEURISTIC 1031 #define NHUSE_INIT 64 #define NHUSE_INC 16 #define NHUSE_MAX 2048 static struct nfsheur { struct vnode *nh_vp; /* vp to match (unreferenced pointer) */ off_t nh_nextoff; /* next offset for sequential detection */ int nh_use; /* use count for selection */ int nh_seqcount; /* heuristic */ } nfsheur[NUM_HEURISTIC]; /* * Heuristic to detect sequential operation. */ static struct nfsheur * nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp) { struct nfsheur *nh; int hi, try; /* Locate best candidate. */ try = 32; hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC; nh = &nfsheur[hi]; while (try--) { if (nfsheur[hi].nh_vp == vp) { nh = &nfsheur[hi]; break; } if (nfsheur[hi].nh_use > 0) --nfsheur[hi].nh_use; hi = (hi + 1) % NUM_HEURISTIC; if (nfsheur[hi].nh_use < nh->nh_use) nh = &nfsheur[hi]; } /* Initialize hint if this is a new file. */ if (nh->nh_vp != vp) { nh->nh_vp = vp; nh->nh_nextoff = uio->uio_offset; nh->nh_use = NHUSE_INIT; if (uio->uio_offset == 0) nh->nh_seqcount = 4; else nh->nh_seqcount = 1; } /* Calculate heuristic. */ if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) || uio->uio_offset == nh->nh_nextoff) { /* See comments in vfs_vnops.c:sequential_heuristic(). */ nh->nh_seqcount += howmany(uio->uio_resid, 16384); if (nh->nh_seqcount > IO_SEQMAX) nh->nh_seqcount = IO_SEQMAX; } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC * imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) { /* Probably a reordered RPC, leave seqcount alone. */ } else if (nh->nh_seqcount > 1) { nh->nh_seqcount /= 2; } else { nh->nh_seqcount = 0; } nh->nh_use += NHUSE_INC; if (nh->nh_use > NHUSE_MAX) nh->nh_use = NHUSE_MAX; return (nh); } /* * Get attributes into nfsvattr structure. */ int nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, int vpislocked) { int error, lockedit = 0; if (vpislocked == 0) { /* * When vpislocked == 0, the vnode is either exclusively * locked by this thread or not locked by this thread. * As such, shared lock it, if not exclusively locked. */ if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { lockedit = 1; NFSVOPLOCK(vp, LK_SHARED | LK_RETRY); } } error = VOP_GETATTR(vp, &nvap->na_vattr, cred); if (lockedit != 0) NFSVOPUNLOCK(vp, 0); NFSEXITCODE(error); return (error); } /* * Get a file handle for a vnode. */ int nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p) { int error; NFSBZERO((caddr_t)fhp, sizeof(fhandle_t)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fhp->fh_fid); NFSEXITCODE(error); return (error); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESSX() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write * case. * 2 - The owner is to be given access irrespective of mode bits for some * operations, so that processes that chmod after opening a file don't * break. */ int nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred, struct nfsexstuff *exp, struct thread *p, int override, int vpislocked, u_int32_t *supportedtypep) { struct vattr vattr; int error = 0, getret = 0; if (vpislocked == 0) { if (NFSVOPLOCK(vp, LK_SHARED) != 0) { error = EPERM; goto out; } } if (accmode & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (NFSVNO_EXRDONLY(exp) || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: error = EROFS; default: break; } } /* * If there's shared text associated with * the inode, try to free it up once. If * we fail, we can't allow writing. */ if (VOP_IS_TEXT(vp) && error == 0) error = ETXTBSY; } if (error != 0) { if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); goto out; } /* * Should the override still be applied when ACLs are enabled? */ error = VOP_ACCESSX(vp, accmode, cred, p); if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) { /* * Try again with VEXPLICIT_DENY, to see if the test for * deletion is supported. */ error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p); if (error == 0) { if (vp->v_type == VDIR) { accmode &= ~(VDELETE | VDELETE_CHILD); accmode |= VWRITE; error = VOP_ACCESSX(vp, accmode, cred, p); } else if (supportedtypep != NULL) { *supportedtypep &= ~NFSACCESS_DELETE; } } } /* * Allow certain operations for the owner (reads and writes * on files that are already open). */ if (override != NFSACCCHK_NOOVERRIDE && (error == EPERM || error == EACCES)) { if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT)) error = 0; else if (override & NFSACCCHK_ALLOWOWNER) { getret = VOP_GETATTR(vp, &vattr, cred); if (getret == 0 && cred->cr_uid == vattr.va_uid) error = 0; } } if (vpislocked == 0) NFSVOPUNLOCK(vp, 0); out: NFSEXITCODE(error); return (error); } /* * Set attribute(s) vnop. */ int nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error; error = VOP_SETATTR(vp, &nvap->na_vattr, cred); NFSEXITCODE(error); return (error); } /* * Set up nameidata for a lookup() call and do it. */ int nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p, struct vnode **retdirp) { struct componentname *cnp = &ndp->ni_cnd; int i; struct iovec aiov; struct uio auio; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen; int error = 0; char *cp; *retdirp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_lcf = 0; /* * Extract and set starting directory. */ if (dp->v_type != VDIR) { if (islocked) vput(dp); else vrele(dp); nfsvno_relpathbuf(ndp); error = ENOTDIR; goto out1; } if (islocked) NFSVOPUNLOCK(dp, 0); VREF(dp); *retdirp = dp; if (NFSVNO_EXRDONLY(exp)) cnp->cn_flags |= RDONLY; ndp->ni_segflg = UIO_SYSSPACE; if (nd->nd_flag & ND_PUBLOOKUP) { ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { vrele(dp); /* * Check for degenerate pathnames here, since lookup() * panics on them. */ for (i = 1; i < ndp->ni_pathlen; i++) if (cnp->cn_pnbuf[i] != '/') break; if (i == ndp->ni_pathlen) { error = NFSERR_ACCES; goto out; } dp = rootvnode; VREF(dp); } } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) || (nd->nd_flag & ND_NFSV4) == 0) { /* * Only cross mount points for NFSv4 when doing a * mount while traversing the file system above * the mount point, unless nfsrv_enable_crossmntpt is set. */ cnp->cn_flags |= NOCROSSMOUNT; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = p; ndp->ni_startdir = dp; ndp->ni_rootdir = rootvnode; ndp->ni_topdir = NULL; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ error = lookup(ndp); if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) nfsvno_relpathbuf(ndp); if (ndp->ni_vp && !lockleaf) NFSVOPUNLOCK(ndp->ni_vp, 0); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) NFSVOPUNLOCK(ndp->ni_dvp, 0); if (!(nd->nd_flag & ND_PUBLOOKUP)) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vrele(ndp->ni_dvp); vput(ndp->ni_vp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; out: if (error) { nfsvno_relpathbuf(ndp); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } out1: NFSEXITCODE2(error, nd); return (error); } /* * Set up a pathname buffer and return a pointer to it and, optionally * set a hash pointer. */ void nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp) { struct componentname *cnp = &ndp->ni_cnd; cnp->cn_flags |= (NOMACCHECK | HASBUF); cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (hashpp != NULL) *hashpp = NULL; *bufpp = cnp->cn_pnbuf; } /* * Release the above path buffer, if not released by nfsvno_namei(). */ void nfsvno_relpathbuf(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) == 0) panic("nfsrelpath"); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } /* * Readlink vnode op into an mbuf list. */ int nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp) { struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; struct iovec *ivp = iv; struct uio io, *uiop = &io; struct mbuf *mp, *mp2 = NULL, *mp3 = NULL; int i, len, tlen, error = 0; len = 0; i = 0; while (len < NFS_MAXPATHLEN) { NFSMGET(mp); MCLGET(mp, M_WAITOK); mp->m_len = M_SIZE(mp); if (len == 0) { mp3 = mp2 = mp; } else { mp2->m_next = mp; mp2 = mp; } if ((len + mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN - len; len = NFS_MAXPATHLEN; } else { len += mp->m_len; } ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; error = VOP_READLINK(vp, uiop, cred); if (error) { m_freem(mp3); *lenp = 0; goto out; } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = NFSM_RNDUP(len); nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len); } *lenp = len; *mpp = mp3; *mpendp = mp; out: NFSEXITCODE(error); return (error); } /* * Read vnode op call into mbuf list. */ int nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, struct thread *p, struct mbuf **mpp, struct mbuf **mpendp) { struct mbuf *m; int i; struct iovec *iv; struct iovec *iv2; int error = 0, len, left, siz, tlen, ioflag = 0; struct mbuf *m2 = NULL, *m3; struct uio io, *uiop = &io; struct nfsheur *nh; len = left = NFSM_RNDUP(cnt); m3 = NULL; /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; while (left > 0) { NFSMGET(m); MCLGET(m, M_WAITOK); m->m_len = 0; siz = min(M_TRAILINGSPACE(m), left); left -= siz; i++; if (m3) m2->m_next = m; else m3 = m; m2 = m; } - MALLOC(iv, struct iovec *, i * sizeof (struct iovec), + iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = m3; left = len; i = 0; while (left > 0) { if (m == NULL) panic("nfsvno_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = NULL; nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); - FREE((caddr_t)iv2, M_TEMP); + free(iv2, M_TEMP); if (error) { m_freem(m3); *mpp = NULL; goto out; } nh->nh_nextoff = uiop->uio_offset; tlen = len - uiop->uio_resid; cnt = cnt < tlen ? cnt : tlen; tlen = NFSM_RNDUP(cnt); if (tlen == 0) { m_freem(m3); m3 = NULL; } else if (len != tlen || tlen != cnt) nfsrv_adj(m3, len - tlen, tlen - cnt); *mpp = m3; *mpendp = m2; out: NFSEXITCODE(error); return (error); } /* * Write vnode op from an mbuf list. */ int nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable, struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p) { struct iovec *ivp; int i, len; struct iovec *iv; int ioflags, error; struct uio io, *uiop = &io; struct nfsheur *nh; - MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, + ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; i = mtod(mp, caddr_t) + mp->m_len - cp; len = retlen; while (len > 0) { if (mp == NULL) panic("nfsvno_write"); if (i > 0) { i = min(i, len); ivp->iov_base = cp; ivp->iov_len = i; ivp++; len -= i; } mp = mp->m_next; if (mp) { i = mp->m_len; cp = mtod(mp, caddr_t); } } if (stable == NFSWRITE_UNSTABLE) ioflags = IO_NODELOCKED; else ioflags = (IO_SYNC | IO_NODELOCKED); uiop->uio_resid = retlen; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; NFSUIOPROC(uiop, p); uiop->uio_offset = off; nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; - FREE((caddr_t)iv, M_TEMP); + free(iv, M_TEMP); NFSEXITCODE(error); return (error); } /* * Common code for creating a regular file (plus special files for V2). */ int nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp, int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp) { u_quad_t tempsize; int error; error = nd->nd_repstat; if (!error && ndp->ni_vp == NULL) { if (nvap->na_type == VREG || nvap->na_type == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!error) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; error = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, nd->nd_cred); if (error != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; error = NFSERR_NOTSUPP; } } } /* * NFS V2 Only. nfsrvd_mknod() does this for V3. * (This implies, just get out on an error.) */ } else if (nvap->na_type == VCHR || nvap->na_type == VBLK || nvap->na_type == VFIFO) { if (nvap->na_type == VCHR && rdev == 0xffffffff) nvap->na_type = VFIFO; if (nvap->na_type != VFIFO && (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } nvap->na_rdev = rdev; error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); if (error) goto out; } else { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = ENXIO; goto out; } *vpp = ndp->ni_vp; } else { /* * Handle cases where error is already set and/or * the file exists. * 1 - clean up the lookup * 2 - iff !error and na_size set, truncate it */ vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); *vpp = ndp->ni_vp; if (ndp->ni_dvp == *vpp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (!error && nvap->na_size != VNOVAL) { error = nfsvno_accchk(*vpp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (!error) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; error = VOP_SETATTR(*vpp, &nvap->na_vattr, nd->nd_cred); } } if (error) vput(*vpp); } out: NFSEXITCODE(error); return (error); } /* * Do a mknod vnode op. */ int nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { int error = 0; enum vtype vtyp; vtyp = nvap->na_type; /* * Iff doesn't exist, create it. */ if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); error = NFSERR_BADTYPE; goto out; } if (vtyp == VSOCK) { vrele(ndp->ni_startdir); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); } else { if (nvap->na_type != VFIFO && (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vput(ndp->ni_dvp); goto out; } error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); vrele(ndp->ni_startdir); /* * Since VOP_MKNOD returns the ni_vp, I can't * see any reason to do the lookup. */ } out: NFSEXITCODE(error); return (error); } /* * Mkdir vnode op. */ int nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp != NULL) { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); nfsvno_relpathbuf(ndp); error = EEXIST; goto out; } error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); out: NFSEXITCODE(error); return (error); } /* * symlink vnode op. */ int nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp, int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { int error = 0; if (ndp->ni_vp) { vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); error = EEXIST; goto out; } error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr, pathcp); vput(ndp->ni_dvp); vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); /* * Although FreeBSD still had the lookup code in * it for 7/current, there doesn't seem to be any * point, since VOP_SYMLINK() returns the ni_vp. * Just vput it for v2. */ if (!not_v2 && !error) vput(ndp->ni_vp); out: NFSEXITCODE(error); return (error); } /* * Parse symbolic link arguments. - * This function has an ugly side effect. It will MALLOC() an area for + * This function has an ugly side effect. It will malloc() an area for * the symlink and set iov_base to point to it, only if it succeeds. * So, if it returns with uiop->uio_iov->iov_base != NULL, that must * be FREE'd later. */ int nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap, struct thread *p, char **pathcpp, int *lenp) { u_int32_t *tl; char *pathcp = NULL; int error = 0, len; struct nfsv2_sattr *sp; *pathcpp = NULL; *lenp = 0; if ((nd->nd_flag & ND_NFSV3) && (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p))) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); len = fxdr_unsigned(int, *tl); if (len > NFS_MAXPATHLEN || len <= 0) { error = EBADRPC; goto nfsmout; } - MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK); + pathcp = malloc(len + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, pathcp, len); if (error) goto nfsmout; if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode); } *pathcpp = pathcp; *lenp = len; NFSEXITCODE2(0, nd); return (0); nfsmout: if (pathcp) free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Remove a non-directory object. */ int nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type == VDIR) error = NFSERR_ISDIR; else if (is_v4) error = nfsrv_checkremove(vp, 1, p); if (!error) error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Remove a directory. */ int nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *vp; int error = 0; vp = ndp->ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (ndp->ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) error = EBUSY; out: if (!error) error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd); if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vput(vp); if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0) nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Rename vnode op. */ int nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp, u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p) { struct vnode *fvp, *tvp, *tdvp; int error = 0; fvp = fromndp->ni_vp; if (ndstat) { vrele(fromndp->ni_dvp); vrele(fvp); error = ndstat; goto out1; } tdvp = tondp->ni_dvp; tvp = tondp->ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } /* * A rename to '.' or '..' results in a prematurely * unlocked vnode on FreeBSD5, so I'm just going to fail that * here. */ if ((tondp->ni_cnd.cn_namelen == 1 && tondp->ni_cnd.cn_nameptr[0] == '.') || (tondp->ni_cnd.cn_namelen == 2 && tondp->ni_cnd.cn_nameptr[0] == '.' && tondp->ni_cnd.cn_nameptr[1] == '.')) { error = EINVAL; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp->v_mount != tdvp->v_mount) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV; goto out; } if (fvp == tdvp) { error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL; goto out; } if (fvp == tvp) { /* * If source and destination are the same, there is nothing to * do. Set error to -1 to indicate this. */ error = -1; goto out; } if (ndflag & ND_NFSV4) { if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(fvp, 0, p); NFSVOPUNLOCK(fvp, 0); } else error = EPERM; if (tvp && !error) error = nfsrv_checkremove(tvp, 1, p); } else { /* * For NFSv2 and NFSv3, try to get rid of the delegation, so * that the NFSv4 client won't be confused by the rename. * Since nfsd_recalldelegation() can only be called on an * unlocked vnode at this point and fvp is the file that will * still exist after the rename, just do fvp. */ nfsd_recalldelegation(fvp, p); } out: if (!error) { error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp, &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp, &tondp->ni_cnd); } else { if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fromndp->ni_dvp); vrele(fvp); if (error == -1) error = 0; } vrele(tondp->ni_startdir); nfsvno_relpathbuf(tondp); out1: vrele(fromndp->ni_startdir); nfsvno_relpathbuf(fromndp); NFSEXITCODE(error); return (error); } /* * Link vnode op. */ int nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp) { struct vnode *xp; int error = 0; xp = ndp->ni_vp; if (xp != NULL) { error = EEXIST; } else { xp = ndp->ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; } if (!error) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd); else error = EPERM; if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); NFSVOPUNLOCK(vp, 0); } else { if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vrele(ndp->ni_vp); } nfsvno_relpathbuf(ndp); NFSEXITCODE(error); return (error); } /* * Do the fsync() appropriate for the commit. */ int nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred, struct thread *td) { int error = 0; /* * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of * file is done. At this time VOP_FSYNC does not accept offset and * byte count parameters so call VOP_FSYNC the whole file for now. * The same is true for NFSv4: RFC 3530 Sec. 14.2.3. * File systems that do not use the buffer cache (as indicated * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC(). */ if (cnt == 0 || cnt > MAX_COMMIT_COUNT || (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) { /* * Give up and do the whole thing */ if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); } else { /* * Locate and synchronously write any buffers that fall * into the requested range. Note: we are assuming that * f_iosize is a power of 2. */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; struct bufobj *bo; daddr_t lblkno; /* * Align to iosize boundary, super-align to page boundary. */ if (off & iomask) { cnt += off & iomask; off &= ~(u_quad_t)iomask; } if (off & PAGE_MASK) { cnt += off & PAGE_MASK; off &= ~(u_quad_t)PAGE_MASK; } lblkno = off / iosize; if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, off, off + cnt, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_LOCK(bo); while (cnt > 0) { struct buf *bp; /* * If we have a buffer and it is marked B_DELWRI we * have to lock and write it. Otherwise the prior * write is assumed to have already been committed. * * gbincore() can return invalid buffers now so we * have to check that bit as well (though B_DELWRI * should not be set if B_INVAL is set there could be * a race here since we haven't locked the buffer). */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI) { bremfree(bp); bp->b_flags &= ~B_ASYNC; bwrite(bp); ++nfs_commit_miss; } else BUF_UNLOCK(bp); BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) break; cnt -= iosize; ++lblkno; } BO_UNLOCK(bo); } NFSEXITCODE(error); return (error); } /* * Statfs vnode op. */ int nfsvno_statfs(struct vnode *vp, struct statfs *sf) { int error; error = VFS_STATFS(vp->v_mount, sf); if (error == 0) { /* * Since NFS handles these values as unsigned on the * wire, there is no way to represent negative values, * so set them to 0. Without this, they will appear * to be very large positive values for clients like * Solaris10. */ if (sf->f_bavail < 0) sf->f_bavail = 0; if (sf->f_ffree < 0) sf->f_ffree = 0; } NFSEXITCODE(error); return (error); } /* * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but * must handle nfsrv_opencheck() calls after any other access checks. */ void nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp, int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create, NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, struct nfsexstuff *exp, struct vnode **vpp) { struct vnode *vp = NULL; u_quad_t tempsize; struct nfsexstuff nes; if (ndp->ni_vp == NULL) nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, NULL, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { if (ndp->ni_vp == NULL) { vrele(ndp->ni_startdir); nd->nd_repstat = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr); vput(ndp->ni_dvp); nfsvno_relpathbuf(ndp); if (!nd->nd_repstat) { if (*exclusive_flagp) { *exclusive_flagp = 0; NFSVNO_ATTRINIT(nvap); nvap->na_atime.tv_sec = cverf[0]; nvap->na_atime.tv_nsec = cverf[1]; nd->nd_repstat = VOP_SETATTR(ndp->ni_vp, &nvap->na_vattr, cred); if (nd->nd_repstat != 0) { vput(ndp->ni_vp); ndp->ni_vp = NULL; nd->nd_repstat = NFSERR_NOTSUPP; } else NFSSETBIT_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS); } else { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); } } vp = ndp->ni_vp; } else { if (ndp->ni_startdir) vrele(ndp->ni_startdir); nfsvno_relpathbuf(ndp); vp = ndp->ni_vp; if (create == NFSV4OPEN_CREATE) { if (ndp->ni_dvp == vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); } if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) { if (ndp->ni_cnd.cn_flags & RDONLY) NFSVNO_SETEXRDONLY(&nes); else NFSVNO_EXINIT(&nes); nd->nd_repstat = nfsvno_accchk(vp, VWRITE, cred, &nes, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); if (!nd->nd_repstat) { tempsize = nvap->na_size; NFSVNO_ATTRINIT(nvap); nvap->na_size = tempsize; nd->nd_repstat = VOP_SETATTR(vp, &nvap->na_vattr, cred); } } else if (vp->v_type == VREG) { nd->nd_repstat = nfsrv_opencheck(clientid, stateidp, stp, vp, nd, p, nd->nd_repstat); } } } else { if (ndp->ni_cnd.cn_flags & HASBUF) nfsvno_relpathbuf(ndp); if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) { vrele(ndp->ni_startdir); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); if (ndp->ni_vp) vput(ndp->ni_vp); } } *vpp = vp; NFSEXITCODE2(0, nd); } /* * Updates the file rev and sets the mtime and ctime * to the current clock time, returning the va_filerev and va_Xtime * values. * Return ESTALE to indicate the vnode is VI_DOOMED. */ int nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred, struct thread *p) { struct vattr va; VATTR_NULL(&va); vfs_timestamp(&va.va_mtime); if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) { NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (ESTALE); } (void) VOP_SETATTR(vp, &va, cred); (void) nfsvno_getattr(vp, nvap, cred, p, 1); return (0); } /* * Glue routine to nfsv4_fillattr(). */ int nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp, struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p, int isdgram, int reterr, int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno) { int error; error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror, attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root, mounted_on_fileno); NFSEXITCODE2(0, nd); return (error); } /* Since the Readdir vnode ops vary, put the entire functions in here. */ /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The NFSM_CLGET macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * - it trims out records with d_type == DT_WHT * these cannot be seen through NFS (unless we extend the protocol) * The alternate call nfsrvd_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ int nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct nfsvattr at; int nlen, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, ncookies; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; struct uio io; struct iovec iv; int is_ufs; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = fxdr_unsigned(u_quad_t, *tl++); } else { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; verf = fxdr_hyper(tl); tl += 2; } toff = off; cnt = fxdr_unsigned(int, *tl); if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); fullsiz = siz; if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); #if 0 /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ if (!nd->nd_repstat && toff && verf != at.na_filerev) nd->nd_repstat = NFSERR_BAD_COOKIE; #endif } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (nd->nd_repstat == 0 && cnt == 0) { if (nd->nd_flag & ND_NFSV2) /* NFSv2 does not have NFSERR_TOOSMALL */ nd->nd_repstat = EPERM; else nd->nd_repstat = NFSERR_TOOSMALL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; - MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { - free((caddr_t)cookies, M_TEMP); + free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = getret; } /* * Handles the failed cases. nd->nd_repstat == 0 past here. */ if (nd->nd_repstat) { vput(vp); - free((caddr_t)rbuf, M_TEMP); + free(rbuf, M_TEMP); if (cookies) - free((caddr_t)cookies, M_TEMP); + free(cookies, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); } else { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; } *tl++ = newnfs_false; *tl = newnfs_true; - FREE((caddr_t)rbuf, M_TEMP); - FREE((caddr_t)cookies, M_TEMP); + free(rbuf, M_TEMP); + free(cookies, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } vput(vp); /* * dirlen is the size of the reply, including all XDR and must * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate * if the XDR should be included in "count", but to be safe, we do. * (Include the two booleans at the end of the reply in dirlen now.) */ if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; } else { dirlen = 2 * NFSX_UNSIGNED; } /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN) { if (nd->nd_flag & ND_NFSV3) dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); else dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen)); if (dirlen > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; } else { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_true; } *tl = txdr_unsigned(dp->d_fileno); (void) nfsm_strtom(nd, dp->d_name, nlen); if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(*cookiep); } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos < cend) eofflag = 0; NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; - FREE((caddr_t)rbuf, M_TEMP); - FREE((caddr_t)cookies, M_TEMP); + free(rbuf, M_TEMP); + free(cookies, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Readdirplus for V3 and Readdir for V4. */ int nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram, struct vnode *vp, struct thread *p, struct nfsexstuff *exp) { struct dirent *dp; u_int32_t *tl; int dirlen; char *cpos, *cend, *rbuf; struct vnode *nvp; fhandle_t nfh; struct nfsvattr nva, at, *nvap = &nva; struct mbuf *mb0, *mb1; struct nfsreferral *refp; int nlen, r, error = 0, getret = 1, usevget = 1; int siz, cnt, fullsiz, eofflag, ncookies, entrycnt; caddr_t bpos0, bpos1; u_int64_t off, toff, verf; u_long *cookies = NULL, *cookiep; nfsattrbit_t attrbits, rderrbits, savbits; struct uio io; struct iovec iv; struct componentname cn; int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls; struct mount *mp, *new_mp; uint64_t mounted_on_fileno; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); off = fxdr_hyper(tl); toff = off; tl += 2; verf = fxdr_hyper(tl); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); /* * Use the server's maximum data transfer size as the upper bound * on reply datalen. */ if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0) cnt = NFS_SRVMAXDATA(nd); /* * siz is a "hint" of how much directory information (name, fileid, * cookie) should be in the reply. At least one client "hints" 0, * so I set it to cnt for that case. I also round it up to the * next multiple of DIRBLKSIZ. */ if (siz <= 0) siz = cnt; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) goto nfsmout; NFSSET_ATTRBIT(&savbits, &attrbits); NFSCLRNOTFILLABLE_ATTRBIT(&attrbits); NFSZERO_ATTRBIT(&rderrbits); NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR); } else { NFSZERO_ATTRBIT(&attrbits); } fullsiz = siz; nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); if (!nd->nd_repstat) { if (off && verf != at.na_filerev) { /* * va_filerev is not sufficient as a cookie verifier, * since it is not supposed to change when entries are * removed/added unless that offset cookies returned to * the client are no longer valid. */ #if 0 if (nd->nd_flag & ND_NFSV4) { nd->nd_repstat = NFSERR_NOTSAME; } else { nd->nd_repstat = NFSERR_BAD_COOKIE; } #endif } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) { nd->nd_repstat = NFSERR_BAD_COOKIE; } } if (!nd->nd_repstat && vp->v_type != VDIR) nd->nd_repstat = NFSERR_NOTDIR; if (!nd->nd_repstat && cnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (!nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0; is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0; - MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + rbuf = malloc(siz, M_TEMP, M_WAITOK); again: eofflag = 0; if (cookies) { - free((caddr_t)cookies, M_TEMP); + free(cookies, M_TEMP); cookies = NULL; } iv.iov_base = rbuf; iv.iov_len = siz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = siz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_td = NULL; nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies, &cookies); off = (u_int64_t)io.uio_offset; if (io.uio_resid) siz -= io.uio_resid; getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); if (!cookies && !nd->nd_repstat) nd->nd_repstat = NFSERR_PERM; if (!nd->nd_repstat) nd->nd_repstat = getret; if (nd->nd_repstat) { vput(vp); if (cookies) - free((caddr_t)cookies, M_TEMP); - free((caddr_t)rbuf, M_TEMP); + free(cookies, M_TEMP); + free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); txdr_hyper(at.na_filerev, tl); tl += 2; *tl++ = newnfs_false; *tl = newnfs_true; - free((caddr_t)cookies, M_TEMP); - free((caddr_t)rbuf, M_TEMP); + free(cookies, M_TEMP); + free(rbuf, M_TEMP); goto out; } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that precede the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || dp->d_type == DT_WHT || (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) || ((nd->nd_flag & ND_NFSV4) && ((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { siz = fullsiz; toff = off; goto again; } /* * Busy the file system so that the mount point won't go away * and, as such, VFS_VGET() can be used safely. */ mp = vp->v_mount; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat != 0) { vrele(vp); free(cookies, M_TEMP); free(rbuf, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); goto out; } /* * Check to see if entries in this directory can be safely acquired * via VFS_VGET() or if a switch to VOP_LOOKUP() is required. * ZFS snapshot directories need VOP_LOOKUP(), so that any * automount of the snapshot directory that is required will * be done. * This needs to be done here for NFSv4, since NFSv4 never does * a VFS_VGET() for "." or "..". */ if (is_zfs == 1) { r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp); if (r == EOPNOTSUPP) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } else if (r == 0) vput(nvp); } /* * Save this position, in case there is an error before one entry * is created. */ mb0 = nd->nd_mb; bpos0 = nd->nd_bpos; /* * Fill in the first part of the reply. * dirlen is the reply length in bytes and cannot exceed cnt. * (Include the two booleans at the end of the reply in dirlen now, * so we recognize when we have exceeded cnt.) */ if (nd->nd_flag & ND_NFSV3) { dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED; nfsrv_postopattr(nd, getret, &at); } else { dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED; } NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); txdr_hyper(at.na_filerev, tl); /* * Save this position, in case there is an empty reply needed. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* Loop through the records and build reply */ entrycnt = 0; while (cpos < cend && ncookies > 0 && dirlen < cnt) { nlen = dp->d_namlen; if (dp->d_fileno != 0 && dp->d_type != DT_WHT && nlen <= NFS_MAXNAMLEN && ((nd->nd_flag & ND_NFSV3) || nlen > 2 || (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.')) || (nlen == 1 && dp->d_name[0] != '.'))) { /* * Save the current position in the reply, in case * this entry exceeds cnt. */ mb1 = nd->nd_mb; bpos1 = nd->nd_bpos; /* * For readdir_and_lookup get the vnode using * the file number. */ nvp = NULL; refp = NULL; r = 0; at_root = 0; needs_unbusy = 0; new_mp = mp; mounted_on_fileno = (uint64_t)dp->d_fileno; if ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&savbits)) { if (nd->nd_flag & ND_NFSV4) refp = nfsv4root_getreferral(NULL, vp, dp->d_fileno); if (refp == NULL) { if (usevget) r = VFS_VGET(mp, dp->d_fileno, LK_SHARED, &nvp); else r = EOPNOTSUPP; if (r == EOPNOTSUPP) { if (usevget) { usevget = 0; cn.cn_nameiop = LOOKUP; cn.cn_lkflags = LK_SHARED | LK_RETRY; cn.cn_cred = nd->nd_cred; cn.cn_thread = p; } cn.cn_nameptr = dp->d_name; cn.cn_namelen = nlen; cn.cn_flags = ISLASTCN | NOFOLLOW | LOCKLEAF; if (nlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') cn.cn_flags |= ISDOTDOT; if (NFSVOPLOCK(vp, LK_SHARED) != 0) { nd->nd_repstat = EPERM; break; } if ((vp->v_vflag & VV_ROOT) != 0 && (cn.cn_flags & ISDOTDOT) != 0) { vref(vp); nvp = vp; r = 0; } else { r = VOP_LOOKUP(vp, &nvp, &cn); if (vp != nvp) NFSVOPUNLOCK(vp, 0); } } /* * For NFSv4, check to see if nvp is * a mount point and get the mount * point vnode, as required. */ if (r == 0 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && nvp->v_mountedhere != NULL) { new_mp = nvp->v_mountedhere; r = vfs_busy(new_mp, 0); vput(nvp); nvp = NULL; if (r == 0) { r = VFS_ROOT(new_mp, LK_SHARED, &nvp); needs_unbusy = 1; if (r == 0) at_root = 1; } } } if (!r) { if (refp == NULL && ((nd->nd_flag & ND_NFSV3) || NFSNONZERO_ATTRBIT(&attrbits))) { r = nfsvno_getfh(nvp, &nfh, p); if (!r) r = nfsvno_getattr(nvp, nvap, nd->nd_cred, p, 1); if (r == 0 && is_zfs == 1 && nfsrv_enable_crossmntpt != 0 && (nd->nd_flag & ND_NFSV4) != 0 && nvp->v_type == VDIR && vp->v_mount != nvp->v_mount) { /* * For a ZFS snapshot, there is a * pseudo mount that does not set * v_mountedhere, so it needs to * be detected via a different * mount structure. */ at_root = 1; if (new_mp == mp) new_mp = nvp->v_mount; } } } else { nvp = NULL; } if (r) { if (!NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR)) { if (nvp != NULL) vput(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); nd->nd_repstat = r; break; } } } /* * Build the directory record xdr */ if (nd->nd_flag & ND_NFSV3) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(dp->d_fileno); dirlen += nfsm_strtom(nd, dp->d_name, nlen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = txdr_unsigned(*cookiep); nfsrv_postopattr(nd, 0, nvap); dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1); dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR); if (nvp != NULL) vput(nvp); } else { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = newnfs_true; *tl++ = 0; *tl = txdr_unsigned(*cookiep); dirlen += nfsm_strtom(nd, dp->d_name, nlen); if (nvp != NULL) { supports_nfsv4acls = nfs_supportsnfsv4acls(nvp); NFSVOPUNLOCK(nvp, 0); } else supports_nfsv4acls = 0; if (refp != NULL) { dirlen += nfsrv_putreferralattr(nd, &savbits, refp, 0, &nd->nd_repstat); if (nd->nd_repstat) { if (nvp != NULL) vrele(nvp); if (needs_unbusy != 0) vfs_unbusy(new_mp); break; } } else if (r) { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &rderrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } else { dirlen += nfsvno_fillattr(nd, new_mp, nvp, nvap, &nfh, r, &attrbits, nd->nd_cred, p, isdgram, 0, supports_nfsv4acls, at_root, mounted_on_fileno); } if (nvp != NULL) vrele(nvp); dirlen += (3 * NFSX_UNSIGNED); } if (needs_unbusy != 0) vfs_unbusy(new_mp); if (dirlen <= cnt) entrycnt++; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); vfs_unbusy(mp); /* * If dirlen > cnt, we must strip off the last entry. If that * results in an empty reply, report NFSERR_TOOSMALL. */ if (dirlen > cnt || nd->nd_repstat) { if (!nd->nd_repstat && entrycnt == 0) nd->nd_repstat = NFSERR_TOOSMALL; if (nd->nd_repstat) { newnfs_trimtrailing(nd, mb0, bpos0); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); } else newnfs_trimtrailing(nd, mb1, bpos1); eofflag = 0; } else if (cpos < cend) eofflag = 0; if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = newnfs_false; if (eofflag) *tl = newnfs_true; else *tl = newnfs_false; } - FREE((caddr_t)cookies, M_TEMP); - FREE((caddr_t)rbuf, M_TEMP); + free(cookies, M_TEMP); + free(rbuf, M_TEMP); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * Get the settable attributes out of the mbuf list. * (Return 0 or EBADRPC) */ int nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; struct nfsv2_sattr *sp; int error = 0, toclient = 0; switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) { case ND_NFSV2: NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Some old clients didn't fill in the high order 16bits. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) nvap->na_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != newnfs_xdrneg1) nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != newnfs_xdrneg1) nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != newnfs_xdrneg1) nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime); #else nvap->na_atime.tv_sec = fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec); nvap->na_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime); break; case ND_NFSV3: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_uid = fxdr_unsigned(uid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_gid = fxdr_unsigned(gid_t, *tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); nvap->na_size = fxdr_hyper(tl); } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; break; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; break; } break; case ND_NFSV4: error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Handle the setable attributes for V4. * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise. */ int nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p) { u_int32_t *tl; int attrsum = 0; int i, j; int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0; int toclient = 0; u_char *cp, namestr[NFSV4_SMALLSTR + 1]; uid_t uid; gid_t gid; error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsize = fxdr_unsigned(int, *tl); /* * Loop around getting the setable attributes. If an unsupported * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return. */ if (retnotsup) { nd->nd_repstat = NFSERR_ATTRNOTSUPP; bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; } for (; bitpos < NFSATTRBIT_MAX; bitpos++) { if (attrsum > attrsize) { error = NFSERR_BADXDR; goto nfsmout; } if (NFSISSET_ATTRBIT(attrbitp, bitpos)) switch (bitpos) { case NFSATTRBIT_SIZE: NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); if (vp != NULL && vp->v_type != VREG) { error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } nvap->na_size = fxdr_hyper(tl); attrsum += NFSX_HYPER; break; case NFSATTRBIT_ACL: error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize, p); if (error) goto nfsmout; if (aceerr && !nd->nd_repstat) nd->nd_repstat = aceerr; attrsum += aclsize; break; case NFSATTRBIT_ARCHIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i)); break; case NFSATTRBIT_MODE: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nvap->na_mode = nfstov_mode(*tl); attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_OWNER: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid, p); if (!nd->nd_repstat) nvap->na_uid = uid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_OWNERGROUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); j = fxdr_unsigned(int, *tl); if (j < 0) { error = NFSERR_BADXDR; goto nfsmout; } if (j > NFSV4_SMALLSTR) cp = malloc(j + 1, M_NFSSTRING, M_WAITOK); else cp = namestr; error = nfsrv_mtostr(nd, cp, j); if (error) { if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); goto nfsmout; } if (!nd->nd_repstat) { nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid, p); if (!nd->nd_repstat) nvap->na_gid = gid; } if (j > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_atime); toclient = 1; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_atime); nvap->na_vaflags |= VA_UTIMES_NULL; } break; case NFSATTRBIT_TIMEBACKUP: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMECREATE: NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); if (!nd->nd_repstat) nd->nd_repstat = NFSERR_ATTRNOTSUPP; attrsum += NFSX_V4TIME; break; case NFSATTRBIT_TIMEMODIFYSET: NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); attrsum += NFSX_UNSIGNED; if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) { NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME); fxdr_nfsv4time(tl, &nvap->na_mtime); nvap->na_vaflags &= ~VA_UTIMES_NULL; attrsum += NFSX_V4TIME; } else { vfs_timestamp(&nvap->na_mtime); if (!toclient) nvap->na_vaflags |= VA_UTIMES_NULL; } break; default: nd->nd_repstat = NFSERR_ATTRNOTSUPP; /* * set bitpos so we drop out of the loop. */ bitpos = NFSATTRBIT_MAX; break; } } /* * some clients pad the attrlist, so we need to skip over the * padding. */ if (attrsum > attrsize) { error = NFSERR_BADXDR; } else { attrsize = NFSM_RNDUP(attrsize); if (attrsum < attrsize) error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Check/setup export credentials. */ int nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp, struct ucred *credanon) { int error = 0; /* * Check/setup credentials. */ if (nd->nd_flag & ND_GSS) exp->nes_exflag &= ~MNT_EXPORTANON; /* * Check to see if the operation is allowed for this security flavor. * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS. * Also, allow Secinfo, so that it can acquire the correct flavor(s). */ if (nfsvno_testexp(nd, exp) && nd->nd_procnum != NFSV4OP_SECINFO && nd->nd_procnum != NFSPROC_FSINFO) { if (nd->nd_flag & ND_NFSV4) error = NFSERR_WRONGSEC; else error = (NFSERR_AUTHERR | AUTH_TOOWEAK); goto out; } /* * Check to see if the file system is exported V4 only. */ if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) { error = NFSERR_PROGNOTV4; goto out; } /* * Now, map the user credentials. * (Note that ND_AUTHNONE will only be set for an NFSv3 * Fsinfo RPC. If set for anything else, this code might need * to change.) */ if (NFSVNO_EXPORTED(exp)) { if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) || NFSVNO_EXPORTANON(exp) || (nd->nd_flag & ND_AUTHNONE) != 0) { nd->nd_cred->cr_uid = credanon->cr_uid; nd->nd_cred->cr_gid = credanon->cr_gid; crsetgroups(nd->nd_cred, credanon->cr_ngroups, credanon->cr_groups); } else if ((nd->nd_flag & ND_GSS) == 0) { /* * If using AUTH_SYS, call nfsrv_getgrpscred() to see * if there is a replacement credential with a group * list set up by "nfsuserd -manage-gids". * If there is no replacement, nfsrv_getgrpscred() * simply returns its argument. */ nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred); } } out: NFSEXITCODE2(error, nd); return (error); } /* * Check exports. */ int nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } NFSEXITCODE(error); return (error); } /* * Get a vnode for a file handle and export stuff. */ int nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp) { int i, error, *secflavors; *credp = NULL; exp->nes_numsecflavor = 0; error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp); if (error != 0) /* Make sure the server replies ESTALE to the client. */ error = ESTALE; if (nam && !error) { error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, &secflavors); if (error) { if (nfs_rootfhset) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; } else { vput(*vpp); } } else { /* Copy the security flavors. */ for (i = 0; i < exp->nes_numsecflavor; i++) exp->nes_secflavors[i] = secflavors[i]; } } NFSEXITCODE(error); return (error); } /* * nfsd_fhtovp() - convert a fh to a vnode ptr * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling nfsvno_fhtovp() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * for AUTH_SYS * - if mpp != NULL, return the mount point so that it can * be used for vn_finished_write() by the caller */ void nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype, struct vnode **vpp, struct nfsexstuff *exp, struct mount **mpp, int startwrite, struct thread *p) { struct mount *mp; struct ucred *credanon; fhandle_t *fhp; fhp = (fhandle_t *)nfp->nfsrvfh_data; /* * Check for the special case of the nfsv4root_fh. */ mp = vfs_busyfs(&fhp->fh_fsid); if (mpp != NULL) *mpp = mp; if (mp == NULL) { *vpp = NULL; nd->nd_repstat = ESTALE; goto out; } if (startwrite) { vn_start_write(NULL, mpp, V_WAIT); if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp))) lktype = LK_EXCLUSIVE; } nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp, &credanon); vfs_unbusy(mp); /* * For NFSv4 without a pseudo root fs, unexported file handles * can be returned, so that Lookup works everywhere. */ if (!nd->nd_repstat && exp->nes_exflag == 0 && !(nd->nd_flag & ND_NFSV4)) { vput(*vpp); nd->nd_repstat = EACCES; } /* * Personally, I've never seen any point in requiring a * reserved port#, since only in the rare case where the * clients are all boxes with secure system privileges, * does it provide any enhanced security, but... some people * believe it to be useful and keep putting this code back in. * (There is also some "security checker" out there that * complains if the nfs server doesn't enforce this.) * However, note the following: * RFC3530 (NFSv4) specifies that a reserved port# not be * required. * RFC2623 recommends that, if a reserved port# is checked for, * that there be a way to turn that off--> ifdef'd. */ #ifdef NFS_REQRSVPORT if (!nd->nd_repstat) { struct sockaddr_in *saddr; struct sockaddr_in6 *saddr6; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *); if (!(nd->nd_flag & ND_NFSV4) && ((saddr->sin_family == AF_INET && ntohs(saddr->sin_port) >= IPPORT_RESERVED) || (saddr6->sin6_family == AF_INET6 && ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) { vput(*vpp); nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); } } #endif /* NFS_REQRSVPORT */ /* * Check/setup credentials. */ if (!nd->nd_repstat) { nd->nd_saveduid = nd->nd_cred->cr_uid; nd->nd_repstat = nfsd_excred(nd, exp, credanon); if (nd->nd_repstat) vput(*vpp); } if (credanon != NULL) crfree(credanon); if (nd->nd_repstat) { if (startwrite) vn_finished_write(mp); *vpp = NULL; if (mpp != NULL) *mpp = NULL; } out: NFSEXITCODE2(0, nd); } /* * glue for fp. */ static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; struct file *fp; int error = 0; fdp = p->td_proc->p_fd; if (fd < 0 || fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } *fpp = fp; out: NFSEXITCODE(error); return (error); } /* * Called from nfssvc() to update the exports list. Just call * vfs_export(). This has to be done, since the v4 root fake fs isn't * in the mount list. */ int nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) { struct nfsex_args *nfsexargp = (struct nfsex_args *)argp; int error = 0; struct nameidata nd; fhandle_t fh; error = vfs_export(&nfsv4root_mnt, &nfsexargp->export); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) nfs_rootfhset = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; goto out; } /* * If fspec != NULL, this is the v4root path. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec, p); if ((error = namei(&nd)) != 0) goto out; error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { nfs_rootfh.nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, nfs_rootfh.nfsrvfh_data, sizeof (fhandle_t)); nfs_rootfhset = 1; } } out: NFSEXITCODE(error); return (error); } /* * This function needs to test to see if the system is near its limit * for memory allocation via malloc() or mget() and return True iff * either of these resources are near their limit. * XXX (For now, this is just a stub.) */ int nfsrv_testmalloclimit = 0; int nfsrv_mallocmget_limit(void) { static int printmesg = 0; static int testval = 1; if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) { if ((printmesg++ % 100) == 0) printf("nfsd: malloc/mget near limit\n"); return (1); } return (0); } /* * BSD specific initialization of a mount point. */ void nfsd_mntinit(void) { static int inited = 0; if (inited) return; inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); TAILQ_INIT(&nfsv4root_newopt); nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; nfsv4root_mnt.mnt_activevnodelistsize = 0; } /* * Get a vnode for a file handle, without checking exports, etc. */ struct vnode * nfsvno_getvp(fhandle_t *fhp) { struct mount *mp; struct vnode *vp; int error; mp = vfs_busyfs(&fhp->fh_fsid); if (mp == NULL) return (NULL); error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error) return (NULL); return (vp); } /* * Do a local VOP_ADVLOCK(). */ int nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first, u_int64_t end, struct thread *td) { int error = 0; struct flock fl; u_int64_t tlen; if (nfsrv_dolocallocks == 0) goto out; ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked"); fl.l_whence = SEEK_SET; fl.l_type = ftype; fl.l_start = (off_t)first; if (end == NFS64BITSSET) { fl.l_len = 0; } else { tlen = end - first; fl.l_len = (off_t)tlen; } /* * For FreeBSD8, the l_pid and l_sysid must be set to the same * values for all calls, so that all locks will be held by the * nfsd server. (The nfsd server handles conflicts between the * various clients.) * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024 * bytes, so it can't be put in l_sysid. */ if (nfsv4_sysid == 0) nfsv4_sysid = nlm_acquire_next_sysid(); fl.l_pid = (pid_t)0; fl.l_sysid = (int)nfsv4_sysid; if (ftype == F_UNLCK) error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl, (F_POSIX | F_REMOTE)); else error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl, (F_POSIX | F_REMOTE)); out: NFSEXITCODE(error); return (error); } /* * Check the nfsv4 root exports. */ int nfsvno_v4rootexport(struct nfsrv_descript *nd) { struct ucred *credanon; int exflags, error = 0, numsecflavor, *secflavors, i; error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags, &credanon, &numsecflavor, &secflavors); if (error) { error = NFSERR_PROGUNAVAIL; goto out; } if (credanon != NULL) crfree(credanon); for (i = 0; i < numsecflavor; i++) { if (secflavors[i] == AUTH_SYS) nd->nd_flag |= ND_EXAUTHSYS; else if (secflavors[i] == RPCSEC_GSS_KRB5) nd->nd_flag |= ND_EXGSS; else if (secflavors[i] == RPCSEC_GSS_KRB5I) nd->nd_flag |= ND_EXGSSINTEGRITY; else if (secflavors[i] == RPCSEC_GSS_KRB5P) nd->nd_flag |= ND_EXGSSPRIVACY; } out: NFSEXITCODE(error); return (error); } /* * Nfs server pseudo system call for the nfsd's */ /* * MPSAFE */ static int nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) { struct file *fp; struct nfsd_addsock_args sockarg; struct nfsd_nfsd_args nfsdarg; cap_rights_t rights; int error; if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) goto out; /* * Since we don't know what rights might be required, * pretend that we need them all. It is better to be too * careful than too reckless. */ error = fget(td, sockarg.sock, cap_rights_init(&rights, CAP_SOCK_SERVER), &fp); if (error != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); error = EPERM; goto out; } error = nfsrvd_addsock(fp); fdrop(fp, td); } else if (uap->flag & NFSSVC_NFSDNFSD) { if (uap->argp == NULL) { error = EINVAL; goto out; } error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof (nfsdarg)); if (error) goto out; error = nfsrvd_nfsd(td, &nfsdarg); } else { error = nfssvc_srvcall(td, uap, td->td_ucred); } out: NFSEXITCODE(error); return (error); } static int nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) { struct nfsex_args export; struct file *fp = NULL; int stablefd, len; struct nfsd_clid adminrevoke; struct nfsd_dumplist dumplist; struct nfsd_dumpclients *dumpclients; struct nfsd_dumplocklist dumplocklist; struct nfsd_dumplocks *dumplocks; struct nameidata nd; vnode_t vp; int error = EINVAL, igotlock; struct proc *procp; static int suspend_nfsd = 0; if (uap->flag & NFSSVC_PUBLICFH) { NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); error = copyin(uap->argp, &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t)); if (!error) nfs_pubfhset = 1; } else if (uap->flag & NFSSVC_V4ROOTEXPORT) { error = copyin(uap->argp,(caddr_t)&export, sizeof (struct nfsex_args)); if (!error) error = nfsrv_v4rootexport(&export, cred, p); } else if (uap->flag & NFSSVC_NOPUBLICFH) { nfs_pubfhset = 0; error = 0; } else if (uap->flag & NFSSVC_STABLERESTART) { error = copyin(uap->argp, (caddr_t)&stablefd, sizeof (int)); if (!error) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; if (!error && newnfs_numnfsd != 0) error = EPERM; if (!error) { nfsrv_stablefirst.nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { error = copyin(uap->argp, (caddr_t)&adminrevoke, sizeof (struct nfsd_clid)); if (!error) error = nfsrv_adminrevoke(&adminrevoke, p); } else if (uap->flag & NFSSVC_DUMPCLIENTS) { error = copyin(uap->argp, (caddr_t)&dumplist, sizeof (struct nfsd_dumplist)); if (!error && (dumplist.ndl_size < 1 || dumplist.ndl_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) { len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size; dumpclients = (struct nfsd_dumpclients *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumpclients(dumpclients, dumplist.ndl_size); error = copyout(dumpclients, CAST_USER_ADDR_T(dumplist.ndl_list), len); - free((caddr_t)dumpclients, M_TEMP); + free(dumpclients, M_TEMP); } } else if (uap->flag & NFSSVC_DUMPLOCKS) { error = copyin(uap->argp, (caddr_t)&dumplocklist, sizeof (struct nfsd_dumplocklist)); if (!error && (dumplocklist.ndllck_size < 1 || dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST)) error = EPERM; if (!error) error = nfsrv_lookupfilename(&nd, dumplocklist.ndllck_fname, p); if (!error) { len = sizeof (struct nfsd_dumplocks) * dumplocklist.ndllck_size; dumplocks = (struct nfsd_dumplocks *)malloc(len, M_TEMP, M_WAITOK); nfsrv_dumplocks(nd.ni_vp, dumplocks, dumplocklist.ndllck_size, p); vput(nd.ni_vp); error = copyout(dumplocks, CAST_USER_ADDR_T(dumplocklist.ndllck_list), len); - free((caddr_t)dumplocks, M_TEMP); + free(dumplocks, M_TEMP); } } else if (uap->flag & NFSSVC_BACKUPSTABLE) { procp = p->td_proc; PROC_LOCK(procp); nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; nfsd_master_proc = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd == 0) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock(&nfsd_suspend_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && suspend_nfsd == 0); suspend_nfsd = 1; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); if (suspend_nfsd != 0) { nfsv4_unlock(&nfsd_suspend_lock, 0); suspend_nfsd = 0; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } NFSEXITCODE(error); return (error); } /* * Check exports. * Returns 0 if ok, 1 otherwise. */ int nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp) { int i; /* * This seems odd, but allow the case where the security flavor * list is empty. This happens when NFSv4 is traversing non-exported * file systems. Exported file systems should always have a non-empty * security flavor list. */ if (exp->nes_numsecflavor == 0) return (0); for (i = 0; i < exp->nes_numsecflavor; i++) { /* * The tests for privacy and integrity must be first, * since ND_GSS is set for everything but AUTH_SYS. */ if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P && (nd->nd_flag & ND_GSSPRIVACY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I && (nd->nd_flag & ND_GSSINTEGRITY)) return (0); if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 && (nd->nd_flag & ND_GSS)) return (0); if (exp->nes_secflavors[i] == AUTH_SYS && (nd->nd_flag & ND_GSS) == 0) return (0); } return (1); } /* * Calculate a hash value for the fid in a file handle. */ uint32_t nfsrv_hashfh(fhandle_t *fhp) { uint32_t hashval; hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0); return (hashval); } /* * Calculate a hash value for the sessionid. */ uint32_t nfsrv_hashsessionid(uint8_t *sessionid) { uint32_t hashval; hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0); return (hashval); } /* * Signal the userland master nfsd to backup the stable restart file. */ void nfsrv_backupstable(void) { struct proc *procp; if (nfsd_master_proc != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ if (procp == nfsd_master_proc && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == nfsd_master_start.tv_usec && strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else nfsd_master_proc = NULL; if (procp != NULL) PROC_UNLOCK(procp); } } extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *); /* * Called once to initialize data structures... */ static int nfsd_modevent(module_t mod, int type, void *data) { int error = 0, i; static int loaded = 0; switch (type) { case MOD_LOAD: if (loaded) goto out; newnfs_portinit(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL, MTX_DEF); mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL, MTX_DEF); } mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF); lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0); nfsrvd_initcache(); nfsd_init(); NFSD_LOCK(); nfsrvd_init(0); NFSD_UNLOCK(); nfsd_mntinit(); #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation; vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation; #endif nfsd_call_servertimer = nfsrv_servertimer; nfsd_call_nfsd = nfssvc_nfsd; loaded = 1; break; case MOD_UNLOAD: if (newnfs_numnfsd != 0) { error = EBUSY; break; } #ifdef VV_DISABLEDELEG vn_deleg_ops.vndeleg_recall = NULL; vn_deleg_ops.vndeleg_disable = NULL; #endif nfsd_call_servertimer = NULL; nfsd_call_nfsd = NULL; /* Clean out all NFSv4 state. */ nfsrv_throwawayallstate(curthread); /* Clean the NFS server reply cache */ nfsrvd_cleancache(); /* Free up the krpc server pool. */ if (nfsrvd_pool != NULL) svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { mtx_destroy(&nfsrchash_table[i].mtx); mtx_destroy(&nfsrcahash_table[i].mtx); } mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) mtx_destroy(&nfssessionhash[i].mtx); lockdestroy(&nfsv4root_mnt.mnt_explock); free(nfsclienthash, M_NFSDCLIENT); free(nfslockhash, M_NFSDLOCKFILE); free(nfssessionhash, M_NFSDSESSION); loaded = 0; break; default: error = EOPNOTSUPP; break; } out: NFSEXITCODE(error); return (error); } static moduledata_t nfsd_mod = { "nfsd", nfsd_modevent, NULL, }; DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsd, 1); MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1); MODULE_DEPEND(nfsd, nfslock, 1, 1, 1); MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1); MODULE_DEPEND(nfsd, krpc, 1, 1, 1); MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1); Index: head/sys/fs/nfsserver/nfs_nfsdserv.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdserv.c (revision 328416) +++ head/sys/fs/nfsserver/nfs_nfsdserv.c (revision 328417) @@ -1,4111 +1,4111 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * nfs version 2, 3 and 4 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request, usually by calling a nfsvno_XXX() * function in nfsd_port.c * 3 - build the rpc reply in an mbuf list * For nfsv4, these functions are called for each Op within the Compound RPC. */ #ifndef APPLEKEXT #include /* Global vars */ extern u_int32_t newnfs_false, newnfs_true; extern enum vtype nv34tov_type[8]; extern struct timeval nfsboottime; extern int nfs_rootfhset; extern int nfsrv_enable_crossmntpt; extern int nfsrv_statehashsize; #endif /* !APPLEKEXT */ static int nfs_async = 0; SYSCTL_DECL(_vfs_nfsd); SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "Tell client that writes were synced even though they were not"); /* * This list defines the GSS mechanisms supported. * (Don't ask me how you get these strings from the RFC stuff like * iso(1), org(3)... but someone did it, so I don't need to know.) */ static struct nfsgss_mechlist nfsgss_mechlist[] = { { 9, "\052\206\110\206\367\022\001\002\002", 11 }, { 0, "", 0 }, }; /* local functions */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen); static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp); /* * nfs access service (not a part of NFS V2) */ APPLESTATIC int nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int getret, error = 0; struct nfsvattr nva; u_int32_t testmode, nfsmode, supported = 0; accmode_t deletebit; if (nd->nd_repstat) { nfsrv_postopattr(nd, 1, &nva); goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nfsmode = fxdr_unsigned(u_int32_t, *tl); if ((nd->nd_flag & ND_NFSV4) && (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP | NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE | NFSACCESS_EXECUTE))) { nd->nd_repstat = NFSERR_INVAL; vput(vp); goto out; } if (nfsmode & NFSACCESS_READ) { supported |= NFSACCESS_READ; if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_READ; } if (nfsmode & NFSACCESS_MODIFY) { supported |= NFSACCESS_MODIFY; if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_MODIFY; } if (nfsmode & NFSACCESS_EXTEND) { supported |= NFSACCESS_EXTEND; if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_EXTEND; } if (nfsmode & NFSACCESS_DELETE) { supported |= NFSACCESS_DELETE; if (vp->v_type == VDIR) deletebit = VDELETE_CHILD; else deletebit = VDELETE; if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~NFSACCESS_DELETE; } if (vnode_vtype(vp) == VDIR) testmode = NFSACCESS_LOOKUP; else testmode = NFSACCESS_EXECUTE; if (nfsmode & testmode) { supported |= (nfsmode & testmode); if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported)) nfsmode &= ~testmode; } nfsmode &= supported; if (nd->nd_flag & ND_NFSV3) { getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); nfsrv_postopattr(nd, getret, &nva); } vput(vp); if (nd->nd_flag & ND_NFSV4) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(supported); } else NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs getattr service */ APPLESTATIC int nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsvattr nva; fhandle_t fh; int at_root = 0, error = 0, supports_nfsv4acls; struct nfsreferral *refp; nfsattrbit_t attrbits, tmpbits; struct mount *mp; struct vnode *tvp = NULL; struct vattr va; uint64_t mounted_on_fileno = 0; accmode_t accmode; if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV4) { error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error) { vput(vp); goto out; } /* * Check for a referral. */ refp = nfsv4root_getreferral(vp, NULL, 0); if (refp != NULL) { (void) nfsrv_putreferralattr(nd, &attrbits, refp, 1, &nd->nd_repstat); vput(vp); goto out; } if (nd->nd_repstat == 0) { accmode = 0; NFSSET_ATTRBIT(&tmpbits, &attrbits); /* * GETATTR with write-only attr time_access_set and time_modify_set * should return NFS4ERR_INVAL. */ if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){ error = NFSERR_INVAL; vput(vp); goto out; } if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) { NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL); accmode |= VREAD_ACL; } if (NFSNONZERO_ATTRBIT(&tmpbits)) accmode |= VREAD_ATTRIBUTES; if (accmode != 0) nd->nd_repstat = nfsvno_accchk(vp, accmode, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE)) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkgetattr(nd, vp, &nva, &attrbits, nd->nd_cred, p); if (nd->nd_repstat == 0) { supports_nfsv4acls = nfs_supportsnfsv4acls(vp); mp = vp->v_mount; if (nfsrv_enable_crossmntpt != 0 && vp->v_type == VDIR && (vp->v_vflag & VV_ROOT) != 0 && vp != rootvnode) { tvp = mp->mnt_vnodecovered; VREF(tvp); at_root = 1; } else at_root = 0; vfs_ref(mp); NFSVOPUNLOCK(vp, 0); if (at_root != 0) { if ((nd->nd_repstat = NFSVOPLOCK(tvp, LK_SHARED)) == 0) { nd->nd_repstat = VOP_GETATTR( tvp, &va, nd->nd_cred); vput(tvp); } else vrele(tvp); if (nd->nd_repstat == 0) mounted_on_fileno = (uint64_t) va.va_fileid; else at_root = 0; } if (nd->nd_repstat == 0) nd->nd_repstat = vfs_busy(mp, 0); vfs_rel(mp); if (nd->nd_repstat == 0) { (void)nfsvno_fillattr(nd, mp, vp, &nva, &fh, 0, &attrbits, nd->nd_cred, p, isdgram, 1, supports_nfsv4acls, at_root, mounted_on_fileno); vfs_unbusy(mp); } vrele(vp); } else vput(vp); } else { nfsrv_fillattr(nd, &nva); vput(vp); } } else { vput(vp); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs setattr service */ APPLESTATIC int nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, nva2; u_int32_t *tl; int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0; struct timespec guard = { 0, 0 }; nfsattrbit_t attrbits, retbits; nfsv4stateid_t stateid; NFSACL_T *aclp = NULL; if (nd->nd_repstat) { nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif NFSVNO_ATTRINIT(&nva); NFSZERO_ATTRBIT(&retbits); if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); } error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error) goto nfsmout; preat_ret = nfsvno_getattr(vp, &nva2, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = preat_ret; if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } if (!nd->nd_repstat && gcheck && (nva2.na_ctime.tv_sec != guard.tv_sec || nva2.na_ctime.tv_nsec != guard.tv_nsec)) nd->nd_repstat = NFSERR_NOT_SYNC; if (nd->nd_repstat) { vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); goto out; } } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); /* * Now that we have all the fields, lets do it. * If the size is being changed write access is required, otherwise * just check for a read only file system. */ if (!nd->nd_repstat) { if (NFSVNO_NOTSETSIZE(&nva)) { if (NFSVNO_EXRDONLY(exp) || (vfs_flags(vnode_mount(vp)) & MNT_RDONLY)) nd->nd_repstat = EROFS; } else { if (vnode_vtype(vp) != VREG) nd->nd_repstat = EINVAL; else if (nva2.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, NULL); } } if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid, &nva, &attrbits, exp, p); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { /* * For V4, try setting the attrbutes in sets, so that the * reply bitmap will be correct for an error case. */ if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid); NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, size, nva.na_size); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE); } if (!nd->nd_repstat && (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) || NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime); NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime); if (nva.na_vaflags & VA_UTIMES_NULL) { nva2.na_vaflags |= VA_UTIMES_NULL; NFSVNO_SETACTIVE(&nva2, vaflags); } nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET); if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET)) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET); } } if (!nd->nd_repstat && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE)) { NFSVNO_ATTRINIT(&nva2); NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode); nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, exp); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE); } #ifdef NFS4_ACL_EXTATTR_NAME if (!nd->nd_repstat && aclp->acl_cnt > 0 && NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) { nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p); if (!nd->nd_repstat) NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL); } #endif } else if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p, exp); } if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) { postat_ret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = postat_ret; } vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva); else if (nd->nd_flag & ND_NFSV4) (void) nfsrv_putattrbit(nd, &retbits); else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV4) { /* * For all nd_repstat, the V4 reply includes a bitmap, * even NFSERR_BADXDR, which is what this will end up * returning. */ (void) nfsrv_putattrbit(nd, &retbits); } NFSEXITCODE2(error, nd); return (error); } /* * nfs lookup rpc * (Also performs lookup parent for v4) */ APPLESTATIC int nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nameidata named; vnode_t vp, dirp = NULL; int error = 0, dattr_ret = 1; struct nfsvattr nva, dattr; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } /* * For some reason, if dp is a symlink, the error * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR. */ if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) { nd->nd_repstat = NFSERR_SYMLINK; vrele(dp); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd->nd_cred, p, 0); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (named.ni_startdir) vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) && vp->v_type != VDIR && vp->v_type != VLNK) /* * Only allow lookup of VDIR and VLNK for traversal of * non-exported volumes during NFSv4 mounting. */ nd->nd_repstat = ENOENT; if (nd->nd_repstat == 0) nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (vpp != NULL && nd->nd_repstat == 0) *vpp = vp; else vput(vp); if (dirp) { if (nd->nd_flag & ND_NFSV3) dattr_ret = nfsvno_getattr(dirp, &dattr, nd->nd_cred, p, 0); vrele(dirp); } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, dattr_ret, &dattr); goto out; } if (nd->nd_flag & ND_NFSV2) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } else if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_postopattr(nd, 0, &nva); nfsrv_postopattr(nd, dattr_ret, &dattr); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs readlink service */ APPLESTATIC int nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; mbuf_t mp = NULL, mpend = NULL; int getret = 1, len; struct nfsvattr nva; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (vnode_vtype(vp) != VLNK) { if (nd->nd_flag & ND_NFSV2) nd->nd_repstat = ENXIO; else nd->nd_repstat = EINVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, p, &mp, &mpend, &len); if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mbuf_setnext(nd->nd_mb, mp); nd->nd_mb = mpend; nd->nd_bpos = NFSMTOD(mpend, caddr_t) + mbuf_len(mpend); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs read service */ APPLESTATIC int nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, cnt, getret = 1, reqlen, eof = 0; mbuf_t m2, m3; struct nfsvattr nva; off_t off = 0x0; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &nva); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *tl++); reqlen = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 2; reqlen = fxdr_unsigned(int, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED); reqlen = fxdr_unsigned(int, *(tl + 6)); } if (reqlen > NFS_SRVMAXDATA(nd)) { reqlen = NFS_SRVMAXDATA(nd); } else if (reqlen < 0) { error = EBADRPC; goto nfsmout; } if (nd->nd_flag & ND_NFSV4) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS); lop->lo_flags = NFSLCK_READ; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK1 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; lop->lo_end = off + reqlen; /* * Paranoia, just in case it wraps around. */ if (lop->lo_end < off) lop->lo_end = NFS64BITSSET; } if (vnode_vtype(vp) != VREG) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = getret; if (!nd->nd_repstat && (nva.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } if (off >= nva.na_size) { cnt = 0; eof = 1; } else if (reqlen == 0) cnt = 0; else if ((off + reqlen) >= nva.na_size) { cnt = nva.na_size - off; eof = 1; } else cnt = reqlen; m3 = NULL; if (cnt > 0) { nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, p, &m3, &m2); if (!(nd->nd_flag & ND_NFSV4)) { getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = getret; } if (nd->nd_repstat) { vput(vp); if (m3) mbuf_freem(m3); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &nva); goto out; } } vput(vp); if (nd->nd_flag & ND_NFSV2) { nfsrv_fillattr(nd, &nva); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); } else { if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &nva); NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cnt); } else NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (eof) *tl++ = newnfs_true; else *tl++ = newnfs_false; } *tl = txdr_unsigned(cnt); if (m3) { mbuf_setnext(nd->nd_mb, m3); nd->nd_mb = m2; nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs write service */ APPLESTATIC int nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { int i, cnt; u_int32_t *tl; mbuf_t mp; struct nfsvattr nva, forat; int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1; int stable = NFSWRITE_FILESYNC; off_t off; struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; nfsv4stateid_t stateid; nfsquad_t clientid; if (nd->nd_repstat) { nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_int32_t, *++tl); tl += 2; retlen = len = fxdr_unsigned(int32_t, *tl); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED); off = fxdr_hyper(tl); tl += 3; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED); stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK2 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } stp->ls_stateid.other[2] = *tl++; off = fxdr_hyper(tl); lop->lo_first = off; tl += 2; stable = fxdr_unsigned(int, *tl++); retlen = len = fxdr_unsigned(int32_t, *tl); lop->lo_end = off + len; /* * Paranoia, just in case it wraps around, which shouldn't * ever happen anyhow. */ if (lop->lo_end < lop->lo_first) lop->lo_end = NFS64BITSSET; } /* * Loop through the mbuf chain, counting how many mbufs are a * part of this write operation, so the iovec size is known. */ cnt = 0; mp = nd->nd_md; i = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - nd->nd_dpos; while (len > 0) { if (i > 0) { len -= i; cnt++; } mp = mbuf_next(mp); if (!mp) { if (len > 0) { error = EBADRPC; goto nfsmout; } } else i = mbuf_len(mp); } if (retlen > NFS_SRVMAXIO || retlen < 0) nd->nd_repstat = EIO; if (vnode_vtype(vp) != VREG && !nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nd->nd_repstat = EINVAL; else nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR : EINVAL; } forat_ret = nfsvno_getattr(vp, &forat, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = forat_ret; if (!nd->nd_repstat && (forat.na_uid != nd->nd_cred->cr_uid || NFSVNO_EXSTRICTACCESS(exp))) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); } if (nd->nd_repstat) { vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); goto out; } /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (retlen > 0) { nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, stable, nd->nd_md, nd->nd_dpos, nd->nd_cred, p); error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1); if (error) goto nfsmout; } if (nd->nd_flag & ND_NFSV4) aftat_ret = 0; else aftat_ret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); vput(vp); if (!nd->nd_repstat) nd->nd_repstat = aftat_ret; if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva); if (nd->nd_repstat) goto out; NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. * Warning: Doing this violates RFC1813 and runs a risk * of data written by a client being lost when the server * crashes/reboots. */ if (stable == NFSWRITE_UNSTABLE && nfs_async == 0) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSWRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } else if (!nd->nd_repstat) nfsrv_fillattr(nd, &nva); out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs create service (creates regular files for V2 and V3. Spec. files for V2.) * now does a truncate to 0 length via. setattr if it already exists * The core creation routine has been extracted out into nfsrv_creatsub(), * so it can also be used by nfsrv_open() for V4. */ APPLESTATIC int nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nfsv2_sattr *sp; struct nameidata named; u_int32_t *tl; int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1; int how = NFSCREATE_UNCHECKED, exclusive_flag = 0; NFSDEV_T rdev = 0; vnode_t vp = NULL, dirp = NULL; fhandle_t fh; char *bufp; u_long *hashp; enum vtype vtyp; int32_t cverf[2], tverf[2] = { 0, 0 }; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV2) { NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode)); if (vtyp == VNON) vtyp = VREG; NFSVNO_SETATTRVAL(&nva, type, vtyp); NFSVNO_SETATTRVAL(&nva, mode, nfstov_mode(sp->sa_mode)); switch (nva.na_type) { case VREG: tsize = fxdr_unsigned(int32_t, sp->sa_size); if (tsize != -1) NFSVNO_SETATTRVAL(&nva, size, (u_quad_t)tsize); break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size); break; default: break; } } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_GUARDED: case NFSCREATE_UNCHECKED: error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; exclusive_flag = 1; break; } NFSVNO_SETATTRVAL(&nva, type, VREG); } } if (nd->nd_repstat) { nfsvno_relpathbuf(&named); if (nd->nd_flag & ND_NFSV3) { dirfor_ret = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 1); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } vput(dp); goto out; } nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); goto out; } if (!(nd->nd_flag & ND_NFSV2)) { switch (how) { case NFSCREATE_GUARDED: if (named.ni_vp) nd->nd_repstat = EEXIST; break; case NFSCREATE_UNCHECKED: break; case NFSCREATE_EXCLUSIVE: if (named.ni_vp == NULL) NFSVNO_SETATTRVAL(&nva, mode, 0); break; } } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ? */ nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva, &exclusive_flag, cverf, rdev, p, exp); if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); vput(vp); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (nd->nd_flag & ND_NFSV2) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); nfsrv_fillattr(nd, &nva); } } else { if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); vrele(dirp); if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * nfs v3 mknod service (and v4 create) */ APPLESTATIC int nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; u_int32_t *tl; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; u_int32_t major, minor; enum vtype vtyp = VNON; nfstype nfs4type = NFNON; vnode_t vp, dirp = NULL; nfsattrbit_t attrbits; char *bufp = NULL, *pathcp = NULL; u_long *hashp, cnflags; NFSACL_T *aclp = NULL; NFSVNO_ATTRINIT(&nva); cnflags = (LOCKPARENT | SAVESTART); if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif /* * For V4, the creation stuff is here, Yuck! */ if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); nfs4type = fxdr_unsigned(nfstype, *tl); switch (nfs4type) { case NFLNK: error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) goto nfsmout; break; case NFCHR: case NFBLK: NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); break; case NFSOCK: case NFFIFO: break; case NFDIR: cnflags = (LOCKPARENT | SAVENAME); break; default: nd->nd_repstat = NFSERR_BADTYPE; vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); vtyp = nfsv34tov_type(*tl); } error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; nva.na_type = vtyp; if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) && (vtyp == VCHR || vtyp == VBLK)) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_int32_t, *tl++); minor = fxdr_unsigned(u_int32_t, *tl); nva.na_rdev = NFSMAKEDEV(major, minor); } } dirfor_ret = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 0); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { if (!dirfor_ret && NFSVNO_ISSETGID(&nva) && dirfor.na_gid == nva.na_gid) NFSVNO_UNSET(&nva, gid); nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); } if (nd->nd_repstat) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif nfsvno_relpathbuf(&named); if (pathcp) - FREE(pathcp, M_TEMP); + free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } /* * Yuck! For V4, mkdir and link are here and some V4 clients don't fill * in va_mode, so we'll have to set a default here. */ if (NFSVNO_NOTSETMODE(&nva)) { if (vtyp == VLNK) nva.na_mode = 0755; else nva.na_mode = 0400; } if (vtyp == VDIR) named.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); if (nd->nd_repstat) { if (dirp) { if (nd->nd_flag & ND_NFSV3) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); vrele(dirp); } #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) { if (vtyp == VDIR) { nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif goto out; } else if (vtyp == VLNK) { nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p, exp, pathcp, pathlen); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif - FREE(pathcp, M_TEMP); + free(pathcp, M_TEMP); goto out; } } nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p); if (!nd->nd_repstat) { vp = named.ni_vp; nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(vp, 0); *vpp = vp; } else vput(vp); } diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); vrele(dirp); if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } else { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); (void) nfsrv_putattrbit(nd, &attrbits); } } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (bufp) nfsvno_relpathbuf(&named); if (pathcp) - FREE(pathcp, M_TEMP); + free(pathcp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * nfs remove service */ APPLESTATIC int nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; struct nfsvattr dirfor, diraft; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE, LOCKPARENT | LOCKLEAF); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) { if (!(nd->nd_flag & ND_NFSV2)) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); } else { vrele(dirp); dirp = NULL; } } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { if (vnode_vtype(named.ni_vp) == VDIR) nd->nd_repstat = nfsvno_rmdirsub(&named, 1, nd->nd_cred, p, exp); else nd->nd_repstat = nfsvno_removesub(&named, 1, nd->nd_cred, p, exp); } else if (nd->nd_procnum == NFSPROC_RMDIR) { nd->nd_repstat = nfsvno_rmdirsub(&named, 0, nd->nd_cred, p, exp); } else { nd->nd_repstat = nfsvno_removesub(&named, 0, nd->nd_cred, p, exp); } } if (!(nd->nd_flag & ND_NFSV2)) { if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs rename service */ APPLESTATIC int nfsrvd_rename(struct nfsrv_descript *nd, int isdgram, vnode_t dp, vnode_t todp, NFSPROC_T *p, struct nfsexstuff *exp, struct nfsexstuff *toexp) { u_int32_t *tl; int error = 0, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; struct nameidata fromnd, tond; vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL; struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft; struct nfsexstuff tnes; struct nfsrvfh tfh; char *bufp, *tbufp = NULL; u_long *hashp; fhandle_t fh; if (nd->nd_repstat) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); goto out; } if (!(nd->nd_flag & ND_NFSV2)) fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd->nd_cred, p, 1); tond.ni_cnd.cn_nameiop = 0; tond.ni_startdir = NULL; NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART); nfsvno_setpathbuf(&fromnd, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen); if (error) { vput(dp); if (todp) vrele(todp); nfsvno_relpathbuf(&fromnd); goto out; } /* * Unlock dp in this code section, so it is unlocked before * tdp gets locked. This avoids a potential LOR if tdp is the * parent directory of dp. */ if (nd->nd_flag & ND_NFSV4) { tdp = todp; tnes = *toexp; if (dp != tdp) { NFSVOPUNLOCK(dp, 0); tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, p, 0); /* Might lock tdp. */ } else { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, p, 1); NFSVOPUNLOCK(dp, 0); } } else { tfh.nfsrvfh_len = 0; error = nfsrv_mtofh(nd, &tfh); if (error == 0) error = nfsvno_getfh(dp, &fh, p); if (error) { vput(dp); /* todp is always NULL except NFSv4 */ nfsvno_relpathbuf(&fromnd); goto out; } /* If this is the same file handle, just VREF() the vnode. */ if (tfh.nfsrvfh_len == NFSX_MYFH && !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) { VREF(dp); tdp = dp; tnes = *exp; tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, p, 1); NFSVOPUNLOCK(dp, 0); } else { NFSVOPUNLOCK(dp, 0); nd->nd_cred->cr_uid = nd->nd_saveduid; nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL, 0, p); /* Locks tdp. */ if (tdp) { tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred, p, 1); NFSVOPUNLOCK(tdp, 0); } } } NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART); nfsvno_setpathbuf(&tond, &tbufp, &hashp); if (!nd->nd_repstat) { error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen); if (error) { if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } } if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (tdp) vrele(tdp); vrele(dp); nfsvno_relpathbuf(&fromnd); nfsvno_relpathbuf(&tond); goto out; } /* * Done parsing, now down to business. */ nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, p, &fdirp); if (nd->nd_repstat) { if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } if (fdirp) vrele(fdirp); if (tdp) vrele(tdp); nfsvno_relpathbuf(&tond); goto out; } if (vnode_vtype(fromnd.ni_vp) == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, p, &tdirp); nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat, nd->nd_flag, nd->nd_cred, p); if (fdirp) fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd->nd_cred, p, 0); if (tdirp) tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd->nd_cred, p, 0); if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (nd->nd_flag & ND_NFSV3) { nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(fdirfor.na_filerev, tl); tl += 2; txdr_hyper(fdiraft.na_filerev, tl); tl += 2; *tl++ = newnfs_false; txdr_hyper(tdirfor.na_filerev, tl); tl += 2; txdr_hyper(tdiraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs link service */ APPLESTATIC int nfsrvd_link(struct nfsrv_descript *nd, int isdgram, vnode_t vp, vnode_t tovp, NFSPROC_T *p, struct nfsexstuff *exp, struct nfsexstuff *toexp) { struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1; vnode_t dirp = NULL, dp = NULL; struct nfsvattr dirfor, diraft, at; struct nfsexstuff tnes; struct nfsrvfh dfh; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSVOPUNLOCK(vp, 0); if (vnode_vtype(vp) == VDIR) { if (nd->nd_flag & ND_NFSV4) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; if (tovp) vrele(tovp); } if (!nd->nd_repstat) { if (nd->nd_flag & ND_NFSV4) { dp = tovp; tnes = *toexp; } else { error = nfsrv_mtofh(nd, &dfh); if (error) { vrele(vp); /* tovp is always NULL unless NFSv4 */ goto out; } nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL, 0, p); if (dp) NFSVOPUNLOCK(dp, 0); } } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); if (!nd->nd_repstat) { nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(vp); if (dp) vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes, p, &dirp); } else { if (dp) vrele(dp); nfsvno_relpathbuf(&named); } } if (dirp) { if (nd->nd_flag & ND_NFSV2) { vrele(dirp); dirp = NULL; } else { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); } } if (!nd->nd_repstat) nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp); if (nd->nd_flag & ND_NFSV3) getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 0); if (dirp) { diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); vrele(dirp); } vrele(vp); if (nd->nd_flag & ND_NFSV3) { nfsrv_postopattr(nd, getret, &at); nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); } out: NFSEXITCODE2(error, nd); return (error); } /* * nfs symbolic link service */ APPLESTATIC int nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen; vnode_t dirp = NULL; char *bufp, *pathcp = NULL; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (vpp) *vpp = NULL; NFSVNO_ATTRINIT(&nva); NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVESTART | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (!error && !nd->nd_repstat) error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen); if (error) { vrele(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } /* * And call nfsrvd_symlinksub() to do the common code. It will * return EBADRPC upon a parsing error, 0 otherwise. */ if (!nd->nd_repstat) { if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp, pathcp, pathlen); } else if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); vrele(dirp); } if (pathcp) - FREE(pathcp, M_TEMP); + free(pathcp, M_TEMP); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } out: NFSEXITCODE2(error, nd); return (error); } /* * Common code for creating a symbolic link. */ static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp, int pathlen) { u_int32_t *tl; nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen, !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) { nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp); if (nd->nd_flag & ND_NFSV3) { nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(ndp->ni_vp, nvap, nd->nd_cred, p, 1); } if (vpp != NULL && nd->nd_repstat == 0) { NFSVOPUNLOCK(ndp->ni_vp, 0); *vpp = ndp->ni_vp; } else vput(ndp->ni_vp); } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd->nd_cred, p, 0); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs mkdir service */ APPLESTATIC int nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { struct nfsvattr nva, dirfor, diraft; struct nameidata named; u_int32_t *tl; int error = 0, dirfor_ret = 1, diraft_ret = 1; vnode_t dirp = NULL; char *bufp; u_long *hashp; if (nd->nd_repstat) { nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | SAVENAME | NOCACHE); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) goto nfsmout; if (!nd->nd_repstat) { NFSVNO_ATTRINIT(&nva); if (nd->nd_flag & ND_NFSV3) { error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nva.na_mode = nfstov_mode(*tl++); } } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) { vrele(dirp); dirp = NULL; } if (nd->nd_repstat) { if (dirp != NULL) { dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); vrele(dirp); } if (nd->nd_flag & ND_NFSV3) nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); goto out; } if (dirp != NULL) dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0); /* * Call nfsrvd_mkdirsub() for the code common to V4 as well. */ nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp); if (nd->nd_flag & ND_NFSV3) { if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1); nfsrv_postopattr(nd, 0, &nva); } nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft); } else if (!nd->nd_repstat) { (void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0); nfsrv_fillattr(nd, &nva); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } /* * Code common to mkdir for V2,3 and 4. */ static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp, struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp, vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp, int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp) { vnode_t vp; u_int32_t *tl; NFSVNO_SETATTRVAL(nvap, type, VDIR); nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid, nd->nd_cred, p, exp); if (!nd->nd_repstat) { vp = ndp->ni_vp; nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp); nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, nvap, nd->nd_cred, p, 1); if (vpp && !nd->nd_repstat) { NFSVOPUNLOCK(vp, 0); *vpp = vp; } else { vput(vp); } } if (dirp) { *diraft_retp = nfsvno_getattr(dirp, diraftp, nd->nd_cred, p, 0); vrele(dirp); } if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = newnfs_false; txdr_hyper(dirforp->na_filerev, tl); tl += 2; txdr_hyper(diraftp->na_filerev, tl); (void) nfsrv_putattrbit(nd, attrbitp); } NFSEXITCODE2(0, nd); } /* * nfs commit service */ APPLESTATIC int nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsvattr bfor, aft; u_int32_t *tl; int error = 0, for_ret = 1, aft_ret = 1, cnt; u_int64_t off; if (nd->nd_repstat) { nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); goto out; } /* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */ if (vp->v_type != VREG) { if (nd->nd_flag & ND_NFSV3) error = NFSERR_NOTSUPP; else error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ off = fxdr_hyper(tl); tl += 2; cnt = fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_NFSV3) for_ret = nfsvno_getattr(vp, &bfor, nd->nd_cred, p, 1); nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p); if (nd->nd_flag & ND_NFSV3) { aft_ret = nfsvno_getattr(vp, &aft, nd->nd_cred, p, 1); nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft); } vput(vp); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); *tl++ = txdr_unsigned(nfsboottime.tv_sec); *tl = txdr_unsigned(nfsboottime.tv_usec); } out: NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfs statfs service */ APPLESTATIC int nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct statfs *sf; u_int32_t *tl; int getret = 1; struct nfsvattr at; u_quad_t tval; sf = NULL; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_statfs(vp, sf); getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); vput(vp); if (nd->nd_flag & ND_NFSV3) nfsrv_postopattr(nd, getret, &at); if (nd->nd_repstat) goto out; if (nd->nd_flag & ND_NFSV2) { NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS); *tl++ = txdr_unsigned(NFS_V2MAXDATA); *tl++ = txdr_unsigned(sf->f_bsize); *tl++ = txdr_unsigned(sf->f_blocks); *tl++ = txdr_unsigned(sf->f_bfree); *tl = txdr_unsigned(sf->f_bavail); } else { NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS); tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_files; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; tval = (u_quad_t)sf->f_ffree; txdr_hyper(tval, tl); tl += 2; *tl = 0; } out: free(sf, M_STATFS); NFSEXITCODE2(0, nd); return (0); } /* * nfs fsinfo service */ APPLESTATIC int nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsfsinfo fs; int getret = 1; struct nfsvattr at; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); nfsvno_getfs(&fs, isdgram); vput(vp); nfsrv_postopattr(nd, getret, &at); NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO); *tl++ = txdr_unsigned(fs.fs_rtmax); *tl++ = txdr_unsigned(fs.fs_rtpref); *tl++ = txdr_unsigned(fs.fs_rtmult); *tl++ = txdr_unsigned(fs.fs_wtmax); *tl++ = txdr_unsigned(fs.fs_wtpref); *tl++ = txdr_unsigned(fs.fs_wtmult); *tl++ = txdr_unsigned(fs.fs_dtpref); txdr_hyper(fs.fs_maxfilesize, tl); tl += 2; txdr_nfsv3time(&fs.fs_timedelta, tl); tl += 2; *tl = txdr_unsigned(fs.fs_properties); out: NFSEXITCODE2(0, nd); return (0); } /* * nfs pathconf service */ APPLESTATIC int nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { struct nfsv3_pathconf *pc; int getret = 1; long linkmax, namemax, chownres, notrunc; struct nfsvattr at; if (nd->nd_repstat) { nfsrv_postopattr(nd, getret, &at); goto out; } nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED, &chownres, nd->nd_cred, p); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, ¬runc, nd->nd_cred, p); getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1); vput(vp); nfsrv_postopattr(nd, getret, &at); if (!nd->nd_repstat) { NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = newnfs_false; pc->pc_casepreserving = newnfs_true; } out: NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 lock service */ APPLESTATIC int nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock *lop; struct nfslockconflict cf; int error = 0; u_short flags = NFSLCK_LOCK, lflags; u_int64_t offset, len; nfsv4stateid_t stateid; nfsquad_t clientid; NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lflags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lflags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } if (*tl++ == newnfs_true) flags |= NFSLCK_RECLAIM; offset = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; if (*tl == newnfs_true) flags |= NFSLCK_OPENTOLOCK; if (flags & NFSLCK_OPENTOLOCK) { NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID); i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED))); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } - MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i, + stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_opentolockseq = fxdr_unsigned(int, *tl++); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK3 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; } else { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate), + stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(int, *tl); clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK4 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } } - MALLOC(lop, struct nfslock *, sizeof (struct nfslock), + lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); lop->lo_first = offset; if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = offset + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } lop->lo_flags = lflags; stp->ls_flags = flags; stp->ls_uid = nd->nd_cred->cr_uid; /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) { if (lflags & NFSLCK_WRITE) { nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } else { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } } /* * We call nfsrv_lockctrl() even if nd_repstat set, so that the * seqid# gets updated. nfsrv_lockctrl() will return the value * of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (lop) - FREE((caddr_t)lop, M_NFSDLOCK); + free(lop, M_NFSDLOCK); if (stp) - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } else if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stateid.other[0]; *tl = stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } vput(vp); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) - free((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 lock test service */ APPLESTATIC int nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp = NULL; struct nfslock lo, *lop = &lo; struct nfslockconflict cf; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 7)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } - MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i, + stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = NULL; stp->ls_flags = NFSLCK_TEST; stp->ls_uid = nd->nd_cred->cr_uid; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: lo.lo_flags = NFSLCK_READ; break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: lo.lo_flags = NFSLCK_WRITE; break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } lo.lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lo.lo_end = NFS64BITSSET; } else { lo.lo_end = lo.lo_first + len; if (lo.lo_end <= lo.lo_first) nd->nd_repstat = NFSERR_INVAL; } tl += 2; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK5 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, &stateid, exp, nd, p); if (nd->nd_repstat) { if (nd->nd_repstat == NFSERR_DENIED) { NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); txdr_hyper(cf.cl_first, tl); tl += 2; if (cf.cl_end == NFS64BITSSET) len = NFS64BITSSET; else len = cf.cl_end - cf.cl_first; txdr_hyper(len, tl); tl += 2; if (cf.cl_flags == NFSLCK_WRITE) *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE); else *tl++ = txdr_unsigned(NFSV4LOCKT_READ); *tl++ = stp->ls_stateid.other[0]; *tl = stp->ls_stateid.other[1]; (void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen); } } vput(vp); if (stp) - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); if (stp) - free((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 unlock service */ APPLESTATIC int nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate *stp; struct nfslock *lop; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; u_int64_t len; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID); - MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate), + stp = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); - MALLOC(lop, struct nfslock *, sizeof (struct nfslock), + lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); stp->ls_flags = NFSLCK_UNLOCK; lop->lo_flags = NFSLCK_UNLOCK; stp->ls_op = nd->nd_rp; i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4LOCKT_READW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_READ: break; case NFSV4LOCKT_WRITEW: stp->ls_flags |= NFSLCK_BLOCKING; case NFSV4LOCKT_WRITE: break; default: nd->nd_repstat = NFSERR_BADXDR; free(stp, M_NFSDSTATE); free(lop, M_NFSDLOCK); goto nfsmout; } stp->ls_ownerlen = 0; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(int, *tl++); stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); lop->lo_first = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); if (len == NFS64BITSSET) { lop->lo_end = NFS64BITSSET; } else { lop->lo_end = lop->lo_first + len; if (lop->lo_end <= lop->lo_first) nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK6 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) nd->nd_repstat = NFSERR_ISDIR; else nd->nd_repstat = NFSERR_INVAL; } /* * Call nfsrv_lockctrl() even if nd_repstat is set, so that the * seqid# gets incremented. nfsrv_lockctrl() will return the * value of nd_repstat, if it gets that far. */ nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, &stateid, exp, nd, p); if (stp) - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); if (lop) - free((caddr_t)lop, M_NFSDLOCK); + free(lop, M_NFSDLOCK); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open service */ APPLESTATIC int nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int i, retext; struct nfsstate *stp = NULL; int error = 0, create, claim, exclusive_flag = 0; u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask; int how = NFSCREATE_UNCHECKED; int32_t cverf[2], tverf[2] = { 0, 0 }; vnode_t vp = NULL, dirp = NULL; struct nfsvattr nva, dirfor, diraft; struct nameidata named; nfsv4stateid_t stateid, delegstateid; nfsattrbit_t attrbits; nfsquad_t clientid; char *bufp = NULL; u_long *hashp; NFSACL_T *aclp = NULL; #ifdef NFS4_ACL_EXTATTR_NAME aclp = acl_alloc(M_WAITOK); aclp->acl_cnt = 0; #endif NFSZERO_ATTRBIT(&attrbits); named.ni_startdir = NULL; named.ni_cnd.cn_nameiop = 0; NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *(tl + 5)); if (i <= 0 || i > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } - MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i, + stp = malloc(sizeof (struct nfsstate) + i, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = i; stp->ls_op = nd->nd_rp; stp->ls_flags = NFSLCK_OPEN; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); retext = 0; if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG | NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) { retext = 1; /* For now, ignore these. */ i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG); switch (i & NFSV4OPEN_WANTDELEGMASK) { case NFSV4OPEN_WANTANYDELEG: stp->ls_flags |= (NFSLCK_WANTRDELEG | NFSLCK_WANTWDELEG); i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTREADDELEG: stp->ls_flags |= NFSLCK_WANTRDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTWRITEDELEG: stp->ls_flags |= NFSLCK_WANTWDELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTNODELEG: stp->ls_flags |= NFSLCK_WANTNODELEG; i &= ~NFSV4OPEN_WANTDELEGMASK; break; case NFSV4OPEN_WANTCANCEL: printf("NFSv4: ignore Open WantCancel\n"); i &= ~NFSV4OPEN_WANTDELEGMASK; break; default: /* nd_repstat will be set to NFSERR_INVAL below. */ break; } } switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags |= NFSLCK_READACCESS; break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags |= NFSLCK_WRITEACCESS; break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); break; default: nd->nd_repstat = NFSERR_INVAL; } i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_INVAL; } clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK7 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen); if (error) goto nfsmout; NFSVNO_ATTRINIT(&nva); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); create = fxdr_unsigned(int, *tl); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 0); if (create == NFSV4OPEN_CREATE) { nva.na_type = VREG; nva.na_mode = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSCREATE_UNCHECKED: case NFSCREATE_GUARDED: error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p); if (error) goto nfsmout; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (!nd->nd_repstat) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; case NFSCREATE_EXCLUSIVE: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; break; case NFSCREATE_EXCLUSIVE41: NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); cverf[0] = *tl++; cverf[1] = *tl; error = nfsv4_sattr(nd, vp, &nva, &attrbits, aclp, p); if (error != 0) goto nfsmout; if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET)) nd->nd_repstat = NFSERR_INVAL; /* * If the na_gid being set is the same as that of * the directory it is going in, clear it, since * that is what will be set by default. This allows * a user that isn't in that group to do the create. */ if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) && nva.na_gid == dirfor.na_gid) NFSVNO_UNSET(&nva, gid); if (nd->nd_repstat == 0) nd->nd_repstat = nfsrv_checkuidgid(nd, &nva); break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } } else if (create != NFSV4OPEN_NOCREATE) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Now, handle the claim, which usually includes looking up a * name in the directory referenced by dp. The exception is * NFSV4OPEN_CLAIMPREVIOUS. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); claim = fxdr_unsigned(int, *tl); if (claim == NFSV4OPEN_CLAIMDELEGATECUR) { NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER); stp->ls_flags |= NFSLCK_DELEGCUR; } else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV) { stp->ls_flags |= NFSLCK_DELEGPREV; } if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR || claim == NFSV4OPEN_CLAIMDELEGATEPREV) { if (!nd->nd_repstat && create == NFSV4OPEN_CREATE && claim != NFSV4OPEN_CLAIMNULL) nd->nd_repstat = NFSERR_INVAL; if (nd->nd_repstat) { nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, NULL, nd, p, nd->nd_repstat); goto nfsmout; } if (create == NFSV4OPEN_CREATE) NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE); else NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); nfsvno_relpathbuf(&named); NFSEXITCODE2(error, nd); return (error); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp); } else { vrele(dp); nfsvno_relpathbuf(&named); } if (create == NFSV4OPEN_CREATE) { switch (how) { case NFSCREATE_UNCHECKED: if (named.ni_vp) { /* * Clear the setable attribute bits, except * for Size, if it is being truncated. */ NFSZERO_ATTRBIT(&attrbits); if (NFSVNO_ISSETSIZE(&nva)) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); } break; case NFSCREATE_GUARDED: if (named.ni_vp && !nd->nd_repstat) nd->nd_repstat = EEXIST; break; case NFSCREATE_EXCLUSIVE: exclusive_flag = 1; if (!named.ni_vp) nva.na_mode = 0; break; case NFSCREATE_EXCLUSIVE41: exclusive_flag = 1; break; } } nfsvno_open(nd, &named, clientid, &stateid, stp, &exclusive_flag, &nva, cverf, create, aclp, &attrbits, nd->nd_cred, p, exp, &vp); } else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim == NFSV4OPEN_CLAIMFH) { if (claim == NFSV4OPEN_CLAIMPREVIOUS) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DELEGATEREAD: stp->ls_flags |= NFSLCK_DELEGREAD; break; case NFSV4OPEN_DELEGATEWRITE: stp->ls_flags |= NFSLCK_DELEGWRITE; case NFSV4OPEN_DELEGATENONE: break; default: nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } stp->ls_flags |= NFSLCK_RECLAIM; } else { /* CLAIM_NULL_FH */ if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE) nd->nd_repstat = NFSERR_INVAL; } vp = dp; NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) == 0) nd->nd_repstat = nfsrv_opencheck(clientid, &stateid, stp, vp, nd, p, nd->nd_repstat); else nd->nd_repstat = NFSERR_PERM; } else { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } /* * Do basic access checking. */ if (!nd->nd_repstat && vnode_vtype(vp) != VREG) { /* * The IETF working group decided that this is the correct * error return for all non-regular files. */ nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK; } if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS)) nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) { nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); if (nd->nd_repstat) nd->nd_repstat = nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL); } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (!nd->nd_repstat) { tverf[0] = nva.na_atime.tv_sec; tverf[1] = nva.na_atime.tv_nsec; } } if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] || cverf[1] != tverf[1])) nd->nd_repstat = EEXIST; /* * Do the open locking/delegation stuff. */ if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid, &delegstateid, &rflags, exp, p, nva.na_filerev); /* * vp must be unlocked before the call to nfsvno_getattr(dirp,...) * below, to avoid a deadlock with the lookup in nfsvno_namei() above. * (ie: Leave the NFSVOPUNLOCK() about here.) */ if (vp) NFSVOPUNLOCK(vp, 0); if (stp) - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); if (!nd->nd_repstat && dirp) nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (claim == NFSV4OPEN_CLAIMPREVIOUS) { *tl++ = newnfs_true; *tl++ = 0; *tl++ = 0; *tl++ = 0; *tl++ = 0; } else { *tl++ = newnfs_false; /* Since dirp is not locked */ txdr_hyper(dirfor.na_filerev, tl); tl += 2; txdr_hyper(diraft.na_filerev, tl); tl += 2; } *tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS); (void) nfsrv_putattrbit(nd, &attrbits); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (rflags & NFSV4OPEN_READDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD); else if (rflags & NFSV4OPEN_WRITEDELEGATE) *tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE); else if (retext != 0) { *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT); if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION); *tl = newnfs_false; } else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE); *tl = newnfs_false; } else { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OPEN_NOTWANTED); } } else *tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE); if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED); *tl++ = txdr_unsigned(delegstateid.seqid); NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (rflags & NFSV4OPEN_RECALL) *tl = newnfs_true; else *tl = newnfs_false; if (rflags & NFSV4OPEN_WRITEDELEGATE) { NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE); txdr_hyper(nva.na_size, tl); } NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE); *tl++ = txdr_unsigned(0x0); acemask = NFSV4ACE_ALLFILESMASK; if (nva.na_mode & S_IRUSR) acemask |= NFSV4ACE_READMASK; if (nva.na_mode & S_IWUSR) acemask |= NFSV4ACE_WRITEMASK; if (nva.na_mode & S_IXUSR) acemask |= NFSV4ACE_EXECUTEMASK; *tl = txdr_unsigned(acemask); (void) nfsm_strtom(nd, "OWNER@", 6); } *vpp = vp; } else if (vp) { vrele(vp); } if (dirp) vrele(dirp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif NFSEXITCODE2(0, nd); return (0); nfsmout: vrele(dp); #ifdef NFS4_ACL_EXTATTR_NAME acl_free(aclp); #endif if (stp) - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 close service */ APPLESTATIC int nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); stp->ls_flags = NFSLCK_CLOSE; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK8 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p); vput(vp); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } NFSEXITCODE2(0, nd); return (0); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegpurge service */ APPLESTATIC int nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK9 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL, NFSV4OP_DELEGPURGE, nd->nd_cred, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 delegreturn service */ APPLESTATIC int nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); clientid.lval[0] = stateid.other[0]; clientid.lval[1] = stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK10 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp, NFSV4OP_DELEGRETURN, nd->nd_cred, p); nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 get file handle service */ APPLESTATIC int nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { fhandle_t fh; nd->nd_repstat = nfsvno_getfh(vp, &fh, p); vput(vp); if (!nd->nd_repstat) (void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0); NFSEXITCODE2(0, nd); return (0); } /* * nfsv4 open confirm service */ APPLESTATIC int nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl); stp->ls_flags = NFSLCK_CONFIRM; clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK11 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 open downgrade service */ APPLESTATIC int nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; struct nfsstate st, *stp = &st; int error = 0; nfsv4stateid_t stateid; nfsquad_t clientid; /* opendowngrade can only work on a file object.*/ if (vp->v_type != VREG) { error = NFSERR_INVAL; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); stp->ls_ownerlen = 0; stp->ls_op = nd->nd_rp; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++); i = fxdr_unsigned(int, *tl++); switch (i) { case NFSV4OPEN_ACCESSREAD: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSWRITE: stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; case NFSV4OPEN_ACCESSBOTH: stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE); break; default: nd->nd_repstat = NFSERR_BADXDR; } i = fxdr_unsigned(int, *tl); switch (i) { case NFSV4OPEN_DENYNONE: break; case NFSV4OPEN_DENYREAD: stp->ls_flags |= NFSLCK_READDENY; break; case NFSV4OPEN_DENYWRITE: stp->ls_flags |= NFSLCK_WRITEDENY; break; case NFSV4OPEN_DENYBOTH: stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY); break; default: nd->nd_repstat = NFSERR_BADXDR; } clientid.lval[0] = stp->ls_stateid.other[0]; clientid.lval[1] = stp->ls_stateid.other[1]; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK12 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } if (!nd->nd_repstat) nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p); if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); *tl++ = txdr_unsigned(stateid.seqid); NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER); } nfsmout: vput(vp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 renew lease service */ APPLESTATIC int nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK13 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW), NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 security info service */ APPLESTATIC int nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram, vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp) { u_int32_t *tl; int len; struct nameidata named; vnode_t dirp = NULL, vp; struct nfsrvfh fh; struct nfsexstuff retnes; u_int32_t *sizp; int error = 0, savflag, i; char *bufp; u_long *hashp; /* * All this just to get the export flags for the name. */ NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP, LOCKLEAF | SAVESTART); nfsvno_setpathbuf(&named, &bufp, &hashp); error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen); if (error) { vput(dp); nfsvno_relpathbuf(&named); goto out; } if (!nd->nd_repstat) { nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp); } else { vput(dp); nfsvno_relpathbuf(&named); } if (dirp) vrele(dirp); if (nd->nd_repstat) goto out; vrele(named.ni_startdir); nfsvno_relpathbuf(&named); fh.nfsrvfh_len = NFSX_MYFH; vp = named.ni_vp; nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p); vput(vp); savflag = nd->nd_flag; if (!nd->nd_repstat) { nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0, p); if (vp) vput(vp); } nd->nd_flag = savflag; if (nd->nd_repstat) goto out; /* * Finally have the export flags for name, so we can create * the security info. */ len = 0; NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED); for (i = 0; i < retnes.nes_numsecflavor; i++) { if (retnes.nes_secflavors[i] == AUTH_SYS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(RPCAUTH_UNIX); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCNONE); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY); len++; } else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl++ = txdr_unsigned(RPCAUTH_GSS); (void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str, nfsgss_mechlist[KERBV_MECH].len); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(GSS_KERBV_QOP); *tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY); len++; } } *sizp = txdr_unsigned(len); out: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id service */ APPLESTATIC int nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int i; int error = 0, idlen; struct nfsclient *clp = NULL; struct sockaddr_in *rad; u_char *verf, *ucp, *ucp2, addrbuf[24]; nfsquad_t clientid, confirm; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto out; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (u_char *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); NFSSOCKADDRALLOC(clp->lc_req.nr_nam); NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in)); clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error) goto nfsmout; if (nd->nd_flag & ND_GSS) { clp->lc_flags = LCL_GSS; if (nd->nd_flag & ND_GSSINTEGRITY) clp->lc_flags |= LCL_GSSINTEGRITY; else if (nd->nd_flag & ND_GSSPRIVACY) clp->lc_flags |= LCL_GSSPRIVACY; } else { clp->lc_flags = 0; } if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_program = fxdr_unsigned(u_int32_t, *tl); error = nfsrv_getclientipaddr(nd, clp); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); clp->lc_callback = fxdr_unsigned(u_int32_t, *tl); /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (nd->nd_repstat == NFSERR_CLIDINUSE) { if (clp->lc_flags & LCL_TCPCALLBACK) (void) nfsm_strtom(nd, "tcp", 3); else (void) nfsm_strtom(nd, "udp", 3); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); ucp = (u_char *)&rad->sin_addr.s_addr; ucp2 = (u_char *)&rad->sin_port; sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff, ucp2[0] & 0xff, ucp2[1] & 0xff); (void) nfsm_strtom(nd, addrbuf, strlen(addrbuf)); } if (clp) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (!nd->nd_repstat) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER); *tl++ = clientid.lval[0]; *tl++ = clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; } out: NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 set client id confirm service */ APPLESTATIC int nfsrvd_setclientidcfrm(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0; nfsquad_t clientid, confirm; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW), NULL, NULL, confirm, 0, nd, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 verify service */ APPLESTATIC int nfsrvd_verify(struct nfsrv_descript *nd, int isdgram, vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { int error = 0, ret, fhsize = NFSX_MYFH; struct nfsvattr nva; struct statfs *sf; struct nfsfsinfo fs; fhandle_t fh; sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_statfs(vp, sf); if (!nd->nd_repstat) nd->nd_repstat = nfsvno_getfh(vp, &fh, p); if (!nd->nd_repstat) { nfsvno_getfs(&fs, isdgram); error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL, sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred); if (!error) { if (nd->nd_procnum == NFSV4OP_NVERIFY) { if (ret == 0) nd->nd_repstat = NFSERR_SAME; else if (ret != NFSERR_NOTSAME) nd->nd_repstat = ret; } else if (ret) nd->nd_repstat = ret; } } vput(vp); free(sf, M_STATFS); NFSEXITCODE2(error, nd); return (error); } /* * nfs openattr rpc */ APPLESTATIC int nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; int error = 0, createdir; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); createdir = fxdr_unsigned(int, *tl); nd->nd_repstat = NFSERR_NOTSUPP; nfsmout: vrele(dp); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 release lock owner service */ APPLESTATIC int nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { u_int32_t *tl; struct nfsstate *stp = NULL; int error = 0, len; nfsquad_t clientid; if ((nd->nd_flag & ND_NFSV41) != 0) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFSV4_OPAQUELIMIT) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } - MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + len, + stp = malloc(sizeof (struct nfsstate) + len, M_NFSDSTATE, M_WAITOK); stp->ls_ownerlen = len; stp->ls_op = NULL; stp->ls_flags = NFSLCK_RELEASE; stp->ls_uid = nd->nd_cred->cr_uid; clientid.lval[0] = *tl++; clientid.lval[1] = *tl; if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { if ((nd->nd_flag & ND_NFSV41) != 0) clientid.qval = nd->nd_clientid.qval; else if (nd->nd_clientid.qval != clientid.qval) printf("EEK14 multiple clids\n"); } else { if ((nd->nd_flag & ND_NFSV41) != 0) printf("EEK! no clientid from session\n"); nd->nd_flag |= ND_IMPLIEDCLID; nd->nd_clientid.qval = clientid.qval; } error = nfsrv_mtostr(nd, stp->ls_owner, len); if (error) goto nfsmout; nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p); - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); NFSEXITCODE2(0, nd); return (0); nfsmout: if (stp) - free((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 exchange_id service */ APPLESTATIC int nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0, i, idlen; struct nfsclient *clp = NULL; nfsquad_t clientid, confirm; uint8_t *verf; uint32_t sp4type, v41flags; uint64_t owner_minor; struct timespec verstime; struct sockaddr_in *sad, *rad; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED); verf = (uint8_t *)tl; tl += (NFSX_VERF / NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i > NFSV4_OPAQUELIMIT || i <= 0) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } idlen = i; if (nd->nd_flag & ND_GSS) i += nd->nd_princlen; clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK | M_ZERO); clp->lc_stateid = malloc(sizeof(struct nfsstatehead) * nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK); NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx); NFSSOCKADDRALLOC(clp->lc_req.nr_nam); NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in)); sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); rad->sin_family = AF_INET; rad->sin_addr.s_addr = 0; rad->sin_port = 0; if (sad->sin_family == AF_INET) rad->sin_addr.s_addr = sad->sin_addr.s_addr; clp->lc_req.nr_cred = NULL; NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); if (error != 0) goto nfsmout; if ((nd->nd_flag & ND_GSS) != 0) { clp->lc_flags = LCL_GSS | LCL_NFSV41; if ((nd->nd_flag & ND_GSSINTEGRITY) != 0) clp->lc_flags |= LCL_GSSINTEGRITY; else if ((nd->nd_flag & ND_GSSPRIVACY) != 0) clp->lc_flags |= LCL_GSSPRIVACY; } else clp->lc_flags = LCL_NFSV41; if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) { clp->lc_flags |= LCL_NAME; clp->lc_namelen = nd->nd_princlen; clp->lc_name = &clp->lc_id[idlen]; NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen); } else { clp->lc_uid = nd->nd_cred->cr_uid; clp->lc_gid = nd->nd_cred->cr_gid; } NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); v41flags = fxdr_unsigned(uint32_t, *tl++); if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR | NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS | NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) { nd->nd_repstat = NFSERR_INVAL; goto nfsmout; } if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0) confirm.lval[1] = 1; else confirm.lval[1] = 0; v41flags = NFSV4EXCH_USENONPNFS; sp4type = fxdr_unsigned(uint32_t, *tl); if (sp4type != NFSV4EXCH_SP4NONE) { nd->nd_repstat = NFSERR_NOTSUPP; goto nfsmout; } /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been * linked into the client list and clp should no longer be used * here. When an error is returned, it has not been linked in, * so it should be free'd. */ nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p); if (clp != NULL) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } if (nd->nd_repstat == 0) { if (confirm.lval[1] != 0) v41flags |= NFSV4EXCH_CONFIRMEDR; NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED); *tl++ = clientid.lval[0]; /* ClientID */ *tl++ = clientid.lval[1]; *tl++ = txdr_unsigned(confirm.lval[0]); /* SequenceID */ *tl++ = txdr_unsigned(v41flags); /* Exch flags */ *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); /* No SSV */ owner_minor = 0; /* Owner */ txdr_hyper(owner_minor, tl); /* Minor */ (void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid, strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */ NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSX_UNSIGNED); *tl++ = time_uptime; /* Make scope a unique value. */ *tl = txdr_unsigned(1); (void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); (void)nfsm_strtom(nd, version, strlen(version)); NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ verstime.tv_nsec = 0; txdr_nfsv4time(&verstime, tl); } NFSEXITCODE2(0, nd); return (0); nfsmout: if (clp != NULL) { NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 create session service */ APPLESTATIC int nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; nfsquad_t clientid, confirm; struct nfsdsession *sep = NULL; uint32_t rdmacnt; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession), M_NFSDSESSION, M_WAITOK | M_ZERO); sep->sess_refcnt = 1; mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF); NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl++; confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++); sep->sess_crflags = fxdr_unsigned(uint32_t, *tl); /* Persistent sessions and RDMA are not supported. */ sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN; /* Fore channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++); if (sep->sess_maxslots > NFSV4_SLOTS) sep->sess_maxslots = NFSV4_SLOTS; rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); /* Back channel attributes. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl++; /* Header pad always 0. */ sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++); sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++); rdmacnt = fxdr_unsigned(uint32_t, *tl); if (rdmacnt > 1) { nd->nd_repstat = NFSERR_BADXDR; goto nfsmout; } else if (rdmacnt == 1) NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl); /* * nfsrv_getclient() searches the client list for a match and * returns the appropriate NFSERR status. */ nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW, NULL, sep, confirm, sep->sess_cbprogram, nd, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(confirm.lval[0]); /* sequenceid */ *tl++ = txdr_unsigned(sep->sess_crflags); /* Fore channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_maxreq); *tl++ = txdr_unsigned(sep->sess_maxresp); *tl++ = txdr_unsigned(sep->sess_maxrespcached); *tl++ = txdr_unsigned(sep->sess_maxops); *tl++ = txdr_unsigned(sep->sess_maxslots); *tl++ = txdr_unsigned(1); *tl++ = txdr_unsigned(0); /* No RDMA. */ /* Back channel attributes. */ *tl++ = 0; *tl++ = txdr_unsigned(sep->sess_cbmaxreq); *tl++ = txdr_unsigned(sep->sess_cbmaxresp); *tl++ = txdr_unsigned(sep->sess_cbmaxrespcached); *tl++ = txdr_unsigned(sep->sess_cbmaxops); *tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(0); /* No RDMA. */ } nfsmout: if (nd->nd_repstat != 0 && sep != NULL) free(sep, M_NFSDSESSION); NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 sequence service */ APPLESTATIC int nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid; int cache_this, error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID); NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); sequenceid = fxdr_unsigned(uint32_t, *tl++); nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++); highest_slotid = fxdr_unsigned(uint32_t, *tl++); if (*tl == newnfs_true) cache_this = 1; else cache_this = 0; nd->nd_flag |= ND_HASSEQUENCE; nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid, &target_highest_slotid, cache_this, &sflags, p); if (nd->nd_repstat == 0) { NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID); NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(sequenceid); *tl++ = txdr_unsigned(nd->nd_slotid); *tl++ = txdr_unsigned(highest_slotid); *tl++ = txdr_unsigned(target_highest_slotid); *tl = txdr_unsigned(sflags); } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 reclaim complete service */ APPLESTATIC int nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); if (*tl == newnfs_true) nd->nd_repstat = NFSERR_NOTSUPP; else nd->nd_repstat = nfsrv_checkreclaimcomplete(nd); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy clientid service */ APPLESTATIC int nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsquad_t clientid; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); clientid.lval[0] = *tl++; clientid.lval[1] = *tl; nd->nd_repstat = nfsrv_destroyclient(clientid, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 destroy session service */ APPLESTATIC int nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint8_t *cp, sessid[NFSX_V4SESSIONID]; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID); NFSBCOPY(cp, sessid, NFSX_V4SESSIONID); nd->nd_repstat = nfsrv_destroysession(nd, sessid); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 free stateid service */ APPLESTATIC int nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp) { uint32_t *tl; nfsv4stateid_t stateid; int error = 0; if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) { nd->nd_repstat = NFSERR_WRONGSEC; goto nfsmout; } NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p); nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * nfsv4 service not supported */ APPLESTATIC int nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram, __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp) { nd->nd_repstat = NFSERR_NOTSUPP; NFSEXITCODE2(0, nd); return (0); } Index: head/sys/fs/nfsserver/nfs_nfsdstate.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 328416) +++ head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 328417) @@ -1,6138 +1,6138 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; extern int newnfs_numnfsd; extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; SYSCTL_DECL(_vfs_nfsd); int nfsrv_statehashsize = NFSSTATEHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN, &nfsrv_statehashsize, 0, "Size of state hash table set via loader.conf"); int nfsrv_clienthashsize = NFSCLIENTHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN, &nfsrv_clienthashsize, 0, "Size of client hash table set via loader.conf"); int nfsrv_lockhashsize = NFSLOCKHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN, &nfsrv_lockhashsize, 0, "Size of file handle hash table set via loader.conf"); int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN, &nfsrv_sessionhashsize, 0, "Size of session hash table set via loader.conf"); static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT; SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN, &nfsrv_v4statelimit, 0, "High water limit for NFSv4 opens+locks+delegations"); static int nfsrv_writedelegifpos = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW, &nfsrv_writedelegifpos, 0, "Issue a write delegation for read opens if possible"); static int nfsrv_allowreadforwriteopen = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW, &nfsrv_allowreadforwriteopen, 0, "Allow Reads to be done with Write Access StateIDs"); /* * Hash lists for nfs V4. */ struct nfsclienthashhead *nfsclienthash; struct nfslockhashhead *nfslockhash; struct nfssessionhash *nfssessionhash; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; static time_t nfsrvboottime; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; static volatile int nfsrv_writedelegcnt = 0; /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p); static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freenfslock(struct nfslock *lop); static void nfsrv_freenfslockfile(struct nfslockfile *lfp); static void nfsrv_freedeleg(struct nfsstate *); static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, u_int32_t flags, struct nfsstate **stpp); static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp); static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p); static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit); static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags); static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p); static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp); static u_int32_t nfsrv_nextclientindex(void); static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp); static void nfsrv_markstable(struct nfsclient *clp); static int nfsrv_checkstable(struct nfsclient *clp); static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct vnode *vp, NFSPROC_T *p); static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp); static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p); static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp); static time_t nfsrv_leaseexpiry(void); static void nfsrv_delaydelegtimeout(struct nfsstate *stp); static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op); static int nfsrv_nootherstate(struct nfsstate *stp); static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p); static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p); static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end); static void nfsrv_locklf(struct nfslockfile *lfp); static void nfsrv_unlocklf(struct nfslockfile *lfp); static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid); static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid); static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp); static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp); /* * Scan the client list for a match and either return the current one, * create a new entry or return an error. * If returning a non-error, the clp structure must either be linked into * the client list or free'd. */ APPLESTATIC int nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p) { struct nfsclient *clp = NULL, *new_clp = *new_clpp; int i, error = 0; struct nfsstate *stp, *tstp; struct sockaddr_in *sad, *rad; int zapit = 0, gotit, hasstate = 0, igotlock; static u_int64_t confirm_index = 0; /* * Check for state resource limit exceeded. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } if (nfsrv_issuedelegs == 0 || ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* * Don't do callbacks when delegations are disabled or * for AUTH_GSS unless enabled via nfsrv_nogsscallback. * If establishing a callback connection is attempted * when a firewall is blocking the callback path, the * server may wait too long for the connect attempt to * succeed during the Open. Some clients, such as Linux, * may timeout and give up on the Open before the server * replies. Also, since AUTH_GSS callbacks are not * yet interoperability tested, they might cause the * server to crap out, if they get past the Init call to * the client. */ new_clp->lc_program = 0; /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } if (gotit == 0) i++; } if (!gotit || (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) { if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) { /* * For NFSv4.1, if confirmp->lval[1] is non-zero, the * client is trying to update a confirmed clientid. */ NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); confirmp->lval[1] = 0; error = NFSERR_NOENT; goto out; } /* * Get rid of the old one. */ if (i != nfsrv_clienthashsize) { LIST_REMOVE(clp, lc_hash); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); zapit = 1; } /* * Add it after assigning a client id to it. */ new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = (u_int32_t)nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); LIST_INIT(&new_clp->lc_open); LIST_INIT(&new_clp->lc_deleg); LIST_INIT(&new_clp->lc_olddeleg); LIST_INIT(&new_clp->lc_session); for (i = 0; i < nfsrv_statehashsize; i++) LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if (zapit) nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * Now, handle the cases where the id is already issued. */ if (nfsrv_notsamecredname(nd, clp)) { /* * Check to see if there is expired state that should go away. */ if (clp->lc_expiry < NFSD_MONOSEC && (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); } /* * If there is outstanding state, then reply NFSERR_CLIDINUSE per * RFC3530 Sec. 8.1.2 last para. */ if (!LIST_EMPTY(&clp->lc_deleg)) { hasstate = 1; } else if (LIST_EMPTY(&clp->lc_open)) { hasstate = 0; } else { hasstate = 0; /* Look for an Open on the OpenOwner */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { if (!LIST_EMPTY(&stp->ls_open)) { hasstate = 1; break; } } } if (hasstate) { /* * If the uid doesn't match, return NFSERR_CLIDINUSE after * filling out the correct ipaddr and portnum. */ sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); sad->sin_addr.s_addr = rad->sin_addr.s_addr; sad->sin_port = rad->sin_port; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIDINUSE; goto out; } } if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) { /* * If the verifier has changed, the client has rebooted * and a new client id is issued. The old state info * can be thrown away once the SETCLIENTID_CONFIRM occurs. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); /* * Save the state until confirmed. */ LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* For NFSv4.1, mark that we found a confirmed clientid. */ if ((nd->nd_flag & ND_NFSV41) != 0) { clientidp->lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = clp->lc_clientid.lval[1]; confirmp->lval[0] = 0; /* Ignored by client */ confirmp->lval[1] = 1; } else { /* * id and verifier match, so update the net address info * and get rid of any existing callback authentication * handle, so a new one will be acquired. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN); new_clp->lc_expiry = nfsrv_leaseexpiry(); confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = clp->lc_clientid.lval[1]; new_clp->lc_delegtime = clp->lc_delegtime; new_clp->lc_stateindex = clp->lc_stateindex; new_clp->lc_statemaxindex = clp->lc_statemaxindex; new_clp->lc_cbref = 0; LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if ((nd->nd_flag & ND_NFSV41) == 0) { /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsdclp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; } out: NFSEXITCODE2(error, nd); return (error); } /* * Check to see if the client id exists and optionally confirm it. */ APPLESTATIC int nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int i; struct nfsclienthashhead *hp; int error = 0, igotlock, doneok; struct nfssessionhash *shp; struct nfsdsession *sep; uint64_t sessid[2]; static uint64_t next_sess = 0; if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* * If called with opflags == CLOPS_RENEW, the State Lock is * already held. Otherwise, we need to get either that or, * for the case of Confirm, lock out the nfsd threads. */ if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); /* * Create a new sessionid here, since we need to do it where * there is a mutex held to serialize update of next_sess. */ if ((nd->nd_flag & ND_NFSV41) != 0) { sessid[0] = ++next_sess; sessid[1] = clientid.qval; } NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSLOCKSTATE(); } /* For NFSv4.1, the clp is acquired from the associated session. */ if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 && opflags == CLOPS_RENEW) { clp = NULL; if ((nd->nd_flag & ND_HASSEQUENCE) != 0) { shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep != NULL) clp = sep->sess_clp; NFSUNLOCKSESSION(shp); } } else { hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } } if (clp == NULL) { if (opflags & CLOPS_CONFIRM) error = NFSERR_STALECLIENTID; else error = NFSERR_EXPIRED; } else if (clp->lc_flags & LCL_ADMINREVOKED) { /* * If marked admin revoked, just return the error. */ error = NFSERR_ADMINREVOKED; } if (error) { if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } goto out; } /* * Perform any operations specified by the opflags. */ if (opflags & CLOPS_CONFIRM) { if (((nd->nd_flag & ND_NFSV41) != 0 && clp->lc_confirm.lval[0] != confirm.lval[0]) || ((nd->nd_flag & ND_NFSV41) == 0 && clp->lc_confirm.qval != confirm.qval)) error = NFSERR_STALECLIENTID; else if (nfsrv_notsamecredname(nd, clp)) error = NFSERR_CLIDINUSE; if (!error) { if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) == LCL_NEEDSCONFIRM) { /* * Hang onto the delegations (as old delegations) * for an Open with CLAIM_DELEGATE_PREV unless in * grace, but get rid of the rest of the state. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_olddeleg); if (nfsrv_checkgrace(nd, clp, 0)) { /* In grace, so just delete delegations */ nfsrv_freedeleglist(&clp->lc_deleg); } else { LIST_FOREACH(stp, &clp->lc_deleg, ls_list) stp->ls_flags |= NFSLCK_OLDDELEG; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg, ls_list); } if ((nd->nd_flag & ND_NFSV41) != 0) clp->lc_program = cbprogram; } clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN); if (clp->lc_program) clp->lc_flags |= LCL_NEEDSCBNULL; /* For NFSv4.1, link the session onto the client. */ if (nsep != NULL) { /* Hold a reference on the xprt for a backchannel. */ if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 && clp->lc_req.nr_client == NULL) { clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, cbprogram, NFSV4_CBVERS); if (clp->lc_req.nr_client != NULL) { SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client->cl_private; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt; } else nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; } NFSBCOPY(sessid, nsep->sess_sessionid, NFSX_V4SESSIONID); NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid, NFSX_V4SESSIONID); shp = NFSSESSIONHASH(nsep->sess_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); LIST_INSERT_HEAD(&shp->list, nsep, sess_hash); LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list); nsep->sess_clp = clp; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); } } } else if (clp->lc_flags & LCL_NEEDSCONFIRM) { error = NFSERR_EXPIRED; } /* * If called by the Renew Op, we must check the principal. */ if (!error && (opflags & CLOPS_RENEWOP)) { if (nfsrv_notsamecredname(nd, clp)) { doneok = 0; for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if ((stp->ls_flags & NFSLCK_OPEN) && stp->ls_uid == nd->nd_cred->cr_uid) { doneok = 1; break; } } } if (!doneok) error = NFSERR_ACCES; } if (!error && (clp->lc_flags & LCL_CBDOWN)) error = NFSERR_CBPATHDOWN; } if ((!error || error == NFSERR_CBPATHDOWN) && (opflags & CLOPS_RENEW)) { clp->lc_expiry = nfsrv_leaseexpiry(); } if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } if (clpp) *clpp = clp; out: NFSEXITCODE2(error, nd); return (error); } /* * Perform the NFSv4.1 destroy clientid. */ int nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsclienthashhead *hp; int error = 0, i, igotlock; if (nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } if (clp == NULL) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* Just return ok, since it is gone. */ goto out; } /* Scan for state on the clientid. */ for (i = 0; i < nfsrv_statehashsize; i++) if (!LIST_EMPTY(&clp->lc_stateid[i])) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } /* Destroy the clientid and return ok. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); nfsrv_zapclient(clp, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Called from the new nfssvc syscall to admin revoke a clientid. * Returns 0 for success, error otherwise. */ APPLESTATIC int nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) { struct nfsclient *clp = NULL; int i, error = 0; int gotit, igotlock; /* * First, lock out the nfsd so that state won't change while the * revocation record is being written to the stable storage restart * file. */ NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); error = EPERM; goto out; } /* * Now, write out the revocation record */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); /* * and clear out the state, marking the clientid revoked. */ clp->lc_flags &= ~LCL_CALLBACKSON; clp->lc_flags |= LCL_ADMINREVOKED; nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); out: NFSEXITCODE(error); return (error); } /* * Dump out stats for all clients. Called from nfssvc(2), that is used * nfsstatsv1. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) { struct nfsclient *clp; int i = 0, cnt = 0; /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); } i++; } if (cnt < maxcnt) dumpp[cnt].ndcl_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd. */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp) { struct nfsstate *stp, *openstp, *lckownstp; struct nfslock *lop; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0; dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0; dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0; dumpp->ndcl_flags = clp->lc_flags; dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen; NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen); sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *); dumpp->ndcl_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr; } /* * Now, scan the state lists and total up the opens and locks. */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { dumpp->ndcl_nopenowners++; LIST_FOREACH(openstp, &stp->ls_open, ls_list) { dumpp->ndcl_nopens++; LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) { dumpp->ndcl_nlockowners++; LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) { dumpp->ndcl_nlocks++; } } } } /* * and the delegation lists. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { dumpp->ndcl_ndelegs++; } LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { dumpp->ndcl_nolddelegs++; } } /* * Dump out lock stats for a file. */ APPLESTATIC void nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, NFSPROC_T *p) { struct nfsstate *stp; struct nfslock *lop; int cnt = 0; struct nfslockfile *lfp; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; int ret; fhandle_t nfh; ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0); if (ret) { ldumpp[0].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); return; } /* * For each open share on file, dump it out. */ stp = LIST_FIRST(&lfp->lf_open); while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_openowner->ls_ownerlen; NFSBCOPY(stp->ls_openowner->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_openowner->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * and all locks. */ lop = LIST_FIRST(&lfp->lf_lock); while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) { stp = lop->lo_stp; ldumpp[cnt].ndlck_flags = lop->lo_flags; ldumpp[cnt].ndlck_first = lop->lo_first; ldumpp[cnt].ndlck_end = lop->lo_end; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen; NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } lop = LIST_NEXT(lop, lo_lckfile); cnt++; } /* * and the delegations. */ stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = 0; ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * If list isn't full, mark end of list by setting the client name * to zero length. */ if (cnt < maxcnt) ldumpp[cnt].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Server timer routine. It can scan any linked list, so long * as it holds the spin/mutex lock and there is no exclusive lock on * nfsv4rootfs_lock. * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok * to do this from a callout, since the spin locks work. For * Darwin, I'm not sure what will work correctly yet.) * Should be called once per second. */ APPLESTATIC void nfsrv_servertimer(void) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int got_ref, i; /* * Make sure nfsboottime is set. This is used by V3 as well * as V4. Note that nfsboottime is not nfsrvboottime, which is * only used by the V4 server for leases. */ if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); /* * If server hasn't started yet, just return. */ NFSLOCKSTATE(); if (nfsrv_stablefirst.nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; } /* * Try and get a reference count on the nfsv4rootfs_lock so that * no nfsd thread can acquire an exclusive lock on it before this * call is done. If it is already exclusively locked, just return. */ NFSLOCKV4ROOTMUTEX(); got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); if (got_ref == 0) { NFSUNLOCKSTATE(); return; } /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC && ((LIST_EMPTY(&clp->lc_deleg) && LIST_EMPTY(&clp->lc_open)) || nfsrv_clients > nfsrv_clienthighwater)) || (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC || (clp->lc_expiry < NFSD_MONOSEC && (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) { /* * Lease has expired several nfsrv_lease times ago: * PLUS * - no state is associated with it * OR * - above high water mark for number of clients * (nfsrv_clienthighwater should be large enough * that this only occurs when clients fail to * use the same nfs_client_id4.id. Maybe somewhat * higher that the maximum number of clients that * will mount this server?) * OR * Lease has expired a very long time ago * OR * Lease has expired PLUS the number of opens + locks * has exceeded 90% of capacity * * --> Mark for expiry. The actual expiry will be done * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; nfsrv_stablefirst.nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* * If there are no opens, increment no open tick cnt * If time exceeds NFSNOOPEN, mark it to be thrown away * otherwise, if there is an open, reset no open time * Hopefully, this will avoid excessive re-creation * of open owners and subsequent open confirms. */ stp = LIST_FIRST(&clp->lc_open); while (stp != LIST_END(&clp->lc_open)) { nstp = LIST_NEXT(stp, ls_list); if (LIST_EMPTY(&stp->ls_open)) { stp->ls_noopens++; if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) nfsrv_stablefirst.nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; } stp = nstp; } } } clp = nclp; } } NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * The following set of functions free up the various data structures. */ /* * Clear out all open/lock state related to this nfsclient. * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that * there are no other active nfsd threads. */ APPLESTATIC void nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; struct nfsdsession *sep, *nsep; LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) nfsrv_freeopenowner(stp, 1, p); if ((clp->lc_flags & LCL_ADMINREVOKED) == 0) LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) (void)nfsrv_freesession(sep, NULL); } /* * Free a client that has been cleaned. It should also already have been * removed from the lists. * (Just to be safe w.r.t. newnfs_disconnect(), call this function when * softclock interrupts are enabled.) */ APPLESTATIC void nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) { #ifdef notyet if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) == (LCL_GSS | LCL_CALLBACKSON) && (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) && clp->lc_handlelen > 0) { clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE; clp->lc_hand.nfsh_flag |= NFSG_DESTROYED; (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, p); } #endif newnfs_disconnect(&clp->lc_req); NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); } /* * Free a list of delegation state structures. * (This function will also free all nfslockfile structures that no * longer have associated state.) */ APPLESTATIC void nfsrv_freedeleglist(struct nfsstatehead *sthp) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) { nfsrv_freedeleg(stp); } LIST_INIT(sthp); } /* * Free up a delegation. */ static void nfsrv_freedeleg(struct nfsstate *stp) { struct nfslockfile *lfp; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) nfsrv_writedelegcnt--; lfp = stp->ls_lfp; if (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } /* * This function frees an open owner and all associated opens. */ static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; LIST_REMOVE(stp, ls_list); /* * Now, free all associated opens. */ nstp = LIST_FIRST(&stp->ls_open); while (nstp != LIST_END(&stp->ls_open)) { tstp = nstp; nstp = LIST_NEXT(nstp, ls_list); (void) nfsrv_freeopen(tstp, NULL, cansleep, p); } if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } /* * This function frees an open (nfsstate open structure) with all associated * lock_owners and locks. It also frees the nfslockfile structure iff there * are no other opens on the file. * Returns 1 if it free'd the nfslockfile, 0 otherwise. */ static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; struct nfslockfile *lfp; int ret; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; /* * Now, free all lockowners associated with this open. */ LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp) nfsrv_freelockowner(tstp, vp, cansleep, p); /* * The nfslockfile is freed here if there are no locks * associated with the open. * If there are locks associated with the open, the * nfslockfile structure can be freed via nfsrv_freelockowner(). * Acquire the state mutex to avoid races with calls to * nfsrv_getlockfile(). */ if (cansleep != 0) NFSLOCKSTATE(); if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) { nfsrv_freenfslockfile(lfp); ret = 1; } else ret = 0; if (cansleep != 0) NFSUNLOCKSTATE(); - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } /* * Frees a lockowner and all associated locks. */ static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); nfsrv_freeallnfslocks(stp, vp, cansleep, p); if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); - FREE((caddr_t)stp, M_NFSDSTATE); + free(stp, M_NFSDSTATE); nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } /* * Free all the nfs locks on a lockowner. */ static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfslock *lop, *nlop; struct nfsrollback *rlp, *nrlp; struct nfslockfile *lfp = NULL; int gottvp = 0; vnode_t tvp = NULL; uint64_t first, end; if (vp != NULL) ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked"); lop = LIST_FIRST(&stp->ls_lock); while (lop != LIST_END(&stp->ls_lock)) { nlop = LIST_NEXT(lop, lo_lckowner); /* * Since all locks should be for the same file, lfp should * not change. */ if (lfp == NULL) lfp = lop->lo_lfp; else if (lfp != lop->lo_lfp) panic("allnfslocks"); /* * If vp is NULL and cansleep != 0, a vnode must be acquired * from the file handle. This only occurs when called from * nfsrv_cleanclient(). */ if (gottvp == 0) { if (nfsrv_dolocallocks == 0) tvp = NULL; else if (vp == NULL && cansleep != 0) { tvp = nfsvno_getvp(&lfp->lf_fh); NFSVOPUNLOCK(tvp, 0); } else tvp = vp; gottvp = 1; } if (tvp != NULL) { if (cansleep == 0) panic("allnfs2"); first = lop->lo_first; end = lop->lo_end; nfsrv_freenfslock(lop); nfsrv_localunlock(tvp, lfp, first, end, p); LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } else nfsrv_freenfslock(lop); lop = nlop; } if (vp == NULL && tvp != NULL) vrele(tvp); } /* * Free an nfslock structure. */ static void nfsrv_freenfslock(struct nfslock *lop) { if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); - FREE((caddr_t)lop, M_NFSDLOCK); + free(lop, M_NFSDLOCK); } /* * This function frees an nfslockfile structure. */ static void nfsrv_freenfslockfile(struct nfslockfile *lfp) { LIST_REMOVE(lfp, lf_hash); - FREE((caddr_t)lfp, M_NFSDLOCKFILE); + free(lfp, M_NFSDLOCKFILE); } /* * This function looks up an nfsstate structure via stateid. */ static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags, struct nfsstate **stpp) { struct nfsstate *stp; struct nfsstatehead *hp; int error = 0; *stpp = NULL; hp = NFSSTATEHASH(clp, *stateidp); LIST_FOREACH(stp, hp, ls_hash) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } /* * If no state id in list, return NFSERR_BADSTATEID. */ if (stp == LIST_END(hp)) { error = NFSERR_BADSTATEID; goto out; } *stpp = stp; out: NFSEXITCODE(error); return (error); } /* * This function gets an nfsstate structure via owner string. */ static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp) { struct nfsstate *stp; *stpp = NULL; LIST_FOREACH(stp, hp, ls_list) { if (new_stp->ls_ownerlen == stp->ls_ownerlen && !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) { *stpp = stp; return; } } } /* * Lock control function called to update lock status. * Returns 0 upon success, -1 if there is no lock and the flags indicate * that one isn't to be created and an NFSERR_xxx for other errors. * The structures new_stp and new_lop are passed in as pointers that should * be set to NULL if the structure is used and shouldn't be free'd. * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are * never used and can safely be allocated on the stack. For all other * cases, *new_stpp and *new_lopp should be malloc'd before the call, * in case they are used. */ APPLESTATIC int nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, struct nfslock **new_lopp, struct nfslockconflict *cfp, nfsquad_t clientid, nfsv4stateid_t *stateidp, __unused struct nfsexstuff *exp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfslock *lop; struct nfsstate *new_stp = *new_stpp; struct nfslock *new_lop = *new_lopp; struct nfsstate *tstp, *mystp, *nstp; int specialid = 0; struct nfslockfile *lfp; struct nfslock *other_lop = NULL; struct nfsstate *stp, *lckstp = NULL; struct nfsclient *clp = NULL; u_int32_t bits; int error = 0, haslock = 0, ret, reterr; int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0; fhandle_t nfh; uint64_t first, end; uint32_t lock_flags; if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Note the special cases of "all 1s" or "all 0s" stateids and * let reads with all 1s go ahead. */ if (new_stp->ls_stateid.seqid == 0x0 && new_stp->ls_stateid.other[0] == 0x0 && new_stp->ls_stateid.other[1] == 0x0 && new_stp->ls_stateid.other[2] == 0x0) specialid = 1; else if (new_stp->ls_stateid.seqid == 0xffffffff && new_stp->ls_stateid.other[0] == 0xffffffff && new_stp->ls_stateid.other[1] == 0xffffffff && new_stp->ls_stateid.other[2] == 0xffffffff) specialid = 2; } /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, specialid); if (error) goto out; /* * Check for state resource limit exceeded. */ if ((new_stp->ls_flags & NFSLCK_LOCK) && nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } /* * For the lock case, get another nfslock structure, * just in case we need it. * Malloc now, before we start sifting through the linked lists, * in case we have to wait for memory. */ tryagain: if (new_stp->ls_flags & NFSLCK_LOCK) - MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock), + other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); filestruct_locked = 0; reterr = 0; lfp = NULL; /* * Get the lockfile structure for CFH now, so we can do a sanity * check against the stateid, before incrementing the seqid#, since * we want to return NFSERR_BADSTATEID on failure and the seqid# * shouldn't be incremented for this case. * If nfsrv_getlockfile() returns -1, it means "not found", which * will be handled later. * If we are doing Lock/LockU and local locking is enabled, sleep * lock the nfslockfile structure. */ getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p); NFSLOCKSTATE(); if (getlckret == 0) { if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 && nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) { getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 1); if (getlckret == 0) filestruct_locked = 1; } else getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 0); } if (getlckret != 0 && getlckret != -1) reterr = getlckret; if (filestruct_locked != 0) { LIST_INIT(&lfp->lf_rollback); if ((new_stp->ls_flags & NFSLCK_LOCK)) { /* * For local locking, do the advisory locking now, so * that any conflict can be detected. A failure later * can be rolled back locally. If an error is returned, * struct nfslockfile has been unlocked and any local * locking rolled back. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } reterr = nfsrv_locallock(vp, lfp, (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)), new_lop->lo_first, new_lop->lo_end, cfp, p); NFSLOCKSTATE(); } } if (specialid == 0) { if (new_stp->ls_flags & NFSLCK_TEST) { /* * RFC 3530 does not list LockT as an op that renews a * lease, but the consensus seems to be that it is ok * for a server to do so. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid * error returns for LockT, just go ahead and test for a lock, * since there are no locks for this client, but other locks * can conflict. (ie. same client will always be false) */ if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED) error = 0; lckstp = new_stp; } else { error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) /* * Look up the stateid */ error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * do some sanity checks for an unconfirmed open or a * stateid that refers to the wrong file, for an open stateid */ if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) && ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) || (getlckret == 0 && stp->ls_lfp != lfp))){ /* * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID * The only exception is using SETATTR with SIZE. * */ if ((new_stp->ls_flags & (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR) error = NFSERR_BADSTATEID; } if (error == 0 && (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) && getlckret == 0 && stp->ls_lfp != lfp) error = NFSERR_BADSTATEID; /* * If the lockowner stateid doesn't refer to the same file, * I believe that is considered ok, since some clients will * only create a single lockowner and use that for all locks * on all files. * For now, log it as a diagnostic, instead of considering it * a BadStateid. */ if (error == 0 && (stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 && getlckret == 0 && stp->ls_lfp != lfp) { #ifdef DIAGNOSTIC printf("Got a lock statid for different file open\n"); #endif /* error = NFSERR_BADSTATEID; */ } if (error == 0) { if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) { /* * If haslock set, we've already checked the seqid. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); else error = NFSERR_BADSTATEID; } if (!error) nfsrv_getowner(&stp->ls_open, new_stp, &lckstp); if (lckstp) /* * I believe this should be an error, but it * isn't obvious what NFSERR_xxx would be * appropriate, so I'll use NFSERR_INVAL for now. */ error = NFSERR_INVAL; else lckstp = new_stp; } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) { /* * If haslock set, ditto above. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = NFSERR_BADSTATEID; else error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp, new_stp->ls_op); } lckstp = stp; } else { lckstp = stp; } } /* * If the seqid part of the stateid isn't the same, return * NFSERR_OLDSTATEID for cases other than I/O Ops. * For I/O Ops, only return NFSERR_OLDSTATEID if * nfsrv_returnoldstateid is set. (The consensus on the email * list was that most clients would prefer to not receive * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that * is what will happen, so I use the nfsrv_returnoldstateid to * allow for either server configuration.) */ if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && (!(new_stp->ls_flags & NFSLCK_CHECK) || nfsrv_returnoldstateid)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; } } /* * Now we can check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If we successfully Reclaimed state, note that. */ if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error) nfsrv_markstable(clp); /* * At this point, either error == NFSERR_BADSTATEID or the * seqid# has been updated, so we can return any error. * If error == 0, there may be an error in: * nd_repstat - Set by the calling function. * reterr - Set above, if getting the nfslockfile structure * or acquiring the local lock failed. * (If both of these are set, nd_repstat should probably be * returned, since that error was detected before this * function call.) */ if (error != 0 || nd->nd_repstat != 0 || reterr != 0) { if (error == 0) { if (nd->nd_repstat != 0) error = nd->nd_repstat; else error = reterr; } if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Check the nfsrv_getlockfile return. * Returned -1 if no structure found. */ if (getlckret == -1) { error = NFSERR_EXPIRED; /* * Called from lockt, so no lock is OK. */ if (new_stp->ls_flags & NFSLCK_TEST) { error = 0; } else if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Called to check for a lock, OK if the stateid is all * 1s or all 0s, but there should be an nfsstate * otherwise. * (ie. If there is no open, I'll assume no share * deny bits.) */ if (specialid) error = 0; else error = NFSERR_BADSTATEID; } NFSUNLOCKSTATE(); goto out; } /* * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict. * For NFSLCK_CHECK, allow a read if write access is granted, * but check for a deny. For NFSLCK_LOCK, require correct access, * which implies a conflicting deny can't exist. */ if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) { /* * Four kinds of state id: * - specialid (all 0s or all 1s), only for NFSLCK_CHECK * - stateid for an open * - stateid for a delegation * - stateid for a lock owner */ if (!specialid) { if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { delegation = 1; mystp = stp; nfsrv_delaydelegtimeout(stp); } else if (stp->ls_flags & NFSLCK_OPEN) { mystp = stp; } else { mystp = stp->ls_openstp; } /* * If locking or checking, require correct access * bit set. */ if (((new_stp->ls_flags & NFSLCK_LOCK) && !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) & mystp->ls_flags & NFSLCK_ACCESSBITS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) == (NFSLCK_CHECK | NFSLCK_READACCESS) && !(mystp->ls_flags & NFSLCK_READACCESS) && nfsrv_allowreadforwriteopen == 0) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) == (NFSLCK_CHECK | NFSLCK_WRITEACCESS) && !(mystp->ls_flags & NFSLCK_WRITEACCESS))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl3"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); error = NFSERR_OPENMODE; goto out; } } else mystp = NULL; if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) { /* * Check for a conflicting deny bit. */ LIST_FOREACH(tstp, &lfp->lf_open, ls_file) { if (tstp != mystp) { bits = tstp->ls_flags; bits >>= NFSLCK_SHIFT; if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked1")); ret = nfsrv_clientconflict(tstp->ls_clp, &haslock, vp, p); if (ret == 1) { /* * nfsrv_clientconflict unlocks state * when it returns non-zero. */ lckstp = NULL; goto tryagain; } if (ret == 0) NFSUNLOCKSTATE(); if (ret == 2) error = NFSERR_PERM; else error = NFSERR_OPENMODE; goto out; } } } /* We're outta here */ NFSUNLOCKSTATE(); goto out; } } /* * For setattr, just get rid of all the Delegations for other clients. */ if (new_stp->ls_flags & NFSLCK_SETATTR) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked2")); ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (ret) { /* * nfsrv_cleandeleg() unlocks state when it * returns non-zero. */ if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } if (!(new_stp->ls_flags & NFSLCK_CHECK) || (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg))) { NFSUNLOCKSTATE(); goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * I currently believe the conflict algorithm to be: * For Lock Ops (Lock/LockT/LockU) * - there is a conflict iff a different client has a write delegation * For Reading (Read Op) * - there is a conflict iff a different client has a write delegation * (the specialids are always a different client) * For Writing (Write/Setattr of size) * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (I don't understand why this isn't allowed, but that seems to be * the current consensus?) */ tstp = LIST_FIRST(&lfp->lf_deleg); while (tstp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(tstp, ls_file); if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))|| ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_READ))) && clp != tstp->ls_clp && (tstp->ls_flags & NFSLCK_DELEGWRITE)) || ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_WRITE) && (clp != tstp->ls_clp || (tstp->ls_flags & NFSLCK_DELEGREAD)))) { ret = 0; if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4"); NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if ((vp->v_iflag & VI_DOOMED) != 0) ret = NFSERR_SERVERFAULT; NFSLOCKSTATE(); } if (ret == 0) ret = nfsrv_delegconflict(tstp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict unlocks state when it * returns non-zero, which it always does. */ if (other_lop) { - FREE((caddr_t)other_lop, M_NFSDLOCK); + free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } /* Never gets here. */ } tstp = nstp; } /* * Handle the unlock case by calling nfsrv_updatelock(). * (Should I have done some access checking above for unlock? For now, * just let it happen.) */ if (new_stp->ls_flags & NFSLCK_UNLOCK) { first = new_lop->lo_first; end = new_lop->lo_end; nfsrv_updatelock(stp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(stp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = stp->ls_stateid.seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; if (filestruct_locked != 0) { NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } /* Update the local locks. */ nfsrv_localunlock(vp, lfp, first, end, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Search for a conflicting lock. A lock conflicts if: * - the lock range overlaps and * - at least one lock is a write lock and * - it is not owned by the same lock owner */ if (!delegation) { LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { if (new_lop->lo_end > lop->lo_first && new_lop->lo_first < lop->lo_end && (new_lop->lo_flags == NFSLCK_WRITE || lop->lo_flags == NFSLCK_WRITE) && lckstp != lop->lo_stp && (clp != lop->lo_stp->ls_clp || lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen || NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner, lckstp->ls_ownerlen))) { if (other_lop) { - FREE((caddr_t)other_lop, M_NFSDLOCK); + free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (vnode_unlocked != 0) ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, NULL, p); else ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, vp, p); if (ret == 1) { if (filestruct_locked != 0) { if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6"); NFSVOPUNLOCK(vp, 0); } /* Roll back local locks. */ nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if ((vp->v_iflag & VI_DOOMED) != 0) { error = NFSERR_SERVERFAULT; goto out; } } /* * nfsrv_clientconflict() unlocks state when it * returns non-zero. */ lckstp = NULL; goto tryagain; } /* * Found a conflicting lock, so record the conflict and * return the error. */ if (cfp != NULL && ret == 0) { cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0]; cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1]; cfp->cl_first = lop->lo_first; cfp->cl_end = lop->lo_end; cfp->cl_flags = lop->lo_flags; cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen; NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner, cfp->cl_ownerlen); } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else if (new_stp->ls_flags & NFSLCK_CHECK) error = NFSERR_LOCKED; else error = NFSERR_DENIED; if (filestruct_locked != 0 && ret == 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } if (ret == 0) NFSUNLOCKSTATE(); goto out; } } } /* * We only get here if there was no lock that conflicted. */ if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) { NFSUNLOCKSTATE(); goto out; } /* * We only get here when we are creating or modifying a lock. * There are two variants: * - exist_lock_owner where lock_owner exists * - open_to_lock_owner with new lock_owner */ first = new_lop->lo_first; end = new_lop->lo_end; lock_flags = new_lop->lo_flags; if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) { nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(lckstp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = lckstp->ls_stateid.seqid = 1; stateidp->other[0] = lckstp->ls_stateid.other[0]; stateidp->other[1] = lckstp->ls_stateid.other[1]; stateidp->other[2] = lckstp->ls_stateid.other[2]; } else { /* * The new open_to_lock_owner case. * Link the new nfsstate into the lists. */ new_stp->ls_seq = new_stp->ls_opentolockseq; nfsrvd_refcache(new_stp->ls_op); stateidp->seqid = new_stp->ls_stateid.seqid = 1; stateidp->other[0] = new_stp->ls_stateid.other[0] = clp->lc_clientid.lval[0]; stateidp->other[1] = new_stp->ls_stateid.other[1] = clp->lc_clientid.lval[1]; stateidp->other[2] = new_stp->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_stp->ls_clp = clp; LIST_INIT(&new_stp->ls_lock); new_stp->ls_openstp = stp; new_stp->ls_lfp = lfp; nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp, lfp); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid), new_stp, ls_hash); LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { NFSUNLOCKSTATE(); nfsrv_locallock_commit(lfp, lock_flags, first, end); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); out: if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (vnode_unlocked != 0) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) error = NFSERR_SERVERFAULT; } if (other_lop) - FREE((caddr_t)other_lop, M_NFSDLOCK); + free(other_lop, M_NFSDLOCK); NFSEXITCODE2(error, nd); return (error); } /* * Check for state errors for Open. * repstat is passed back out as an error if more critical errors * are not detected. */ APPLESTATIC int nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd, NFSPROC_T *p, int repstat) { struct nfsstate *stp, *nstp; struct nfsclient *clp; struct nfsstate *ownerstp; struct nfslockfile *lfp, *new_lfp; int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; /* * Check for state resource limit exceeded. * Technically this should be SMP protected, but the worst * case error is "out by one or two" on the count when it * returns NFSERR_RESOURCE and the limit is just a rather * arbitrary high water mark, so no harm is done. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } tryagain: - MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile), + new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); if (vp) getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the nfsclient structure. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (!error && ownerstp) { error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp, new_stp->ls_op); /* * If the OpenOwner hasn't been confirmed, assume the * old one was a replay and this one is ok. * See: RFC3530 Sec. 14.2.18. */ if (error == NFSERR_BADSEQID && (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM)) error = 0; } /* * Check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If none of the above errors occurred, let repstat be * returned. */ if (repstat && !error) error = repstat; if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } - free((caddr_t)new_lfp, M_NFSDLOCKFILE); + free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * If vp == NULL, the file doesn't exist yet, so return ok. * (This always happens on the first pass, so haslock must be 0.) */ if (vp == NULL) { NFSUNLOCKSTATE(); - FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); + free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) - FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); + free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } } /* * Check for access/deny bit conflicts. I check for the same * owner as well, in case the client didn't bother. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) && (((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks * state when it returns non-zero. */ goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) * Don't check for a Reclaim, since that will be dealt with * by nfsrv_openctrl(). */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ if (ret == -1) goto tryagain; error = ret; goto out; } } stp = nstp; } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Open control function to create/update open state for an open. */ APPLESTATIC int nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp, nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp, NFSPROC_T *p, u_quad_t filerev) { struct nfsstate *new_stp = *new_stpp; struct nfsstate *stp, *nstp; struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg; struct nfslockfile *lfp, *new_lfp; struct nfsclient *clp; int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1; int readonly = 0, cbret = 1, getfhret = 0; int gotstate = 0, len = 0; u_char *clidp = NULL; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). * (Paranoia, should have been detected by nfsrv_opencheck().) * If an error does show up, return NFSERR_EXPIRED, since the * the seqid# has already been incremented. */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) { printf("Nfsd: openctrl unexpected restart err=%d\n", error); error = NFSERR_EXPIRED; goto out; } clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); tryagain: - MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile), + new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); - MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate), + new_open = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); - MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate), + new_deleg = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the client structure. Since the linked lists could be changed * by other nfsd processes if this process does a tsleep(), one of * two things must be done. * 1 - don't tsleep() * or * 2 - get the nfsv4_lock() { indicated by haslock == 1 } * before using the lists, since this lock stops the other * nfsd. This should only be used for rare cases, since it * essentially single threads the nfsd. * At this time, it is only done for cases where the stable * storage file must be written prior to completion of state * expiration. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) && clp->lc_program) { /* * This happens on the first open for a client * that supports callbacks. */ NFSUNLOCKSTATE(); /* * Although nfsrv_docallback() will sleep, clp won't * go away, since they are only removed when the * nfsv4_lock() has blocked the nfsd threads. The * fields in clp can change, but having multiple * threads do this Null callback RPC should be * harmless. */ cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, p); NFSLOCKSTATE(); clp->lc_flags &= ~LCL_NEEDSCBNULL; if (!cbret) clp->lc_flags |= LCL_CALLBACKSON; } /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (error) { NFSUNLOCKSTATE(); printf("Nfsd: openctrl unexpected state err=%d\n", error); - free((caddr_t)new_lfp, M_NFSDLOCKFILE); - free((caddr_t)new_open, M_NFSDSTATE); - free((caddr_t)new_deleg, M_NFSDSTATE); + free(new_lfp, M_NFSDLOCKFILE); + free(new_open, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } if (new_stp->ls_flags & NFSLCK_RECLAIM) nfsrv_markstable(clp); /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) - FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); + free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected getlockfile err=%d\n", error); - free((caddr_t)new_open, M_NFSDSTATE); - free((caddr_t)new_deleg, M_NFSDSTATE); + free(new_open, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected expiry\n"); - free((caddr_t)new_open, M_NFSDSTATE); - free((caddr_t)new_deleg, M_NFSDSTATE); + free(new_open, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } /* * Don't issue a Delegation, since one already exists and * delay delegation timeout, as required. */ delegate = 0; nfsrv_delaydelegtimeout(stp); } /* * Check for access/deny bit conflicts. I also check for the * same owner, since the client might not have bothered to check. * Also, note an open for the same file and owner, if found, * which is all we do here for Delegate_Cur, since conflict * checking is already done. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (ownerstp && stp->ls_openowner == ownerstp) openstp = stp; if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) { /* * If another client has the file open, the only * delegation that can be issued is a Read delegation * and only if it is a Read open with Deny none. */ if (clp != stp->ls_clp) { if ((stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) writedeleg = 0; else delegate = 0; } if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks state * when it returns non-zero. */ - free((caddr_t)new_open, M_NFSDSTATE); - free((caddr_t)new_deleg, M_NFSDSTATE); + free(new_open, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); openstp = NULL; goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } - free((caddr_t)new_open, M_NFSDSTATE); - free((caddr_t)new_deleg, M_NFSDSTATE); + free(new_open, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); printf("nfsd openctrl unexpected client cnfl\n"); goto out; } } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD)) writedeleg = 0; else delegate = 0; if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { if (new_stp->ls_flags & NFSLCK_RECLAIM) { delegate = 2; } else { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ printf("Nfsd openctrl unexpected deleg cnfl\n"); - free((caddr_t)new_open, M_NFSDSTATE); - free((caddr_t)new_deleg, M_NFSDSTATE); + free(new_open, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); if (ret == -1) { openstp = NULL; goto tryagain; } error = ret; goto out; } } } stp = nstp; } } /* * We only get here if there was no open that conflicted. * If an open for the owner exists, or in the access/deny bits. * Otherwise it is a new open. If the open_owner hasn't been * confirmed, replace the open with the new one needing confirmation, * otherwise add the open. */ if (new_stp->ls_flags & NFSLCK_DELEGPREV) { /* * Handle NFSLCK_DELEGPREV by searching the old delegations for * a match. If found, just move the old delegation to the current * delegation list and issue open. If not found, return * NFSERR_EXPIRED. */ LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { if (stp->ls_lfp == lfp) { /* Found it */ if (stp->ls_clp != clp) panic("olddeleg clp"); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_hash); stp->ls_flags &= ~NFSLCK_OLDDELEG; stp->ls_stateid.seqid = delegstateidp->seqid = 1; stp->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; stp->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; stp->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); stp->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, stp->ls_stateid), stp, ls_hash); if (stp->ls_flags & NFSLCK_DELEGWRITE) *rflagsp |= NFSV4OPEN_WRITEDELEGATE; else *rflagsp |= NFSV4OPEN_READDELEGATE; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)| NFSLCK_OPEN; if (stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } } if (stp == LIST_END(&clp->lc_olddeleg)) error = NFSERR_EXPIRED; } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { /* * Scan to see that no delegation for this client and file * doesn't already exist. * There also shouldn't yet be an Open for this file and * openowner. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_clp == clp) break; } if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) { /* * This is the Claim_Previous case with a delegation * type != Delegate_None. */ /* * First, add the delegation. (Although we must issue the * delegation, we can also ask for an immediate return.) */ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (new_stp->ls_flags & NFSLCK_DELEGWRITE) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; if (delegate == 2 || nfsrv_issuedelegs == 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) | NFSLCK_OPEN; if (new_stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; } } else if (ownerstp) { if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) { /* Replace the open */ if (ownerstp->ls_op) nfsrvd_derefcache(ownerstp->ls_op); ownerstp->ls_op = new_stp->ls_op; nfsrvd_refcache(ownerstp->ls_op); ownerstp->ls_seq = new_stp->ls_seq; *rflagsp |= NFSV4OPEN_RESULTCONFIRM; stp = LIST_FIRST(&ownerstp->ls_open); stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; stp->ls_stateid.seqid = 1; stp->ls_uid = new_stp->ls_uid; if (lfp != stp->ls_lfp) { LIST_REMOVE(stp, ls_file); LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file); stp->ls_lfp = lfp; } openstp = stp; } else if (openstp) { openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS); openstp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && openstp->ls_stateid.seqid == 0) openstp->ls_stateid.seqid = 1; /* * This is where we can choose to issue a delegation. */ if (delegate == 0 || writedeleg == 0 || NFSVNO_EXRDONLY(exp) || (readonly != 0 && nfsrv_writedelegifpos == 0) || !NFSVNO_DELEGOK(vp) || (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else if (nfsrv_issuedelegs == 0 || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; nfsrv_writedelegcnt++; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } else { new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)| NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; new_open->ls_openowner = ownerstp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* * This is where we can choose to issue a delegation. */ if (delegate == 0 || (writedeleg == 0 && readonly == 0) || !NFSVNO_DELEGOK(vp) || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else if (nfsrv_issuedelegs == 0 || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } } else { /* * New owner case. Start the open_owner sequence with a * Needs confirmation (unless a reclaim) and hang the * new open off it. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; LIST_INIT(&new_open->ls_open); new_open->ls_openowner = new_stp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); if (new_stp->ls_flags & NFSLCK_RECLAIM) { new_stp->ls_flags = 0; } else if ((nd->nd_flag & ND_NFSV41) != 0) { /* NFSv4.1 never needs confirmation. */ new_stp->ls_flags = 0; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && (writedeleg || readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0)) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } /* * Since NFSv4.1 never does an OpenConfirm, the first * open state will be acquired here. */ if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } } else { *rflagsp |= NFSV4OPEN_RESULTCONFIRM; new_stp->ls_flags = NFSLCK_NEEDSCONFIRM; } nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; *new_stpp = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { stateidp->seqid = openstp->ls_stateid.seqid; stateidp->other[0] = openstp->ls_stateid.other[0]; stateidp->other[1] = openstp->ls_stateid.other[1]; stateidp->other[2] = openstp->ls_stateid.other[2]; } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (new_open) - FREE((caddr_t)new_open, M_NFSDSTATE); + free(new_open, M_NFSDSTATE); if (new_deleg) - FREE((caddr_t)new_deleg, M_NFSDSTATE); + free(new_deleg, M_NFSDSTATE); /* * If the NFSv4.1 client just acquired its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Open update. Does the confirm, downgrade and close. */ APPLESTATIC int nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsstate *stp; struct nfsclient *clp; struct nfslockfile *lfp; u_int32_t bits; int error = 0, gotstate = 0, len = 0; u_char *clidp = NULL; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * Sanity check the open. */ if (!error && (!(stp->ls_flags & NFSLCK_OPEN) || (!(new_stp->ls_flags & NFSLCK_CONFIRM) && (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) || ((new_stp->ls_flags & NFSLCK_CONFIRM) && (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))))) error = NFSERR_BADSTATEID; if (!error) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && !(new_stp->ls_flags & NFSLCK_CONFIRM)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; if (!error && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) error = NFSERR_ISDIR; else error = NFSERR_INVAL; } if (error) { /* * If a client tries to confirm an Open with a bad * seqid# and there are no byte range locks or other Opens * on the openowner, just throw it away, so the next use of the * openowner will start a fresh seq#. */ if (error == NFSERR_BADSEQID && (new_stp->ls_flags & NFSLCK_CONFIRM) && nfsrv_nootherstate(stp)) nfsrv_freeopenowner(stp->ls_openowner, 0, p); NFSUNLOCKSTATE(); goto out; } /* * Set the return stateid. */ stateidp->seqid = stp->ls_stateid.seqid + 1; if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; /* * Now, handle the three cases. */ if (new_stp->ls_flags & NFSLCK_CONFIRM) { /* * If the open doesn't need confirmation, it seems to me that * there is a client error, but I'll just log it and keep going? */ if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) printf("Nfsv4d: stray open confirm\n"); stp->ls_openowner->ls_flags = 0; stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } NFSUNLOCKSTATE(); } else if (new_stp->ls_flags & NFSLCK_CLOSE) { lfp = stp->ls_lfp; if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) { /* Get the lf lock */ nfsrv_locklf(lfp); NFSUNLOCKSTATE(); ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate"); NFSVOPUNLOCK(vp, 0); if (nfsrv_freeopen(stp, vp, 1, p) == 0) { NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); } else { (void) nfsrv_freeopen(stp, NULL, 0, p); NFSUNLOCKSTATE(); } } else { /* * Update the share bits, making sure that the new set are a * subset of the old ones. */ bits = (new_stp->ls_flags & NFSLCK_SHAREBITS); if (~(stp->ls_flags) & bits) { NFSUNLOCKSTATE(); error = NFSERR_INVAL; goto out; } stp->ls_flags = (bits | NFSLCK_OPEN); stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; NFSUNLOCKSTATE(); } /* * If the client just confirmed its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Delegation update. Does the purge and return. */ APPLESTATIC int nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid, nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p) { struct nfsstate *stp; struct nfsclient *clp; int error = 0; fhandle_t fh; /* * Do a sanity check against the file handle for DelegReturn. */ if (vp) { error = nfsvno_getfh(vp, &fh, p); if (error) goto out; } /* * Check for restart conditions (client and server). */ if (op == NFSV4OP_DELEGRETURN) error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN, stateidp, 0); else error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE, stateidp, 0); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ if (!error) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error) { if (error == NFSERR_CBPATHDOWN) error = 0; if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN) error = NFSERR_STALESTATEID; } if (!error && op == NFSV4OP_DELEGRETURN) { error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp); if (!error && stp->ls_stateid.seqid != stateidp->seqid && ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0)) error = NFSERR_OLDSTATEID; } /* * NFSERR_EXPIRED means that the state has gone away, * so Delegations have been purged. Just return ok. */ if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) { NFSUNLOCKSTATE(); error = 0; goto out; } if (error) { NFSUNLOCKSTATE(); goto out; } if (op == NFSV4OP_DELEGRETURN) { if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh, sizeof (fhandle_t))) { NFSUNLOCKSTATE(); error = NFSERR_BADSTATEID; goto out; } nfsrv_freedeleg(stp); } else { nfsrv_freedeleglist(&clp->lc_olddeleg); } NFSUNLOCKSTATE(); error = 0; out: NFSEXITCODE(error); return (error); } /* * Release lock owner. */ APPLESTATIC int nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid, NFSPROC_T *p) { struct nfsstate *stp, *nstp, *openstp, *ownstp; struct nfsclient *clp; int error = 0; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the lock owner by name. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); if (error) { NFSUNLOCKSTATE(); goto out; } LIST_FOREACH(ownstp, &clp->lc_open, ls_list) { LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) { stp = LIST_FIRST(&openstp->ls_open); while (stp != LIST_END(&openstp->ls_open)) { nstp = LIST_NEXT(stp, ls_list); /* * If the owner matches, check for locks and * then free or return an error. */ if (stp->ls_ownerlen == new_stp->ls_ownerlen && !NFSBCMP(stp->ls_owner, new_stp->ls_owner, stp->ls_ownerlen)){ if (LIST_EMPTY(&stp->ls_lock)) { nfsrv_freelockowner(stp, NULL, 0, p); } else { NFSUNLOCKSTATE(); error = NFSERR_LOCKSHELD; goto out; } } stp = nstp; } } } NFSUNLOCKSTATE(); out: NFSEXITCODE(error); return (error); } /* * Get the file handle for a lock structure. */ static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p) { fhandle_t *fhp = NULL; int error; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL")); fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfh"); } error = nfsvno_getfh(vp, fhp, p); NFSEXITCODE(error); return (error); } /* * Get an nfs lock structure. Allocate one, as required, and return a * pointer to it. * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock. */ static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit) { struct nfslockfile *lfp; fhandle_t *fhp = NULL, *tfhp; struct nfslockhashhead *hp; struct nfslockfile *new_lfp = NULL; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfile"); } hp = NFSLOCKHASH(fhp); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(fhp, tfhp)) { if (lockit) nfsrv_locklf(lfp); *lfpp = lfp; return (0); } } if (!(flags & NFSLCK_OPEN)) return (-1); /* * No match, so chain the new one into the list. */ LIST_INIT(&new_lfp->lf_open); LIST_INIT(&new_lfp->lf_lock); LIST_INIT(&new_lfp->lf_deleg); LIST_INIT(&new_lfp->lf_locallock); LIST_INIT(&new_lfp->lf_rollback); new_lfp->lf_locallock_lck.nfslock_usecnt = 0; new_lfp->lf_locallock_lck.nfslock_lock = 0; new_lfp->lf_usecount = 0; LIST_INSERT_HEAD(hp, new_lfp, lf_hash); *lfpp = new_lfp; *new_lfpp = NULL; return (0); } /* * This function adds a nfslock lock structure to the list for the associated * nfsstate and nfslockfile structures. It will be inserted after the * entry pointed at by insert_lop. */ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp) { struct nfslock *lop, *nlop; new_lop->lo_stp = stp; new_lop->lo_lfp = lfp; if (stp != NULL) { /* Insert in increasing lo_first order */ lop = LIST_FIRST(&lfp->lf_lock); if (lop == LIST_END(&lfp->lf_lock) || new_lop->lo_first <= lop->lo_first) { LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile); } else { nlop = LIST_NEXT(lop, lo_lckfile); while (nlop != LIST_END(&lfp->lf_lock) && nlop->lo_first < new_lop->lo_first) { lop = nlop; nlop = LIST_NEXT(lop, lo_lckfile); } LIST_INSERT_AFTER(lop, new_lop, lo_lckfile); } } else { new_lop->lo_lckfile.le_prev = NULL; /* list not used */ } /* * Insert after insert_lop, which is overloaded as stp or lfp for * an empty list. */ if (stp == NULL && (struct nfslockfile *)insert_lop == lfp) LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner); else if ((struct nfsstate *)insert_lop == stp) LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner); else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. */ static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp) { struct nfslock *new_lop = *new_lopp; struct nfslock *lop, *tlop, *ilop; struct nfslock *other_lop = *other_lopp; int unlock = 0, myfile = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->lo_flags & NFSLCK_UNLOCK) unlock = 1; if (stp != NULL) { ilop = (struct nfslock *)stp; lop = LIST_FIRST(&stp->ls_lock); } else { ilop = (struct nfslock *)lfp; lop = LIST_FIRST(&lfp->lf_locallock); } while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->lo_lfp == lfp) { myfile = 1; if (lop->lo_end >= new_lop->lo_first) { if (new_lop->lo_end < lop->lo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->lo_flags == lop->lo_flags || (new_lop->lo_first <= lop->lo_first && new_lop->lo_end >= lop->lo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (lop->lo_first < new_lop->lo_first) new_lop->lo_first = lop->lo_first; if (lop->lo_end > new_lop->lo_end) new_lop->lo_end = lop->lo_end; tlop = lop; lop = LIST_NEXT(lop, lo_lckowner); nfsrv_freenfslock(tlop); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->lo_first <= lop->lo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ lop->lo_first = new_lop->lo_end; break; } if (new_lop->lo_end >= lop->lo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ lop->lo_end = new_lop->lo_first; ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->lo_flags any longer. */ tmp = new_lop->lo_first; if (other_lop == NULL) { if (!unlock) panic("nfsd srv update unlock"); other_lop = new_lop; *new_lopp = NULL; } other_lop->lo_first = new_lop->lo_end; other_lop->lo_end = lop->lo_end; other_lop->lo_flags = lop->lo_flags; other_lop->lo_stp = stp; other_lop->lo_lfp = lfp; lop->lo_end = tmp; nfsrv_insertlock(other_lop, lop, stp, lfp); *other_lopp = NULL; ilop = lop; break; } } ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); if (myfile && (lop == NULL || lop->lo_lfp != lfp)) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfsrv_insertlock(new_lop, ilop, stp, lfp); *new_lopp = NULL; } } /* * This function handles sequencing of locks, etc. * It returns an error that indicates what the caller should do. */ static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op) { int error = 0; if ((nd->nd_flag & ND_NFSV41) != 0) /* NFSv4.1 ignores the open_seqid and lock_seqid. */ goto out; if (op != nd->nd_rp) panic("nfsrvstate checkseqid"); if (!(op->rc_flag & RC_INPROG)) panic("nfsrvstate not inprog"); if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) { printf("refcnt=%d\n", stp->ls_op->rc_refcnt); panic("nfsrvstate op refcnt"); } if ((stp->ls_seq + 1) == seqid) { if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); stp->ls_op = op; nfsrvd_refcache(op); stp->ls_seq = seqid; goto out; } else if (stp->ls_seq == seqid && stp->ls_op && op->rc_xid == stp->ls_op->rc_xid && op->rc_refcnt == 0 && op->rc_reqlen == stp->ls_op->rc_reqlen && op->rc_cksum == stp->ls_op->rc_cksum) { if (stp->ls_op->rc_flag & RC_INPROG) { error = NFSERR_DONTREPLY; goto out; } nd->nd_rp = stp->ls_op; nd->nd_rp->rc_flag |= RC_INPROG; nfsrvd_delcache(op); error = NFSERR_REPLYFROMCACHE; goto out; } error = NFSERR_BADSEQID; out: NFSEXITCODE2(error, nd); return (error); } /* * Get the client ip address for callbacks. If the strings can't be parsed, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { u_int32_t *tl; u_char *cp, *cp2; int i, j; struct sockaddr_in *rad, *sad; u_char protocol[5], addr[24]; int error = 0, cantparse = 0; union { in_addr_t ival; u_char cval[4]; } ip; union { in_port_t sval; u_char cval[2]; } port; rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); rad->sin_family = AF_INET; rad->sin_len = sizeof (struct sockaddr_in); rad->sin_addr.s_addr = 0; rad->sin_port = 0; clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (!strcmp(protocol, "tcp")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; } else if (!strcmp(protocol, "udp")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; } else { cantparse = 1; } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (i == 0) { cantparse = 1; } else if (!cantparse && i <= 23 && i >= 11) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* * Parse out the address fields. We expect 6 decimal numbers * separated by '.'s. */ cp = addr; i = 0; while (*cp && i < 6) { cp2 = cp; while (*cp2 && *cp2 != '.') cp2++; if (*cp2) *cp2++ = '\0'; else if (i != 5) { cantparse = 1; break; } j = nfsrv_getipnumber(cp); if (j >= 0) { if (i < 4) ip.cval[3 - i] = j; else port.cval[5 - i] = j; } else { cantparse = 1; break; } cp = cp2; i++; } if (!cantparse) { if (ip.ival != 0x0) { rad->sin_addr.s_addr = htonl(ip.ival); rad->sin_port = htons(port.sval); } else { cantparse = 1; } } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } if (cantparse) { sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); if (sad->sin_family == AF_INET) { rad->sin_addr.s_addr = sad->sin_addr.s_addr; rad->sin_port = 0x0; } clp->lc_program = 0; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Turn a string of up to three decimal digits into a number. Return -1 upon * error. */ static int nfsrv_getipnumber(u_char *cp) { int i = 0, j = 0; while (*cp) { if (j > 2 || *cp < '0' || *cp > '9') return (-1); i *= 10; i += (*cp - '0'); cp++; j++; } if (i < 256) return (i); return (-1); } /* * This function checks for restart conditions. */ static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid) { int ret = 0; /* * First check for a server restart. Open, LockT, ReleaseLockOwner * and DelegPurge have a clientid, the rest a stateid. */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { if (clientid.lval[0] != nfsrvboottime) { ret = NFSERR_STALECLIENTID; goto out; } } else if (stateidp->other[0] != nfsrvboottime && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; } /* * Read, Write, Setattr and LockT can return NFSERR_GRACE and do * not use a lock/open owner seqid#, so the check can be done now. * (The others will be checked, as required, later.) */ if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST))) goto out; NFSLOCKSTATE(); ret = nfsrv_checkgrace(NULL, NULL, flags); NFSUNLOCKSTATE(); out: NFSEXITCODE(ret); return (ret); } /* * Check for grace. */ static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags) { int error = 0; if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; } } else { if (!(flags & NFSLCK_RECLAIM)) { error = NFSERR_GRACE; goto out; } if (nd != NULL && clp != NULL && (nd->nd_flag & ND_NFSV41) != 0 && (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) { error = NFSERR_NOGRACE; goto out; } /* * If grace is almost over and we are still getting Reclaims, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } out: NFSEXITCODE(error); return (error); } /* * Do a server callback. */ static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p) { mbuf_t m; u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct ucred *cred; int error = 0; u_int32_t callback; struct nfsdsession *sep = NULL; cred = newnfs_getcred(); NFSLOCKSTATE(); /* mostly for lc_cbref++ */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); panic("docallb"); } clp->lc_cbref++; /* * Fill the callback program# and version into the request * structure for newnfs_connect() to use. */ clp->lc_req.nr_prog = clp->lc_program; #ifdef notnow if ((clp->lc_flags & LCL_NFSV41) != 0) clp->lc_req.nr_vers = NFSV41_CBVERS; else #endif clp->lc_req.nr_vers = NFSV4_CBVERS; /* * First, fill in some of the fields of nd and cr. */ nd->nd_flag = ND_NFSV4; if (clp->lc_flags & LCL_GSS) nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; callback = clp->lc_callback; NFSUNLOCKSTATE(); cred->cr_ngroups = 1; /* * Get the first mbuf for the request. */ MGET(m, M_WAITOK, MT_DATA); mbuf_setlen(m, 0); nd->nd_mreq = nd->nd_mb = m; nd->nd_bpos = NFSMTOD(m, caddr_t); /* * and build the callback request. */ if (procnum == NFSV4OP_CBGETATTR) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR, "CB Getattr", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); (void)nfsrv_putattrbit(nd, attrbitp); } else if (procnum == NFSV4OP_CBRECALL) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL, "CB Recall", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (trunc) *tl = newnfs_true; else *tl = newnfs_false; (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); } else if (procnum == NFSV4PROC_CBNULL) { nd->nd_procnum = NFSV4PROC_CBNULL; if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsv4_getcbsession(clp, &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } } } else { error = NFSERR_SERVERFAULT; mbuf_freem(nd->nd_mreq); goto errout; } /* * Call newnfs_connect(), as required, and then newnfs_request(). */ (void) newnfs_sndlock(&clp->lc_req.nr_lock); if (clp->lc_req.nr_client == NULL) { if ((clp->lc_flags & LCL_NFSV41) != 0) error = ECONNREFUSED; else if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 1); else error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 3); } newnfs_sndunlock(&clp->lc_req.nr_lock); if (!error) { if ((nd->nd_flag & ND_NFSV41) != 0) { KASSERT(sep != NULL, ("sep NULL")); if (sep->sess_cbsess.nfsess_xprt != NULL) error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, &sep->sess_cbsess); else { /* * This should probably never occur, but if a * client somehow does an RPC without a * SequenceID Op that causes a callback just * after the nfsd threads have been terminated * and restared we could conceivably get here * without a backchannel xprt. */ printf("nfsrv_docallback: no xprt\n"); error = ECONNREFUSED; } nfsrv_freesession(sep, NULL); } else error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, NULL); } errout: NFSFREECRED(cred); /* * If error is set here, the Callback path isn't working * properly, so twiddle the appropriate LCL_ flags. * (nd_repstat != 0 indicates the Callback path is working, * but the callback failed on the client.) */ if (error) { /* * Mark the callback pathway down, which disabled issuing * of delegations and gets Renew to return NFSERR_CBPATHDOWN. */ NFSLOCKSTATE(); clp->lc_flags |= LCL_CBDOWN; NFSUNLOCKSTATE(); } else { /* * Callback worked. If the callback path was down, disable * callbacks, so no more delegations will be issued. (This * is done on the assumption that the callback pathway is * flakey.) */ NFSLOCKSTATE(); if (clp->lc_flags & LCL_CBDOWN) clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON); NFSUNLOCKSTATE(); if (nd->nd_repstat) error = nd->nd_repstat; else if (error == 0 && procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, NULL); mbuf_freem(nd->nd_mrep); } NFSLOCKSTATE(); clp->lc_cbref--; if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) { clp->lc_flags &= ~LCL_WAKEUPWANTED; wakeup(clp); } NFSUNLOCKSTATE(); NFSEXITCODE(error); return (error); } /* * Set up the compound RPC for the callback. */ static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp) { uint32_t *tl; int error, len; len = strlen(optag); (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); error = nfsv4_setcbsequence(nd, clp, 1, sepp); if (error != 0) return (error); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(op); } else { *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(op); } return (0); } /* * Return the next index# for a clientid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around, * it should be rebooted. * At an average rate of one new client per second, it will wrap around in * approximately 136 years. (I think the server will have been shut * down or rebooted before then.) */ static u_int32_t nfsrv_nextclientindex(void) { static u_int32_t client_index = 0; client_index++; if (client_index != 0) return (client_index); printf("%s: out of clientids\n", __func__); return (client_index); } /* * Return the next index# for a stateid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around * (will a BSD server stay up that long?), find * new start and end values. */ static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp) { struct nfsstate *stp; int i; u_int32_t canuse, min_index, max_index; if (!(clp->lc_flags & LCL_INDEXNOTOK)) { clp->lc_stateindex++; if (clp->lc_stateindex != clp->lc_statemaxindex) return (clp->lc_stateindex); } /* * Yuck, we've hit the end. * Look for a new min and max. */ min_index = 0; max_index = 0xffffffff; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] > 0x80000000) { if (stp->ls_stateid.other[2] < max_index) max_index = stp->ls_stateid.other[2]; } else { if (stp->ls_stateid.other[2] > min_index) min_index = stp->ls_stateid.other[2]; } } } /* * Yikes, highly unlikely, but I'll handle it anyhow. */ if (min_index == 0x80000000 && max_index == 0x80000001) { canuse = 0; /* * Loop around until we find an unused entry. Return that * and set LCL_INDEXNOTOK, so the search will continue next time. * (This is one of those rare cases where a goto is the * cleanest way to code the loop.) */ tryagain: for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] == canuse) { canuse++; goto tryagain; } } } clp->lc_flags |= LCL_INDEXNOTOK; return (canuse); } /* * Ok to start again from min + 1. */ clp->lc_stateindex = min_index + 1; clp->lc_statemaxindex = max_index; clp->lc_flags &= ~LCL_INDEXNOTOK; return (clp->lc_stateindex); } /* * The following functions handle the stable storage file that deals with * the edge conditions described in RFC3530 Sec. 8.6.3. * The file is as follows: * - a single record at the beginning that has the lease time of the * previous server instance (before the last reboot) and the nfsrvboottime * values for the previous server boots. * These previous boot times are used to ensure that the current * nfsrvboottime does not, somehow, get set to a previous one. * (This is important so that Stale ClientIDs and StateIDs can * be recognized.) * The number of previous nfsvrboottime values precedes the list. * - followed by some number of appended records with: * - client id string * - flag that indicates it is a record revoking state via lease * expiration or similar * OR has successfully acquired state. * These structures vary in length, with the client string at the end, up * to NFSV4_OPAQUELIMIT in size. * * At the end of the grace period, the file is truncated, the first * record is rewritten with updated information and any acquired state * records for successful reclaims of state are written. * * Subsequent records are appended when the first state is issued to * a client and when state is revoked for a client. * * When reading the file in, state issued records that come later in * the file override older ones, since the append log is in cronological order. * If, for some reason, the file can't be read, the grace period is * immediately terminated and all reclaims get NFSERR_NOGRACE. */ /* * Read in the stable storage file. Called by nfssvc() before the nfsd * processes start servicing requests. */ APPLESTATIC void nfsrv_setupstable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; off_t off = 0; ssize_t aresid, len; /* * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without * a reboot, so state has not been lost. */ if (sf->nsf_flags & NFSNSF_UPDATEDONE) return; /* * Set Grace over just until the file reads successfully. */ nfsrvboottime = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; if (sf->nsf_fp == NULL) return; error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid || sf->nsf_numboots == 0 || sf->nsf_numboots > NFSNSF_MAXNUMBOOTS) return; /* * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * sizeof (time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid) { - free((caddr_t)sf->nsf_bootvals, M_TEMP); + free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } /* * Make sure this nfsrvboottime is different from all recorded * previous ones. */ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { if (nfsrvboottime == sf->nsf_bootvals[i]) { nfsrvboottime++; tryagain = 1; break; } } } while (tryagain); sf->nsf_flags |= NFSNSF_OK; off += (sf->nsf_numboots * sizeof (time_t)); /* * Read through the file, building a list of records for grace * checking. * Each record is between sizeof (struct nfst_rec) and * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1 * and is actually sizeof (struct nfst_rec) + nst_len - 1. */ tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK); do { error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid; if (error || (len > 0 && (len < sizeof (struct nfst_rec) || len < (sizeof (struct nfst_rec) + tsp->len - 1)))) { /* * Yuck, the file has been corrupted, so just return * after clearing out any restart state, so the grace period * is over. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { LIST_REMOVE(sp, nst_list); - free((caddr_t)sp, M_TEMP); + free(sp, M_TEMP); } - free((caddr_t)tsp, M_TEMP); + free(tsp, M_TEMP); sf->nsf_flags &= ~NFSNSF_OK; - free((caddr_t)sf->nsf_bootvals, M_TEMP); + free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } if (len > 0) { off += sizeof (struct nfst_rec) + tsp->len - 1; /* * Search the list for a matching client. */ LIST_FOREACH(sp, &sf->nsf_head, nst_list) { if (tsp->len == sp->nst_len && !NFSBCMP(tsp->client, sp->nst_client, tsp->len)) break; } if (sp == LIST_END(&sf->nsf_head)) { sp = (struct nfsrv_stable *)malloc(tsp->len + sizeof (struct nfsrv_stable) - 1, M_TEMP, M_WAITOK); NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec, sizeof (struct nfst_rec) + tsp->len - 1); LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list); } else { if (tsp->flag == NFSNST_REVOKE) sp->nst_flag |= NFSNST_REVOKE; else /* * A subsequent timestamp indicates the client * did a setclientid/confirm and any previous * revoke is no longer relevant. */ sp->nst_flag &= ~NFSNST_REVOKE; } } } while (len > 0); - free((caddr_t)tsp, M_TEMP); + free(tsp, M_TEMP); sf->nsf_flags = NFSNSF_OK; sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease + NFSRV_LEASEDELTA; } /* * Update the stable storage file, now that the grace period is over. */ APPLESTATIC void nfsrv_updatestable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; vnode_t vp; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) mount_t mp = NULL; #endif int error; if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE)) return; sf->nsf_flags |= NFSNSF_UPDATEDONE; /* * Ok, we need to rewrite the stable storage file. * - truncate to 0 length * - write the new first structure * - loop through the data structures, writing out any that * have timestamps older than the old boot */ if (sf->nsf_bootvals) { sf->nsf_numboots++; for (i = sf->nsf_numboots - 2; i >= 0; i--) sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), M_TEMP, M_WAITOK); } sf->nsf_bootvals[0] = nfsrvboottime; sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); vp = NFSFPVNODE(sf->nsf_fp); vn_start_write(vp, &mp, V_WAIT); if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p, NULL); NFSVOPUNLOCK(vp, 0); } else error = EPERM; vn_finished_write(mp); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0, UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), (off_t)(sizeof (struct nfsf_rec)), UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); - free((caddr_t)sf->nsf_bootvals, M_TEMP); + free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); return; } sf->nsf_flags |= NFSNSF_OK; /* * Loop through the list and write out timestamp records for * any clients that successfully reclaimed state. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { if (sp->nst_flag & NFSNST_GOTSTATE) { nfsrv_writestable(sp->nst_client, sp->nst_len, NFSNST_NEWSTATE, p); sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE; } LIST_REMOVE(sp, nst_list); - free((caddr_t)sp, M_TEMP); + free(sp, M_TEMP); } nfsrv_backupstable(); } /* * Append a record to the stable storage file. */ APPLESTATIC void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfst_rec *sp; int error; if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL) return; sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + len - 1, M_TEMP, M_WAITOK); sp->len = len; NFSBCOPY(client, sp->client, len); sp->flag = flag; error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp), (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0, UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p); - free((caddr_t)sp, M_TEMP); + free(sp, M_TEMP); if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); } } /* * This function is called during the grace period to mark a client * that successfully reclaimed state. */ static void nfsrv_markstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just mark it and set the nfsclient back pointer. */ sp->nst_flag |= NFSNST_GOTSTATE; sp->nst_clp = clp; } /* * This function is called for a reclaim, to see if it gets grace. * It returns 0 if a reclaim is allowed, 1 otherwise. */ static int nfsrv_checkstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First, find the entry for the client. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } /* * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) return (1); return (0); } /* * Test for and try to clear out a conflicting client. This is called by * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients * a found. * The trick here is that it can't revoke a conflicting client with an * expired lease unless it holds the v4root lock, so... * If no v4root lock, get the lock and return 1 to indicate "try again". * Return 0 to indicate the conflict can't be revoked and 1 to indicate * the revocation worked and the conflicting client is "bye, bye", so it * can be tried again. * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK(). * Unlocks State before a non-zero value is returned. */ static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, NFSPROC_T *p) { int gotlock, lktype = 0; /* * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (2); } return (1); } NFSUNLOCKSTATE(); /* * Ok, we can expire the conflicting client. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); return (1); } /* * Resolve a delegation conflict. * Returns 0 to indicate the conflict was resolved without sleeping. * Return -1 to indicate that the caller should check for conflicts again. * Return > 0 for an error that should be returned, normally NFSERR_DELAY. * * Also, manipulate the nfsv4root_lock, as required. It isn't changed * for a return of 0, since there was no sleep and it could be required * later. It is released for a return of NFSERR_DELAY, since the caller * will return that error. It is released when a sleep was done waiting * for the delegation to be returned or expire (so that other nfsds can * handle ops). Then, it must be acquired for the write to stable storage. * (This function is somewhat similar to nfsrv_clientconflict(), but * the semantics differ in a couple of subtle ways. The return of 0 * indicates the conflict was resolved without sleeping here, not * that the conflict can't be resolved and the handling of nfsv4root_lock * differs, as noted above.) * Unlocks State before returning a non-zero value. */ static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp) { struct nfsclient *clp = stp->ls_clp; int gotlock, error, lktype = 0, retrycnt, zapped_clp; nfsv4stateid_t tstateid; fhandle_t tfh; /* * If the conflict is with an old delegation... */ if (stp->ls_flags & NFSLCK_OLDDELEG) { /* * You can delete it, if it has expired. */ if (clp->lc_delegtime < NFSD_MONOSEC) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); error = -1; goto out; } NFSUNLOCKSTATE(); /* * During this delay, the old delegation could expire or it * could be recovered by the client via an Open with * CLAIM_DELEGATE_PREV. * Release the nfsv4root_lock, if held. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * It's a current delegation, so: * - check to see if the delegation has expired * - if so, get the v4root lock and then expire it */ if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) { /* * - do a recall callback, since not yet done * For now, never allow truncate to be set. To use * truncate safely, it must be guaranteed that the * Remove, Rename or Setattr with size of 0 will * succeed and that would require major changes to * the VFS/Vnode OPs. * Set the expiry time large enough so that it won't expire * until after the callback, then set it correctly, once * the callback is done. (The delegation will now time * out whether or not the Recall worked ok. The timeout * will be extended when ops are done on the delegation * stateid, up to the timelimit.) */ stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_flags |= NFSLCK_DELEGRECALL; /* * Loop NFSRV_CBRETRYCNT times while the CBRecall replies * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done * in order to try and avoid a race that could happen * when a CBRecall request passed the Open reply with * the delegation in it when transitting the network. * Since nfsrv_docallback will sleep, don't use stp after * the call. */ NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid, sizeof (tstateid)); NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh, sizeof (tfh)); NFSUNLOCKSTATE(); if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } retrycnt = 0; do { error = nfsrv_docallback(clp, NFSV4OP_CBRECALL, &tstateid, 0, &tfh, NULL, NULL, p); retrycnt++; } while ((error == NFSERR_BADSTATEID || error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT); error = NFSERR_DELAY; goto out; } if (clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); /* * A recall has been done, but it has not yet expired. * So, RETURN_DELAY. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * If we don't yet have the lock, just get it and then return, * since we need that before deleting expired state, such as * this delegation. * When getting the lock, unlock the vnode, so other nfsds that * are in progress, won't get stuck waiting for the vnode lock. */ if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_PERM; goto out; } } error = -1; goto out; } NFSUNLOCKSTATE(); /* * Ok, we can delete the expired delegation. * First, write the Revoke record to stable storage and then * clear out the conflict. * Since all other nfsd threads are now blocked, we can safely * sleep without the state changing. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); if (clp->lc_expiry < NFSD_MONOSEC) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); zapped_clp = 1; } else { nfsrv_freedeleg(stp); zapped_clp = 0; } if (zapped_clp) nfsrv_zapclient(clp, p); error = -1; out: NFSEXITCODE(error); return (error); } /* * Check for a remove allowed, if remove is set to 1 and get rid of * delegations. */ APPLESTATIC int nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (error == -1) error = 0; goto out; } /* * Now, we must Recall any delegations. */ error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero * return. */ if (error == -1) goto tryagain; if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Now, look for a conflicting open share. */ if (remove) { /* * If the entry in the directory was the last reference to the * corresponding filesystem object, the object can be destroyed * */ if(lfp->lf_usecount>1) LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (stp->ls_flags & NFSLCK_WRITEDENY) { error = NFSERR_FILEOPEN; break; } } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE(error); return (error); } /* * Clear out all delegations for the file referred to by lfp. * May return NFSERR_DELAY, if there will be a delay waiting for * delegations to expire. * Returns -1 to indicate it slept while recalling a delegation. * This function has the side effect of deleting the nfslockfile structure, * if it no longer has associated state and didn't have to sleep. * Unlocks State before a non-zero value is returned. */ static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; int ret = 0; stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp) { ret = nfsrv_delegconflict(stp, haslockp, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ goto out; } } stp = nstp; } out: NFSEXITCODE(ret); return (ret); } /* * There are certain operations that, when being done outside of NFSv4, * require that any NFSv4 delegation for the file be recalled. * This function is to be called for those cases: * VOP_RENAME() - When a delegation is being recalled for any reason, * the client may have to do Opens against the server, using the file's * final component name. If the file has been renamed on the server, * that component name will be incorrect and the Open will fail. * VOP_REMOVE() - Theoretically, a client could Open a file after it has * been removed on the server, if there is a delegation issued to * that client for the file. I say "theoretically" since clients * normally do an Access Op before the Open and that Access Op will * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so * they will detect the file's removal in the same manner. (There is * one case where RFC3530 allows a client to do an Open without first * doing an Access Op, which is passage of a check against the ACE * returned with a Write delegation, but current practice is to ignore * the ACE and always do an Access Op.) * Since the functions can only be called with an unlocked vnode, this * can't be done at this time. * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range * locks locally in the client, which are not visible to the server. To * deal with this, issuing of delegations for a vnode must be disabled * and all delegations for the vnode recalled. This is done via the * second function, using the VV_DISABLEDELEG vflag on the vnode. */ APPLESTATIC void nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) { time_t starttime; int error; /* * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ if (newnfs_numnfsd == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp)); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* * Now, call nfsrv_checkremove() in a loop while it returns * NFSERR_DELAY. Return upon any other error or when timed out. */ starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(vp, 0, p); NFSVOPUNLOCK(vp, 0); } else error = EPERM; if (error == NFSERR_DELAY) { if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO) break; /* Sleep for a short period of time */ (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } APPLESTATIC void nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p) { #ifdef VV_DISABLEDELEG /* * First, flag issuance of delegations disabled. */ atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG); #endif /* * Then call nfsd_recalldelegation() to get rid of all extant * delegations. */ nfsd_recalldelegation(vp, p); } /* * Check for conflicting locks, etc. and then get rid of delegations. * (At one point I thought that I should get rid of delegations for any * Setattr, since it could potentially disallow the I/O op (read or write) * allowed by the delegation. However, Setattr Ops that aren't changing * the size get a stateid of all 0s, so you can't tell if it is a delegation * for the same client or a different one, so I decided to only get rid * of delegations for other clients when the size is being changed.) * In general, a Setattr can disable NFS I/O Ops that are outstanding, such * as Write backs, even if there is no delegation, so it really isn't any * different?) */ APPLESTATIC int nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct nfsexstuff *exp, NFSPROC_T *p) { struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; int error = 0; nfsquad_t clientid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_first = nvap->na_size; } else { stp->ls_flags = 0; lop->lo_first = 0; } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL)) stp->ls_flags |= NFSLCK_SETATTR; if (stp->ls_flags == 0) goto out; lop->lo_end = NFS64BITSSET; lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = stateidp->seqid; clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0]; clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1]; stp->ls_stateid.other[2] = stateidp->other[2]; error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, stateidp, exp, nd, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Check for a write delegation and do a CBGETATTR if there is one, updating * the attributes, as required. * Should I return an error if I can't get the attributes? (For now, I'll * just return ok. */ APPLESTATIC int nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; struct nfsclient *clp; struct nfsvattr nva; fhandle_t nfh; int error = 0; nfsattrbit_t cbbits; u_quad_t delegfilerev; NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits); if (!NFSNONZERO_ATTRBIT(&cbbits)) goto out; if (nfsrv_writedelegcnt == 0) goto out; /* * Get the lock file structure. * (A return of -1 means no associated state, so return ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (error == -1) error = 0; goto out; } /* * Now, look for a write delegation. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_flags & NFSLCK_DELEGWRITE) break; } if (stp == LIST_END(&lfp->lf_deleg)) { NFSUNLOCKSTATE(); goto out; } clp = stp->ls_clp; delegfilerev = stp->ls_filerev; /* * If the Write delegation was issued as a part of this Compound RPC * or if we have an Implied Clientid (used in a previous Op in this * compound) and it is the client the delegation was issued to, * just return ok. * I also assume that it is from the same client iff the network * host IP address is the same as the callback address. (Not * exactly correct by the RFC, but avoids a lot of Getattr * callbacks.) */ if (nd->nd_compref == stp->ls_compref || ((nd->nd_flag & ND_IMPLIEDCLID) && clp->lc_clientid.qval == nd->nd_clientid.qval) || nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) { NFSUNLOCKSTATE(); goto out; } /* * We are now done with the delegation state structure, * so the statelock can be released and we can now tsleep(). */ /* * Now, we must do the CB Getattr callback, to see if Change or Size * has changed. */ if (clp->lc_expiry >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); NFSVNO_ATTRINIT(&nva); nva.na_filerev = NFS64BITSSET; error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL, 0, &nfh, &nva, &cbbits, p); if (!error) { if ((nva.na_filerev != NFS64BITSSET && nva.na_filerev > delegfilerev) || (NFSVNO_ISSETSIZE(&nva) && nva.na_size != nvap->na_size)) { error = nfsvno_updfilerev(vp, nvap, cred, p); if (NFSVNO_ISSETSIZE(&nva)) nvap->na_size = nva.na_size; } } else error = 0; /* Ignore callback errors for now. */ } else { NFSUNLOCKSTATE(); } out: NFSEXITCODE2(error, nd); return (error); } /* * This function looks for openowners that haven't had any opens for * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS * is set. */ APPLESTATIC void nfsrv_throwawayopens(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int i; NFSLOCKSTATE(); nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit)) nfsrv_freeopenowner(stp, 0, p); } } } NFSUNLOCKSTATE(); } /* * This function checks to see if the credentials are the same. * Returns 1 for not same, 0 otherwise. */ static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp) { if (nd->nd_flag & ND_GSS) { if (!(clp->lc_flags & LCL_GSS)) return (1); if (clp->lc_flags & LCL_NAME) { if (nd->nd_princlen != clp->lc_namelen || NFSBCMP(nd->nd_principal, clp->lc_name, clp->lc_namelen)) return (1); else return (0); } if (nd->nd_cred->cr_uid == clp->lc_uid) return (0); else return (1); } else if (clp->lc_flags & LCL_GSS) return (1); /* * For AUTH_SYS, allow the same uid or root. (This is underspecified * in RFC3530, which talks about principals, but doesn't say anything * about uids for AUTH_SYS.) */ if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0) return (0); else return (1); } /* * Calculate the lease expiry time. */ static time_t nfsrv_leaseexpiry(void) { if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } /* * Delay the delegation timeout as far as ls_delegtimelimit, as required. */ static void nfsrv_delaydelegtimeout(struct nfsstate *stp) { if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) return; if ((stp->ls_delegtime + 15) > NFSD_MONOSEC && stp->ls_delegtime < stp->ls_delegtimelimit) { stp->ls_delegtime += nfsrv_lease; if (stp->ls_delegtime > stp->ls_delegtimelimit) stp->ls_delegtime = stp->ls_delegtimelimit; } } /* * This function checks to see if there is any other state associated * with the openowner for this Open. * It returns 1 if there is no other state, 0 otherwise. */ static int nfsrv_nootherstate(struct nfsstate *stp) { struct nfsstate *tstp; LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) { if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock)) return (0); } return (1); } /* * Create a list of lock deltas (changes to local byte range locking * that can be rolled back using the list) and apply the changes via * nfsvno_advlock(). Optionally, lock the list. It is expected that either * the rollback or update function will be called after this. * It returns an error (and rolls back, as required), if any nfsvno_advlock() * call fails. If it returns an error, it will unlock the list. */ static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfslock *lop, *nlop; int error = 0; /* Loop through the list of locks. */ lop = LIST_FIRST(&lfp->lf_locallock); while (first < end && lop != NULL) { nlop = LIST_NEXT(lop, lo_lckowner); if (first >= lop->lo_end) { /* not there yet */ lop = nlop; } else if (first < lop->lo_first) { /* new one starts before entry in list */ if (end <= lop->lo_first) { /* no overlap between old and new */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, lop->lo_first, cfp, p); if (error != 0) break; first = lop->lo_first; } } else { /* new one overlaps this entry in list */ if (end <= lop->lo_end) { /* overlaps all of new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, lop->lo_end, cfp, p); if (error != 0) break; first = lop->lo_end; lop = nlop; } } } if (first < end && error == 0) /* handle fragment past end of list */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); NFSEXITCODE(error); return (error); } /* * Local lock unlock. Unlock all byte ranges that are no longer locked * by NFSv4. To do this, unlock any subranges of first-->end that * do not overlap with the byte ranges of any lock in the lfp->lf_lock * list. This list has all locks for the file held by other * tuples. The list is ordered by increasing * lo_first value, but may have entries that overlap each other, for * the case of read locks. */ static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p) { struct nfslock *lop; uint64_t first, end, prevfirst; first = init_first; end = init_end; while (first < init_end) { /* Loop through all nfs locks, adjusting first and end */ prevfirst = 0; LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { KASSERT(prevfirst <= lop->lo_first, ("nfsv4 locks out of order")); KASSERT(lop->lo_first < lop->lo_end, ("nfsv4 bogus lock")); prevfirst = lop->lo_first; if (first >= lop->lo_first && first < lop->lo_end) /* * Overlaps with initial part, so trim * off that initial part by moving first past * it. */ first = lop->lo_end; else if (end > lop->lo_first && lop->lo_first > first) { /* * This lock defines the end of the * segment to unlock, so set end to the * start of it and break out of the loop. */ end = lop->lo_first; break; } if (first >= end) /* * There is no segment left to do, so * break out of this loop and then exit * the outer while() since first will be set * to end, which must equal init_end here. */ break; } if (first < end) { /* Unlock this segment */ (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK, NFSLCK_READ, first, end, NULL, p); nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK, first, end); } /* * Now move past this segment and look for any further * segment in the range, if there is one. */ first = end; end = init_end; } } /* * Do the local lock operation and update the rollback list, as required. * Perform the rollback and return the error if nfsvno_advlock() fails. */ static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfsrollback *rlp; int error = 0, ltype, oldltype; if (flags & NFSLCK_WRITE) ltype = F_WRLCK; else if (flags & NFSLCK_READ) ltype = F_RDLCK; else ltype = F_UNLCK; if (oldflags & NFSLCK_WRITE) oldltype = F_WRLCK; else if (oldflags & NFSLCK_READ) oldltype = F_RDLCK; else oldltype = F_UNLCK; if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK)) /* nothing to do */ goto out; error = nfsvno_advlock(vp, ltype, first, end, p); if (error != 0) { if (cfp != NULL) { cfp->cl_clientid.lval[0] = 0; cfp->cl_clientid.lval[1] = 0; cfp->cl_first = 0; cfp->cl_end = NFS64BITSSET; cfp->cl_flags = NFSLCK_WRITE; cfp->cl_ownerlen = 5; NFSBCOPY("LOCAL", cfp->cl_owner, 5); } nfsrv_locallock_rollback(vp, lfp, p); } else if (ltype != F_UNLCK) { rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK, M_WAITOK); rlp->rlck_first = first; rlp->rlck_end = end; rlp->rlck_type = oldltype; LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list); } out: NFSEXITCODE(error); return (error); } /* * Roll back local lock changes and free up the rollback list. */ static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p) { struct nfsrollback *rlp, *nrlp; LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) { (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first, rlp->rlck_end, p); free(rlp, M_NFSDROLLBACK); } LIST_INIT(&lfp->lf_rollback); } /* * Update local lock list and delete rollback list (ie now committed to the * local locks). Most of the work is done by the internal function. */ static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end) { struct nfsrollback *rlp, *nrlp; struct nfslock *new_lop, *other_lop; new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); if (flags & (NFSLCK_READ | NFSLCK_WRITE)) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); else other_lop = NULL; new_lop->lo_flags = flags; new_lop->lo_first = first; new_lop->lo_end = end; nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp); if (new_lop != NULL) free(new_lop, M_NFSDLOCK); if (other_lop != NULL) free(other_lop, M_NFSDLOCK); /* and get rid of the rollback list */ LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } /* * Lock the struct nfslockfile for local lock updating. */ static void nfsrv_locklf(struct nfslockfile *lfp) { int gotlock; /* lf_usecount ensures *lfp won't be free'd */ lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } /* * Unlock the struct nfslockfile after local lock updating. */ static void nfsrv_unlocklf(struct nfslockfile *lfp) { nfsv4_unlock(&lfp->lf_locallock_lck, 0); } /* * Clear out all state for the NFSv4 server. * Must be called by a thread that can sleep when no nfsds are running. */ void nfsrv_throwawayallstate(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfslockfile *lfp, *nlfp; int i; /* * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } } /* * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) nfsrv_freenfslockfile(lfp); } } /* * Check the sequence# for the session and slot provided as an argument. * Also, renew the lease if the session will return NFS_OK. */ int nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this, uint32_t *sflagsp, NFSPROC_T *p) { struct nfsdsession *sep; struct nfssessionhash *shp; int error; SVCXPRT *savxprt; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); return (NFSERR_BADSESSION); } error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp, sep->sess_slots, NULL, NFSV4_SLOTS - 1); if (error != 0) { NFSUNLOCKSESSION(shp); return (error); } if (cache_this != 0) nd->nd_flag |= ND_SAVEREPLY; /* Renew the lease. */ sep->sess_clp->lc_expiry = nfsrv_leaseexpiry(); nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; /* * If this session handles the backchannel, save the nd_xprt for this * RPC, since this is the one being used. */ if (sep->sess_clp->lc_req.nr_client != NULL && (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) { savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = sep->sess_clp->lc_req.nr_client->cl_private; nd->nd_xprt->xp_idletimeout = 0; /* Disable timeout. */ sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; if (savxprt != NULL) SVC_RELEASE(savxprt); } *sflagsp = 0; if (sep->sess_clp->lc_req.nr_client == NULL) *sflagsp |= NFSV4SEQ_CBPATHDOWN; NFSUNLOCKSESSION(shp); if (error == NFSERR_EXPIRED) { *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED; error = 0; } else if (error == NFSERR_ADMINREVOKED) { *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED; error = 0; } *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1; return (0); } /* * Check/set reclaim complete for this session/clientid. */ int nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd) { struct nfsdsession *sep; struct nfssessionhash *shp; int error = 0; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } /* Check to see if reclaim complete has already happened. */ if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) error = NFSERR_COMPLETEALREADY; else sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Cache the reply in a session slot. */ void nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat, struct mbuf **m) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); printf("nfsrv_cache_session: no session\n"); m_freem(*m); return; } nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m); NFSUNLOCKSESSION(shp); } /* * Search for a session that matches the sessionid. */ static struct nfsdsession * nfsrv_findsession(uint8_t *sessionid) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); LIST_FOREACH(sep, &shp->list, sess_hash) { if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID)) break; } return (sep); } /* * Destroy a session. */ int nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid) { int error, samesess; samesess = 0; if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) { samesess = 1; if ((nd->nd_flag & ND_LASTOP) == 0) return (NFSERR_BADSESSION); } error = nfsrv_freesession(NULL, sessionid); if (error == 0 && samesess != 0) nd->nd_flag &= ~ND_HASSEQUENCE; return (error); } /* * Free up a session structure. */ static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid) { struct nfssessionhash *shp; int i; NFSLOCKSTATE(); if (sep == NULL) { shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); } else { shp = NFSSESSIONHASH(sep->sess_sessionid); NFSLOCKSESSION(shp); } if (sep != NULL) { sep->sess_refcnt--; if (sep->sess_refcnt > 0) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (0); } LIST_REMOVE(sep, sess_hash); LIST_REMOVE(sep, sess_list); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); if (sep == NULL) return (NFSERR_BADSESSION); for (i = 0; i < NFSV4_SLOTS; i++) if (sep->sess_slots[i].nfssl_reply != NULL) m_freem(sep->sess_slots[i].nfssl_reply); if (sep->sess_cbsess.nfsess_xprt != NULL) SVC_RELEASE(sep->sess_cbsess.nfsess_xprt); free(sep, M_NFSDSESSION); return (0); } /* * Free a stateid. * RFC5661 says that it should fail when there are associated opens, locks * or delegations. Since stateids represent opens, I don't see how you can * free an open stateid (it will be free'd when closed), so this function * only works for lock stateids (freeing the lock_owner) or delegations. */ int nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) { /* First, check for a delegation. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } if (stp != NULL) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); return (error); } } /* Not a delegation, try for a lock_owner. */ if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0)) /* Not a lock_owner stateid. */ error = NFSERR_LOCKSHELD; if (error == 0 && !LIST_EMPTY(&stp->ls_lock)) error = NFSERR_LOCKSHELD; if (error == 0) nfsrv_freelockowner(stp, NULL, 0, p); NFSUNLOCKSTATE(); return (error); } /* * Generate the xdr for an NFSv4.1 CBSequence Operation. */ static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp) { struct nfsdsession *sep; uint32_t *tl, slotseq = 0; int maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; int error; error = nfsv4_getcbsession(clp, sepp); if (error != 0) return (error); sep = *sepp; (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot, &slotseq, sessionid); KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot")); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = 0; /* No referring call list, for now. */ nd->nd_flag |= ND_HASSEQUENCE; return (0); } /* * Get a session for the callback. */ static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp) { struct nfsdsession *sep; NFSLOCKSTATE(); LIST_FOREACH(sep, &clp->lc_session, sess_list) { if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) break; } if (sep == NULL) { NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } sep->sess_refcnt++; *sepp = sep; NFSUNLOCKSTATE(); return (0); } /* * Free up all backchannel xprts. This needs to be done when the nfsd threads * exit, since those transports will all be going away. * This is only called after all the nfsd threads are done performing RPCs, * so locking shouldn't be an issue. */ APPLESTATIC void nfsrv_freeallbackchannel_xprts(void) { struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *xprt; int i; for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; if (xprt != NULL) SVC_RELEASE(xprt); } } } } Index: head/sys/netinet/ip_mroute.c =================================================================== --- head/sys/netinet/ip_mroute.c (revision 328416) +++ head/sys/netinet/ip_mroute.c (revision 328417) @@ -1,2952 +1,2952 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989 Stephen Deering * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 */ /* * IP multicast forwarding procedures * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Mark J. Steiglitz, Stanford, May, 1991 * Modified by Van Jacobson, LBL, January 1993 * Modified by Ajit Thyagarajan, PARC, August 1993 * Modified by Bill Fenner, PARC, April 1995 * Modified by Ahmed Helmy, SGI, June 1996 * Modified by George Edmond Eddy (Rusty), ISI, February 1998 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 * Modified by Hitoshi Asaeda, WIDE, August 2000 * Modified by Pavlin Radoslavov, ICSI, October 2002 * * MROUTING Revision: 3.5 * and PIM-SMv2 and PIM-DM support, advanced API support, * bandwidth metering and signaling */ /* * TODO: Prefix functions with ipmf_. * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol * domain attachment (if_afdata) so we can track consumers of that service. * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT, * move it to socket options. * TODO: Cleanup LSRR removal further. * TODO: Push RSVP stubs into raw_ip.c. * TODO: Use bitstring.h for vif set. * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded. * TODO: Sync ip6_mroute.c with this file. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_mrouting.h" #define _PIM_VT 1 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef KTR_IPMF #define KTR_IPMF KTR_INET #endif #define VIFI_INVALID ((vifi_t) -1) static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */ #define V_last_tv_sec VNET(last_tv_sec) static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache"); /* * Locking. We use two locks: one for the virtual interface table and * one for the forwarding table. These locks may be nested in which case * the VIF lock must always be taken first. Note that each lock is used * to cover not only the specific data structure but also related data * structures. */ static struct mtx mrouter_mtx; #define MROUTER_LOCK() mtx_lock(&mrouter_mtx) #define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx) #define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED) #define MROUTER_LOCK_INIT() \ mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF) #define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx) static int ip_mrouter_cnt; /* # of vnets with active mrouters */ static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */ static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat); VNET_PCPUSTAT_SYSINIT(mrtstat); VNET_PCPUSTAT_SYSUNINIT(mrtstat); SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat, mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, " "netinet/ip_mroute.h)"); static VNET_DEFINE(u_long, mfchash); #define V_mfchash VNET(mfchash) #define MFCHASH(a, g) \ ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash) #define MFCHASHSIZE 256 static u_long mfchashsize; /* Hash size */ static VNET_DEFINE(u_char *, nexpire); /* 0..mfchashsize-1 */ #define V_nexpire VNET(nexpire) static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl); #define V_mfchashtbl VNET(mfchashtbl) static struct mtx mfc_mtx; #define MFC_LOCK() mtx_lock(&mfc_mtx) #define MFC_UNLOCK() mtx_unlock(&mfc_mtx) #define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) #define MFC_LOCK_INIT() \ mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF) #define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) static VNET_DEFINE(vifi_t, numvifs); #define V_numvifs VNET(numvifs) static VNET_DEFINE(struct vif, viftable[MAXVIFS]); #define V_viftable VNET(viftable) SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]", "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); static struct mtx vif_mtx; #define VIF_LOCK() mtx_lock(&vif_mtx) #define VIF_UNLOCK() mtx_unlock(&vif_mtx) #define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) #define VIF_LOCK_INIT() \ mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF) #define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) static eventhandler_tag if_detach_event_tag = NULL; static VNET_DEFINE(struct callout, expire_upcalls_ch); #define V_expire_upcalls_ch VNET(expire_upcalls_ch) #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ #define UPCALL_EXPIRE 6 /* number of timeouts */ /* * Bandwidth meter variables and constants */ static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); /* * Pending timeouts are stored in a hash table, the key being the * expiration time. Periodically, the entries are analysed and processed. */ #define BW_METER_BUCKETS 1024 static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]); #define V_bw_meter_timers VNET(bw_meter_timers) static VNET_DEFINE(struct callout, bw_meter_ch); #define V_bw_meter_ch VNET(bw_meter_ch) #define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ /* * Pending upcalls are stored in a vector which is flushed when * full, or periodically */ static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]); #define V_bw_upcalls VNET(bw_upcalls) static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */ #define V_bw_upcalls_n VNET(bw_upcalls_n) static VNET_DEFINE(struct callout, bw_upcalls_ch); #define V_bw_upcalls_ch VNET(bw_upcalls_ch) #define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat); VNET_PCPUSTAT_SYSINIT(pimstat); VNET_PCPUSTAT_SYSUNINIT(pimstat); SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat, pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)"); static u_long pim_squelch_wholepkt = 0; SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW, &pim_squelch_wholepkt, 0, "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified"); extern struct domain inetdomain; static const struct protosw in_pim_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_PIM, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = pim_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; static const struct encaptab *pim_encap_cookie; static int pim_encapcheck(const struct mbuf *, int, int, void *); /* * Note: the PIM Register encapsulation adds the following in front of a * data packet: * * struct pim_encap_hdr { * struct ip ip; * struct pim_encap_pimhdr pim; * } * */ struct pim_encap_pimhdr { struct pim pim; uint32_t flags; }; #define PIM_ENCAP_TTL 64 static struct ip pim_encap_iphdr = { #if BYTE_ORDER == LITTLE_ENDIAN sizeof(struct ip) >> 2, IPVERSION, #else IPVERSION, sizeof(struct ip) >> 2, #endif 0, /* tos */ sizeof(struct ip), /* total length */ 0, /* id */ 0, /* frag offset */ PIM_ENCAP_TTL, IPPROTO_PIM, 0, /* checksum */ }; static struct pim_encap_pimhdr pim_encap_pimhdr = { { PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 0, /* reserved */ 0, /* checksum */ }, 0 /* flags */ }; static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID; #define V_reg_vif_num VNET(reg_vif_num) static VNET_DEFINE(struct ifnet, multicast_register_if); #define V_multicast_register_if VNET(multicast_register_if) /* * Private variables. */ static u_long X_ip_mcast_src(int); static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); static int X_ip_mrouter_done(void); static int X_ip_mrouter_get(struct socket *, struct sockopt *); static int X_ip_mrouter_set(struct socket *, struct sockopt *); static int X_legal_vif_num(int); static int X_mrt_ioctl(u_long, caddr_t, int); static int add_bw_upcall(struct bw_upcall *); static int add_mfc(struct mfcctl2 *); static int add_vif(struct vifctl *); static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *); static void bw_meter_process(void); static void bw_meter_receive_packet(struct bw_meter *, int, struct timeval *); static void bw_upcalls_send(void); static int del_bw_upcall(struct bw_upcall *); static int del_mfc(struct mfcctl2 *); static int del_vif(vifi_t); static int del_vif_locked(vifi_t); static void expire_bw_meter_process(void *); static void expire_bw_upcalls_send(void *); static void expire_mfc(struct mfc *); static void expire_upcalls(void *); static void free_bw_list(struct bw_meter *); static int get_sg_cnt(struct sioc_sg_req *); static int get_vif_cnt(struct sioc_vif_req *); static void if_detached_event(void *, struct ifnet *); static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); static int ip_mrouter_init(struct socket *, int); static __inline struct mfc * mfc_find(struct in_addr *, struct in_addr *); static void phyint_send(struct ip *, struct vif *, struct mbuf *); static struct mbuf * pim_register_prepare(struct ip *, struct mbuf *); static int pim_register_send(struct ip *, struct vif *, struct mbuf *, struct mfc *); static int pim_register_send_rp(struct ip *, struct vif *, struct mbuf *, struct mfc *); static int pim_register_send_upcall(struct ip *, struct vif *, struct mbuf *, struct mfc *); static void schedule_bw_meter(struct bw_meter *, struct timeval *); static void send_packet(struct vif *, struct mbuf *); static int set_api_config(uint32_t *); static int set_assert(int); static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); static void unschedule_bw_meter(struct bw_meter *); /* * Kernel multicast forwarding API capabilities and setup. * If more API capabilities are added to the kernel, they should be * recorded in `mrt_api_support'. */ #define MRT_API_VERSION 0x0305 static const int mrt_api_version = MRT_API_VERSION; static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | MRT_MFC_FLAGS_BORDER_VIF | MRT_MFC_RP | MRT_MFC_BW_UPCALL); static VNET_DEFINE(uint32_t, mrt_api_config); #define V_mrt_api_config VNET(mrt_api_config) static VNET_DEFINE(int, pim_assert_enabled); #define V_pim_assert_enabled VNET(pim_assert_enabled) static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */ /* * Find a route for a given origin IP address and multicast group address. * Statistics must be updated by the caller. */ static __inline struct mfc * mfc_find(struct in_addr *o, struct in_addr *g) { struct mfc *rt; MFC_LOCK_ASSERT(); LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) { if (in_hosteq(rt->mfc_origin, *o) && in_hosteq(rt->mfc_mcastgrp, *g) && TAILQ_EMPTY(&rt->mfc_stall)) break; } return (rt); } /* * Handle MRT setsockopt commands to modify the multicast forwarding tables. */ static int X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) { int error, optval; vifi_t vifi; struct vifctl vifc; struct mfcctl2 mfc; struct bw_upcall bw_upcall; uint32_t i; if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT) return EPERM; error = 0; switch (sopt->sopt_name) { case MRT_INIT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; error = ip_mrouter_init(so, optval); break; case MRT_DONE: error = ip_mrouter_done(); break; case MRT_ADD_VIF: error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); if (error) break; error = add_vif(&vifc); break; case MRT_DEL_VIF: error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); if (error) break; error = del_vif(vifi); break; case MRT_ADD_MFC: case MRT_DEL_MFC: /* * select data size depending on API version. */ if (sopt->sopt_name == MRT_ADD_MFC && V_mrt_api_config & MRT_API_FLAGS_ALL) { error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), sizeof(struct mfcctl2)); } else { error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), sizeof(struct mfcctl)); bzero((caddr_t)&mfc + sizeof(struct mfcctl), sizeof(mfc) - sizeof(struct mfcctl)); } if (error) break; if (sopt->sopt_name == MRT_ADD_MFC) error = add_mfc(&mfc); else error = del_mfc(&mfc); break; case MRT_ASSERT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; set_assert(optval); break; case MRT_API_CONFIG: error = sooptcopyin(sopt, &i, sizeof i, sizeof i); if (!error) error = set_api_config(&i); if (!error) error = sooptcopyout(sopt, &i, sizeof i); break; case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, sizeof bw_upcall); if (error) break; if (sopt->sopt_name == MRT_ADD_BW_UPCALL) error = add_bw_upcall(&bw_upcall); else error = del_bw_upcall(&bw_upcall); break; default: error = EOPNOTSUPP; break; } return error; } /* * Handle MRT getsockopt commands */ static int X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) { int error; switch (sopt->sopt_name) { case MRT_VERSION: error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version); break; case MRT_ASSERT: error = sooptcopyout(sopt, &V_pim_assert_enabled, sizeof V_pim_assert_enabled); break; case MRT_API_SUPPORT: error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); break; case MRT_API_CONFIG: error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config); break; default: error = EOPNOTSUPP; break; } return error; } /* * Handle ioctl commands to obtain information from the cache */ static int X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused) { int error = 0; /* * Currently the only function calling this ioctl routine is rtioctl_fib(). * Typically, only root can create the raw socket in order to execute * this ioctl method, however the request might be coming from a prison */ error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error) return (error); switch (cmd) { case (SIOCGETVIFCNT): error = get_vif_cnt((struct sioc_vif_req *)data); break; case (SIOCGETSGCNT): error = get_sg_cnt((struct sioc_sg_req *)data); break; default: error = EINVAL; break; } return error; } /* * returns the packet, byte, rpf-failure count for the source group provided */ static int get_sg_cnt(struct sioc_sg_req *req) { struct mfc *rt; MFC_LOCK(); rt = mfc_find(&req->src, &req->grp); if (rt == NULL) { MFC_UNLOCK(); req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; return EADDRNOTAVAIL; } req->pktcnt = rt->mfc_pkt_cnt; req->bytecnt = rt->mfc_byte_cnt; req->wrong_if = rt->mfc_wrong_if; MFC_UNLOCK(); return 0; } /* * returns the input and output packet and byte counts on the vif provided */ static int get_vif_cnt(struct sioc_vif_req *req) { vifi_t vifi = req->vifi; VIF_LOCK(); if (vifi >= V_numvifs) { VIF_UNLOCK(); return EINVAL; } req->icount = V_viftable[vifi].v_pkt_in; req->ocount = V_viftable[vifi].v_pkt_out; req->ibytes = V_viftable[vifi].v_bytes_in; req->obytes = V_viftable[vifi].v_bytes_out; VIF_UNLOCK(); return 0; } static void if_detached_event(void *arg __unused, struct ifnet *ifp) { vifi_t vifi; u_long i; MROUTER_LOCK(); if (V_ip_mrouter == NULL) { MROUTER_UNLOCK(); return; } VIF_LOCK(); MFC_LOCK(); /* * Tear down multicast forwarder state associated with this ifnet. * 1. Walk the vif list, matching vifs against this ifnet. * 2. Walk the multicast forwarding cache (mfc) looking for * inner matches with this vif's index. * 3. Expire any matching multicast forwarding cache entries. * 4. Free vif state. This should disable ALLMULTI on the interface. */ for (vifi = 0; vifi < V_numvifs; vifi++) { if (V_viftable[vifi].v_ifp != ifp) continue; for (i = 0; i < mfchashsize; i++) { struct mfc *rt, *nrt; LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { if (rt->mfc_parent == vifi) { expire_mfc(rt); } } } del_vif_locked(vifi); } MFC_UNLOCK(); VIF_UNLOCK(); MROUTER_UNLOCK(); } /* * Enable multicast forwarding. */ static int ip_mrouter_init(struct socket *so, int version) { CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__, so->so_type, so->so_proto->pr_protocol); if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; if (version != 1) return ENOPROTOOPT; MROUTER_LOCK(); if (ip_mrouter_unloading) { MROUTER_UNLOCK(); return ENOPROTOOPT; } if (V_ip_mrouter != NULL) { MROUTER_UNLOCK(); return EADDRINUSE; } V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash, HASH_NOWAIT); callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, curvnet); callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, curvnet); callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, curvnet); V_ip_mrouter = so; ip_mrouter_cnt++; MROUTER_UNLOCK(); CTR1(KTR_IPMF, "%s: done", __func__); return 0; } /* * Disable multicast forwarding. */ static int X_ip_mrouter_done(void) { struct ifnet *ifp; u_long i; vifi_t vifi; MROUTER_LOCK(); if (V_ip_mrouter == NULL) { MROUTER_UNLOCK(); return EINVAL; } /* * Detach/disable hooks to the reset of the system. */ V_ip_mrouter = NULL; ip_mrouter_cnt--; V_mrt_api_config = 0; VIF_LOCK(); /* * For each phyint in use, disable promiscuous reception of all IP * multicasts. */ for (vifi = 0; vifi < V_numvifs; vifi++) { if (!in_nullhost(V_viftable[vifi].v_lcl_addr) && !(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { ifp = V_viftable[vifi].v_ifp; if_allmulti(ifp, 0); } } bzero((caddr_t)V_viftable, sizeof(V_viftable)); V_numvifs = 0; V_pim_assert_enabled = 0; VIF_UNLOCK(); callout_stop(&V_expire_upcalls_ch); callout_stop(&V_bw_upcalls_ch); callout_stop(&V_bw_meter_ch); MFC_LOCK(); /* * Free all multicast forwarding cache entries. * Do not use hashdestroy(), as we must perform other cleanup. */ for (i = 0; i < mfchashsize; i++) { struct mfc *rt, *nrt; LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { expire_mfc(rt); } } free(V_mfchashtbl, M_MRTABLE); V_mfchashtbl = NULL; bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize); V_bw_upcalls_n = 0; bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); MFC_UNLOCK(); V_reg_vif_num = VIFI_INVALID; MROUTER_UNLOCK(); CTR1(KTR_IPMF, "%s: done", __func__); return 0; } /* * Set PIM assert processing global */ static int set_assert(int i) { if ((i != 1) && (i != 0)) return EINVAL; V_pim_assert_enabled = i; return 0; } /* * Configure API capabilities */ int set_api_config(uint32_t *apival) { u_long i; /* * We can set the API capabilities only if it is the first operation * after MRT_INIT. I.e.: * - there are no vifs installed * - pim_assert is not enabled * - the MFC table is empty */ if (V_numvifs > 0) { *apival = 0; return EPERM; } if (V_pim_assert_enabled) { *apival = 0; return EPERM; } MFC_LOCK(); for (i = 0; i < mfchashsize; i++) { if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) { MFC_UNLOCK(); *apival = 0; return EPERM; } } MFC_UNLOCK(); V_mrt_api_config = *apival & mrt_api_support; *apival = V_mrt_api_config; return 0; } /* * Add a vif to the vif table */ static int add_vif(struct vifctl *vifcp) { struct vif *vifp = V_viftable + vifcp->vifc_vifi; struct sockaddr_in sin = {sizeof sin, AF_INET}; struct ifaddr *ifa; struct ifnet *ifp; int error; VIF_LOCK(); if (vifcp->vifc_vifi >= MAXVIFS) { VIF_UNLOCK(); return EINVAL; } /* rate limiting is no longer supported by this code */ if (vifcp->vifc_rate_limit != 0) { log(LOG_ERR, "rate limiting is no longer supported\n"); VIF_UNLOCK(); return EINVAL; } if (!in_nullhost(vifp->v_lcl_addr)) { VIF_UNLOCK(); return EADDRINUSE; } if (in_nullhost(vifcp->vifc_lcl_addr)) { VIF_UNLOCK(); return EADDRNOTAVAIL; } /* Find the interface with an address in AF_INET family */ if (vifcp->vifc_flags & VIFF_REGISTER) { /* * XXX: Because VIFF_REGISTER does not really need a valid * local interface (e.g. it could be 127.0.0.2), we don't * check its address. */ ifp = NULL; } else { sin.sin_addr = vifcp->vifc_lcl_addr; ifa = ifa_ifwithaddr((struct sockaddr *)&sin); if (ifa == NULL) { VIF_UNLOCK(); return EADDRNOTAVAIL; } ifp = ifa->ifa_ifp; ifa_free(ifa); } if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) { CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__); VIF_UNLOCK(); return EOPNOTSUPP; } else if (vifcp->vifc_flags & VIFF_REGISTER) { ifp = &V_multicast_register_if; CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp); if (V_reg_vif_num == VIFI_INVALID) { if_initname(&V_multicast_register_if, "register_vif", 0); V_multicast_register_if.if_flags = IFF_LOOPBACK; V_reg_vif_num = vifcp->vifc_vifi; } } else { /* Make sure the interface supports multicast */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { VIF_UNLOCK(); return EOPNOTSUPP; } /* Enable promiscuous reception of all IP multicasts from the if */ error = if_allmulti(ifp, 1); if (error) { VIF_UNLOCK(); return error; } } vifp->v_flags = vifcp->vifc_flags; vifp->v_threshold = vifcp->vifc_threshold; vifp->v_lcl_addr = vifcp->vifc_lcl_addr; vifp->v_rmt_addr = vifcp->vifc_rmt_addr; vifp->v_ifp = ifp; /* initialize per vif pkt counters */ vifp->v_pkt_in = 0; vifp->v_pkt_out = 0; vifp->v_bytes_in = 0; vifp->v_bytes_out = 0; /* Adjust numvifs up if the vifi is higher than numvifs */ if (V_numvifs <= vifcp->vifc_vifi) V_numvifs = vifcp->vifc_vifi + 1; VIF_UNLOCK(); CTR4(KTR_IPMF, "%s: add vif %d laddr 0x%08x thresh %x", __func__, (int)vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr), (int)vifcp->vifc_threshold); return 0; } /* * Delete a vif from the vif table */ static int del_vif_locked(vifi_t vifi) { struct vif *vifp; VIF_LOCK_ASSERT(); if (vifi >= V_numvifs) { return EINVAL; } vifp = &V_viftable[vifi]; if (in_nullhost(vifp->v_lcl_addr)) { return EADDRNOTAVAIL; } if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) if_allmulti(vifp->v_ifp, 0); if (vifp->v_flags & VIFF_REGISTER) V_reg_vif_num = VIFI_INVALID; bzero((caddr_t)vifp, sizeof (*vifp)); CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi); /* Adjust numvifs down */ for (vifi = V_numvifs; vifi > 0; vifi--) if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr)) break; V_numvifs = vifi; return 0; } static int del_vif(vifi_t vifi) { int cc; VIF_LOCK(); cc = del_vif_locked(vifi); VIF_UNLOCK(); return cc; } /* * update an mfc entry without resetting counters and S,G addresses. */ static void update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) { int i; rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < V_numvifs; i++) { rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config & MRT_MFC_FLAGS_ALL; } /* set the RP address */ if (V_mrt_api_config & MRT_MFC_RP) rt->mfc_rp = mfccp->mfcc_rp; else rt->mfc_rp.s_addr = INADDR_ANY; } /* * fully initialize an mfc entry from the parameter. */ static void init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) { rt->mfc_origin = mfccp->mfcc_origin; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; update_mfc_params(rt, mfccp); /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; rt->mfc_byte_cnt = 0; rt->mfc_wrong_if = 0; timevalclear(&rt->mfc_last_assert); } static void expire_mfc(struct mfc *rt) { struct rtdetq *rte, *nrte; MFC_LOCK_ASSERT(); free_bw_list(rt->mfc_bw_meter); TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { m_freem(rte->m); TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); free(rte, M_MRTABLE); } LIST_REMOVE(rt, mfc_hash); free(rt, M_MRTABLE); } /* * Add an mfc entry */ static int add_mfc(struct mfcctl2 *mfccp) { struct mfc *rt; struct rtdetq *rte, *nrte; u_long hash = 0; u_short nstl; VIF_LOCK(); MFC_LOCK(); rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); /* If an entry already exists, just update the fields */ if (rt) { CTR4(KTR_IPMF, "%s: update mfc orig 0x%08x group %lx parent %x", __func__, ntohl(mfccp->mfcc_origin.s_addr), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); update_mfc_params(rt, mfccp); MFC_UNLOCK(); VIF_UNLOCK(); return (0); } /* * Find the entry for which the upcall was made and update */ nstl = 0; hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && !TAILQ_EMPTY(&rt->mfc_stall)) { CTR5(KTR_IPMF, "%s: add mfc orig 0x%08x group %lx parent %x qh %p", __func__, ntohl(mfccp->mfcc_origin.s_addr), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent, TAILQ_FIRST(&rt->mfc_stall)); if (nstl++) CTR1(KTR_IPMF, "%s: multiple matches", __func__); init_mfc_params(rt, mfccp); rt->mfc_expire = 0; /* Don't clean this guy up */ V_nexpire[hash]--; /* Free queued packets, but attempt to forward them first. */ TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { if (rte->ifp != NULL) ip_mdq(rte->m, rte->ifp, rt, -1); m_freem(rte->m); TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); rt->mfc_nstall--; free(rte, M_MRTABLE); } } } /* * It is possible that an entry is being inserted without an upcall */ if (nstl == 0) { CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__); LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { init_mfc_params(rt, mfccp); if (rt->mfc_expire) V_nexpire[hash]--; rt->mfc_expire = 0; break; /* XXX */ } } if (rt == NULL) { /* no upcall, so make a new entry */ rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); if (rt == NULL) { MFC_UNLOCK(); VIF_UNLOCK(); return (ENOBUFS); } init_mfc_params(rt, mfccp); TAILQ_INIT(&rt->mfc_stall); rt->mfc_nstall = 0; rt->mfc_expire = 0; rt->mfc_bw_meter = NULL; /* insert new entry at head of hash chain */ LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); } } MFC_UNLOCK(); VIF_UNLOCK(); return (0); } /* * Delete an mfc entry */ static int del_mfc(struct mfcctl2 *mfccp) { struct in_addr origin; struct in_addr mcastgrp; struct mfc *rt; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; CTR3(KTR_IPMF, "%s: delete mfc orig 0x%08x group %lx", __func__, ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); MFC_LOCK(); rt = mfc_find(&origin, &mcastgrp); if (rt == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } /* * free the bw_meter entries */ free_bw_list(rt->mfc_bw_meter); rt->mfc_bw_meter = NULL; LIST_REMOVE(rt, mfc_hash); free(rt, M_MRTABLE); MFC_UNLOCK(); return (0); } /* * Send a message to the routing daemon on the multicast routing socket. */ static int socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) { if (s) { SOCKBUF_LOCK(&s->so_rcv); if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) { sorwakeup_locked(s); return 0; } SOCKBUF_UNLOCK(&s->so_rcv); } m_freem(mm); return -1; } /* * IP multicast forwarding function. This function assumes that the packet * pointed to by "ip" has arrived on (or is about to be sent to) the interface * pointed to by "ifp", and the packet is to be relayed to other networks * that have members of the packet's destination IP multicast group. * * The packet is returned unscathed to the caller, unless it is * erroneous, in which case a non-zero return value tells the caller to * discard it. */ #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo) { struct mfc *rt; int error; vifi_t vifi; CTR3(KTR_IPMF, "ip_mforward: delete mfc orig 0x%08x group %lx ifp %p", ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), ifp); if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { /* * Packet arrived via a physical interface or * an encapsulated tunnel or a register_vif. */ } else { /* * Packet arrived through a source-route tunnel. * Source-route tunnels are no longer supported. */ return (1); } VIF_LOCK(); MFC_LOCK(); if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) { if (ip->ip_ttl < MAXTTL) ip->ip_ttl++; /* compensate for -1 in *_send routines */ error = ip_mdq(m, ifp, NULL, vifi); MFC_UNLOCK(); VIF_UNLOCK(); return error; } /* * Don't forward a packet with time-to-live of zero or one, * or a packet destined to a local-only group. */ if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) { MFC_UNLOCK(); VIF_UNLOCK(); return 0; } /* * Determine forwarding vifs from the forwarding cache table */ MRTSTAT_INC(mrts_mfc_lookups); rt = mfc_find(&ip->ip_src, &ip->ip_dst); /* Entry exists, so forward if necessary */ if (rt != NULL) { error = ip_mdq(m, ifp, rt, -1); MFC_UNLOCK(); VIF_UNLOCK(); return error; } else { /* * If we don't have a route for packet's origin, * Make a copy of the packet & send message to routing daemon */ struct mbuf *mb0; struct rtdetq *rte; u_long hash; int hlen = ip->ip_hl << 2; MRTSTAT_INC(mrts_mfc_misses); MRTSTAT_INC(mrts_no_route); CTR2(KTR_IPMF, "ip_mforward: no mfc for (0x%08x,%lx)", ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr)); /* * Allocate mbufs early so that we don't do extra work if we are * just going to fail anyway. Make sure to pullup the header so * that other people can't step on it. */ rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT|M_ZERO); if (rte == NULL) { MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } mb0 = m_copypacket(m, M_NOWAIT); if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen)) mb0 = m_pullup(mb0, hlen); if (mb0 == NULL) { free(rte, M_MRTABLE); MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } /* is there an upcall waiting for this flow ? */ hash = MFCHASH(ip->ip_src, ip->ip_dst); LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { if (in_hosteq(ip->ip_src, rt->mfc_origin) && in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && !TAILQ_EMPTY(&rt->mfc_stall)) break; } if (rt == NULL) { int i; struct igmpmsg *im; struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; struct mbuf *mm; /* * Locate the vifi for the incoming interface for this packet. * If none found, drop packet. */ for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp; vifi++) ; if (vifi >= V_numvifs) /* vif not found, drop packet */ goto non_fatal; /* no upcall, so make a new entry */ rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); if (rt == NULL) goto fail; /* Make a copy of the header to send to the user level process */ mm = m_copym(mb0, 0, hlen, M_NOWAIT); if (mm == NULL) goto fail1; /* * Send message to routing daemon to install * a route into the kernel table */ im = mtod(mm, struct igmpmsg *); im->im_msgtype = IGMPMSG_NOCACHE; im->im_mbz = 0; im->im_vif = vifi; MRTSTAT_INC(mrts_upcalls); k_igmpsrc.sin_addr = ip->ip_src; if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { CTR0(KTR_IPMF, "ip_mforward: socket queue full"); MRTSTAT_INC(mrts_upq_sockfull); fail1: free(rt, M_MRTABLE); fail: free(rte, M_MRTABLE); m_freem(mb0); MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } /* insert new entry at head of hash chain */ rt->mfc_origin.s_addr = ip->ip_src.s_addr; rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; rt->mfc_expire = UPCALL_EXPIRE; V_nexpire[hash]++; for (i = 0; i < V_numvifs; i++) { rt->mfc_ttls[i] = 0; rt->mfc_flags[i] = 0; } rt->mfc_parent = -1; /* clear the RP address */ rt->mfc_rp.s_addr = INADDR_ANY; rt->mfc_bw_meter = NULL; /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; rt->mfc_byte_cnt = 0; rt->mfc_wrong_if = 0; timevalclear(&rt->mfc_last_assert); TAILQ_INIT(&rt->mfc_stall); rt->mfc_nstall = 0; /* link into table */ LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link); rt->mfc_nstall++; } else { /* determine if queue has overflowed */ if (rt->mfc_nstall > MAX_UPQ) { MRTSTAT_INC(mrts_upq_ovflw); non_fatal: free(rte, M_MRTABLE); m_freem(mb0); MFC_UNLOCK(); VIF_UNLOCK(); return (0); } TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link); rt->mfc_nstall++; } rte->m = mb0; rte->ifp = ifp; MFC_UNLOCK(); VIF_UNLOCK(); return 0; } } /* * Clean up the cache entry if upcall is not serviced */ static void expire_upcalls(void *arg) { u_long i; CURVNET_SET((struct vnet *) arg); MFC_LOCK(); for (i = 0; i < mfchashsize; i++) { struct mfc *rt, *nrt; if (V_nexpire[i] == 0) continue; LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { if (TAILQ_EMPTY(&rt->mfc_stall)) continue; if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) continue; /* * free the bw_meter entries */ while (rt->mfc_bw_meter != NULL) { struct bw_meter *x = rt->mfc_bw_meter; rt->mfc_bw_meter = x->bm_mfc_next; free(x, M_BWMETER); } MRTSTAT_INC(mrts_cache_cleanups); CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__, (u_long)ntohl(rt->mfc_origin.s_addr), (u_long)ntohl(rt->mfc_mcastgrp.s_addr)); expire_mfc(rt); } } MFC_UNLOCK(); callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, curvnet); CURVNET_RESTORE(); } /* * Packet forwarding routine once entry in the cache is made */ static int ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) { struct ip *ip = mtod(m, struct ip *); vifi_t vifi; int plen = ntohs(ip->ip_len); VIF_LOCK_ASSERT(); /* * If xmt_vif is not -1, send on only the requested vif. * * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) */ if (xmt_vif < V_numvifs) { if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER) pim_register_send(ip, V_viftable + xmt_vif, m, rt); else phyint_send(ip, V_viftable + xmt_vif, m); return 1; } /* * Don't forward if it didn't arrive from the parent vif for its origin. */ vifi = rt->mfc_parent; if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) { CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)", __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp); MRTSTAT_INC(mrts_wrong_if); ++rt->mfc_wrong_if; /* * If we are doing PIM assert processing, send a message * to the routing daemon. * * XXX: A PIM-SM router needs the WRONGVIF detection so it * can complete the SPT switch, regardless of the type * of the iif (broadcast media, GRE tunnel, etc). */ if (V_pim_assert_enabled && (vifi < V_numvifs) && V_viftable[vifi].v_ifp) { if (ifp == &V_multicast_register_if) PIMSTAT_INC(pims_rcv_registers_wrongiif); /* Get vifi for the incoming packet */ for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp; vifi++) ; if (vifi >= V_numvifs) return 0; /* The iif is not found: ignore the packet. */ if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) return 0; /* WRONGVIF disabled: ignore the packet */ if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) { struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; struct igmpmsg *im; int hlen = ip->ip_hl << 2; struct mbuf *mm = m_copym(m, 0, hlen, M_NOWAIT); if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen)) mm = m_pullup(mm, hlen); if (mm == NULL) return ENOBUFS; im = mtod(mm, struct igmpmsg *); im->im_msgtype = IGMPMSG_WRONGVIF; im->im_mbz = 0; im->im_vif = vifi; MRTSTAT_INC(mrts_upcalls); k_igmpsrc.sin_addr = im->im_src; if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { CTR1(KTR_IPMF, "%s: socket queue full", __func__); MRTSTAT_INC(mrts_upq_sockfull); return ENOBUFS; } } } return 0; } /* If I sourced this packet, it counts as output, else it was input. */ if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) { V_viftable[vifi].v_pkt_out++; V_viftable[vifi].v_bytes_out += plen; } else { V_viftable[vifi].v_pkt_in++; V_viftable[vifi].v_bytes_in += plen; } rt->mfc_pkt_cnt++; rt->mfc_byte_cnt += plen; /* * For each vif, decide if a copy of the packet should be forwarded. * Forward if: * - the ttl exceeds the vif's threshold * - there are group members downstream on interface */ for (vifi = 0; vifi < V_numvifs; vifi++) if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { V_viftable[vifi].v_pkt_out++; V_viftable[vifi].v_bytes_out += plen; if (V_viftable[vifi].v_flags & VIFF_REGISTER) pim_register_send(ip, V_viftable + vifi, m, rt); else phyint_send(ip, V_viftable + vifi, m); } /* * Perform upcall-related bw measuring. */ if (rt->mfc_bw_meter != NULL) { struct bw_meter *x; struct timeval now; microtime(&now); MFC_LOCK_ASSERT(); for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) bw_meter_receive_packet(x, plen, &now); } return 0; } /* * Check if a vif number is legal/ok. This is used by in_mcast.c. */ static int X_legal_vif_num(int vif) { int ret; ret = 0; if (vif < 0) return (ret); VIF_LOCK(); if (vif < V_numvifs) ret = 1; VIF_UNLOCK(); return (ret); } /* * Return the local address used by this vif */ static u_long X_ip_mcast_src(int vifi) { in_addr_t addr; addr = INADDR_ANY; if (vifi < 0) return (addr); VIF_LOCK(); if (vifi < V_numvifs) addr = V_viftable[vifi].v_lcl_addr.s_addr; VIF_UNLOCK(); return (addr); } static void phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) { struct mbuf *mb_copy; int hlen = ip->ip_hl << 2; VIF_LOCK_ASSERT(); /* * Make a new reference to the packet; make sure that * the IP header is actually copied, not just referenced, * so that ip_output() only scribbles on the copy. */ mb_copy = m_copypacket(m, M_NOWAIT); if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen)) mb_copy = m_pullup(mb_copy, hlen); if (mb_copy == NULL) return; send_packet(vifp, mb_copy); } static void send_packet(struct vif *vifp, struct mbuf *m) { struct ip_moptions imo; struct in_multi *imm[2]; int error; VIF_LOCK_ASSERT(); imo.imo_multicast_ifp = vifp->v_ifp; imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; imo.imo_multicast_loop = 1; imo.imo_multicast_vif = -1; imo.imo_num_memberships = 0; imo.imo_max_memberships = 2; imo.imo_membership = &imm[0]; /* * Re-entrancy should not be a problem here, because * the packets that we send out and are looped back at us * should get rejected because they appear to come from * the loopback interface, thus preventing looping. */ error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL); CTR3(KTR_IPMF, "%s: vif %td err %d", __func__, (ptrdiff_t)(vifp - V_viftable), error); } /* * Stubs for old RSVP socket shim implementation. */ static int X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused) { return (EOPNOTSUPP); } static void X_ip_rsvp_force_done(struct socket *so __unused) { } static int X_rsvp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m; m = *mp; *mp = NULL; if (!V_rsvp_on) m_freem(m); return (IPPROTO_DONE); } /* * Code for bandwidth monitors */ /* * Define common interface for timeval-related methods */ #define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) #define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) #define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) static uint32_t compute_bw_meter_flags(struct bw_upcall *req) { uint32_t flags = 0; if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) flags |= BW_METER_UNIT_PACKETS; if (req->bu_flags & BW_UPCALL_UNIT_BYTES) flags |= BW_METER_UNIT_BYTES; if (req->bu_flags & BW_UPCALL_GEQ) flags |= BW_METER_GEQ; if (req->bu_flags & BW_UPCALL_LEQ) flags |= BW_METER_LEQ; return flags; } /* * Add a bw_meter entry */ static int add_bw_upcall(struct bw_upcall *req) { struct mfc *mfc; struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; struct timeval now; struct bw_meter *x; uint32_t flags; if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) return EOPNOTSUPP; /* Test if the flags are valid */ if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) return EINVAL; if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) return EINVAL; if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) return EINVAL; /* Test if the threshold time interval is valid */ if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) return EINVAL; flags = compute_bw_meter_flags(req); /* * Find if we have already same bw_meter entry */ MFC_LOCK(); mfc = mfc_find(&req->bu_src, &req->bu_dst); if (mfc == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, &req->bu_threshold.b_time, ==)) && (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && (x->bm_flags & BW_METER_USER_FLAGS) == flags) { MFC_UNLOCK(); return 0; /* XXX Already installed */ } } /* Allocate the new bw_meter entry */ x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); if (x == NULL) { MFC_UNLOCK(); return ENOBUFS; } /* Set the new bw_meter entry */ x->bm_threshold.b_time = req->bu_threshold.b_time; microtime(&now); x->bm_start_time = now; x->bm_threshold.b_packets = req->bu_threshold.b_packets; x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags = flags; x->bm_time_next = NULL; x->bm_time_hash = BW_METER_BUCKETS; /* Add the new bw_meter entry to the front of entries for this MFC */ x->bm_mfc = mfc; x->bm_mfc_next = mfc->mfc_bw_meter; mfc->mfc_bw_meter = x; schedule_bw_meter(x, &now); MFC_UNLOCK(); return 0; } static void free_bw_list(struct bw_meter *list) { while (list != NULL) { struct bw_meter *x = list; list = list->bm_mfc_next; unschedule_bw_meter(x); free(x, M_BWMETER); } } /* * Delete one or multiple bw_meter entries */ static int del_bw_upcall(struct bw_upcall *req) { struct mfc *mfc; struct bw_meter *x; if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) return EOPNOTSUPP; MFC_LOCK(); /* Find the corresponding MFC entry */ mfc = mfc_find(&req->bu_src, &req->bu_dst); if (mfc == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { /* * Delete all bw_meter entries for this mfc */ struct bw_meter *list; list = mfc->mfc_bw_meter; mfc->mfc_bw_meter = NULL; free_bw_list(list); MFC_UNLOCK(); return 0; } else { /* Delete a single bw_meter entry */ struct bw_meter *prev; uint32_t flags = 0; flags = compute_bw_meter_flags(req); /* Find the bw_meter entry to delete */ for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; prev = x, x = x->bm_mfc_next) { if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, &req->bu_threshold.b_time, ==)) && (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && (x->bm_flags & BW_METER_USER_FLAGS) == flags) break; } if (x != NULL) { /* Delete entry from the list for this MFC */ if (prev != NULL) prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ else x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ unschedule_bw_meter(x); MFC_UNLOCK(); /* Free the bw_meter entry */ free(x, M_BWMETER); return 0; } else { MFC_UNLOCK(); return EINVAL; } } /* NOTREACHED */ } /* * Perform bandwidth measurement processing that may result in an upcall */ static void bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) { struct timeval delta; MFC_LOCK_ASSERT(); delta = *nowp; BW_TIMEVALDECR(&delta, &x->bm_start_time); if (x->bm_flags & BW_METER_GEQ) { /* * Processing for ">=" type of bw_meter entry */ if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { /* Reset the bw_meter entry */ x->bm_start_time = *nowp; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; } /* Record that a packet is received */ x->bm_measured.b_packets++; x->bm_measured.b_bytes += plen; /* * Test if we should deliver an upcall */ if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { if (((x->bm_flags & BW_METER_UNIT_PACKETS) && (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || ((x->bm_flags & BW_METER_UNIT_BYTES) && (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { /* Prepare an upcall for delivery */ bw_meter_prepare_upcall(x, nowp); x->bm_flags |= BW_METER_UPCALL_DELIVERED; } } } else if (x->bm_flags & BW_METER_LEQ) { /* * Processing for "<=" type of bw_meter entry */ if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { /* * We are behind time with the multicast forwarding table * scanning for "<=" type of bw_meter entries, so test now * if we should deliver an upcall. */ if (((x->bm_flags & BW_METER_UNIT_PACKETS) && (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || ((x->bm_flags & BW_METER_UNIT_BYTES) && (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { /* Prepare an upcall for delivery */ bw_meter_prepare_upcall(x, nowp); } /* Reschedule the bw_meter entry */ unschedule_bw_meter(x); schedule_bw_meter(x, nowp); } /* Record that a packet is received */ x->bm_measured.b_packets++; x->bm_measured.b_bytes += plen; /* * Test if we should restart the measuring interval */ if ((x->bm_flags & BW_METER_UNIT_PACKETS && x->bm_measured.b_packets <= x->bm_threshold.b_packets) || (x->bm_flags & BW_METER_UNIT_BYTES && x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { /* Don't restart the measuring interval */ } else { /* Do restart the measuring interval */ /* * XXX: note that we don't unschedule and schedule, because this * might be too much overhead per packet. Instead, when we process * all entries for a given timer hash bin, we check whether it is * really a timeout. If not, we reschedule at that time. */ x->bm_start_time = *nowp; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; } } } /* * Prepare a bandwidth-related upcall */ static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) { struct timeval delta; struct bw_upcall *u; MFC_LOCK_ASSERT(); /* * Compute the measured time interval */ delta = *nowp; BW_TIMEVALDECR(&delta, &x->bm_start_time); /* * If there are too many pending upcalls, deliver them now */ if (V_bw_upcalls_n >= BW_UPCALLS_MAX) bw_upcalls_send(); /* * Set the bw_upcall entry */ u = &V_bw_upcalls[V_bw_upcalls_n++]; u->bu_src = x->bm_mfc->mfc_origin; u->bu_dst = x->bm_mfc->mfc_mcastgrp; u->bu_threshold.b_time = x->bm_threshold.b_time; u->bu_threshold.b_packets = x->bm_threshold.b_packets; u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; u->bu_measured.b_time = delta; u->bu_measured.b_packets = x->bm_measured.b_packets; u->bu_measured.b_bytes = x->bm_measured.b_bytes; u->bu_flags = 0; if (x->bm_flags & BW_METER_UNIT_PACKETS) u->bu_flags |= BW_UPCALL_UNIT_PACKETS; if (x->bm_flags & BW_METER_UNIT_BYTES) u->bu_flags |= BW_UPCALL_UNIT_BYTES; if (x->bm_flags & BW_METER_GEQ) u->bu_flags |= BW_UPCALL_GEQ; if (x->bm_flags & BW_METER_LEQ) u->bu_flags |= BW_UPCALL_LEQ; } /* * Send the pending bandwidth-related upcalls */ static void bw_upcalls_send(void) { struct mbuf *m; int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]); struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; static struct igmpmsg igmpmsg = { 0, /* unused1 */ 0, /* unused2 */ IGMPMSG_BW_UPCALL,/* im_msgtype */ 0, /* im_mbz */ 0, /* im_vif */ 0, /* unused3 */ { 0 }, /* im_src */ { 0 } }; /* im_dst */ MFC_LOCK_ASSERT(); if (V_bw_upcalls_n == 0) return; /* No pending upcalls */ V_bw_upcalls_n = 0; /* * Allocate a new mbuf, initialize it with the header and * the payload for the pending calls. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); return; } m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]); /* * Send the upcalls * XXX do we need to set the address in k_igmpsrc ? */ MRTSTAT_INC(mrts_upcalls); if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) { log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); MRTSTAT_INC(mrts_upq_sockfull); } } /* * Compute the timeout hash value for the bw_meter entries */ #define BW_METER_TIMEHASH(bw_meter, hash) \ do { \ struct timeval next_timeval = (bw_meter)->bm_start_time; \ \ BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ (hash) = next_timeval.tv_sec; \ if (next_timeval.tv_usec) \ (hash)++; /* XXX: make sure we don't timeout early */ \ (hash) %= BW_METER_BUCKETS; \ } while (0) /* * Schedule a timer to process periodically bw_meter entry of type "<=" * by linking the entry in the proper hash bucket. */ static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp) { int time_hash; MFC_LOCK_ASSERT(); if (!(x->bm_flags & BW_METER_LEQ)) return; /* XXX: we schedule timers only for "<=" entries */ /* * Reset the bw_meter entry */ x->bm_start_time = *nowp; x->bm_measured.b_packets = 0; x->bm_measured.b_bytes = 0; x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; /* * Compute the timeout hash value and insert the entry */ BW_METER_TIMEHASH(x, time_hash); x->bm_time_next = V_bw_meter_timers[time_hash]; V_bw_meter_timers[time_hash] = x; x->bm_time_hash = time_hash; } /* * Unschedule the periodic timer that processes bw_meter entry of type "<=" * by removing the entry from the proper hash bucket. */ static void unschedule_bw_meter(struct bw_meter *x) { int time_hash; struct bw_meter *prev, *tmp; MFC_LOCK_ASSERT(); if (!(x->bm_flags & BW_METER_LEQ)) return; /* XXX: we schedule timers only for "<=" entries */ /* * Compute the timeout hash value and delete the entry */ time_hash = x->bm_time_hash; if (time_hash >= BW_METER_BUCKETS) return; /* Entry was not scheduled */ for (prev = NULL, tmp = V_bw_meter_timers[time_hash]; tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) if (tmp == x) break; if (tmp == NULL) panic("unschedule_bw_meter: bw_meter entry not found"); if (prev != NULL) prev->bm_time_next = x->bm_time_next; else V_bw_meter_timers[time_hash] = x->bm_time_next; x->bm_time_next = NULL; x->bm_time_hash = BW_METER_BUCKETS; } /* * Process all "<=" type of bw_meter that should be processed now, * and for each entry prepare an upcall if necessary. Each processed * entry is rescheduled again for the (periodic) processing. * * This is run periodically (once per second normally). On each round, * all the potentially matching entries are in the hash slot that we are * looking at. */ static void bw_meter_process() { uint32_t loops; int i; struct timeval now, process_endtime; microtime(&now); if (V_last_tv_sec == now.tv_sec) return; /* nothing to do */ loops = now.tv_sec - V_last_tv_sec; V_last_tv_sec = now.tv_sec; if (loops > BW_METER_BUCKETS) loops = BW_METER_BUCKETS; MFC_LOCK(); /* * Process all bins of bw_meter entries from the one after the last * processed to the current one. On entry, i points to the last bucket * visited, so we need to increment i at the beginning of the loop. */ for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { struct bw_meter *x, *tmp_list; if (++i >= BW_METER_BUCKETS) i = 0; /* Disconnect the list of bw_meter entries from the bin */ tmp_list = V_bw_meter_timers[i]; V_bw_meter_timers[i] = NULL; /* Process the list of bw_meter entries */ while (tmp_list != NULL) { x = tmp_list; tmp_list = tmp_list->bm_time_next; /* Test if the time interval is over */ process_endtime = x->bm_start_time; BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); if (BW_TIMEVALCMP(&process_endtime, &now, >)) { /* Not yet: reschedule, but don't reset */ int time_hash; BW_METER_TIMEHASH(x, time_hash); if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { /* * XXX: somehow the bin processing is a bit ahead of time. * Put the entry in the next bin. */ if (++time_hash >= BW_METER_BUCKETS) time_hash = 0; } x->bm_time_next = V_bw_meter_timers[time_hash]; V_bw_meter_timers[time_hash] = x; x->bm_time_hash = time_hash; continue; } /* * Test if we should deliver an upcall */ if (((x->bm_flags & BW_METER_UNIT_PACKETS) && (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || ((x->bm_flags & BW_METER_UNIT_BYTES) && (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { /* Prepare an upcall for delivery */ bw_meter_prepare_upcall(x, &now); } /* * Reschedule for next processing */ schedule_bw_meter(x, &now); } } /* Send all upcalls that are pending delivery */ bw_upcalls_send(); MFC_UNLOCK(); } /* * A periodic function for sending all upcalls that are pending delivery */ static void expire_bw_upcalls_send(void *arg) { CURVNET_SET((struct vnet *) arg); MFC_LOCK(); bw_upcalls_send(); MFC_UNLOCK(); callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, curvnet); CURVNET_RESTORE(); } /* * A periodic function for periodic scanning of the multicast forwarding * table for processing all "<=" bw_meter entries. */ static void expire_bw_meter_process(void *arg) { CURVNET_SET((struct vnet *) arg); if (V_mrt_api_config & MRT_MFC_BW_UPCALL) bw_meter_process(); callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, curvnet); CURVNET_RESTORE(); } /* * End of bandwidth monitoring code */ /* * Send the packet up to the user daemon, or eventually do kernel encapsulation * */ static int pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, struct mfc *rt) { struct mbuf *mb_copy, *mm; /* * Do not send IGMP_WHOLEPKT notifications to userland, if the * rendezvous point was unspecified, and we were told not to. */ if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) && in_nullhost(rt->mfc_rp)) return 0; mb_copy = pim_register_prepare(ip, m); if (mb_copy == NULL) return ENOBUFS; /* * Send all the fragments. Note that the mbuf for each fragment * is freed by the sending machinery. */ for (mm = mb_copy; mm; mm = mb_copy) { mb_copy = mm->m_nextpkt; mm->m_nextpkt = 0; mm = m_pullup(mm, sizeof(struct ip)); if (mm != NULL) { ip = mtod(mm, struct ip *); if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) { pim_register_send_rp(ip, vifp, mm, rt); } else { pim_register_send_upcall(ip, vifp, mm, rt); } } } return 0; } /* * Return a copy of the data packet that is ready for PIM Register * encapsulation. * XXX: Note that in the returned copy the IP header is a valid one. */ static struct mbuf * pim_register_prepare(struct ip *ip, struct mbuf *m) { struct mbuf *mb_copy = NULL; int mtu; /* Take care of delayed checksums */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } /* * Copy the old packet & pullup its IP header into the * new mbuf so we can modify it. */ mb_copy = m_copypacket(m, M_NOWAIT); if (mb_copy == NULL) return NULL; mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); if (mb_copy == NULL) return NULL; /* take care of the TTL */ ip = mtod(mb_copy, struct ip *); --ip->ip_ttl; /* Compute the MTU after the PIM Register encapsulation */ mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); if (ntohs(ip->ip_len) <= mtu) { /* Turn the IP header into a valid one */ ip->ip_sum = 0; ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); } else { /* Fragment the packet */ mb_copy->m_pkthdr.csum_flags |= CSUM_IP; if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) { m_freem(mb_copy); return NULL; } } return mb_copy; } /* * Send an upcall with the data packet to the user-level process. */ static int pim_register_send_upcall(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, struct mfc *rt) { struct mbuf *mb_first; int len = ntohs(ip->ip_len); struct igmpmsg *im; struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; VIF_LOCK_ASSERT(); /* * Add a new mbuf with an upcall header */ mb_first = m_gethdr(M_NOWAIT, MT_DATA); if (mb_first == NULL) { m_freem(mb_copy); return ENOBUFS; } mb_first->m_data += max_linkhdr; mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); mb_first->m_len = sizeof(struct igmpmsg); mb_first->m_next = mb_copy; /* Send message to routing daemon */ im = mtod(mb_first, struct igmpmsg *); im->im_msgtype = IGMPMSG_WHOLEPKT; im->im_mbz = 0; im->im_vif = vifp - V_viftable; im->im_src = ip->ip_src; im->im_dst = ip->ip_dst; k_igmpsrc.sin_addr = ip->ip_src; MRTSTAT_INC(mrts_upcalls); if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) { CTR1(KTR_IPMF, "%s: socket queue full", __func__); MRTSTAT_INC(mrts_upq_sockfull); return ENOBUFS; } /* Keep statistics */ PIMSTAT_INC(pims_snd_registers_msgs); PIMSTAT_ADD(pims_snd_registers_bytes, len); return 0; } /* * Encapsulate the data packet in PIM Register message and send it to the RP. */ static int pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, struct mfc *rt) { struct mbuf *mb_first; struct ip *ip_outer; struct pim_encap_pimhdr *pimhdr; int len = ntohs(ip->ip_len); vifi_t vifi = rt->mfc_parent; VIF_LOCK_ASSERT(); if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) { m_freem(mb_copy); return EADDRNOTAVAIL; /* The iif vif is invalid */ } /* * Add a new mbuf with the encapsulating header */ mb_first = m_gethdr(M_NOWAIT, MT_DATA); if (mb_first == NULL) { m_freem(mb_copy); return ENOBUFS; } mb_first->m_data += max_linkhdr; mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); mb_first->m_next = mb_copy; mb_first->m_pkthdr.len = len + mb_first->m_len; /* * Fill in the encapsulating IP and PIM header */ ip_outer = mtod(mb_first, struct ip *); *ip_outer = pim_encap_iphdr; ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr)); ip_outer->ip_src = V_viftable[vifi].v_lcl_addr; ip_outer->ip_dst = rt->mfc_rp; /* * Copy the inner header TOS to the outer header, and take care of the * IP_DF bit. */ ip_outer->ip_tos = ip->ip_tos; if (ip->ip_off & htons(IP_DF)) ip_outer->ip_off |= htons(IP_DF); ip_fillid(ip_outer); pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer + sizeof(pim_encap_iphdr)); *pimhdr = pim_encap_pimhdr; /* If the iif crosses a border, set the Border-bit */ if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config) pimhdr->flags |= htonl(PIM_BORDER_REGISTER); mb_first->m_data += sizeof(pim_encap_iphdr); pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); mb_first->m_data -= sizeof(pim_encap_iphdr); send_packet(vifp, mb_first); /* Keep statistics */ PIMSTAT_INC(pims_snd_registers_msgs); PIMSTAT_ADD(pims_snd_registers_bytes, len); return 0; } /* * pim_encapcheck() is called by the encap4_input() path at runtime to * determine if a packet is for PIM; allowing PIM to be dynamically loaded * into the kernel. */ static int pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { #ifdef DIAGNOSTIC KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); #endif if (proto != IPPROTO_PIM) return 0; /* not for us; reject the datagram. */ return 64; /* claim the datagram. */ } /* * PIM-SMv2 and PIM-DM messages processing. * Receives and verifies the PIM control messages, and passes them * up to the listening socket, using rip_input(). * The only message with special processing is the PIM_REGISTER message * (used by PIM-SM): the PIM header is stripped off, and the inner packet * is passed to if_simloop(). */ int pim_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct pim *pim; int iphlen = *offp; int minlen; int datalen = ntohs(ip->ip_len) - iphlen; int ip_tos; *mp = NULL; /* Keep statistics */ PIMSTAT_INC(pims_rcv_total_msgs); PIMSTAT_ADD(pims_rcv_total_bytes, datalen); /* * Validate lengths */ if (datalen < PIM_MINLEN) { PIMSTAT_INC(pims_rcv_tooshort); CTR3(KTR_IPMF, "%s: short packet (%d) from 0x%08x", __func__, datalen, ntohl(ip->ip_src.s_addr)); m_freem(m); return (IPPROTO_DONE); } /* * If the packet is at least as big as a REGISTER, go agead * and grab the PIM REGISTER header size, to avoid another * possible m_pullup() later. * * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 */ minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); /* * Get the IP and PIM headers in contiguous memory, and * possibly the PIM REGISTER header. */ if (m->m_len < minlen && (m = m_pullup(m, minlen)) == NULL) { CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__); return (IPPROTO_DONE); } /* m_pullup() may have given us a new mbuf so reset ip. */ ip = mtod(m, struct ip *); ip_tos = ip->ip_tos; /* adjust mbuf to point to the PIM header */ m->m_data += iphlen; m->m_len -= iphlen; pim = mtod(m, struct pim *); /* * Validate checksum. If PIM REGISTER, exclude the data packet. * * XXX: some older PIMv2 implementations don't make this distinction, * so for compatibility reason perform the checksum over part of the * message, and if error, then over the whole message. */ if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { /* do nothing, checksum okay */ } else if (in_cksum(m, datalen)) { PIMSTAT_INC(pims_rcv_badsum); CTR1(KTR_IPMF, "%s: invalid checksum", __func__); m_freem(m); return (IPPROTO_DONE); } /* PIM version check */ if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { PIMSTAT_INC(pims_rcv_badversion); CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__, (int)PIM_VT_V(pim->pim_vt), PIM_VERSION); m_freem(m); return (IPPROTO_DONE); } /* restore mbuf back to the outer IP */ m->m_data -= iphlen; m->m_len += iphlen; if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { /* * Since this is a REGISTER, we'll make a copy of the register * headers ip + pim + u_int32 + encap_ip, to be passed up to the * routing daemon. */ struct sockaddr_in dst = { sizeof(dst), AF_INET }; struct mbuf *mcp; struct ip *encap_ip; u_int32_t *reghdr; struct ifnet *vifp; VIF_LOCK(); if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) { VIF_UNLOCK(); CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__, (int)V_reg_vif_num); m_freem(m); return (IPPROTO_DONE); } /* XXX need refcnt? */ vifp = V_viftable[V_reg_vif_num].v_ifp; VIF_UNLOCK(); /* * Validate length */ if (datalen < PIM_REG_MINLEN) { PIMSTAT_INC(pims_rcv_tooshort); PIMSTAT_INC(pims_rcv_badregisters); CTR1(KTR_IPMF, "%s: register packet size too small", __func__); m_freem(m); return (IPPROTO_DONE); } reghdr = (u_int32_t *)(pim + 1); encap_ip = (struct ip *)(reghdr + 1); CTR3(KTR_IPMF, "%s: register: encap ip src 0x%08x len %d", __func__, ntohl(encap_ip->ip_src.s_addr), ntohs(encap_ip->ip_len)); /* verify the version number of the inner packet */ if (encap_ip->ip_v != IPVERSION) { PIMSTAT_INC(pims_rcv_badregisters); CTR1(KTR_IPMF, "%s: bad encap ip version", __func__); m_freem(m); return (IPPROTO_DONE); } /* verify the inner packet is destined to a mcast group */ if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { PIMSTAT_INC(pims_rcv_badregisters); CTR2(KTR_IPMF, "%s: bad encap ip dest 0x%08x", __func__, ntohl(encap_ip->ip_dst.s_addr)); m_freem(m); return (IPPROTO_DONE); } /* If a NULL_REGISTER, pass it to the daemon */ if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) goto pim_input_to_daemon; /* * Copy the TOS from the outer IP header to the inner IP header. */ if (encap_ip->ip_tos != ip_tos) { /* Outer TOS -> inner TOS */ encap_ip->ip_tos = ip_tos; /* Recompute the inner header checksum. Sigh... */ /* adjust mbuf to point to the inner IP header */ m->m_data += (iphlen + PIM_MINLEN); m->m_len -= (iphlen + PIM_MINLEN); encap_ip->ip_sum = 0; encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); /* restore mbuf to point back to the outer IP header */ m->m_data -= (iphlen + PIM_MINLEN); m->m_len += (iphlen + PIM_MINLEN); } /* * Decapsulate the inner IP packet and loopback to forward it * as a normal multicast packet. Also, make a copy of the * outer_iphdr + pimhdr + reghdr + encap_iphdr * to pass to the daemon later, so it can take the appropriate * actions (e.g., send back PIM_REGISTER_STOP). * XXX: here m->m_data points to the outer IP header. */ mcp = m_copym(m, 0, iphlen + PIM_REG_MINLEN, M_NOWAIT); if (mcp == NULL) { CTR1(KTR_IPMF, "%s: m_copym() failed", __func__); m_freem(m); return (IPPROTO_DONE); } /* Keep statistics */ /* XXX: registers_bytes include only the encap. mcast pkt */ PIMSTAT_INC(pims_rcv_registers_msgs); PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len)); /* * forward the inner ip packet; point m_data at the inner ip. */ m_adj(m, iphlen + PIM_MINLEN); CTR4(KTR_IPMF, "%s: forward decap'd REGISTER: src %lx dst %lx vif %d", __func__, (u_long)ntohl(encap_ip->ip_src.s_addr), (u_long)ntohl(encap_ip->ip_dst.s_addr), (int)V_reg_vif_num); /* NB: vifp was collected above; can it change on us? */ if_simloop(vifp, m, dst.sin_family, 0); /* prepare the register head to send to the mrouting daemon */ m = mcp; } pim_input_to_daemon: /* * Pass the PIM message up to the daemon; if it is a Register message, * pass the 'head' only up to the daemon. This includes the * outer IP header, PIM header, PIM-Register header and the * inner IP header. * XXX: the outer IP header pkt size of a Register is not adjust to * reflect the fact that the inner multicast data is truncated. */ *mp = m; rip_input(mp, offp, proto); return (IPPROTO_DONE); } static int sysctl_mfctable(SYSCTL_HANDLER_ARGS) { struct mfc *rt; int error, i; if (req->newptr) return (EPERM); if (V_mfchashtbl == NULL) /* XXX unlocked */ return (0); error = sysctl_wire_old_buffer(req, 0); if (error) return (error); MFC_LOCK(); for (i = 0; i < mfchashsize; i++) { LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) { error = SYSCTL_OUT(req, rt, sizeof(struct mfc)); if (error) goto out_locked; } } out_locked: MFC_UNLOCK(); return (error); } static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, sysctl_mfctable, "IPv4 Multicast Forwarding Table " "(struct *mfc[mfchashsize], netinet/ip_mroute.h)"); static void vnet_mroute_init(const void *unused __unused) { - MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO); + V_nexpire = malloc(mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO); bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); callout_init(&V_expire_upcalls_ch, 1); callout_init(&V_bw_upcalls_ch, 1); callout_init(&V_bw_meter_ch, 1); } VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init, NULL); static void vnet_mroute_uninit(const void *unused __unused) { - FREE(V_nexpire, M_MRTABLE); + free(V_nexpire, M_MRTABLE); V_nexpire = NULL; } VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, vnet_mroute_uninit, NULL); static int ip_mroute_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: MROUTER_LOCK_INIT(); if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, if_detached_event, NULL, EVENTHANDLER_PRI_ANY); if (if_detach_event_tag == NULL) { printf("ip_mroute: unable to register " "ifnet_departure_event handler\n"); MROUTER_LOCK_DESTROY(); return (EINVAL); } MFC_LOCK_INIT(); VIF_LOCK_INIT(); mfchashsize = MFCHASHSIZE; if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) && !powerof2(mfchashsize)) { printf("WARNING: %s not a power of 2; using default\n", "net.inet.ip.mfchashsize"); mfchashsize = MFCHASHSIZE; } pim_squelch_wholepkt = 0; TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt", &pim_squelch_wholepkt); pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM, pim_encapcheck, &in_pim_protosw, NULL); if (pim_encap_cookie == NULL) { printf("ip_mroute: unable to attach pim encap\n"); VIF_LOCK_DESTROY(); MFC_LOCK_DESTROY(); MROUTER_LOCK_DESTROY(); return (EINVAL); } ip_mcast_src = X_ip_mcast_src; ip_mforward = X_ip_mforward; ip_mrouter_done = X_ip_mrouter_done; ip_mrouter_get = X_ip_mrouter_get; ip_mrouter_set = X_ip_mrouter_set; ip_rsvp_force_done = X_ip_rsvp_force_done; ip_rsvp_vif = X_ip_rsvp_vif; legal_vif_num = X_legal_vif_num; mrt_ioctl = X_mrt_ioctl; rsvp_input_p = X_rsvp_input; break; case MOD_UNLOAD: /* * Typically module unload happens after the user-level * process has shutdown the kernel services (the check * below insures someone can't just yank the module out * from under a running process). But if the module is * just loaded and then unloaded w/o starting up a user * process we still need to cleanup. */ MROUTER_LOCK(); if (ip_mrouter_cnt != 0) { MROUTER_UNLOCK(); return (EINVAL); } ip_mrouter_unloading = 1; MROUTER_UNLOCK(); EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); if (pim_encap_cookie) { encap_detach(pim_encap_cookie); pim_encap_cookie = NULL; } ip_mcast_src = NULL; ip_mforward = NULL; ip_mrouter_done = NULL; ip_mrouter_get = NULL; ip_mrouter_set = NULL; ip_rsvp_force_done = NULL; ip_rsvp_vif = NULL; legal_vif_num = NULL; mrt_ioctl = NULL; rsvp_input_p = NULL; VIF_LOCK_DESTROY(); MFC_LOCK_DESTROY(); MROUTER_LOCK_DESTROY(); break; default: return EOPNOTSUPP; } return 0; } static moduledata_t ip_mroutemod = { "ip_mroute", ip_mroute_modevent, 0 }; DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE); Index: head/sys/nfsclient/nfsnode.h =================================================================== --- head/sys/nfsclient/nfsnode.h (revision 328416) +++ head/sys/nfsclient/nfsnode.h (revision 328417) @@ -1,215 +1,215 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfsnode.h 8.9 (Berkeley) 5/14/95 * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSNODE_H_ #define _NFSCLIENT_NFSNODE_H_ #include #if !defined(_NFSCLIENT_NFS_H_) && !defined(_KERNEL) #include #endif /* * Silly rename structure that hangs off the nfsnode until the name * can be removed by nfs_inactive() */ struct sillyrename { struct task s_task; struct ucred *s_cred; struct vnode *s_dvp; int (*s_removeit)(struct sillyrename *sp); long s_namlen; char s_name[32]; }; /* * This structure is used to save the logical directory offset to * NFS cookie mappings. * The mappings are stored in a list headed * by n_cookies, as required. * There is one mapping for each NFS_DIRBLKSIZ bytes of directory information * stored in increasing logical offset byte order. */ #define NFSNUMCOOKIES 31 struct nfsdmap { LIST_ENTRY(nfsdmap) ndm_list; int ndm_eocookie; union { nfsuint64 ndmu3_cookies[NFSNUMCOOKIES]; uint64_t ndmu4_cookies[NFSNUMCOOKIES]; } ndm_un1; }; #define ndm_cookies ndm_un1.ndmu3_cookies #define ndm4_cookies ndm_un1.ndmu4_cookies struct nfs_accesscache { u_int32_t mode; /* ACCESS mode cache */ uid_t uid; /* credentials having mode */ time_t stamp; /* mode cache timestamp */ }; /* * The nfsnode is the nfs equivalent to ufs's inode. Any similarity * is purely coincidental. * There is a unique nfsnode allocated for each active file, * each current directory, each mounted-on file, text file, and the root. * An nfsnode is 'named' by its file handle. (nget/nfs_node.c) * If this structure exceeds 256 bytes (it is currently 256 using 4.4BSD-Lite * type definitions), file handles of > 32 bytes should probably be split out - * into a separate MALLOC()'d data structure. (Reduce the size of nfsfh_t by + * into a separate malloc()'d data structure. (Reduce the size of nfsfh_t by * changing the definition in nfsproto.h of NFS_SMALLFH.) * NB: Hopefully the current order of the fields is such that everything will * be well aligned and, therefore, tightly packed. */ struct nfsnode { struct mtx n_mtx; /* Protects all of these members */ u_quad_t n_size; /* Current size of file */ u_quad_t n_brev; /* Modify rev when cached */ u_quad_t n_lrev; /* Modify rev for lease */ struct vattr n_vattr; /* Vnode attribute cache */ time_t n_attrstamp; /* Attr. cache timestamp */ struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE]; struct timespec n_mtime; /* Prev modify time. */ nfsfh_t *n_fhp; /* NFS File Handle */ struct vnode *n_vnode; /* associated vnode */ struct vnode *n_dvp; /* parent vnode */ int n_error; /* Save write error value */ union { struct timespec nf_atim; /* Special file times */ nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */ u_char nd4_cookieverf[NFSX_V4VERF]; } n_un1; union { struct timespec nf_mtim; off_t nd_direof; /* Dir. EOF offset cache */ } n_un2; union { struct sillyrename *nf_silly; /* Ptr to silly rename struct */ LIST_HEAD(, nfsdmap) nd_cook; /* cookies */ } n_un3; short n_fhsize; /* size in bytes, of fh */ short n_flag; /* Flag for locking.. */ nfsfh_t n_fh; /* Small File Handle */ u_char *n_name; /* leaf name, for v4 OPEN op */ uint32_t n_namelen; int n_directio_opens; int n_directio_asyncwr; struct ucred *n_writecred; /* Cred. for putpages */ }; #define n_atim n_un1.nf_atim #define n_mtim n_un2.nf_mtim #define n_sillyrename n_un3.nf_silly #define n_cookieverf n_un1.nd_cookieverf #define n4_cookieverf n_un1.nd4_cookieverf #define n_direofoffset n_un2.nd_direof #define n_cookies n_un3.nd_cook /* * Flags for n_flag */ #define NFSYNCWAIT 0x0002 /* fsync waiting for all directio async writes to drain */ #define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ #define NWRITEERR 0x0008 /* Flag write errors so close will know */ /* 0x20, 0x40, 0x80 free */ #define NACC 0x0100 /* Special file accessed */ #define NUPD 0x0200 /* Special file updated */ #define NCHG 0x0400 /* Special file times changed */ #define NCREATED 0x0800 /* Opened by nfs_create() */ #define NTRUNCATE 0x1000 /* Opened by nfs_setattr() */ #define NSIZECHANGED 0x2000 /* File size has changed: need cache inval */ #define NNONCACHE 0x4000 /* Node marked as noncacheable */ #define NDIRCOOKIELK 0x8000 /* Lock to serialize access to directory cookies */ /* * Convert between nfsnode pointers and vnode pointers */ #define VTONFS(vp) ((struct nfsnode *)(vp)->v_data) #define NFSTOV(np) ((struct vnode *)(np)->n_vnode) #define NFS_TIMESPEC_COMPARE(T1, T2) (((T1)->tv_sec != (T2)->tv_sec) || ((T1)->tv_nsec != (T2)->tv_nsec)) /* * NFS iod threads can be in one of these two states once spawned. * NFSIOD_NOT_AVAILABLE - Cannot be assigned an I/O operation at this time. * NFSIOD_AVAILABLE - Available to be assigned an I/O operation. */ enum nfsiod_state { NFSIOD_NOT_AVAILABLE = 0, NFSIOD_AVAILABLE = 1, }; /* * Queue head for nfsiod's */ extern TAILQ_HEAD(nfs_bufq, buf) nfs_bufq; extern enum nfsiod_state nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; #if defined(_KERNEL) extern struct vop_vector nfs_fifoops; extern struct vop_vector nfs_vnodeops; extern struct buf_ops buf_ops_nfs; /* * Prototypes for NFS vnode operations */ int nfs_getpages(struct vop_getpages_args *); int nfs_putpages(struct vop_putpages_args *); int nfs_write(struct vop_write_args *); int nfs_inactive(struct vop_inactive_args *); int nfs_reclaim(struct vop_reclaim_args *); /* other stuff */ int nfs_removeit(struct sillyrename *); int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **, int flags); nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int); void nfs_invaldir(struct vnode *); int nfs_upgrade_vnlock(struct vnode *vp); void nfs_downgrade_vnlock(struct vnode *vp, int old_lock); void nfs_printf(const char *fmt, ...); void nfs_dircookie_lock(struct nfsnode *np); void nfs_dircookie_unlock(struct nfsnode *np); #endif /* _KERNEL */ #endif Index: head/sys/sys/malloc.h =================================================================== --- head/sys/sys/malloc.h (revision 328416) +++ head/sys/sys/malloc.h (revision 328417) @@ -1,219 +1,212 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1987, 1993 * The Regents of the University of California. * Copyright (c) 2005, 2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)malloc.h 8.5 (Berkeley) 5/3/95 * $FreeBSD$ */ #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ #include #include #include #include #include #define MINALLOCSIZE UMA_SMALLEST_UNIT /* * flags to malloc. */ #define M_NOWAIT 0x0001 /* do not block */ #define M_WAITOK 0x0002 /* ok to block */ #define M_ZERO 0x0100 /* bzero the allocation */ #define M_NOVM 0x0200 /* don't ask VM for pages */ #define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */ #define M_NODUMP 0x0800 /* don't dump pages in this allocation */ #define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */ #define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */ #define M_MAGIC 877983977 /* time when first defined :-) */ /* * Two malloc type structures are present: malloc_type, which is used by a * type owner to declare the type, and malloc_type_internal, which holds * malloc-owned statistics and other ABI-sensitive fields, such as the set of * malloc statistics indexed by the compile-time MAXCPU constant. * Applications should avoid introducing dependence on the allocator private * data layout and size. * * The malloc_type ks_next field is protected by malloc_mtx. Other fields in * malloc_type are static after initialization so unsynchronized. * * Statistics in malloc_type_stats are written only when holding a critical * section and running on the CPU associated with the index into the stat * array, but read lock-free resulting in possible (minor) races, which the * monitoring app should take into account. */ struct malloc_type_stats { uint64_t mts_memalloced; /* Bytes allocated on CPU. */ uint64_t mts_memfreed; /* Bytes freed on CPU. */ uint64_t mts_numallocs; /* Number of allocates on CPU. */ uint64_t mts_numfrees; /* number of frees on CPU. */ uint64_t mts_size; /* Bitmask of sizes allocated on CPU. */ uint64_t _mts_reserved1; /* Reserved field. */ uint64_t _mts_reserved2; /* Reserved field. */ uint64_t _mts_reserved3; /* Reserved field. */ }; /* * Index definitions for the mti_probes[] array. */ #define DTMALLOC_PROBE_MALLOC 0 #define DTMALLOC_PROBE_FREE 1 #define DTMALLOC_PROBE_MAX 2 struct malloc_type_internal { uint32_t mti_probes[DTMALLOC_PROBE_MAX]; /* DTrace probe ID array. */ u_char mti_zone; struct malloc_type_stats mti_stats[MAXCPU]; }; /* * Public data structure describing a malloc type. Private data is hung off * of ks_handle to avoid encoding internal malloc(9) data structures in * modules, which will statically allocate struct malloc_type. */ struct malloc_type { struct malloc_type *ks_next; /* Next in global chain. */ u_long ks_magic; /* Detect programmer error. */ const char *ks_shortdesc; /* Printable type name. */ void *ks_handle; /* Priv. data, was lo_class. */ }; /* * Statistics structure headers for user space. The kern.malloc sysctl * exposes a structure stream consisting of a stream header, then a series of * malloc type headers and statistics structures (quantity maxcpus). For * convenience, the kernel will provide the current value of maxcpus at the * head of the stream. */ #define MALLOC_TYPE_STREAM_VERSION 0x00000001 struct malloc_type_stream_header { uint32_t mtsh_version; /* Stream format version. */ uint32_t mtsh_maxcpus; /* Value of MAXCPU for stream. */ uint32_t mtsh_count; /* Number of records. */ uint32_t _mtsh_pad; /* Pad/reserved field. */ }; #define MALLOC_MAX_NAME 32 struct malloc_type_header { char mth_name[MALLOC_MAX_NAME]; }; #ifdef _KERNEL #define MALLOC_DEFINE(type, shortdesc, longdesc) \ struct malloc_type type[1] = { \ { NULL, M_MAGIC, shortdesc, NULL } \ }; \ SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_THIRD, malloc_init, \ type); \ SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \ malloc_uninit, type) #define MALLOC_DECLARE(type) \ extern struct malloc_type type[1] MALLOC_DECLARE(M_CACHE); MALLOC_DECLARE(M_DEVBUF); MALLOC_DECLARE(M_TEMP); /* - * Deprecated macro versions of not-quite-malloc() and free(). - */ -#define MALLOC(space, cast, size, type, flags) \ - ((space) = (cast)malloc((u_long)(size), (type), (flags))) -#define FREE(addr, type) free((addr), (type)) - -/* * XXX this should be declared in , but that tends to fail * because is included in a header before the source file * has a chance to include to get MALLOC_DECLARE() defined. */ MALLOC_DECLARE(M_IOV); extern struct mtx malloc_mtx; /* * Function type used when iterating over the list of malloc types. */ typedef void malloc_type_list_func_t(struct malloc_type *, void *); void contigfree(void *addr, unsigned long size, struct malloc_type *type); void *contigmalloc(unsigned long size, struct malloc_type *type, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) __malloc_like __result_use_check __alloc_size(1) __alloc_align(6); void *contigmalloc_domain(unsigned long size, struct malloc_type *type, int domain, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) __malloc_like __result_use_check __alloc_size(1) __alloc_align(6); void free(void *addr, struct malloc_type *type); void free_domain(void *addr, struct malloc_type *type); void *malloc(size_t size, struct malloc_type *type, int flags) __malloc_like __result_use_check __alloc_size(1); void *malloc_domain(size_t size, struct malloc_type *type, int domain, int flags) __malloc_like __result_use_check __alloc_size(1); void *mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) __malloc_like __result_use_check __alloc_size2(1, 2); void malloc_init(void *); int malloc_last_fail(void); void malloc_type_allocated(struct malloc_type *type, unsigned long size); void malloc_type_freed(struct malloc_type *type, unsigned long size); void malloc_type_list(malloc_type_list_func_t *, void *); void malloc_uninit(void *); void *realloc(void *addr, size_t size, struct malloc_type *type, int flags) __result_use_check __alloc_size(2); void *reallocf(void *addr, size_t size, struct malloc_type *type, int flags) __result_use_check __alloc_size(2); struct malloc_type *malloc_desc2type(const char *desc); /* * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW */ #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 8 / 2)) static inline bool WOULD_OVERFLOW(size_t nmemb, size_t size) { return ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && nmemb > 0 && __SIZE_T_MAX / nmemb < size); } #undef MUL_NO_OVERFLOW #endif /* _KERNEL */ #endif /* !_SYS_MALLOC_H_ */