Index: head/sys/nfsclient/bootp_subr.c =================================================================== --- head/sys/nfsclient/bootp_subr.c (revision 130553) +++ head/sys/nfsclient/bootp_subr.c (revision 130554) @@ -1,1896 +1,1896 @@ /* * Copyright (c) 1995 Gordon Ross, Adam Glass * Copyright (c) 1992 Regents of the University of California. * All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * based on: * nfs/krpc_subr.c * $NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define BOOTP_MIN_LEN 300 /* Minimum size of bootp udp packet */ #ifndef BOOTP_SETTLE_DELAY #define BOOTP_SETTLE_DELAY 3 #endif /* * What is the longest we will wait before re-sending a request? * Note this is also the frequency of "RPC timeout" messages. * The re-send loop count sup linearly to this maximum, so the * first complaint will happen after (1+2+3+4+5)=15 seconds. */ #define MAX_RESEND_DELAY 5 /* seconds */ /* Definitions from RFC951 */ struct bootp_packet { u_int8_t op; u_int8_t htype; u_int8_t hlen; u_int8_t hops; u_int32_t xid; u_int16_t secs; u_int16_t flags; struct in_addr ciaddr; struct in_addr yiaddr; struct in_addr siaddr; struct in_addr giaddr; unsigned char chaddr[16]; char sname[64]; char file[128]; unsigned char vend[1222]; }; struct bootpc_ifcontext { struct bootpc_ifcontext *next; struct bootp_packet call; struct bootp_packet reply; int replylen; int overload; struct socket *so; struct ifreq ireq; struct ifnet *ifp; struct sockaddr_dl *sdl; struct sockaddr_in myaddr; struct sockaddr_in netmask; struct sockaddr_in gw; struct sockaddr_in broadcast; /* Different for each interface */ int gotgw; int gotnetmask; int gotrootpath; int outstanding; int sentmsg; u_int32_t xid; enum { IF_BOOTP_UNRESOLVED, IF_BOOTP_RESOLVED, IF_BOOTP_FAILED, IF_DHCP_UNRESOLVED, IF_DHCP_OFFERED, IF_DHCP_RESOLVED, IF_DHCP_FAILED, } state; int dhcpquerytype; /* dhcp type sent */ struct in_addr dhcpserver; int gotdhcpserver; }; #define TAG_MAXLEN 1024 struct bootpc_tagcontext { char buf[TAG_MAXLEN + 1]; int overload; int badopt; int badtag; int foundopt; int taglen; }; struct bootpc_globalcontext { struct bootpc_ifcontext *interfaces; struct bootpc_ifcontext *lastinterface; u_int32_t xid; int gotrootpath; int gotgw; int ifnum; int secs; int starttime; struct bootp_packet reply; int replylen; struct bootpc_ifcontext *setrootfs; struct bootpc_ifcontext *sethostname; struct bootpc_tagcontext tmptag; struct bootpc_tagcontext tag; }; #define IPPORT_BOOTPC 68 #define IPPORT_BOOTPS 67 #define BOOTP_REQUEST 1 #define BOOTP_REPLY 2 /* Common tags */ #define TAG_PAD 0 /* Pad option, implicit length 1 */ #define TAG_SUBNETMASK 1 /* RFC 950 subnet mask */ #define TAG_ROUTERS 3 /* Routers (in order of preference) */ #define TAG_HOSTNAME 12 /* Client host name */ #define TAG_ROOT 17 /* Root path */ /* DHCP specific tags */ #define TAG_OVERLOAD 52 /* Option Overload */ #define TAG_MAXMSGSIZE 57 /* Maximum DHCP Message Size */ #define TAG_END 255 /* End Option (i.e. no more options) */ /* Overload values */ #define OVERLOAD_FILE 1 #define OVERLOAD_SNAME 2 /* Site specific tags: */ #define TAG_ROOTOPTS 130 #define TAG_COOKIE 134 /* ascii info for userland, via sysctl */ #define TAG_DHCP_MSGTYPE 53 #define TAG_DHCP_REQ_ADDR 50 #define TAG_DHCP_SERVERID 54 #define TAG_DHCP_LEASETIME 51 #define TAG_VENDOR_INDENTIFIER 60 #define DHCP_NOMSG 0 #define DHCP_DISCOVER 1 #define DHCP_OFFER 2 #define DHCP_REQUEST 3 #define DHCP_ACK 5 static char bootp_cookie[128]; SYSCTL_STRING(_kern, OID_AUTO, bootp_cookie, CTLFLAG_RD, bootp_cookie, 0, "Cookie (T134) supplied by bootp server"); /* mountd RPC */ static int md_mount(struct sockaddr_in *mdsin, char *path, u_char *fhp, int *fhsizep, struct nfs_args *args, struct thread *td); static int setfs(struct sockaddr_in *addr, char *path, char *p, const struct in_addr *siaddr); static int getdec(char **ptr); static int getip(char **ptr, struct in_addr *ip); static char *substr(char *a, char *b); static void mountopts(struct nfs_args *args, char *p); static int xdr_opaque_decode(struct mbuf **ptr, u_char *buf, int len); static int xdr_int_decode(struct mbuf **ptr, int *iptr); static void print_in_addr(struct in_addr addr); static void print_sin_addr(struct sockaddr_in *addr); static void clear_sinaddr(struct sockaddr_in *sin); static void allocifctx(struct bootpc_globalcontext *gctx); static void bootpc_compose_query(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td); static unsigned char *bootpc_tag(struct bootpc_tagcontext *tctx, struct bootp_packet *bp, int len, int tag); static void bootpc_tag_helper(struct bootpc_tagcontext *tctx, unsigned char *start, int len, int tag); #ifdef BOOTP_DEBUG void bootpboot_p_sa(struct sockaddr *sa, struct sockaddr *ma); void bootpboot_p_ma(struct sockaddr *ma); void bootpboot_p_rtentry(struct rtentry *rt); void bootpboot_p_tree(struct radix_node *rn); void bootpboot_p_rtlist(void); void bootpboot_p_if(struct ifnet *ifp, struct ifaddr *ifa); void bootpboot_p_iflist(void); #endif static int bootpc_call(struct bootpc_globalcontext *gctx, struct thread *td); static int bootpc_fakeup_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td); static int bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td); static void bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx); static int bootpc_received(struct bootpc_globalcontext *gctx, struct bootpc_ifcontext *ifctx); static __inline int bootpc_ifctx_isresolved(struct bootpc_ifcontext *ifctx); static __inline int bootpc_ifctx_isunresolved(struct bootpc_ifcontext *ifctx); static __inline int bootpc_ifctx_isfailed(struct bootpc_ifcontext *ifctx); /* * In order to have multiple active interfaces with address 0.0.0.0 * and be able to send data to a selected interface, we perform * some tricks: * * - The 'broadcast' address is different for each interface. * * - We temporarily add routing pointing 255.255.255.255 to the * selected interface broadcast address, thus the packet sent * goes to that interface. */ #ifdef BOOTP_DEBUG void bootpboot_p_sa(struct sockaddr *sa, struct sockaddr *ma) { if (sa == NULL) { printf("(sockaddr *) "); return; } switch (sa->sa_family) { case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *) sa; printf("inet "); print_sin_addr(sin); if (ma != NULL) { sin = (struct sockaddr_in *) ma; printf(" mask "); print_sin_addr(sin); } } break; case AF_LINK: { struct sockaddr_dl *sli; int i; sli = (struct sockaddr_dl *) sa; printf("link %.*s ", sli->sdl_nlen, sli->sdl_data); for (i = 0; i < sli->sdl_alen; i++) { if (i > 0) printf(":"); printf("%x", ((unsigned char *) LLADDR(sli))[i]); } } break; default: printf("af%d", sa->sa_family); } } void bootpboot_p_ma(struct sockaddr *ma) { if (ma == NULL) { printf(""); return; } printf("%x", *(int *)ma); } void bootpboot_p_rtentry(struct rtentry *rt) { bootpboot_p_sa(rt_key(rt), rt_mask(rt)); printf(" "); bootpboot_p_ma(rt->rt_genmask); printf(" "); bootpboot_p_sa(rt->rt_gateway, NULL); printf(" "); printf("flags %x", (unsigned short) rt->rt_flags); printf(" %d", (int) rt->rt_rmx.rmx_expire); printf(" %s\n", rt->rt_ifp->if_xname); } void bootpboot_p_tree(struct radix_node *rn) { while (rn != NULL) { if (rn->rn_bit < 0) { if ((rn->rn_flags & RNF_ROOT) != 0) { } else { bootpboot_p_rtentry((struct rtentry *) rn); } rn = rn->rn_dupedkey; } else { bootpboot_p_tree(rn->rn_left); bootpboot_p_tree(rn->rn_right); return; } } } void bootpboot_p_rtlist(void) { printf("Routing table:\n"); RADIX_NODE_LOCK(rt_tables[AF_INET]); /* could sleep XXX */ bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop); RADIX_NODE_UNLOCK(rt_tables[AF_INET]); } void bootpboot_p_if(struct ifnet *ifp, struct ifaddr *ifa) { printf("%s flags %x, addr ", ifp->if_xname, ifp->if_flags); print_sin_addr((struct sockaddr_in *) ifa->ifa_addr); printf(", broadcast "); print_sin_addr((struct sockaddr_in *) ifa->ifa_dstaddr); printf(", netmask "); print_sin_addr((struct sockaddr_in *) ifa->ifa_netmask); printf("\n"); } void bootpboot_p_iflist(void) { struct ifnet *ifp; struct ifaddr *ifa; printf("Interface list:\n"); IFNET_RLOCK(); /* could sleep, but okay for debugging XXX */ for (ifp = TAILQ_FIRST(&ifnet); ifp != NULL; ifp = TAILQ_NEXT(ifp, if_link)) { for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) if (ifa->ifa_addr->sa_family == AF_INET) bootpboot_p_if(ifp, ifa); } IFNET_RUNLOCK(); } #endif /* defined(BOOTP_DEBUG) */ static void clear_sinaddr(struct sockaddr_in *sin) { bzero(sin, sizeof(*sin)); sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; /* XXX: htonl(INAADDR_ANY) ? */ sin->sin_port = 0; } static void allocifctx(struct bootpc_globalcontext *gctx) { struct bootpc_ifcontext *ifctx; ifctx = (struct bootpc_ifcontext *) malloc(sizeof(*ifctx), M_TEMP, M_WAITOK | M_ZERO); if (ifctx == NULL) panic("Failed to allocate bootp interface context structure"); ifctx->xid = gctx->xid; #ifdef BOOTP_NO_DHCP ifctx->state = IF_BOOTP_UNRESOLVED; #else ifctx->state = IF_DHCP_UNRESOLVED; #endif gctx->xid += 0x100; if (gctx->interfaces != NULL) gctx->lastinterface->next = ifctx; else gctx->interfaces = ifctx; gctx->lastinterface = ifctx; } static __inline int bootpc_ifctx_isresolved(struct bootpc_ifcontext *ifctx) { if (ifctx->state == IF_BOOTP_RESOLVED || ifctx->state == IF_DHCP_RESOLVED) return 1; return 0; } static __inline int bootpc_ifctx_isunresolved(struct bootpc_ifcontext *ifctx) { if (ifctx->state == IF_BOOTP_UNRESOLVED || ifctx->state == IF_DHCP_UNRESOLVED) return 1; return 0; } static __inline int bootpc_ifctx_isfailed(struct bootpc_ifcontext *ifctx) { if (ifctx->state == IF_BOOTP_FAILED || ifctx->state == IF_DHCP_FAILED) return 1; return 0; } static int bootpc_received(struct bootpc_globalcontext *gctx, struct bootpc_ifcontext *ifctx) { unsigned char dhcpreplytype; char *p; /* * Need timeout for fallback to less * desirable alternative. */ /* This call used for the side effect (badopt flag) */ (void) bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_END); /* If packet is invalid, ignore it */ if (gctx->tmptag.badopt != 0) return 0; p = bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_DHCP_MSGTYPE); if (p != NULL) dhcpreplytype = *p; else dhcpreplytype = DHCP_NOMSG; switch (ifctx->dhcpquerytype) { case DHCP_DISCOVER: if (dhcpreplytype != DHCP_OFFER /* Normal DHCP offer */ #ifndef BOOTP_FORCE_DHCP && dhcpreplytype != DHCP_NOMSG /* Fallback to BOOTP */ #endif ) return 0; break; case DHCP_REQUEST: if (dhcpreplytype != DHCP_ACK) return 0; case DHCP_NOMSG: break; } /* Ignore packet unless it gives us a root tag we didn't have */ if ((ifctx->state == IF_BOOTP_RESOLVED || (ifctx->dhcpquerytype == DHCP_DISCOVER && (ifctx->state == IF_DHCP_OFFERED || ifctx->state == IF_DHCP_RESOLVED))) && (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROOT) != NULL || bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_ROOT) == NULL)) return 0; bcopy(&gctx->reply, &ifctx->reply, gctx->replylen); ifctx->replylen = gctx->replylen; /* XXX: Only reset if 'perfect' response */ if (ifctx->state == IF_BOOTP_UNRESOLVED) ifctx->state = IF_BOOTP_RESOLVED; else if (ifctx->state == IF_DHCP_UNRESOLVED && ifctx->dhcpquerytype == DHCP_DISCOVER) { if (dhcpreplytype == DHCP_OFFER) ifctx->state = IF_DHCP_OFFERED; else ifctx->state = IF_BOOTP_RESOLVED; /* Fallback */ } else if (ifctx->state == IF_DHCP_OFFERED && ifctx->dhcpquerytype == DHCP_REQUEST) ifctx->state = IF_DHCP_RESOLVED; if (ifctx->dhcpquerytype == DHCP_DISCOVER && ifctx->state != IF_BOOTP_RESOLVED) { p = bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_DHCP_SERVERID); if (p != NULL && gctx->tmptag.taglen == 4) { memcpy(&ifctx->dhcpserver, p, 4); ifctx->gotdhcpserver = 1; } else ifctx->gotdhcpserver = 0; return 1; } ifctx->gotrootpath = (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROOT) != NULL); ifctx->gotgw = (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROUTERS) != NULL); ifctx->gotnetmask = (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_SUBNETMASK) != NULL); return 1; } static int bootpc_call(struct bootpc_globalcontext *gctx, struct thread *td) { struct socket *so; struct sockaddr_in *sin, dst; struct uio auio; struct sockopt sopt; struct iovec aio; int error, on, rcvflg, timo, len; time_t atimo; time_t rtimo; struct timeval tv; struct bootpc_ifcontext *ifctx; int outstanding; int gotrootpath; int retry; const char *s; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* * Create socket and set its recieve timeout. */ error = socreate(AF_INET, &so, SOCK_DGRAM, 0, td->td_ucred, td); if (error != 0) goto out0; tv.tv_sec = 1; tv.tv_usec = 0; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_RCVTIMEO; sopt.sopt_val = &tv; sopt.sopt_valsize = sizeof tv; error = sosetopt(so, &sopt); if (error != 0) goto out; /* * Enable broadcast. */ on = 1; sopt.sopt_name = SO_BROADCAST; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof on; error = sosetopt(so, &sopt); if (error != 0) goto out; /* * Disable routing. */ on = 1; sopt.sopt_name = SO_DONTROUTE; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof on; error = sosetopt(so, &sopt); if (error != 0) goto out; /* * Bind the local endpoint to a bootp client port. */ sin = &dst; clear_sinaddr(sin); sin->sin_port = htons(IPPORT_BOOTPC); error = sobind(so, (struct sockaddr *)sin, td); if (error != 0) { printf("bind failed\n"); goto out; } /* * Setup socket address for the server. */ sin = &dst; clear_sinaddr(sin); sin->sin_addr.s_addr = INADDR_BROADCAST; sin->sin_port = htons(IPPORT_BOOTPS); /* * Send it, repeatedly, until a reply is received, * but delay each re-send by an increasing amount. * If the delay hits the maximum, start complaining. */ timo = 0; rtimo = 0; for (;;) { outstanding = 0; gotrootpath = 0; for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) != 0 && bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROOT) != NULL) gotrootpath = 1; } for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { ifctx->outstanding = 0; if (bootpc_ifctx_isresolved(ifctx) != 0 && gotrootpath != 0) { continue; } if (bootpc_ifctx_isfailed(ifctx) != 0) continue; outstanding++; ifctx->outstanding = 1; /* Proceed to next step in DHCP negotiation */ if ((ifctx->state == IF_DHCP_OFFERED && ifctx->dhcpquerytype != DHCP_REQUEST) || (ifctx->state == IF_DHCP_UNRESOLVED && ifctx->dhcpquerytype != DHCP_DISCOVER) || (ifctx->state == IF_BOOTP_UNRESOLVED && ifctx->dhcpquerytype != DHCP_NOMSG)) { ifctx->sentmsg = 0; bootpc_compose_query(ifctx, gctx, td); } /* Send BOOTP request (or re-send). */ if (ifctx->sentmsg == 0) { switch(ifctx->dhcpquerytype) { case DHCP_DISCOVER: s = "DHCP Discover"; break; case DHCP_REQUEST: s = "DHCP Request"; break; case DHCP_NOMSG: default: s = "BOOTP Query"; break; } printf("Sending %s packet from " "interface %s (%*D)\n", s, ifctx->ireq.ifr_name, ifctx->sdl->sdl_alen, (unsigned char *) LLADDR(ifctx->sdl), ":"); ifctx->sentmsg = 1; } aio.iov_base = (caddr_t) &ifctx->call; aio.iov_len = sizeof(ifctx->call); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_offset = 0; auio.uio_resid = sizeof(ifctx->call); auio.uio_td = td; /* Set netmask to 0.0.0.0 */ sin = (struct sockaddr_in *) &ifctx->ireq.ifr_addr; clear_sinaddr(sin); error = ifioctl(ifctx->so, SIOCSIFNETMASK, (caddr_t) &ifctx->ireq, td); if (error != 0) panic("bootpc_call:" "set if netmask, error=%d", error); error = sosend(so, (struct sockaddr *) &dst, &auio, NULL, NULL, 0, td); if (error != 0) { printf("bootpc_call: sosend: %d state %08x\n", error, (int) so->so_state); } /* XXX: Is this needed ? */ tsleep(&error, PZERO + 8, "bootpw", 10); /* Set netmask to 255.0.0.0 */ sin = (struct sockaddr_in *) &ifctx->ireq.ifr_addr; clear_sinaddr(sin); sin->sin_addr.s_addr = htonl(0xff000000u); error = ifioctl(ifctx->so, SIOCSIFNETMASK, (caddr_t) &ifctx->ireq, td); if (error != 0) panic("bootpc_call:" "set if netmask, error=%d", error); } if (outstanding == 0 && (rtimo == 0 || time_second >= rtimo)) { error = 0; goto gotreply; } /* Determine new timeout. */ if (timo < MAX_RESEND_DELAY) timo++; else { printf("DHCP/BOOTP timeout for server "); print_sin_addr(&dst); printf("\n"); } /* * Wait for up to timo seconds for a reply. * The socket receive timeout was set to 1 second. */ atimo = timo + time_second; while (time_second < atimo) { aio.iov_base = (caddr_t) &gctx->reply; aio.iov_len = sizeof(gctx->reply); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(gctx->reply); auio.uio_td = td; rcvflg = 0; error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg); gctx->secs = time_second - gctx->starttime; for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) != 0 || bootpc_ifctx_isfailed(ifctx) != 0) continue; ifctx->call.secs = htons(gctx->secs); } if (error == EWOULDBLOCK) continue; if (error != 0) goto out; len = sizeof(gctx->reply) - auio.uio_resid; /* Do we have the required number of bytes ? */ if (len < BOOTP_MIN_LEN) continue; gctx->replylen = len; /* Is it a reply? */ if (gctx->reply.op != BOOTP_REPLY) continue; /* Is this an answer to our query */ for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (gctx->reply.xid != ifctx->call.xid) continue; /* Same HW address size ? */ if (gctx->reply.hlen != ifctx->call.hlen) continue; /* Correct HW address ? */ if (bcmp(gctx->reply.chaddr, ifctx->call.chaddr, ifctx->call.hlen) != 0) continue; break; } if (ifctx != NULL) { s = bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_DHCP_MSGTYPE); if (s != NULL) { switch (*s) { case DHCP_OFFER: s = "DHCP Offer"; break; case DHCP_ACK: s = "DHCP Ack"; break; default: s = "DHCP (unexpected)"; break; } } else s = "BOOTP Reply"; printf("Received %s packet" " on %s from ", s, ifctx->ireq.ifr_name); print_in_addr(gctx->reply.siaddr); if (gctx->reply.giaddr.s_addr != htonl(INADDR_ANY)) { printf(" via "); print_in_addr(gctx->reply.giaddr); } if (bootpc_received(gctx, ifctx) != 0) { printf(" (accepted)"); if (ifctx->outstanding) { ifctx->outstanding = 0; outstanding--; } /* Network settle delay */ if (outstanding == 0) atimo = time_second + BOOTP_SETTLE_DELAY; } else printf(" (ignored)"); if (ifctx->gotrootpath) { gotrootpath = 1; rtimo = time_second + BOOTP_SETTLE_DELAY; printf(" (got root path)"); } else printf(" (no root path)"); printf("\n"); } } /* while secs */ #ifdef BOOTP_TIMEOUT if (gctx->secs > BOOTP_TIMEOUT && BOOTP_TIMEOUT > 0) break; #endif /* Force a retry if halfway in DHCP negotiation */ retry = 0; for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (ifctx->state == IF_DHCP_OFFERED) { if (ifctx->dhcpquerytype == DHCP_DISCOVER) retry = 1; else ifctx->state = IF_DHCP_UNRESOLVED; } } if (retry != 0) continue; if (gotrootpath != 0) { gctx->gotrootpath = gotrootpath; if (rtimo != 0 && time_second >= rtimo) break; } } /* forever send/receive */ /* * XXX: These are errors of varying seriousness being silently * ignored */ for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) == 0) { printf("%s timeout for interface %s\n", ifctx->dhcpquerytype != DHCP_NOMSG ? "DHCP" : "BOOTP", ifctx->ireq.ifr_name); } } if (gctx->gotrootpath != 0) { #if 0 printf("Got a root path, ignoring remaining timeout\n"); #endif error = 0; goto out; } #ifndef BOOTP_NFSROOT for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) != 0) { error = 0; goto out; } } #endif error = ETIMEDOUT; goto out; gotreply: out: soclose(so); out0: return error; } static int bootpc_fakeup_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td) { struct sockaddr_in *sin; int error; struct ifreq *ireq; struct socket *so; struct ifaddr *ifa; struct sockaddr_dl *sdl; GIANT_REQUIRED; /* XXX until socket locking done */ error = socreate(AF_INET, &ifctx->so, SOCK_DGRAM, 0, td->td_ucred, td); if (error != 0) panic("nfs_boot: socreate, error=%d", error); ireq = &ifctx->ireq; so = ifctx->so; /* * Bring up the interface. * * Get the old interface flags and or IFF_UP into them; if * IFF_UP set blindly, interface selection can be clobbered. */ error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error); ireq->ifr_flags |= IFF_UP; error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error); /* * Do enough of ifconfig(8) so that the chosen interface * can talk to the servers. (just set the address) */ /* addr is 0.0.0.0 */ sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); error = ifioctl(so, SIOCSIFADDR, (caddr_t) ireq, td); if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces)) panic("bootpc_fakeup_interface: " "set if addr, error=%d", error); /* netmask is 255.0.0.0 */ sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); sin->sin_addr.s_addr = htonl(0xff000000u); error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: set if netmask, error=%d", error); /* Broadcast is 255.255.255.255 */ sin = (struct sockaddr_in *)&ireq->ifr_addr; clear_sinaddr(sin); clear_sinaddr(&ifctx->broadcast); sin->sin_addr.s_addr = htonl(INADDR_BROADCAST); ifctx->broadcast.sin_addr.s_addr = sin->sin_addr.s_addr; error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: " "set if broadcast addr, error=%d", error); /* Get HW address */ sdl = NULL; for (ifa = TAILQ_FIRST(&ifctx->ifp->if_addrhead); ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) if (ifa->ifa_addr->sa_family == AF_LINK && (sdl = ((struct sockaddr_dl *) ifa->ifa_addr)) != NULL && sdl->sdl_type == IFT_ETHER) break; if (sdl == NULL) panic("bootpc: Unable to find HW address for %s", ifctx->ireq.ifr_name); ifctx->sdl = sdl; return error; } static int bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td) { int error; struct sockaddr_in defdst; struct sockaddr_in defmask; struct sockaddr_in *sin; struct ifreq *ireq; struct socket *so; struct sockaddr_in *myaddr; struct sockaddr_in *netmask; struct sockaddr_in *gw; ireq = &ifctx->ireq; so = ifctx->so; myaddr = &ifctx->myaddr; netmask = &ifctx->netmask; gw = &ifctx->gw; if (bootpc_ifctx_isresolved(ifctx) == 0) { /* Shutdown interfaces where BOOTP failed */ printf("Shutdown interface %s\n", ifctx->ireq.ifr_name); error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "SIOCGIFFLAGS, error=%d", error); ireq->ifr_flags &= ~IFF_UP; error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "SIOCSIFFLAGS, error=%d", error); sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); error = ifioctl(so, SIOCDIFADDR, (caddr_t) ireq, td); if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces)) panic("bootpc_adjust_interface: " "SIOCDIFADDR, error=%d", error); return 0; } printf("Adjusted interface %s\n", ifctx->ireq.ifr_name); /* * Do enough of ifconfig(8) so that the chosen interface * can talk to the servers. (just set the address) */ bcopy(netmask, &ireq->ifr_addr, sizeof(*netmask)); error = ifioctl(so, SIOCSIFNETMASK, (caddr_t) ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "set if netmask, error=%d", error); /* Broadcast is with host part of IP address all 1's */ sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); sin->sin_addr.s_addr = myaddr->sin_addr.s_addr | ~ netmask->sin_addr.s_addr; error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t) ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "set if broadcast addr, error=%d", error); bcopy(myaddr, &ireq->ifr_addr, sizeof(*myaddr)); error = ifioctl(so, SIOCSIFADDR, (caddr_t) ireq, td); if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces)) panic("bootpc_adjust_interface: " "set if addr, error=%d", error); /* Add new default route */ if (ifctx->gotgw != 0 || gctx->gotgw == 0) { clear_sinaddr(&defdst); clear_sinaddr(&defmask); error = rtrequest(RTM_ADD, (struct sockaddr *) &defdst, (struct sockaddr *) gw, (struct sockaddr *) &defmask, (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL); if (error != 0) { printf("bootpc_adjust_interface: " "add net route, error=%d\n", error); return error; } } return 0; } static int setfs(struct sockaddr_in *addr, char *path, char *p, const struct in_addr *siaddr) { if (getip(&p, &addr->sin_addr) == 0) { if (siaddr != NULL && *p == '/') bcopy(siaddr, &addr->sin_addr, sizeof(struct in_addr)); else return 0; } else { if (*p != ':') return 0; p++; } addr->sin_len = sizeof(struct sockaddr_in); addr->sin_family = AF_INET; strlcpy(path, p, MNAMELEN); return 1; } static int getip(char **ptr, struct in_addr *addr) { char *p; unsigned int ip; int val; p = *ptr; ip = 0; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip = val << 24; if (*p != '.') return 0; p++; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip |= (val << 16); if (*p != '.') return 0; p++; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip |= (val << 8); if (*p != '.') return 0; p++; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip |= val; addr->s_addr = htonl(ip); *ptr = p; return 1; } static int getdec(char **ptr) { char *p; int ret; p = *ptr; ret = 0; if ((*p < '0') || (*p > '9')) return -1; while ((*p >= '0') && (*p <= '9')) { ret = ret * 10 + (*p - '0'); p++; } *ptr = p; return ret; } static char * substr(char *a, char *b) { char *loc1; char *loc2; while (*a != '\0') { loc1 = a; loc2 = b; while (*loc1 == *loc2++) { if (*loc1 == '\0') return 0; loc1++; if (*loc2 == '\0') return loc1; } a++; } return 0; } static void mountopts(struct nfs_args *args, char *p) { char *tmp; args->version = NFS_ARGSVERSION; args->rsize = 8192; args->wsize = 8192; args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT; args->sotype = SOCK_DGRAM; if (p == NULL) return; if ((tmp = (char *)substr(p, "rsize="))) args->rsize = getdec(&tmp); if ((tmp = (char *)substr(p, "wsize="))) args->wsize = getdec(&tmp); if ((tmp = (char *)substr(p, "intr"))) args->flags |= NFSMNT_INT; if ((tmp = (char *)substr(p, "soft"))) args->flags |= NFSMNT_SOFT; if ((tmp = (char *)substr(p, "noconn"))) args->flags |= NFSMNT_NOCONN; if ((tmp = (char *)substr(p, "tcp"))) args->sotype = SOCK_STREAM; } static int xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len) { struct mbuf *m; int alignedlen; m = *mptr; alignedlen = ( len + 3 ) & ~3; if (m->m_len < alignedlen) { m = m_pullup(m, alignedlen); if (m == NULL) { *mptr = NULL; return EBADRPC; } } bcopy(mtod(m, u_char *), buf, len); m_adj(m, alignedlen); *mptr = m; return 0; } static int xdr_int_decode(struct mbuf **mptr, int *iptr) { u_int32_t i; if (xdr_opaque_decode(mptr, (u_char *) &i, sizeof(u_int32_t)) != 0) return EBADRPC; *iptr = fxdr_unsigned(u_int32_t, i); return 0; } static void print_sin_addr(struct sockaddr_in *sin) { print_in_addr(sin->sin_addr); } static void print_in_addr(struct in_addr addr) { unsigned int ip; ip = ntohl(addr.s_addr); printf("%d.%d.%d.%d", ip >> 24, (ip >> 16) & 255, (ip >> 8) & 255, ip & 255); } static void bootpc_compose_query(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td) { unsigned char *vendp; unsigned char vendor_client[64]; uint32_t leasetime; uint8_t vendor_client_len; ifctx->gotrootpath = 0; bzero((caddr_t) &ifctx->call, sizeof(ifctx->call)); /* bootpc part */ ifctx->call.op = BOOTP_REQUEST; /* BOOTREQUEST */ ifctx->call.htype = 1; /* 10mb ethernet */ ifctx->call.hlen = ifctx->sdl->sdl_alen;/* Hardware address length */ ifctx->call.hops = 0; if (bootpc_ifctx_isunresolved(ifctx) != 0) ifctx->xid++; ifctx->call.xid = txdr_unsigned(ifctx->xid); bcopy(LLADDR(ifctx->sdl), &ifctx->call.chaddr, ifctx->sdl->sdl_alen); vendp = ifctx->call.vend; *vendp++ = 99; /* RFC1048 cookie */ *vendp++ = 130; *vendp++ = 83; *vendp++ = 99; *vendp++ = TAG_MAXMSGSIZE; *vendp++ = 2; *vendp++ = (sizeof(struct bootp_packet) >> 8) & 255; *vendp++ = sizeof(struct bootp_packet) & 255; snprintf(vendor_client, sizeof(vendor_client), "%s:%s:%s", ostype, MACHINE, osrelease); vendor_client_len = strlen(vendor_client); *vendp++ = TAG_VENDOR_INDENTIFIER; *vendp++ = vendor_client_len; memcpy(vendp, vendor_client, vendor_client_len); vendp += vendor_client_len;; ifctx->dhcpquerytype = DHCP_NOMSG; switch (ifctx->state) { case IF_DHCP_UNRESOLVED: *vendp++ = TAG_DHCP_MSGTYPE; *vendp++ = 1; *vendp++ = DHCP_DISCOVER; ifctx->dhcpquerytype = DHCP_DISCOVER; ifctx->gotdhcpserver = 0; break; case IF_DHCP_OFFERED: *vendp++ = TAG_DHCP_MSGTYPE; *vendp++ = 1; *vendp++ = DHCP_REQUEST; ifctx->dhcpquerytype = DHCP_REQUEST; *vendp++ = TAG_DHCP_REQ_ADDR; *vendp++ = 4; memcpy(vendp, &ifctx->reply.yiaddr, 4); vendp += 4; if (ifctx->gotdhcpserver != 0) { *vendp++ = TAG_DHCP_SERVERID; *vendp++ = 4; memcpy(vendp, &ifctx->dhcpserver, 4); vendp += 4; } *vendp++ = TAG_DHCP_LEASETIME; *vendp++ = 4; leasetime = htonl(300); memcpy(vendp, &leasetime, 4); vendp += 4; break; default: break; } *vendp = TAG_END; ifctx->call.secs = 0; ifctx->call.flags = htons(0x8000); /* We need a broadcast answer */ } static int bootpc_hascookie(struct bootp_packet *bp) { return (bp->vend[0] == 99 && bp->vend[1] == 130 && bp->vend[2] == 83 && bp->vend[3] == 99); } static void bootpc_tag_helper(struct bootpc_tagcontext *tctx, unsigned char *start, int len, int tag) { unsigned char *j; unsigned char *ej; unsigned char code; if (tctx->badtag != 0 || tctx->badopt != 0) return; j = start; ej = j + len; while (j < ej) { code = *j++; if (code == TAG_PAD) continue; if (code == TAG_END) return; if (j >= ej || j + *j + 1 > ej) { tctx->badopt = 1; return; } len = *j++; if (code == tag) { if (tctx->taglen + len > TAG_MAXLEN) { tctx->badtag = 1; return; } tctx->foundopt = 1; if (len > 0) memcpy(tctx->buf + tctx->taglen, j, len); tctx->taglen += len; } if (code == TAG_OVERLOAD) tctx->overload = *j; j += len; } } static unsigned char * bootpc_tag(struct bootpc_tagcontext *tctx, struct bootp_packet *bp, int len, int tag) { tctx->overload = 0; tctx->badopt = 0; tctx->badtag = 0; tctx->foundopt = 0; tctx->taglen = 0; if (bootpc_hascookie(bp) == 0) return NULL; bootpc_tag_helper(tctx, &bp->vend[4], (unsigned char *) bp + len - &bp->vend[4], tag); if ((tctx->overload & OVERLOAD_FILE) != 0) bootpc_tag_helper(tctx, (unsigned char *) bp->file, sizeof(bp->file), tag); if ((tctx->overload & OVERLOAD_SNAME) != 0) bootpc_tag_helper(tctx, (unsigned char *) bp->sname, sizeof(bp->sname), tag); if (tctx->badopt != 0 || tctx->badtag != 0 || tctx->foundopt == 0) return NULL; tctx->buf[tctx->taglen] = '\0'; return tctx->buf; } static void bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx) { char *p; unsigned int ip; ifctx->gotgw = 0; ifctx->gotnetmask = 0; clear_sinaddr(&ifctx->myaddr); clear_sinaddr(&ifctx->netmask); clear_sinaddr(&ifctx->gw); ifctx->myaddr.sin_addr = ifctx->reply.yiaddr; ip = ntohl(ifctx->myaddr.sin_addr.s_addr); printf("%s at ", ifctx->ireq.ifr_name); print_sin_addr(&ifctx->myaddr); printf(" server "); print_in_addr(ifctx->reply.siaddr); ifctx->gw.sin_addr = ifctx->reply.giaddr; if (ifctx->reply.giaddr.s_addr != htonl(INADDR_ANY)) { printf(" via gateway "); print_in_addr(ifctx->reply.giaddr); } /* This call used for the side effect (overload flag) */ (void) bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_END); if ((gctx->tmptag.overload & OVERLOAD_SNAME) == 0) if (ifctx->reply.sname[0] != '\0') printf(" server name %s", ifctx->reply.sname); if ((gctx->tmptag.overload & OVERLOAD_FILE) == 0) if (ifctx->reply.file[0] != '\0') printf(" boot file %s", ifctx->reply.file); printf("\n"); p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_SUBNETMASK); if (p != NULL) { if (gctx->tag.taglen != 4) panic("bootpc: subnet mask len is %d", gctx->tag.taglen); bcopy(p, &ifctx->netmask.sin_addr, 4); ifctx->gotnetmask = 1; printf("subnet mask "); print_sin_addr(&ifctx->netmask); printf(" "); } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_ROUTERS); if (p != NULL) { /* Routers */ if (gctx->tag.taglen % 4) panic("bootpc: Router Len is %d", gctx->tag.taglen); if (gctx->tag.taglen > 0) { bcopy(p, &ifctx->gw.sin_addr, 4); printf("router "); print_sin_addr(&ifctx->gw); printf(" "); ifctx->gotgw = 1; gctx->gotgw = 1; } } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_ROOT); if (p != NULL) { if (gctx->setrootfs != NULL) { printf("rootfs %s (ignored) ", p); } else if (setfs(&nd->root_saddr, nd->root_hostnam, p, &ifctx->reply.siaddr)) { if (*p == '/') { printf("root_server "); print_sin_addr(&nd->root_saddr); printf(" "); } printf("rootfs %s ", p); gctx->gotrootpath = 1; ifctx->gotrootpath = 1; gctx->setrootfs = ifctx; p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_ROOTOPTS); if (p != NULL) { mountopts(&nd->root_args, p); printf("rootopts %s ", p); } } else panic("Failed to set rootfs to %s", p); } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_HOSTNAME); if (p != NULL) { if (gctx->tag.taglen >= MAXHOSTNAMELEN) panic("bootpc: hostname >= %d bytes", MAXHOSTNAMELEN); if (gctx->sethostname != NULL) { printf("hostname %s (ignored) ", p); } else { strcpy(nd->my_hostnam, p); strcpy(hostname, p); printf("hostname %s ", hostname); gctx->sethostname = ifctx; } } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_COOKIE); if (p != NULL) { /* store in a sysctl variable */ int i, l = sizeof(bootp_cookie) - 1; for (i = 0; i < l && p[i] != '\0'; i++) bootp_cookie[i] = p[i]; p[i] = '\0'; } printf("\n"); if (ifctx->gotnetmask == 0) { if (IN_CLASSA(ntohl(ifctx->myaddr.sin_addr.s_addr))) ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET); else if (IN_CLASSB(ntohl(ifctx->myaddr.sin_addr.s_addr))) ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET); else ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET); } if (ifctx->gotgw == 0) { /* Use proxyarp */ ifctx->gw.sin_addr.s_addr = ifctx->myaddr.sin_addr.s_addr; } } void bootpc_init(void) { struct bootpc_ifcontext *ifctx, *nctx; /* Interface BOOTP contexts */ struct bootpc_globalcontext *gctx; /* Global BOOTP context */ struct ifnet *ifp; int error; #ifndef BOOTP_WIRED_TO int ifcnt; #endif struct nfsv3_diskless *nd; struct thread *td; nd = &nfsv3_diskless; td = curthread; /* * If already filled in, don't touch it here */ if (nfs_diskless_valid != 0) return; gctx = malloc(sizeof(*gctx), M_TEMP, M_WAITOK | M_ZERO); if (gctx == NULL) panic("Failed to allocate bootp global context structure"); gctx->xid = ~0xFFFF; gctx->starttime = time_second; /* * Find a network interface. */ #ifdef BOOTP_WIRED_TO printf("bootpc_init: wired to interface '%s'\n", __XSTRING(BOOTP_WIRED_TO)); allocifctx(gctx); #else /* * Preallocate interface context storage, if another interface * attaches and wins the race, it won't be eligible for bootp. */ IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet), ifcnt = 0; ifp != NULL; ifp = TAILQ_NEXT(ifp, if_link)) { if ((ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT | IFF_BROADCAST)) != IFF_BROADCAST) continue; ifcnt++; } IFNET_RUNLOCK(); if (ifcnt == 0) panic("bootpc_init: no eligible interfaces"); for (; ifcnt > 0; ifcnt--) allocifctx(gctx); #endif IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet), ifctx = gctx->interfaces; ifp != NULL && ifctx != NULL; ifp = TAILQ_NEXT(ifp, if_link)) { strlcpy(ifctx->ireq.ifr_name, ifp->if_xname, sizeof(ifctx->ireq.ifr_name)); #ifdef BOOTP_WIRED_TO if (strcmp(ifctx->ireq.ifr_name, __XSTRING(BOOTP_WIRED_TO)) != 0) continue; #else if ((ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT | IFF_BROADCAST)) != IFF_BROADCAST) continue; #endif ifctx->ifp = ifp; ifctx = ifctx->next; } IFNET_RUNLOCK(); if (gctx->interfaces == NULL || gctx->interfaces->ifp == NULL) { #ifdef BOOTP_WIRED_TO panic("bootpc_init: Could not find interface specified " "by BOOTP_WIRED_TO: " __XSTRING(BOOTP_WIRED_TO)); #else panic("bootpc_init: no suitable interface"); #endif } for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) bootpc_fakeup_interface(ifctx, gctx, td); for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) bootpc_compose_query(ifctx, gctx, td); error = bootpc_call(gctx, td); if (error != 0) { #ifdef BOOTP_NFSROOT panic("BOOTP call failed"); #else printf("BOOTP call failed\n"); #endif } mountopts(&nd->root_args, NULL); for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (bootpc_ifctx_isresolved(ifctx) != 0) bootpc_decode_reply(nd, ifctx, gctx); #ifdef BOOTP_NFSROOT if (gctx->gotrootpath == 0) panic("bootpc: No root path offered"); #endif for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { bootpc_adjust_interface(ifctx, gctx, td); soclose(ifctx->so); } for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (ifctx->gotrootpath != 0) break; if (ifctx == NULL) { for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (bootpc_ifctx_isresolved(ifctx) != 0) break; } if (ifctx == NULL) goto out; if (gctx->gotrootpath != 0) { error = md_mount(&nd->root_saddr, nd->root_hostnam, nd->root_fh, &nd->root_fhsize, &nd->root_args, td); if (error != 0) panic("nfs_boot: mountd root, error=%d", error); nfs_diskless_valid = 3; } strcpy(nd->myif.ifra_name, ifctx->ireq.ifr_name); bcopy(&ifctx->myaddr, &nd->myif.ifra_addr, sizeof(ifctx->myaddr)); bcopy(&ifctx->myaddr, &nd->myif.ifra_broadaddr, sizeof(ifctx->myaddr)); ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr = ifctx->myaddr.sin_addr.s_addr | ~ ifctx->netmask.sin_addr.s_addr; bcopy(&ifctx->netmask, &nd->myif.ifra_mask, sizeof(ifctx->netmask)); out: for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = nctx) { nctx = ifctx->next; free(ifctx, M_TEMP); } free(gctx, M_TEMP); } /* * RPC: mountd/mount * Given a server pathname, get an NFS file handle. * Also, sets sin->sin_port to the NFS service port. */ static int md_mount(struct sockaddr_in *mdsin, char *path, u_char *fhp, int *fhsizep, struct nfs_args *args, struct thread *td) { struct mbuf *m; int error; int authunixok; int authcount; int authver; #ifdef BOOTP_NFSV3 /* First try NFS v3 */ /* Get port number for MOUNTD. */ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3, &mdsin->sin_port, td); if (error == 0) { m = xdr_string_encode(path, strlen(path)); /* Do RPC to mountd. */ error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3, RPCMNT_MOUNT, &m, NULL, td); } if (error == 0) { args->flags |= NFSMNT_NFSV3; } else { #endif /* Fallback to NFS v2 */ /* Get port number for MOUNTD. */ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1, &mdsin->sin_port, td); if (error != 0) return error; m = xdr_string_encode(path, strlen(path)); /* Do RPC to mountd. */ error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1, RPCMNT_MOUNT, &m, NULL, td); if (error != 0) return error; /* message already freed */ #ifdef BOOTP_NFSV3 } #endif if (xdr_int_decode(&m, &error) != 0 || error != 0) goto bad; if ((args->flags & NFSMNT_NFSV3) != 0) { if (xdr_int_decode(&m, fhsizep) != 0 || *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0) goto bad; } else *fhsizep = NFSX_V2FH; if (xdr_opaque_decode(&m, fhp, *fhsizep) != 0) goto bad; if (args->flags & NFSMNT_NFSV3) { if (xdr_int_decode(&m, &authcount) != 0) goto bad; authunixok = 0; if (authcount < 0 || authcount > 100) goto bad; while (authcount > 0) { if (xdr_int_decode(&m, &authver) != 0) goto bad; if (authver == RPCAUTH_UNIX) authunixok = 1; authcount--; } if (authunixok == 0) goto bad; } /* Set port number for NFS use. */ error = krpc_portmap(mdsin, NFS_PROG, (args->flags & NFSMNT_NFSV3) ? NFS_VER3 : NFS_VER2, &mdsin->sin_port, td); goto out; bad: error = EBADRPC; out: m_freem(m); return error; } Index: head/sys/nfsclient/krpc_subr.c =================================================================== --- head/sys/nfsclient/krpc_subr.c (revision 130553) +++ head/sys/nfsclient/krpc_subr.c (revision 130554) @@ -1,486 +1,486 @@ /* $NetBSD: krpc_subr.c,v 1.12.4.1 1996/06/07 00:52:26 cgd Exp $ */ /* * Copyright (c) 1995 Gordon Ross, Adam Glass * Copyright (c) 1992 Regents of the University of California. * All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * partially based on: * libnetboot/rpc.c * @(#) Header: rpc.c,v 1.12 93/09/28 08:31:56 leres Exp (LBL) */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Kernel support for Sun RPC * * Used currently for bootstrapping in nfs diskless configurations. */ /* * Generic RPC headers */ struct auth_info { u_int32_t authtype; /* auth type */ u_int32_t authlen; /* auth length */ }; struct auth_unix { int32_t ua_time; int32_t ua_hostname; /* null */ int32_t ua_uid; int32_t ua_gid; int32_t ua_gidlist; /* null */ }; struct krpc_call { u_int32_t rp_xid; /* request transaction id */ int32_t rp_direction; /* call direction (0) */ u_int32_t rp_rpcvers; /* rpc version (2) */ u_int32_t rp_prog; /* program */ u_int32_t rp_vers; /* version */ u_int32_t rp_proc; /* procedure */ struct auth_info rpc_auth; struct auth_unix rpc_unix; struct auth_info rpc_verf; }; struct krpc_reply { u_int32_t rp_xid; /* request transaction id */ int32_t rp_direction; /* call direction (1) */ int32_t rp_astatus; /* accept status (0: accepted) */ union { u_int32_t rpu_errno; struct { struct auth_info rok_auth; u_int32_t rok_status; } rpu_rok; } rp_u; }; #define rp_errno rp_u.rpu_errno #define rp_auth rp_u.rpu_rok.rok_auth #define rp_status rp_u.rpu_rok.rok_status #define MIN_REPLY_HDR 16 /* xid, dir, astat, errno */ /* * What is the longest we will wait before re-sending a request? * Note this is also the frequency of "RPC timeout" messages. * The re-send loop count sup linearly to this maximum, so the * first complaint will happen after (1+2+3+4+5)=15 seconds. */ #define MAX_RESEND_DELAY 5 /* seconds */ /* * Call portmap to lookup a port number for a particular rpc program * Returns non-zero error on failure. */ int krpc_portmap(struct sockaddr_in *sin, u_int prog, u_int vers, u_int16_t *portp, struct thread *td) { struct sdata { u_int32_t prog; /* call program */ u_int32_t vers; /* call version */ u_int32_t proto; /* call protocol */ u_int32_t port; /* call port (unused) */ } *sdata; struct rdata { u_int16_t pad; u_int16_t port; } *rdata; struct mbuf *m; int error; /* The portmapper port is fixed. */ if (prog == PMAPPROG) { *portp = htons(PMAPPORT); return 0; } m = m_get(M_TRYWAIT, MT_DATA); if (m == NULL) return ENOBUFS; sdata = mtod(m, struct sdata *); m->m_len = sizeof(*sdata); /* Do the RPC to get it. */ sdata->prog = txdr_unsigned(prog); sdata->vers = txdr_unsigned(vers); sdata->proto = txdr_unsigned(IPPROTO_UDP); sdata->port = 0; sin->sin_port = htons(PMAPPORT); error = krpc_call(sin, PMAPPROG, PMAPVERS, PMAPPROC_GETPORT, &m, NULL, td); if (error) return error; if (m->m_len < sizeof(*rdata)) { m = m_pullup(m, sizeof(*rdata)); if (m == NULL) return ENOBUFS; } rdata = mtod(m, struct rdata *); *portp = rdata->port; m_freem(m); return 0; } /* * Do a remote procedure call (RPC) and wait for its reply. * If from_p is non-null, then we are doing broadcast, and * the address from whence the response came is saved there. */ int krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers, u_int func, struct mbuf **data, struct sockaddr **from_p, struct thread *td) { struct socket *so; struct sockaddr_in *sin, ssin; struct sockaddr *from; struct mbuf *m, *nam, *mhead; struct krpc_call *call; struct krpc_reply *reply; struct sockopt sopt; struct timeval tv; struct uio auio; int error, rcvflg, timo, secs, len; static u_int32_t xid = ~0xFF; u_int16_t tport; u_int32_t saddr; /* * Validate address family. * Sorry, this is INET specific... */ if (sa->sin_family != AF_INET) return (EAFNOSUPPORT); /* Free at end if not null. */ nam = mhead = NULL; from = NULL; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* * Create socket and set its recieve timeout. */ if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, td->td_ucred, td))) goto out; tv.tv_sec = 1; tv.tv_usec = 0; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_RCVTIMEO; sopt.sopt_val = &tv; sopt.sopt_valsize = sizeof tv; if ((error = sosetopt(so, &sopt)) != 0) goto out; /* * Enable broadcast if necessary. */ if (from_p) { int on = 1; sopt.sopt_name = SO_BROADCAST; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof on; if ((error = sosetopt(so, &sopt)) != 0) goto out; } /* * Bind the local endpoint to a reserved port, * because some NFS servers refuse requests from * non-reserved (non-privileged) ports. */ sin = &ssin; bzero(sin, sizeof *sin); sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED; do { tport--; sin->sin_port = htons(tport); error = sobind(so, (struct sockaddr *)sin, td); } while (error == EADDRINUSE && tport > IPPORT_RESERVED / 2); if (error) { printf("bind failed\n"); goto out; } /* * Setup socket address for the server. */ /* * Prepend RPC message header. */ mhead = m_gethdr(M_TRYWAIT, MT_DATA); mhead->m_next = *data; call = mtod(mhead, struct krpc_call *); mhead->m_len = sizeof(*call); bzero((caddr_t)call, sizeof(*call)); /* rpc_call part */ xid++; call->rp_xid = txdr_unsigned(xid); /* call->rp_direction = 0; */ call->rp_rpcvers = txdr_unsigned(2); call->rp_prog = txdr_unsigned(prog); call->rp_vers = txdr_unsigned(vers); call->rp_proc = txdr_unsigned(func); /* rpc_auth part (auth_unix as root) */ call->rpc_auth.authtype = txdr_unsigned(RPCAUTH_UNIX); call->rpc_auth.authlen = txdr_unsigned(sizeof(struct auth_unix)); /* rpc_verf part (auth_null) */ call->rpc_verf.authtype = 0; call->rpc_verf.authlen = 0; /* * Setup packet header */ len = 0; m = mhead; while (m) { len += m->m_len; m = m->m_next; } mhead->m_pkthdr.len = len; mhead->m_pkthdr.rcvif = NULL; /* * Send it, repeatedly, until a reply is received, * but delay each re-send by an increasing amount. * If the delay hits the maximum, start complaining. */ timo = 0; for (;;) { /* Send RPC request (or re-send). */ m = m_copym(mhead, 0, M_COPYALL, M_TRYWAIT); if (m == NULL) { error = ENOBUFS; goto out; } error = sosend(so, (struct sockaddr *)sa, NULL, m, NULL, 0, td); if (error) { printf("krpc_call: sosend: %d\n", error); goto out; } m = NULL; /* Determine new timeout. */ if (timo < MAX_RESEND_DELAY) timo++; else { saddr = ntohl(sa->sin_addr.s_addr); printf("RPC timeout for server %d.%d.%d.%d\n", (saddr >> 24) & 255, (saddr >> 16) & 255, (saddr >> 8) & 255, saddr & 255); } /* * Wait for up to timo seconds for a reply. * The socket receive timeout was set to 1 second. */ secs = timo; while (secs > 0) { if (from) { FREE(from, M_SONAME); from = NULL; } if (m) { m_freem(m); m = NULL; } bzero(&auio, sizeof(auio)); auio.uio_resid = len = 1<<16; rcvflg = 0; error = soreceive(so, &from, &auio, &m, NULL, &rcvflg); if (error == EWOULDBLOCK) { secs--; continue; } if (error) goto out; len -= auio.uio_resid; /* Does the reply contain at least a header? */ if (len < MIN_REPLY_HDR) continue; if (m->m_len < MIN_REPLY_HDR) continue; reply = mtod(m, struct krpc_reply *); /* Is it the right reply? */ if (reply->rp_direction != txdr_unsigned(RPC_REPLY)) continue; if (reply->rp_xid != txdr_unsigned(xid)) continue; /* Was RPC accepted? (authorization OK) */ if (reply->rp_astatus != 0) { error = fxdr_unsigned(u_int32_t, reply->rp_errno); printf("rpc denied, error=%d\n", error); continue; } /* Did the call succeed? */ if (reply->rp_status != 0) { error = fxdr_unsigned(u_int32_t, reply->rp_status); if (error == RPC_PROGMISMATCH) { error = EBADRPC; goto out; } printf("rpc denied, status=%d\n", error); continue; } goto gotreply; /* break two levels */ } /* while secs */ } /* forever send/receive */ error = ETIMEDOUT; goto out; gotreply: /* * Get RPC reply header into first mbuf, * get its length, then strip it off. */ len = sizeof(*reply); if (m->m_len < len) { m = m_pullup(m, len); if (m == NULL) { error = ENOBUFS; goto out; } } reply = mtod(m, struct krpc_reply *); if (reply->rp_auth.authtype != 0) { len += fxdr_unsigned(u_int32_t, reply->rp_auth.authlen); len = (len + 3) & ~3; /* XXX? */ } m_adj(m, len); /* result */ *data = m; if (from_p) { *from_p = from; from = NULL; } out: if (mhead) m_freem(mhead); if (from) free(from, M_SONAME); soclose(so); return error; } /* * eXternal Data Representation routines. * (but with non-standard args...) */ /* * String representation for RPC. */ struct xdr_string { u_int32_t len; /* length without null or padding */ char data[4]; /* data (longer, of course) */ /* data is padded to a long-word boundary */ }; struct mbuf * xdr_string_encode(char *str, int len) { struct mbuf *m; struct xdr_string *xs; int dlen; /* padded string length */ int mlen; /* message length */ dlen = (len + 3) & ~3; mlen = dlen + 4; if (mlen > MCLBYTES) /* If too big, we just can't do it. */ return (NULL); m = m_get(M_TRYWAIT, MT_DATA); if (mlen > MLEN) { MCLGET(m, M_TRYWAIT); if ((m->m_flags & M_EXT) == 0) { (void) m_free(m); /* There can be only one. */ return (NULL); } } xs = mtod(m, struct xdr_string *); m->m_len = mlen; xs->len = txdr_unsigned(len); bcopy(str, xs->data, len); return (m); } Index: head/sys/nfsclient/nfs_socket.c =================================================================== --- head/sys/nfsclient/nfs_socket.c (revision 130553) +++ head/sys/nfsclient/nfs_socket.c (revision 130554) @@ -1,1439 +1,1439 @@ /* * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TRUE 1 #define FALSE 0 /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc * for the frequent rpcs and a default for the others. * The justification for doing "other" this way is that these rpcs * happen so infrequently that timer est. would probably be stale. * Also, since many of these rpcs are * non-idempotent, a conservative timeout is desired. * getattr, lookup - A+2D * read, write - A+4D * other - nm_timeo */ #define NFS_RTO(n, t) \ ((t) == 0 ? (n)->nm_timeo : \ ((t) < 3 ? \ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] /* * Defines which timer to use for the procnum. * 0 - default * 1 - getattr * 2 - lookup * 3 - read * 4 - write */ static int proct[NFS_NPROCS] = { 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, }; static int nfs_realign_test; static int nfs_realign_count; static int nfs_bufpackets = 4; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, ""); /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of * SIGCOMM '88". ACM, August 1988. * describes for TCP. The cwnd size is chopped in half on a retransmit timeout * and incremented by 1/cwnd when each rpc reply is received and a full cwnd * of rpcs is in progress. * (The sent count and cwnd are scaled for integer arith.) * Variants of "slow start" were tried and were found to be too much of a * performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define NFS_CWNDSCALE 256 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) #define NFS_NBACKOFF 8 static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, }; struct callout nfs_callout; static int nfs_msg(struct thread *, char *, char *); static int nfs_rcvlock(struct nfsreq *); static void nfs_rcvunlock(struct nfsreq *); static void nfs_realign(struct mbuf **pm, int hsiz); static int nfs_receive(struct nfsreq *rep, struct sockaddr **aname, struct mbuf **mp); static int nfs_reply(struct nfsreq *); static void nfs_softterm(struct nfsreq *rep); static int nfs_reconnect(struct nfsreq *rep); /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) { struct socket *so; int s, error, rcvreserve, sndreserve; int pktscale; struct sockaddr *saddr; struct thread *td = &thread0; /* only used for socreate and sobind */ - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); nmp->nm_so = NULL; saddr = nmp->nm_nam; error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto, nmp->nm_mountp->mnt_cred, td); if (error) goto bad; so = nmp->nm_so; nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (nmp->nm_flag & NFSMNT_RESVPORT) { struct sockopt sopt; int ip, ip2, len; struct sockaddr_in6 ssin; struct sockaddr *sa; bzero(&sopt, sizeof sopt); switch(saddr->sa_family) { case AF_INET: sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_PORTRANGE; ip = IP_PORTRANGE_LOW; ip2 = IP_PORTRANGE_DEFAULT; len = sizeof (struct sockaddr_in); break; #ifdef INET6 case AF_INET6: sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_PORTRANGE; ip = IPV6_PORTRANGE_LOW; ip2 = IPV6_PORTRANGE_DEFAULT; len = sizeof (struct sockaddr_in6); break; #endif default: goto noresvport; } sa = (struct sockaddr *)&ssin; bzero(sa, len); sa->sa_len = len; sa->sa_family = saddr->sa_family; sopt.sopt_dir = SOPT_SET; sopt.sopt_val = (void *)&ip; sopt.sopt_valsize = sizeof(ip); error = sosetopt(so, &sopt); if (error) goto bad; error = sobind(so, sa, td); if (error) goto bad; ip = ip2; error = sosetopt(so, &sopt); if (error) goto bad; noresvport: ; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } } else { error = soconnect(so, nmp->nm_nam, td); if (error) goto bad; /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { (void) tsleep(&so->so_timeo, PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0) { so->so_state &= ~SS_ISCONNECTING; splx(s); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); } so->so_rcv.sb_timeo = 5 * hz; so->so_snd.sb_timeo = 5 * hz; /* * Get buffer reservation size from sysctl, but impose reasonable * limits. */ pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + NFS_MAXPKTHDR) * pktscale; } else if (nmp->nm_sotype == SOCK_SEQPACKET) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale; rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + NFS_MAXPKTHDR) * pktscale; } else { if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { struct sockopt sopt; int val; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_KEEPALIVE; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; val = 1; sosetopt(so, &sopt); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { struct sockopt sopt; int val; bzero(&sopt, sizeof sopt); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof val; val = 1; sosetopt(so, &sopt); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_int32_t)) * pktscale; } error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = (NFS_TIMEO << 3); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; return (0); bad: nfs_disconnect(nmp); return (error); } /* * Reconnect routine: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */ static int nfs_reconnect(struct nfsreq *rep) { struct nfsreq *rp; struct nfsmount *nmp = rep->r_nmp; int error; nfs_disconnect(nmp); while ((error = nfs_connect(nmp, rep)) != 0) { if (error == EINTR || error == ERESTART) return (EINTR); (void) tsleep(&lbolt, PSOCK, "nfscon", 0); } /* * Loop through outstanding request list and fix up all requests * on old socket. */ TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { if (rp->r_nmp == nmp) rp->r_flags |= R_MUSTRESEND; } return (0); } /* * NFS disconnect. Clean up and unlink. */ void nfs_disconnect(struct nfsmount *nmp) { struct socket *so; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = NULL; soshutdown(so, SHUT_RDWR); soclose(so); } } void nfs_safedisconnect(struct nfsmount *nmp) { struct nfsreq dummyreq; bzero(&dummyreq, sizeof(dummyreq)); dummyreq.r_nmp = nmp; nfs_rcvlock(&dummyreq); nfs_disconnect(nmp); nfs_rcvunlock(&dummyreq); } /* * This is the nfs send routine. For connection based socket types, it * must be called with an nfs_sndlock() on the socket. * - return EINTR if the RPC is terminated, 0 otherwise * - set R_MUSTRESEND if the send fails for any reason * - do any cleanup required by recoverable socket errors (?) */ int nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top, struct nfsreq *rep) { struct sockaddr *sendnam; int error, soflags, flags; - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); KASSERT(rep, ("nfs_send: called with rep == NULL")); if (rep->r_flags & R_SOFTTERM) { m_freem(top); return (EINTR); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; m_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = NULL; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0, flags, curthread /*XXX*/); if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { error = 0; rep->r_flags |= R_MUSTRESEND; } if (error) { /* * Don't report EPIPE errors on nfs sockets. * These can be due to idle tcp mounts which will be closed by * netapp, solaris, etc. if left idle too long. */ if (error != EPIPE) { log(LOG_INFO, "nfs send error %d for server %s\n", error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); } /* * Deal with errors for the client side. */ if (rep->r_flags & R_SOFTTERM) error = EINTR; else rep->r_flags |= R_MUSTRESEND; /* * Handle any recoverable (soft) socket errors here. (?) */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) error = 0; } return (error); } /* * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive(), but for SOCK_STREAM we must deal with the Record * Mark and consolidate the data into a new mbuf list. * nb: Sometimes TCP passes the data up to soreceive() in long lists of * small mbufs. * For SOCK_STREAM we must be very careful to read an entire record once * we have read any of it, even if the system call has been interrupted. */ static int nfs_receive(struct nfsreq *rep, struct sockaddr **aname, struct mbuf **mp) { struct socket *so; struct uio auio; struct iovec aio; struct mbuf *m; struct mbuf *control; u_int32_t len; struct sockaddr **getnam; int error, sotype, rcvflg; struct thread *td = curthread; /* XXX */ - GIANT_REQUIRED; /* XXX until socket locking done */ + NET_ASSERT_GIANT(); /* * Set up arguments for soreceive() */ *mp = NULL; *aname = NULL; sotype = rep->r_nmp->nm_sotype; /* * For reliable protocols, lock against other senders/receivers * in case a reconnect is necessary. * For SOCK_STREAM, first get the Record Mark to find out how much * more there is to get. * We must lock the socket against other receivers * until we have an entire rpc request/reply. */ if (sotype != SOCK_DGRAM) { error = nfs_sndlock(rep); if (error) return (error); tryagain: /* * Check for fatal errors and resending request. */ /* * Ugh: If a reconnect attempt just happened, nm_so * would have changed. NULL indicates a failed * attempt that has essentially shut down this * mount point. */ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { nfs_sndunlock(rep); return (EINTR); } so = rep->r_nmp->nm_so; if (!so) { error = nfs_reconnect(rep); if (error) { nfs_sndunlock(rep); return (error); } goto tryagain; } while (rep->r_flags & R_MUSTRESEND) { m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); if (error) { if (error == EINTR || error == ERESTART || (error = nfs_reconnect(rep)) != 0) { nfs_sndunlock(rep); return (error); } goto tryagain; } } nfs_sndunlock(rep); if (sotype == SOCK_STREAM) { aio.iov_base = (caddr_t) &len; aio.iov_len = sizeof(u_int32_t); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(u_int32_t); auio.uio_td = td; do { rcvflg = MSG_WAITALL; error = so->so_proto->pr_usrreqs->pru_soreceive (so, NULL, &auio, NULL, NULL, &rcvflg); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK); if (!error && auio.uio_resid > 0) { /* * Don't log a 0 byte receive; it means * that the socket has been closed, and * can happen during normal operation * (forcible unmount or Solaris server). */ if (auio.uio_resid != sizeof (u_int32_t)) log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", (int)(sizeof(u_int32_t) - auio.uio_resid), (int)sizeof(u_int32_t), rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } if (error) goto errout; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > NFS_MAXPACKET) { log(LOG_ERR, "%s (%d) from nfs server %s\n", "impossible packet length", len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EFBIG; goto errout; } auio.uio_resid = len; do { rcvflg = MSG_WAITALL; error = so->so_proto->pr_usrreqs->pru_soreceive (so, NULL, &auio, mp, NULL, &rcvflg); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (!error && auio.uio_resid > 0) { if (len != auio.uio_resid) log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", len - auio.uio_resid, len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } } else { /* * NB: Since uio_resid is big, MSG_WAITALL is ignored * and soreceive() will return when it has either a * control msg or a data msg. * We have no use for control msg., but must grab them * and then throw them away so we know what is going * on. */ auio.uio_resid = len = 100000000; /* Anything Big */ auio.uio_td = td; do { rcvflg = 0; error = so->so_proto->pr_usrreqs->pru_soreceive (so, NULL, &auio, mp, &control, &rcvflg); if (control) m_freem(control); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK || (!error && *mp == NULL && control)); if ((rcvflg & MSG_EOR) == 0) printf("Egad!!\n"); if (!error && *mp == NULL) error = EPIPE; len -= auio.uio_resid; } errout: if (error && error != EINTR && error != ERESTART) { m_freem(*mp); *mp = NULL; if (error != EPIPE) log(LOG_INFO, "receive error %d from nfs server %s\n", error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = nfs_sndlock(rep); if (!error) { error = nfs_reconnect(rep); if (!error) goto tryagain; else nfs_sndunlock(rep); } } } else { if ((so = rep->r_nmp->nm_so) == NULL) return (EACCES); if (so->so_state & SS_ISCONNECTED) getnam = NULL; else getnam = aname; auio.uio_resid = len = 1000000; auio.uio_td = td; do { rcvflg = 0; error = so->so_proto->pr_usrreqs->pru_soreceive (so, getnam, &auio, mp, NULL, &rcvflg); if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) return (EINTR); } while (error == EWOULDBLOCK); len -= auio.uio_resid; } if (error) { m_freem(*mp); *mp = NULL; } /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ nfs_realign(mp, 5 * NFSX_UNSIGNED); return (error); } /* * Implement receipt of reply on a socket. * We must search through the list of received datagrams matching them * with outstanding requests using the xid, until ours is found. */ /* ARGSUSED */ static int nfs_reply(struct nfsreq *myrep) { struct nfsreq *rep; struct nfsmount *nmp = myrep->r_nmp; int32_t t1; struct mbuf *mrep, *md; struct sockaddr *nam; u_int32_t rxid, *tl; caddr_t dpos; int error; /* * Loop around until we get our own reply */ for (;;) { /* * Lock against other receivers so that I don't get stuck in * sbwait() after someone else has received my reply for me. * Also necessary for connection based protocols to avoid * race conditions during a reconnect. * If nfs_rcvlock() returns EALREADY, that means that * the reply has already been recieved by another * process and we can return immediately. In this * case, the lock is not taken to avoid races with * other processes. */ error = nfs_rcvlock(myrep); if (error == EALREADY) return (0); if (error) return (error); /* * Get the next Rpc reply off the socket */ error = nfs_receive(myrep, &nam, &mrep); nfs_rcvunlock(myrep); if (error) { /* * Ignore routing errors on connectionless protocols?? */ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { nmp->nm_so->so_error = 0; if (myrep->r_flags & R_GETONEREP) return (0); continue; } return (error); } if (nam) FREE(nam, M_SONAME); /* * Get the xid and check that it is an rpc reply */ md = mrep; dpos = mtod(md, caddr_t); tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { nfsstats.rpcinvalid++; m_freem(mrep); nfsmout: if (myrep->r_flags & R_GETONEREP) return (0); continue; } /* * Loop through the request list to match up the reply * Iff no match, just drop the datagram */ TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; rep->r_md = md; rep->r_dpos = dpos; /* * Update congestion window. * Do the additive increase of * one rpc/rtt. */ if (nmp->nm_cwnd <= nmp->nm_sent) { nmp->nm_cwnd += (NFS_CWNDSCALE * NFS_CWNDSCALE + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; if (nmp->nm_cwnd > NFS_MAXCWND) nmp->nm_cwnd = NFS_MAXCWND; } if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; nmp->nm_sent -= NFS_CWNDSCALE; } /* * Update rtt using a gain of 0.125 on the mean * and a gain of 0.25 on the deviation. */ if (rep->r_flags & R_TIMING) { /* * Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since * r_rtt == N means that the actual * rtt is between N+dt and N+2-dt ticks, * add 1. */ t1 = rep->r_rtt + 1; t1 -= (NFS_SRTT(rep) >> 3); NFS_SRTT(rep) += t1; if (t1 < 0) t1 = -t1; t1 -= (NFS_SDRTT(rep) >> 2); NFS_SDRTT(rep) += t1; } nmp->nm_timeouts = 0; break; } } /* * If not matched to a request, drop it. * If it's mine, get out. */ if (rep == 0) { nfsstats.rpcunexpected++; m_freem(mrep); } else if (rep == myrep) { if (rep->r_mrep == NULL) panic("nfsreply nil"); return (0); } if (myrep->r_flags & R_GETONEREP) return (0); } } /* * nfs_request - goes something like this * - fill in request struct * - links it into list * - calls nfs_send() for first transmit * - calls nfs_receive() to get reply * - break down rpc header and return with nfs reply pointed to * by mrep or error * nb: always frees up mreq mbuf list */ /* XXX overloaded before */ #define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */ int nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum, struct thread *td, struct ucred *cred, struct mbuf **mrp, struct mbuf **mdp, caddr_t *dposp) { struct mbuf *mrep, *m2; struct nfsreq *rep; u_int32_t *tl; int i; struct nfsmount *nmp; struct mbuf *m, *md, *mheadend; time_t waituntil; caddr_t dpos; int s, error = 0, mrest_len, auth_len, auth_type; int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0; u_int32_t xid; /* Reject requests while attempting a forced unmount. */ if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) { m_freem(mrest); return (ESTALE); } nmp = VFSTONFS(vp->v_mount); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); rep->r_nmp = nmp; rep->r_vp = vp; rep->r_td = td; rep->r_procnum = procnum; mrest_len = m_length(mrest, NULL); /* * Get the RPC header with authorization. */ auth_type = RPCAUTH_UNIX; if (cred->cr_ngroups < 1) panic("nfsreq nogrps"); auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 5 * NFSX_UNSIGNED; m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, mrest, mrest_len, &mheadend, &xid); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (nmp->nm_sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_TRYWAIT); *mtod(m, u_int32_t *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } rep->r_mreq = m; rep->r_xid = xid; tryagain: if (nmp->nm_flag & NFSMNT_SOFT) rep->r_retry = nmp->nm_retry; else rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ rep->r_rtt = rep->r_rexmit = 0; if (proct[procnum] > 0) rep->r_flags = R_TIMING; else rep->r_flags = 0; rep->r_mrep = NULL; /* * Do the client side RPC. */ nfsstats.rpcrequests++; /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ s = splsoftclock(); if (TAILQ_EMPTY(&nfs_reqq)) callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); /* * If backing off another request or avoiding congestion, don't * send this one now but let timer do it. If not timing a request, * do it now. */ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { splx(s); if (nmp->nm_soflags & PR_CONNREQUIRED) error = nfs_sndlock(rep); if (!error) { m2 = m_copym(m, 0, M_COPYALL, M_TRYWAIT); error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep); if (nmp->nm_soflags & PR_CONNREQUIRED) nfs_sndunlock(rep); } if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { nmp->nm_sent += NFS_CWNDSCALE; rep->r_flags |= R_SENT; } } else { splx(s); rep->r_rtt = -1; } /* * Wait for the reply from our send or the timer's. */ if (!error || error == EPIPE) error = nfs_reply(rep); /* * RPC done, unlink the request. */ s = splsoftclock(); TAILQ_REMOVE(&nfs_reqq, rep, r_chain); if (TAILQ_EMPTY(&nfs_reqq)) callout_stop(&nfs_callout); splx(s); /* * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; /* paranoia */ nmp->nm_sent -= NFS_CWNDSCALE; } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (!error && (rep->r_flags & R_TPRINTFMSG)) nfs_msg(rep->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname, "is alive again"); mrep = rep->r_mrep; md = rep->r_md; dpos = rep->r_dpos; if (error) { m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * break down the rpc header and check if ok */ tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); if (*tl++ == rpc_msgdenied) { if (*tl == rpc_mismatch) error = EOPNOTSUPP; else error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Just throw away any verifyer (ie: kerberos etc). */ i = fxdr_unsigned(int, *tl++); /* verf type */ i = fxdr_unsigned(int32_t, *tl); /* len */ if (i > 0) nfsm_adv(nfsm_rndup(i)); tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); /* 0 == ok */ if (*tl == 0) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); if (*tl != 0) { error = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { m_freem(mrep); error = 0; waituntil = time_second + trylater_delay; while (time_second < waituntil) (void) tsleep(&lbolt, PSOCK, "nqnfstry", 0); trylater_delay *= nfs_backoff[trylater_cnt]; if (trylater_cnt < NFS_NBACKOFF - 1) trylater_cnt++; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. */ if (error == ESTALE) cache_purge(vp); if (nmp->nm_flag & NFSMNT_NFSV3) { *mrp = mrep; *mdp = md; *dposp = dpos; error |= NFSERR_RETERR; } else m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } *mrp = mrep; *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); FREE((caddr_t)rep, M_NFSREQ); return (0); } m_freem(mrep); error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Nfs timer routine * Scan the nfsreq list and retranmit any requests that have timed out * To avoid retransmission attempts on STREAM sockets (in the future) make * sure to set the r_retry field to 0 (implies nm_retry == 0). */ void nfs_timer(void *arg) { struct nfsreq *rep; struct mbuf *m; struct socket *so; struct nfsmount *nmp; int timeo; int s, error; struct thread *td; td = &thread0; /* XXX for credentials, may break if sleep */ s = splnet(); TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { nmp = rep->r_nmp; if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) continue; if (nfs_sigintr(nmp, rep, rep->r_td)) { nfs_softterm(rep); continue; } if (rep->r_rtt >= 0) { rep->r_rtt++; if (nmp->nm_flag & NFSMNT_DUMBTIMR) timeo = nmp->nm_timeo; else timeo = NFS_RTO(nmp, proct[rep->r_procnum]); if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; if (rep->r_rtt <= timeo) continue; if (nmp->nm_timeouts < NFS_NBACKOFF) nmp->nm_timeouts++; } /* * Check for server not responding */ if ((rep->r_flags & R_TPRINTFMSG) == 0 && rep->r_rexmit > nmp->nm_deadthresh) { nfs_msg(rep->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname, "not responding"); rep->r_flags |= R_TPRINTFMSG; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ nfsstats.rpctimeouts++; nfs_softterm(rep); continue; } if (nmp->nm_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; continue; } if ((so = nmp->nm_so) == NULL) continue; /* * If there is enough space and the window allows.. * Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, NULL, NULL, td); else error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, nmp->nm_nam, NULL, td); if (error) { if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) so->so_error = 0; } else { /* * Iff first send, start timing * else turn timing off, backoff timer * and divide congestion window by 2. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_TIMING; if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; nmp->nm_cwnd >>= 1; if (nmp->nm_cwnd < NFS_CWNDSCALE) nmp->nm_cwnd = NFS_CWNDSCALE; nfsstats.rpcretries++; } else { rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } rep->r_rtt = 0; } } } splx(s); callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int nfs_nmcancelreqs(nmp) struct nfsmount *nmp; { struct nfsreq *req; int i, s; s = splnet(); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (nmp != req->r_nmp || req->r_mrep != NULL || (req->r_flags & R_SOFTTERM)) continue; nfs_softterm(req); } splx(s); for (i = 0; i < 30; i++) { s = splnet(); TAILQ_FOREACH(req, &nfs_reqq, r_chain) { if (nmp == req->r_nmp) break; } splx(s); if (req == NULL) return (0); tsleep(&lbolt, PSOCK, "nfscancel", 0); } return (EBUSY); } /* * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT). * The nm_send count is decremented now to avoid deadlocks when the process in * soreceive() hasn't yet managed to send its own request. */ static void nfs_softterm(struct nfsreq *rep) { rep->r_flags |= R_SOFTTERM; if (rep->r_flags & R_SENT) { rep->r_nmp->nm_sent -= NFS_CWNDSCALE; rep->r_flags &= ~R_SENT; } } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td) { struct proc *p; sigset_t tmpset; if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) return nfs4_sigintr(nmp, rep, td); if (rep && (rep->r_flags & R_SOFTTERM)) return (EINTR); /* Terminate all requests while attempting a forced unmount. */ if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) return (EINTR); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if (SIGNOTEMPTY(p->p_siglist) && NFSINT_SIGMASK(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } /* * Lock a socket against others. * Necessary for STREAM sockets to ensure you get an entire rpc request/reply * and also to avoid race conditions between the processes with nfs requests * in progress when a reconnect is necessary. */ int nfs_sndlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; struct thread *td; int slpflag = 0, slptimeo = 0; td = rep->r_td; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; while (*statep & NFSSTA_SNDLOCK) { if (nfs_sigintr(rep->r_nmp, rep, td)) return (EINTR); *statep |= NFSSTA_WANTSND; (void) tsleep(statep, slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *statep |= NFSSTA_SNDLOCK; return (0); } /* * Unlock the stream socket for others. */ void nfs_sndunlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; if ((*statep & NFSSTA_SNDLOCK) == 0) panic("nfs sndunlock"); *statep &= ~NFSSTA_SNDLOCK; if (*statep & NFSSTA_WANTSND) { *statep &= ~NFSSTA_WANTSND; wakeup(statep); } } static int nfs_rcvlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; int slpflag, slptimeo = 0; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; else slpflag = 0; while (*statep & NFSSTA_RCVLOCK) { if (nfs_sigintr(rep->r_nmp, rep, rep->r_td)) return (EINTR); *statep |= NFSSTA_WANTRCV; (void) tsleep(statep, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo); /* * If our reply was recieved while we were sleeping, * then just return without taking the lock to avoid a * situation where a single iod could 'capture' the * recieve lock. */ if (rep->r_mrep != NULL) return (EALREADY); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } /* Always fail if our request has been cancelled. */ if (rep != NULL && (rep->r_flags & R_SOFTTERM)) return (EINTR); *statep |= NFSSTA_RCVLOCK; return (0); } /* * Unlock the stream socket for others. */ static void nfs_rcvunlock(struct nfsreq *rep) { int *statep = &rep->r_nmp->nm_state; if ((*statep & NFSSTA_RCVLOCK) == 0) panic("nfs rcvunlock"); *statep &= ~NFSSTA_RCVLOCK; if (*statep & NFSSTA_WANTRCV) { *statep &= ~NFSSTA_WANTRCV; wakeup(statep); } } /* * nfs_realign: * * Check for badly aligned mbuf data and realign by copying the unaligned * portion of the data into a new mbuf chain and freeing the portions * of the old chain that were replaced. * * We cannot simply realign the data within the existing mbuf chain * because the underlying buffers may contain other rpc commands and * we cannot afford to overwrite them. * * We would prefer to avoid this situation entirely. The situation does * not occur with NFS/UDP and is supposed to only occassionally occur * with TCP. Use vfs.nfs.realign_count and realign_test to check this. */ static void nfs_realign(struct mbuf **pm, int hsiz) { struct mbuf *m; struct mbuf *n = NULL; int off = 0; ++nfs_realign_test; while ((m = *pm) != NULL) { if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { MGET(n, M_TRYWAIT, MT_DATA); if (m->m_len >= MINCLSIZE) { MCLGET(n, M_TRYWAIT); } n->m_len = 0; break; } pm = &m->m_next; } /* * If n is non-NULL, loop on m copying data, then replace the * portion of the chain that had to be realigned. */ if (n != NULL) { ++nfs_realign_count; while (m) { m_copyback(n, off, m->m_len, mtod(m, caddr_t)); off += m->m_len; m = m->m_next; } m_freem(*pm); *pm = n; } } static int nfs_msg(struct thread *td, char *server, char *msg) { tprintf(td ? td->td_proc : NULL, LOG_INFO, "nfs server %s: %s\n", server, msg); return (0); }