Index: head/sys/netinet/in_pcb.c =================================================================== --- head/sys/netinet/in_pcb.c (revision 127503) +++ head/sys/netinet/in_pcb.c (revision 127504) @@ -1,1182 +1,1181 @@ /* * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif /* INET6 */ #ifdef IPSEC #include #include #endif /* IPSEC */ #ifdef FAST_IPSEC #if defined(IPSEC) || defined(IPSEC_ESP) #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" #endif #include #include #endif /* FAST_IPSEC */ /* * These configure the range of local port addresses assigned to * "unspecified" outgoing connections/packets/whatever. */ int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ /* * Reserved ports accessible only to root. There are significant * security considerations that must be accounted for when changing these, * but the security benefits can be great. Please be careful. */ int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ int ipport_reservedlow = 0; #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } static int sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) { RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); } return error; } #undef RANGECHK SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); /* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called at * splnet(). XXX - There are, unfortunately, a few exceptions to this * rule that should be fixed. */ /* * Allocate a PCB and associate it with the socket. */ int -in_pcballoc(so, pcbinfo, td, type) +in_pcballoc(so, pcbinfo, type) struct socket *so; struct inpcbinfo *pcbinfo; - struct thread *td; const char *type; { register struct inpcb *inp; int error; INP_INFO_WLOCK_ASSERT(pcbinfo); error = 0; inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); if (inp == NULL) return (ENOBUFS); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; #ifdef MAC error = mac_init_inpcb(inp, M_NOWAIT); if (error != 0) goto out; mac_create_inpcb_from_socket(so, inp); #endif #if defined(IPSEC) || defined(FAST_IPSEC) #ifdef FAST_IPSEC error = ipsec_init_policy(so, &inp->inp_sp); #else error = ipsec_init_pcbpolicy(so, &inp->inp_sp); #endif if (error != 0) goto out; #endif /*IPSEC*/ #if defined(INET6) if (INP_SOCKAF(so) == AF_INET6) { inp->inp_vflag |= INP_IPV6PROTO; if (ip6_v6only) inp->inp_flags |= IN6P_IPV6_V6ONLY; } #endif LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; INP_LOCK_INIT(inp, "inp", type); #ifdef INET6 if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) out: if (error != 0) uma_zfree(pcbinfo->ipi_zone, inp); #endif return (error); } int in_pcbbind(inp, nam, td) register struct inpcb *inp; struct sockaddr *nam; struct thread *td; { int anonport, error; INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_LOCK_ASSERT(inp); if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); anonport = inp->inp_lport == 0 && (nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0); error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, &inp->inp_lport, td); if (error) return (error); if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } /* * Set up a bind operation on a PCB, performing port allocation * as required, but do not actually modify the PCB. Callers can * either complete the bind by setting inp_laddr/inp_lport and * calling in_pcbinshash(), or they can just use the resulting * port and address to authorise the sending of a once-off packet. * * On error, the values of *laddrp and *lportp are not changed. */ int in_pcbbind_setup(inp, nam, laddrp, lportp, td) struct inpcb *inp; struct sockaddr *nam; in_addr_t *laddrp; u_short *lportp; struct thread *td; { struct socket *so = inp->inp_socket; unsigned short *lastport; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct in_addr laddr; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, prison = 0; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; if (nam != NULL && laddr.s_addr != INADDR_ANY) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); #endif if (sin->sin_addr.s_addr != INADDR_ANY) if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) return(EINVAL); if (sin->sin_port != *lportp) { /* Don't allow the port to change. */ if (*lportp != 0) return (EINVAL); lport = sin->sin_port; } /* NB: lport is left as 0 if the port isn't being changed. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ bzero(&sin->sin_zero, sizeof(sin->sin_zero)); if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } laddr = sin->sin_addr; if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) <= ipport_reservedhigh && ntohs(lport) >= ipport_reservedlow && td && suser_cred(td->td_ucred, PRISON_ROOT)) return (EACCES); if (td && jailed(td->td_ucred)) prison = 1; if (so->so_cred->cr_uid != 0 && !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { t = in_pcblookup_local(inp->inp_pcbinfo, sin->sin_addr, lport, prison ? 0 : INPLOOKUP_WILDCARD); /* * XXX * This entire block sorely needs a rewrite. */ if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) return (EADDRINUSE); } else if (t && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) { #if defined(INET6) if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } if (prison && prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) return (EADDRNOTAVAIL); t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, prison ? 0 : wild); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((reuseport & intotw(t)->tw_so_options) == 0) return (EADDRINUSE); } else if (t && (reuseport & t->inp_socket->so_options) == 0) { #if defined(INET6) if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } } if (*lportp != 0) lport = *lportp; if (lport == 0) { u_short first, last; int count; if (laddr.s_addr != INADDR_ANY) if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) return (EINVAL); if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { if (td && (error = suser_cred(td->td_ucred, PRISON_ROOT)) != 0) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, laddr, lport, wild)); } } if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) return (EINVAL); *laddrp = laddr.s_addr; *lportp = lport; return (0); } /* * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in_pcbconnect(inp, nam, td) register struct inpcb *inp; struct sockaddr *nam; struct thread *td; { u_short lport, fport; in_addr_t laddr, faddr; int anonport, error; lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; anonport = (lport == 0); error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, NULL, td); if (error) return (error); /* Do the initial binding of the local address if required. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } } /* Commit the remaining changes. */ inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; inp->inp_faddr.s_addr = faddr; inp->inp_fport = fport; in_pcbrehash(inp); #ifdef IPSEC if (inp->inp_socket->so_type == SOCK_STREAM) ipsec_pcbconn(inp->inp_sp); #endif if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } /* * Set up for a connect from a socket to the specified address. * On entry, *laddrp and *lportp should contain the current local * address and port for the PCB; these are updated to the values * that should be placed in inp_laddr and inp_lport to complete * the connect. * * On success, *faddrp and *fportp will be set to the remote address * and port. These are not updated in the error case. * * If the operation fails because the connection already exists, * *oinpp will be set to the PCB of that connection so that the * caller can decide to override it. In all other cases, *oinpp * is set to NULL. */ int in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td) register struct inpcb *inp; struct sockaddr *nam; in_addr_t *laddrp; u_short *lportp; in_addr_t *faddrp; u_short *fportp; struct inpcb **oinpp; struct thread *td; { struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct in_ifaddr *ia; struct sockaddr_in sa; struct ucred *cred; struct inpcb *oinp; struct in_addr laddr, faddr; u_short lport, fport; int error; if (oinpp != NULL) *oinpp = NULL; if (nam->sa_len != sizeof (*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; cred = inp->inp_socket->so_cred; if (laddr.s_addr == INADDR_ANY && jailed(cred)) { bzero(&sa, sizeof(sa)); sa.sin_addr.s_addr = htonl(prison_getip(cred)); sa.sin_len = sizeof(sa); sa.sin_family = AF_INET; error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, &laddr.s_addr, &lport, td); if (error) return (error); } if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. * If the supplied address is INADDR_BROADCAST, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ if (faddr.s_addr == INADDR_ANY) faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; else if (faddr.s_addr == (u_long)INADDR_BROADCAST && (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST)) faddr = satosin(&TAILQ_FIRST( &in_ifaddrhead)->ia_broadaddr)->sin_addr; } if (laddr.s_addr == INADDR_ANY) { struct route sro; bzero(&sro, sizeof(sro)); ia = (struct in_ifaddr *)0; /* * If route is known our src addr is taken from the i/f, * else punt. */ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) { /* Find out route to destination */ sro.ro_dst.sa_family = AF_INET; sro.ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr; rtalloc_ign(&sro, RTF_CLONING); } /* * If we found a route, use the address * corresponding to the outgoing interface * unless it is the loopback (in case a route * to our address on another net goes to loopback). */ if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) ia = ifatoia(sro.ro_rt->rt_ifa); if (sro.ro_rt) RTFREE(sro.ro_rt); if (ia == 0) { bzero(&sa, sizeof(sa)); sa.sin_addr = faddr; sa.sin_len = sizeof(sa); sa.sin_family = AF_INET; ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); if (ia == 0) ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); if (ia == 0) ia = TAILQ_FIRST(&in_ifaddrhead); if (ia == 0) return (EADDRNOTAVAIL); } /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, use the * address of that interface as our source address. */ if (IN_MULTICAST(ntohl(faddr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; imo = inp->inp_moptions; if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) if (ia->ia_ifp == ifp) break; if (ia == 0) return (EADDRNOTAVAIL); } } laddr = ia->ia_addr.sin_addr; } oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 0, NULL); if (oinp != NULL) { if (oinpp != NULL) *oinpp = oinp; return (EADDRINUSE); } if (lport == 0) { error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td); if (error) return (error); } *laddrp = laddr.s_addr; *lportp = lport; *faddrp = faddr.s_addr; *fportp = fport; return (0); } void in_pcbdisconnect(inp) struct inpcb *inp; { INP_LOCK_ASSERT(inp); inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; in_pcbrehash(inp); #ifdef IPSEC ipsec_pcbdisconn(inp->inp_sp); #endif if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } void in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; INP_LOCK_ASSERT(inp); #if defined(IPSEC) || defined(FAST_IPSEC) ipsec4_delete_pcbpolicy(inp); #endif /*IPSEC*/ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { so->so_pcb = 0; sotryfree(so); } if (inp->inp_options) (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); #ifdef MAC mac_destroy_inpcb(inp); #endif uma_zfree(ipi->ipi_zone, inp); } struct sockaddr * in_sockaddr(port, addr_p) in_port_t port; struct in_addr *addr_p; { struct sockaddr_in *sin; MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = *addr_p; sin->sin_port = port; return (struct sockaddr *)sin; } /* * The wrapper function will pass down the pcbinfo for this function to lock. * The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in_setsockaddr(so, nam, pcbinfo) struct socket *so; struct sockaddr **nam; struct inpcbinfo *pcbinfo; { int s; register struct inpcb *inp; struct in_addr addr; in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); return ECONNRESET; } INP_LOCK(inp); port = inp->inp_lport; addr = inp->inp_laddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); *nam = in_sockaddr(port, &addr); return 0; } /* * The wrapper function will pass down the pcbinfo for this function to lock. */ int in_setpeeraddr(so, nam, pcbinfo) struct socket *so; struct sockaddr **nam; struct inpcbinfo *pcbinfo; { int s; register struct inpcb *inp; struct in_addr addr; in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); return ECONNRESET; } INP_LOCK(inp); port = inp->inp_fport; addr = inp->inp_faddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); *nam = in_sockaddr(port, &addr); return 0; } void in_pcbnotifyall(pcbinfo, faddr, errno, notify) struct inpcbinfo *pcbinfo; struct in_addr faddr; int errno; struct inpcb *(*notify)(struct inpcb *, int); { struct inpcb *inp, *ninp; struct inpcbhead *head; int s; s = splnet(); INP_INFO_WLOCK(pcbinfo); head = pcbinfo->listhead; for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { INP_LOCK(inp); ninp = LIST_NEXT(inp, inp_list); #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { INP_UNLOCK(inp); continue; } #endif if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) { INP_UNLOCK(inp); continue; } if ((*notify)(inp, errno)) INP_UNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); splx(s); } void in_pcbpurgeif0(pcbinfo, ifp) struct inpcbinfo *pcbinfo; struct ifnet *ifp; { struct inpcb *inp; struct ip_moptions *imo; int i, gap; /* why no splnet here? XXX */ INP_INFO_RLOCK(pcbinfo); LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { INP_LOCK(inp); imo = inp->inp_moptions; if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { /* * Unselect the outgoing interface if it is being * detached. */ if (imo->imo_multicast_ifp == ifp) imo->imo_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { if (imo->imo_membership[i]->inm_ifp == ifp) { in_delmulti(imo->imo_membership[i]); gap++; } else if (gap != 0) imo->imo_membership[i - gap] = imo->imo_membership[i]; } imo->imo_num_memberships -= gap; } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in_addr laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; INP_INFO_WLOCK_ASSERT(pcbinfo); if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ retrylookup: porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif /* * Clean out old time_wait sockets if they * are clogging up needed local ports. */ if ((inp->inp_vflag & INP_TIMEWAIT) != 0) { if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) { INP_LOCK(inp); tcp_twclose((struct tcptw *)inp->inp_ppcb, 0); match = NULL; goto retrylookup; } } if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; else if (inp->inp_laddr.s_addr != laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } /* * Lookup PCB in hash list. */ struct inpcb * in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in_addr faddr, laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; INP_INFO_RLOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; #if defined(INET6) struct inpcb *local_wild_mapped = NULL; #endif /* defined(INET6) */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_lport == lport) { if (ifp && ifp->if_type == IFT_FAITH && (inp->inp_flags & INP_FAITH) == 0) continue; if (inp->inp_laddr.s_addr == laddr.s_addr) return (inp); else if (inp->inp_laddr.s_addr == INADDR_ANY) { #if defined(INET6) if (INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) local_wild_mapped = inp; else #endif /* defined(INET6) */ local_wild = inp; } } } #if defined(INET6) if (local_wild == NULL) return (local_wild_mapped); #endif /* defined(INET6) */ return (local_wild); } /* * Not found. */ return (NULL); } /* * Insert PCB onto various hash lists. */ int in_pcbinshash(inp) struct inpcb *inp; { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; INP_INFO_WLOCK_ASSERT(pcbinfo); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; /* * Go through port list and look for a head for this lport. */ LIST_FOREACH(phd, pcbporthash, phd_hash) { if (phd->phd_port == inp->inp_lport) break; } /* * If none exists, malloc one and tack it on. */ if (phd == NULL) { MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); return (0); } /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void in_pcbrehash(inp) struct inpcb *inp; { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *head; u_int32_t hashkey_faddr; INP_INFO_WLOCK_ASSERT(pcbinfo); /* XXX? INP_LOCK_ASSERT(inp); */ #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); } /* * Remove PCB from various lists. */ void in_pcbremlists(inp) struct inpcb *inp; { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; if (inp->inp_lport) { struct inpcbport *phd = inp->inp_phd; LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } } LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; } /* * A set label operation has occurred at the socket layer, propagate the * label change into the in_pcb for the socket. */ void in_pcbsosetlabel(so) struct socket *so; { #ifdef MAC struct inpcb *inp; /* XXX: Will assert socket lock when we have them. */ inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); mac_inpcb_sosetlabel(so, inp); INP_UNLOCK(inp); #endif } Index: head/sys/netinet/in_pcb.h =================================================================== --- head/sys/netinet/in_pcb.h (revision 127503) +++ head/sys/netinet/in_pcb.h (revision 127504) @@ -1,370 +1,369 @@ /* * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NETINET_IN_PCB_H_ #define _NETINET_IN_PCB_H_ #include #include #include #include #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ struct inpcbpolicy; /* * Common structure pcb for internet protocol implementation. * Here are stored pointers to local and foreign host table * entries, local and foreign socket numbers, and pointers * up (to a socket structure) and down (to a protocol-specific) * control block. */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); typedef u_quad_t inp_gen_t; /* * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. * So, AF_INET6 null laddr is also used as AF_INET null laddr, * by utilize following structure. (At last, same as INRIA) */ struct in_addr_4in6 { u_int32_t ia46_pad32[3]; struct in_addr ia46_addr4; }; /* * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. * in_conninfo has some extra padding to accomplish this. */ struct in_endpoints { u_int16_t ie_fport; /* foreign port */ u_int16_t ie_lport; /* local port */ /* protocol dependent part, local and foreign addr */ union { /* foreign host table entry */ struct in_addr_4in6 ie46_foreign; struct in6_addr ie6_foreign; } ie_dependfaddr; union { /* local host table entry */ struct in_addr_4in6 ie46_local; struct in6_addr ie6_local; } ie_dependladdr; #define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 #define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 #define ie6_faddr ie_dependfaddr.ie6_foreign #define ie6_laddr ie_dependladdr.ie6_local }; /* * XXX * the defines for inc_* are hacks and should be changed to direct references */ struct in_conninfo { u_int8_t inc_flags; u_int8_t inc_len; u_int16_t inc_pad; /* XXX alignment for in_endpoints */ /* protocol dependent part */ struct in_endpoints inc_ie; }; #define inc_isipv6 inc_flags /* temp compatability */ #define inc_fport inc_ie.ie_fport #define inc_lport inc_ie.ie_lport #define inc_faddr inc_ie.ie_faddr #define inc_laddr inc_ie.ie_laddr #define inc6_faddr inc_ie.ie6_faddr #define inc6_laddr inc_ie.ie6_laddr struct icmp6_filter; struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* hash list */ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ u_int32_t inp_flow; /* local and foreign ports, local and foreign addr */ struct in_conninfo inp_inc; caddr_t inp_ppcb; /* pointer to per-protocol pcb */ struct inpcbinfo *inp_pcbinfo; /* PCB list info */ struct socket *inp_socket; /* back pointer to socket */ /* list for this PCB's local port */ struct label *inp_label; /* MAC label */ int inp_flags; /* generic IP/datagram flags */ struct inpcbpolicy *inp_sp; /* for IPSEC */ u_char inp_vflag; /* IP version flag (v4/v6) */ #define INP_IPV4 0x1 #define INP_IPV6 0x2 #define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ #define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */ #define INP_ONESBCAST 0x10 /* send all-ones broadcast */ u_char inp_ip_ttl; /* time to live proto */ u_char inp_ip_p; /* protocol proto */ /* protocol dependent part; options */ struct { u_char inp4_ip_tos; /* type of service proto */ struct mbuf *inp4_options; /* IP options */ struct ip_moptions *inp4_moptions; /* IP multicast options */ } inp_depend4; #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport #define inp_faddr inp_inc.inc_faddr #define inp_laddr inp_inc.inc_laddr #define inp_ip_tos inp_depend4.inp4_ip_tos #define inp_options inp_depend4.inp4_options #define inp_moptions inp_depend4.inp4_moptions struct { /* IP options */ struct mbuf *inp6_options; /* IP6 options for outgoing packets */ struct ip6_pktopts *inp6_outputopts; /* IP multicast options */ struct ip6_moptions *inp6_moptions; /* ICMPv6 code type filter */ struct icmp6_filter *inp6_icmp6filt; /* IPV6_CHECKSUM setsockopt */ int inp6_cksum; u_short inp6_ifindex; short inp6_hops; } inp_depend6; LIST_ENTRY(inpcb) inp_portlist; struct inpcbport *inp_phd; /* head of this list */ inp_gen_t inp_gencnt; /* generation count of this instance */ struct mtx inp_mtx; #define in6p_faddr inp_inc.inc6_faddr #define in6p_laddr inp_inc.inc6_laddr #define in6p_ip6_hlim inp_depend6.inp6_hlim #define in6p_hops inp_depend6.inp6_hops /* default hop limit */ #define in6p_ip6_nxt inp_ip_p #define in6p_flowinfo inp_flow #define in6p_vflag inp_vflag #define in6p_options inp_depend6.inp6_options #define in6p_outputopts inp_depend6.inp6_outputopts #define in6p_moptions inp_depend6.inp6_moptions #define in6p_icmp6filt inp_depend6.inp6_icmp6filt #define in6p_cksum inp_depend6.inp6_cksum #define inp6_ifindex inp_depend6.inp6_ifindex #define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ #define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ #define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ #define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ #define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ }; /* * The range of the generation count, as used in this implementation, * is 9e19. We would have to create 300 billion connections per * second for this number to roll over in a year. This seems sufficiently * unlikely that we simply don't concern ourselves with that possibility. */ /* * Interface exported to userland by various protocols which use * inpcbs. Hack alert -- only define if struct xsocket is in scope. */ #ifdef _SYS_SOCKETVAR_H_ struct xinpcb { size_t xi_len; /* length of this structure */ struct inpcb xi_inp; struct xsocket xi_socket; u_quad_t xi_alignment_hack; }; struct xinpgen { size_t xig_len; /* length of this structure */ u_int xig_count; /* number of PCBs at this time */ inp_gen_t xig_gen; /* generation count at this time */ so_gen_t xig_sogen; /* socket generation count at this time */ }; #endif /* _SYS_SOCKETVAR_H_ */ struct inpcbport { LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; u_short phd_port; }; struct inpcbinfo { /* XXX documentation, prefixes */ struct inpcbhead *hashbase; u_long hashmask; struct inpcbporthead *porthashbase; u_long porthashmask; struct inpcbhead *listhead; u_short lastport; u_short lastlow; u_short lasthi; struct uma_zone *ipi_zone; /* zone to allocate pcbs from */ u_int ipi_count; /* number of pcbs in this list */ u_quad_t ipi_gencnt; /* current generation count */ struct mtx ipi_mtx; }; /* * NB: We cannot enable assertions when IPv6 is configured as * this code is shared by both IPv4 and IPv6 and IPv6 is * not properly locked. */ #define INP_LOCK_INIT(inp, d, t) \ mtx_init(&(inp)->inp_mtx, (d), (t), MTX_DEF | MTX_RECURSE | MTX_DUPOK) #define INP_LOCK_DESTROY(inp) mtx_destroy(&(inp)->inp_mtx) #define INP_LOCK(inp) mtx_lock(&(inp)->inp_mtx) #define INP_UNLOCK(inp) mtx_unlock(&(inp)->inp_mtx) #ifndef INET6 #define INP_LOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_OWNED) #else #define INP_LOCK_ASSERT(inp) #endif #define INP_INFO_LOCK_INIT(ipi, d) \ mtx_init(&(ipi)->ipi_mtx, (d), NULL, MTX_DEF | MTX_RECURSE) #define INP_INFO_RLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx) #define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx) #define INP_INFO_RUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) #define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) #ifndef INET6 #define INP_INFO_RLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) #define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) #else #define INP_INFO_RLOCK_ASSERT(ipi) #define INP_INFO_WLOCK_ASSERT(ipi) #endif #define INP_PCBHASH(faddr, lport, fport, mask) \ (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ (ntohs((lport)) & (mask)) /* flags in inp_flags: */ #define INP_RECVOPTS 0x01 /* receive incoming IP options */ #define INP_RECVRETOPTS 0x02 /* receive IP options for reply */ #define INP_RECVDSTADDR 0x04 /* receive IP dst address */ #define INP_HDRINCL 0x08 /* user supplies entire IP header */ #define INP_HIGHPORT 0x10 /* user wants "high" port binding */ #define INP_LOWPORT 0x20 /* user wants "low" port binding */ #define INP_ANONPORT 0x40 /* port chosen for user */ #define INP_RECVIF 0x80 /* receive incoming interface */ #define INP_MTUDISC 0x100 /* user can do MTU discovery */ #define INP_FAITH 0x200 /* accept FAITH'ed connections */ #define INP_RECVTTL 0x400 /* receive incoming IP TTL */ #define IN6P_IPV6_V6ONLY 0x008000 /* restrict AF_INET6 socket for v6 */ #define IN6P_PKTINFO 0x010000 /* receive IP6 dst and I/F */ #define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ #define IN6P_HOPOPTS 0x040000 /* receive hop-by-hop options */ #define IN6P_DSTOPTS 0x080000 /* receive dst options after rthdr */ #define IN6P_RTHDR 0x100000 /* receive routing header */ #define IN6P_RTHDRDSTOPTS 0x200000 /* receive dstoptions before rthdr */ #define IN6P_TCLASS 0x400000 /* receive traffic class value */ #define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ #define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */ #define IN6P_MTU 0x80000000 /* receive path MTU */ #define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ INP_RECVIF|INP_RECVTTL|\ IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ IN6P_MTU) #define INP_UNMAPPABLEOPTS (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\ IN6P_TCLASS|IN6P_AUTOFLOWLABEL) /* for KAME src sync over BSD*'s */ #define IN6P_HIGHPORT INP_HIGHPORT #define IN6P_LOWPORT INP_LOWPORT #define IN6P_ANONPORT INP_ANONPORT #define IN6P_RECVIF INP_RECVIF #define IN6P_MTUDISC INP_MTUDISC #define IN6P_FAITH INP_FAITH #define IN6P_CONTROLOPTS INP_CONTROLOPTS /* * socket AF version is {newer than,or include} * actual datagram AF version */ #define INPLOOKUP_WILDCARD 1 #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) #define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ #define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family #define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) #ifdef _KERNEL extern int ipport_lowfirstauto; extern int ipport_lowlastauto; extern int ipport_firstauto; extern int ipport_lastauto; extern int ipport_hifirstauto; extern int ipport_hilastauto; void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); -int in_pcballoc(struct socket *, struct inpcbinfo *, struct thread *, - const char *); +int in_pcballoc(struct socket *, struct inpcbinfo *, const char *); int in_pcbbind(struct inpcb *, struct sockaddr *, struct thread *); int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, struct thread *); int in_pcbconnect(struct inpcb *, struct sockaddr *, struct thread *); int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, in_addr_t *, u_short *, struct inpcb **, struct thread *); void in_pcbdetach(struct inpcb *); void in_pcbdisconnect(struct inpcb *); int in_pcbinshash(struct inpcb *); struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_int, int); struct inpcb * in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *); void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, int, struct inpcb *(*)(struct inpcb *, int)); void in_pcbrehash(struct inpcb *); void in_pcbsetsolabel(struct socket *so); int in_setpeeraddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo); int in_setsockaddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);; struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbsosetlabel(struct socket *so); void in_pcbremlists(struct inpcb *inp); #endif /* _KERNEL */ #endif /* !_NETINET_IN_PCB_H_ */ Index: head/sys/netinet/ip_divert.c =================================================================== --- head/sys/netinet/ip_divert.c (revision 127503) +++ head/sys/netinet/ip_divert.c (revision 127504) @@ -1,683 +1,683 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_ipfw.h" #include "opt_ipdivert.h" #include "opt_ipsec.h" #include "opt_mac.h" #ifndef INET #error "IPDIVERT requires INET." #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Divert sockets */ /* * Allocate enough space to hold a full IP packet */ #define DIVSNDQ (65536 + 100) #define DIVRCVQ (65536 + 100) /* * Divert sockets work in conjunction with ipfw, see the divert(4) * manpage for features. * Internally, packets selected by ipfw in ip_input() or ip_output(), * and never diverted before, are passed to the input queue of the * divert socket with a given 'divert_port' number (as specified in * the matching ipfw rule), and they are tagged with a 16 bit cookie * (representing the rule number of the matching ipfw rule), which * is passed to process reading from the socket. * * Packets written to the divert socket are again tagged with a cookie * (usually the same as above) and a destination address. * If the destination address is INADDR_ANY then the packet is * treated as outgoing and sent to ip_output(), otherwise it is * treated as incoming and sent to ip_input(). * In both cases, the packet is tagged with the cookie. * * On reinjection, processing in ip_input() and ip_output() * will be exactly the same as for the original packet, except that * ipfw processing will start at the rule number after the one * written in the cookie (so, tagging a packet with a cookie of 0 * will cause it to be effectively considered as a standard packet). */ /* Internal variables */ static struct inpcbhead divcb; static struct inpcbinfo divcbinfo; static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ /* * Initialize divert connection block queue. */ void div_init(void) { INP_INFO_LOCK_INIT(&divcbinfo, "div"); LIST_INIT(&divcb); divcbinfo.listhead = &divcb; /* * XXX We don't use the hash list for divert IP, but it's easier * to allocate a one entry hash list than it is to check all * over the place for hashbase == NULL. */ divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(divcbinfo.ipi_zone, maxsockets); } /* * IPPROTO_DIVERT is not in the real IP protocol number space; this * function should never be called. Just in case, drop any packets. */ void div_input(struct mbuf *m, int off) { ipstat.ips_noproto++; m_freem(m); } /* * Divert a packet by passing it up to the divert socket at port 'port'. * * Setup generic address and protocol structures for div_input routine, * then pass them along with mbuf chain. */ void divert_packet(struct mbuf *m, int incoming) { struct ip *ip; struct inpcb *inp; struct socket *sa; u_int16_t nport; struct sockaddr_in divsrc; struct m_tag *mtag; mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); if (mtag == NULL) { printf("%s: no divert tag\n", __func__); m_freem(m); return; } /* Assure header */ if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == 0) return; ip = mtod(m, struct ip *); /* * Record receive interface address, if any. * But only for incoming packets. */ bzero(&divsrc, sizeof(divsrc)); divsrc.sin_len = sizeof(divsrc); divsrc.sin_family = AF_INET; divsrc.sin_port = divert_cookie(mtag); /* record matching rule */ if (incoming) { struct ifaddr *ifa; /* Sanity check */ M_ASSERTPKTHDR(m); /* Find IP address for receive interface */ TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL) continue; if (ifa->ifa_addr->sa_family != AF_INET) continue; divsrc.sin_addr = ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr; break; } } /* * Record the incoming interface name whenever we have one. */ if (m->m_pkthdr.rcvif) { /* * Hide the actual interface name in there in the * sin_zero array. XXX This needs to be moved to a * different sockaddr type for divert, e.g. * sockaddr_div with multiple fields like * sockaddr_dl. Presently we have only 7 bytes * but that will do for now as most interfaces * are 4 or less + 2 or less bytes for unit. * There is probably a faster way of doing this, * possibly taking it from the sockaddr_dl on the iface. * This solves the problem of a P2P link and a LAN interface * having the same address, which can result in the wrong * interface being assigned to the packet when fed back * into the divert socket. Theoretically if the daemon saves * and re-uses the sockaddr_in as suggested in the man pages, * this iface name will come along for the ride. * (see div_output for the other half of this.) */ strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, sizeof(divsrc.sin_zero)); } /* * XXX sbappendaddr must be protected by Giant until * we have locking at the socket layer. When entered * from below we come in w/o Giant and must take it * here. Unfortunately we cannot tell whether we're * entering from above (already holding Giant), * below (potentially without Giant), or otherwise * (e.g. from tcp_syncache through a timeout) so we * have to grab it regardless. This causes a LOR with * the tcp lock, at least, and possibly others. For * the moment we're ignoring this. Once sockets are * locked this cruft can be removed. */ mtx_lock(&Giant); /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)divert_info(mtag)); INP_INFO_RLOCK(&divcbinfo); LIST_FOREACH(inp, &divcb, inp_list) { INP_LOCK(inp); /* XXX why does only one socket match? */ if (inp->inp_lport == nport) { sa = inp->inp_socket; if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, m, (struct mbuf *)0) == 0) sa = NULL; /* force mbuf reclaim below */ else sorwakeup(sa); INP_UNLOCK(inp); break; } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&divcbinfo); mtx_unlock(&Giant); if (sa == NULL) { m_freem(m); ipstat.ips_noproto++; ipstat.ips_delivered--; } } /* * Deliver packet back into the IP processing machinery. * * If no address specified, or address is 0.0.0.0, send to ip_output(); * otherwise, send to ip_input() and mark as having been received on * the interface with that address. */ static int div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { int error = 0; KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null")); #ifdef MAC mac_create_mbuf_from_socket(so, m); #endif if (control) m_freem(control); /* XXX */ /* Loopback avoidance and state recovery */ if (sin) { struct m_tag *mtag; struct divert_tag *dt; int i; mtag = m_tag_get(PACKET_TAG_DIVERT, sizeof(struct divert_tag), M_NOWAIT); if (mtag == NULL) { error = ENOBUFS; goto cantsend; } dt = (struct divert_tag *)(mtag+1); dt->info = 0; dt->cookie = sin->sin_port; m_tag_prepend(m, mtag); /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. * The name is user supplied data so don't trust its size * or that it is zero terminated. */ for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++) ; if ( i > 0 && i < sizeof(sin->sin_zero)) m->m_pkthdr.rcvif = ifunit(sin->sin_zero); } /* Reinject packet into the system as incoming or outgoing */ if (!sin || sin->sin_addr.s_addr == 0) { struct ip *const ip = mtod(m, struct ip *); struct inpcb *inp; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); INP_LOCK(inp); /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) { error = EINVAL; m_freem(m); } else { /* Convert fields to host order for ip_output() */ ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); /* Send packet to output processing */ ipstat.ips_rawout++; /* XXX */ error = ip_output(m, inp->inp_options, NULL, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions, NULL); } INP_UNLOCK(inp); INP_INFO_WUNLOCK(&divcbinfo); } else { if (m->m_pkthdr.rcvif == NULL) { /* * No luck with the name, check by IP address. * Clear the port and the ifname to make sure * there are no distractions for ifa_ifwithaddr. */ struct ifaddr *ifa; bzero(sin->sin_zero, sizeof(sin->sin_zero)); sin->sin_port = 0; ifa = ifa_ifwithaddr((struct sockaddr *) sin); if (ifa == NULL) { error = EADDRNOTAVAIL; goto cantsend; } m->m_pkthdr.rcvif = ifa->ifa_ifp; } /* Send packet to input processing */ ip_input(m); } return error; cantsend: m_freem(m); return error; } /* * Return a copy of the specified packet, but without * the divert tag. This is used when packets are ``tee'd'' * and we want the cloned copy to not have divert processing. */ struct mbuf * divert_clone(struct mbuf *m) { struct mbuf *clone; struct m_tag *mtag; clone = m_dup(m, M_DONTWAIT); if (clone != NULL) { /* strip divert tag from copy */ mtag = m_tag_find(clone, PACKET_TAG_DIVERT, NULL); if (mtag != NULL) m_tag_delete(clone, mtag); } return clone; } static int div_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp != 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; } if (td && (error = suser(td)) != 0) { INP_INFO_WUNLOCK(&divcbinfo); return error; } error = soreserve(so, div_sendspace, div_recvspace); if (error) { INP_INFO_WUNLOCK(&divcbinfo); return error; } - error = in_pcballoc(so, &divcbinfo, td, "divinp"); + error = in_pcballoc(so, &divcbinfo, "divinp"); if (error) { INP_INFO_WUNLOCK(&divcbinfo); return error; } inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); INP_INFO_WUNLOCK(&divcbinfo); inp->inp_ip_p = proto; inp->inp_vflag |= INP_IPV4; inp->inp_flags |= INP_HDRINCL; /* The socket is always "connected" because we always know "where" to send the packet */ INP_UNLOCK(inp); so->so_state |= SS_ISCONNECTED; return 0; } static int div_detach(struct socket *so) { struct inpcb *inp; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; } INP_LOCK(inp); in_pcbdetach(inp); INP_INFO_WUNLOCK(&divcbinfo); return 0; } static int div_abort(struct socket *so) { struct inpcb *inp; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; /* ??? possible? panic instead? */ } INP_LOCK(inp); soisdisconnected(so); in_pcbdetach(inp); INP_INFO_WUNLOCK(&divcbinfo); return 0; } static int div_disconnect(struct socket *so) { if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; return div_abort(so); } static int div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int error; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; } /* in_pcbbind assumes that nam is a sockaddr_in * and in_pcbbind requires a valid address. Since divert * sockets don't we need to make sure the address is * filled in properly. * XXX -- divert should not be abusing in_pcbind * and should probably have its own family. */ if (nam->sa_family != AF_INET) error = EAFNOSUPPORT; else { ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; INP_LOCK(inp); error = in_pcbbind(inp, nam, td); INP_UNLOCK(inp); } INP_INFO_WUNLOCK(&divcbinfo); return error; } static int div_shutdown(struct socket *so) { struct inpcb *inp; INP_INFO_RLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&divcbinfo); return EINVAL; } INP_LOCK(inp); INP_INFO_RUNLOCK(&divcbinfo); socantsendmore(so); INP_UNLOCK(inp); return 0; } static int div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { /* Packet must have a header (but that's about it) */ if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { ipstat.ips_toosmall++; m_freem(m); return EINVAL; } /* Send packet */ return div_output(so, m, (struct sockaddr_in *)nam, control); } void div_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct in_addr faddr; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (PRC_IS_REDIRECT(cmd)) return; } static int div_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = divcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ INP_INFO_RLOCK(&divcbinfo); gencnt = divcbinfo.ipi_gencnt; n = divcbinfo.ipi_count; INP_INFO_RUNLOCK(&divcbinfo); error = sysctl_wire_old_buffer(req, 2 * sizeof(xig) + n*sizeof(struct xinpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; INP_INFO_RLOCK(&divcbinfo); for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) inp_list[i++] = inp; INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&divcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_INFO_RLOCK(&divcbinfo); xig.xig_gen = divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = divcbinfo.ipi_count; INP_INFO_RUNLOCK(&divcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int div_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &divcbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int div_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &divcbinfo)); } SYSCTL_DECL(_net_inet_divert); SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0, div_pcblist, "S,xinpcb", "List of active divert sockets"); struct pr_usrreqs div_usrreqs = { div_abort, pru_accept_notsupp, div_attach, div_bind, pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach, div_disconnect, pru_listen_notsupp, div_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown, div_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; Index: head/sys/netinet/raw_ip.c =================================================================== --- head/sys/netinet/raw_ip.c (revision 127503) +++ head/sys/netinet/raw_ip.c (revision 127504) @@ -1,839 +1,839 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 * $FreeBSD$ */ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mac.h" #include "opt_random_ip_id.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FAST_IPSEC #include #endif /*FAST_IPSEC*/ #ifdef IPSEC #include #endif /*IPSEC*/ struct inpcbhead ripcb; struct inpcbinfo ripcbinfo; /* control hooks for ipfw and dummynet */ ip_fw_ctl_t *ip_fw_ctl_ptr; ip_dn_ctl_t *ip_dn_ctl_ptr; /* * hooks for multicast routing. They all default to NULL, * so leave them not initialized and rely on BSS being set to 0. */ /* The socket used to communicate with the multicast routing daemon. */ struct socket *ip_mrouter; /* The various mrouter and rsvp functions */ int (*ip_mrouter_set)(struct socket *, struct sockopt *); int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int (*mrt_ioctl)(int, caddr_t); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); void (*rsvp_input_p)(struct mbuf *m, int off); int (*ip_rsvp_vif)(struct socket *, struct sockopt *); void (*ip_rsvp_force_done)(struct socket *); /* * Nominal space allocated to a raw ip socket. */ #define RIPSNDQ 8192 #define RIPRCVQ 8192 /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ void rip_init() { INP_INFO_LOCK_INIT(&ripcbinfo, "rip"); LIST_INIT(&ripcb); ripcbinfo.listhead = &ripcb; /* * XXX We don't use the hash list for raw IP, but it's easier * to allocate a one entry hash list than it is to check all * over the place for hashbase == NULL. */ ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets); } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; static int raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n) { int policyfail = 0; #if defined(IPSEC) || defined(FAST_IPSEC) /* check AH/ESP integrity. */ if (ipsec4_in_reject(n, last)) { policyfail = 1; #ifdef IPSEC ipsecstat.in_polvio++; #endif /*IPSEC*/ /* do not inject data to pcb */ } #endif /*IPSEC || FAST_IPSEC*/ #ifdef MAC if (!policyfail && mac_check_inpcb_deliver(last, n) != 0) policyfail = 1; #endif if (!policyfail) { struct mbuf *opts = NULL; if ((last->inp_flags & INP_CONTROLOPTS) || (last->inp_socket->so_options & SO_TIMESTAMP)) ip_savecontrol(last, &opts, ip, n); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&ripsrc, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) m_freem(opts); } else sorwakeup(last->inp_socket); } else m_freem(n); return policyfail; } /* * Setup generic address and protocol structures * for raw_input routine, then pass them along with * mbuf chain. */ void rip_input(struct mbuf *m, int off) { struct ip *ip = mtod(m, struct ip *); int proto = ip->ip_p; struct inpcb *inp, *last; INP_INFO_RLOCK(&ripcbinfo); ripsrc.sin_addr = ip->ip_src; last = NULL; LIST_FOREACH(inp, &ripcb, inp_list) { INP_LOCK(inp); if (inp->inp_ip_p && inp->inp_ip_p != proto) { docontinue: INP_UNLOCK(inp); continue; } #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) goto docontinue; #endif if (inp->inp_laddr.s_addr && inp->inp_laddr.s_addr != ip->ip_dst.s_addr) goto docontinue; if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) goto docontinue; if (last) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); if (n != NULL) (void) raw_append(last, ip, n); /* XXX count dropped packet */ INP_UNLOCK(last); } last = inp; } if (last != NULL) { if (raw_append(last, ip, m) != 0) ipstat.ips_delivered--; INP_UNLOCK(last); } else { m_freem(m); ipstat.ips_noproto++; ipstat.ips_delivered--; } INP_INFO_RUNLOCK(&ripcbinfo); } /* * Generate IP header and pass packet to ip_output. * Tack on options user may have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, u_long dst) { struct ip *ip; struct inpcb *inp = sotoinpcb(so); int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; #ifdef MAC mac_create_mbuf_from_socket(so, m); #endif /* * If the user handed us a complete IP packet, use it. * Otherwise, allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_TRYWAIT); if (m == NULL) return(ENOBUFS); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; ip->ip_off = 0; ip->ip_p = inp->inp_ip_p; ip->ip_len = m->m_pkthdr.len; ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; ip->ip_ttl = inp->inp_ip_ttl; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } ip = mtod(m, struct ip *); /* don't allow both user specified and setsockopt options, and don't allow packet length sizes that will crash */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || (ip->ip_len > m->m_pkthdr.len) || (ip->ip_len < (ip->ip_hl << 2))) { m_freem(m); return EINVAL; } if (ip->ip_id == 0) #ifdef RANDOM_IP_ID ip->ip_id = ip_randomid(); #else ip->ip_id = htons(ip_id++); #endif /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; ipstat.ips_rawout++; } if (inp->inp_flags & INP_ONESBCAST) flags |= IP_SENDONES; return (ip_output(m, inp->inp_options, NULL, flags, inp->inp_moptions, inp)); } /* * Raw IP socket option processing. * * Note that access to all of the IP administrative functions here is * implicitly protected by suser() as gaining access to a raw socket * requires either that the thread pass a suser() check, or that it be * passed a raw socket by another thread that has passed a suser() check. * If FreeBSD moves to a more fine-grained access control mechanism, * additional checks will need to be placed here if the raw IP attachment * check is not equivilent the the check required for these * administrative operations; in some cases, these checks are already * present. */ int rip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) return (EINVAL); error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: if (IPFW_LOADED) error = ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET_GET: if (DUMMYNET_LOADED) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT; break ; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: if (IPFW_LOADED) error = ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (DUMMYNET_LOADED) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT ; break ; case IP_RSVP_ON: error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = ip_rsvp_done(); break; case IP_RSVP_VIF_ON: case IP_RSVP_VIF_OFF: error = ip_rsvp_vif ? ip_rsvp_vif(so, sopt) : EINVAL; break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which * are sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, * and calls in_ifadown() to remove all routes corresponding to that address. * It also receives the PRC_IFUP messages from if_up() and reinstalls the * interface routes. */ void rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { /* * in_ifscrub kills the interface route. */ in_ifscrub(ia->ia_ifp, ia); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 0); break; } } break; case PRC_IFUP: TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) return; flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; break; } } u_long rip_sendspace = RIPSNDQ; u_long rip_recvspace = RIPRCVQ; SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); static int rip_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; /* XXX why not lower? */ INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); if (inp) { /* XXX counter, printf */ INP_INFO_WUNLOCK(&ripcbinfo); return EINVAL; } if (td && (error = suser(td)) != 0) { INP_INFO_WUNLOCK(&ripcbinfo); return error; } if (proto >= IPPROTO_MAX || proto < 0) { INP_INFO_WUNLOCK(&ripcbinfo); return EPROTONOSUPPORT; } error = soreserve(so, rip_sendspace, rip_recvspace); if (error) { INP_INFO_WUNLOCK(&ripcbinfo); return error; } - error = in_pcballoc(so, &ripcbinfo, td, "rawinp"); + error = in_pcballoc(so, &ripcbinfo, "rawinp"); if (error) { INP_INFO_WUNLOCK(&ripcbinfo); return error; } inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; inp->inp_ip_ttl = ip_defttl; INP_UNLOCK(inp); return 0; } static void rip_pcbdetach(struct socket *so, struct inpcb *inp) { INP_INFO_WLOCK_ASSERT(&ripcbinfo); INP_LOCK_ASSERT(inp); if (so == ip_mrouter && ip_mrouter_done) ip_mrouter_done(); if (ip_rsvp_force_done) ip_rsvp_force_done(so); if (so == ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); } static int rip_detach(struct socket *so) { struct inpcb *inp; INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); if (inp == 0) { /* XXX counter, printf */ INP_INFO_WUNLOCK(&ripcbinfo); return EINVAL; } INP_LOCK(inp); rip_pcbdetach(so, inp); INP_INFO_WUNLOCK(&ripcbinfo); return 0; } static int rip_abort(struct socket *so) { struct inpcb *inp; INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&ripcbinfo); return EINVAL; /* ??? possible? panic instead? */ } INP_LOCK(inp); soisdisconnected(so); if (so->so_state & SS_NOFDREF) rip_pcbdetach(so, inp); else INP_UNLOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); return 0; } static int rip_disconnect(struct socket *so) { if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; return rip_abort(so); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet) || (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || (addr->sin_addr.s_addr && ifa_ifwithaddr((struct sockaddr *)addr) == 0)) return EADDRNOTAVAIL; INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&ripcbinfo); return EINVAL; } INP_LOCK(inp); inp->inp_laddr = addr->sin_addr; INP_UNLOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); return 0; } static int rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet)) return EADDRNOTAVAIL; if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) return EAFNOSUPPORT; INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&ripcbinfo); return EINVAL; } INP_LOCK(inp); inp->inp_faddr = addr->sin_addr; soisconnected(so); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); return 0; } static int rip_shutdown(struct socket *so) { struct inpcb *inp; INP_INFO_RLOCK(&ripcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&ripcbinfo); return EINVAL; } INP_LOCK(inp); INP_INFO_RUNLOCK(&ripcbinfo); socantsendmore(so); INP_UNLOCK(inp); return 0; } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; u_long dst; int ret; INP_INFO_WLOCK(&ripcbinfo); inp = sotoinpcb(so); if (so->so_state & SS_ISCONNECTED) { if (nam) { INP_INFO_WUNLOCK(&ripcbinfo); m_freem(m); return EISCONN; } dst = inp->inp_faddr.s_addr; } else { if (nam == NULL) { INP_INFO_WUNLOCK(&ripcbinfo); m_freem(m); return ENOTCONN; } dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } INP_LOCK(inp); ret = rip_output(m, so, dst); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&ripcbinfo); return ret; } static int rip_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = ripcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ INP_INFO_RLOCK(&ripcbinfo); gencnt = ripcbinfo.ipi_gencnt; n = ripcbinfo.ipi_count; INP_INFO_RUNLOCK(&ripcbinfo); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; INP_INFO_RLOCK(&ripcbinfo); for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) { /* XXX held references? */ inp_list[i++] = inp; } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&ripcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_INFO_RLOCK(&ripcbinfo); xig.xig_gen = ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = ripcbinfo.ipi_count; INP_INFO_RUNLOCK(&ripcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int rip_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &ripcbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int rip_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &ripcbinfo)); } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); struct pr_usrreqs rip_usrreqs = { rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, pru_listen_notsupp, rip_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, rip_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; Index: head/sys/netinet/tcp_usrreq.c =================================================================== --- head/sys/netinet/tcp_usrreq.c (revision 127503) +++ head/sys/netinet/tcp_usrreq.c (revision 127504) @@ -1,1309 +1,1309 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif #ifdef IPSEC #include #endif /*IPSEC*/ /* * TCP protocol interface to socket abstraction. */ extern char *tcpstates[]; /* XXX ??? */ static int tcp_attach(struct socket *, struct thread *td); static int tcp_connect(struct tcpcb *, struct sockaddr *, struct thread *td); #ifdef INET6 static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct thread *td); #endif /* INET6 */ static struct tcpcb * tcp_disconnect(struct tcpcb *); static struct tcpcb * tcp_usrclosed(struct tcpcb *); #ifdef TCPDEBUG #define TCPDEBUG0 int ostate = 0 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ tcp_trace(TA_USER, ostate, tp, 0, 0, req) #else #define TCPDEBUG0 #define TCPDEBUG1() #define TCPDEBUG2(req) #endif /* * TCP attaches to socket via pru_attach(), reserving space, * and an internet control block. */ static int tcp_usr_attach(struct socket *so, int proto, struct thread *td) { int s = splnet(); int error; struct inpcb *inp; struct tcpcb *tp = 0; TCPDEBUG0; INP_INFO_WLOCK(&tcbinfo); TCPDEBUG1(); inp = sotoinpcb(so); if (inp) { error = EISCONN; goto out; } error = tcp_attach(so, td); if (error) goto out; if ((so->so_options & SO_LINGER) && so->so_linger == 0) so->so_linger = TCP_LINGERTIME; inp = sotoinpcb(so); tp = intotcpcb(inp); out: TCPDEBUG2(PRU_ATTACH); INP_INFO_WUNLOCK(&tcbinfo); splx(s); return error; } /* * pru_detach() detaches the TCP protocol from the socket. * If the protocol state is non-embryonic, then can't * do this directly: have to initiate a pru_disconnect(), * which may finish later; embryonic TCB's can just * be discarded here. */ static int tcp_usr_detach(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; TCPDEBUG0; INP_INFO_WLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&tcbinfo); splx(s); return EINVAL; /* XXX */ } INP_LOCK(inp); tp = intotcpcb(inp); TCPDEBUG1(); tp = tcp_disconnect(tp); TCPDEBUG2(PRU_DETACH); if (tp) INP_UNLOCK(inp); INP_INFO_WUNLOCK(&tcbinfo); splx(s); return error; } #define INI_NOLOCK 0 #define INI_READ 1 #define INI_WRITE 2 #define COMMON_START() \ TCPDEBUG0; \ do { \ if (inirw == INI_READ) \ INP_INFO_RLOCK(&tcbinfo); \ else if (inirw == INI_WRITE) \ INP_INFO_WLOCK(&tcbinfo); \ inp = sotoinpcb(so); \ if (inp == 0) { \ if (inirw == INI_READ) \ INP_INFO_RUNLOCK(&tcbinfo); \ else if (inirw == INI_WRITE) \ INP_INFO_WUNLOCK(&tcbinfo); \ splx(s); \ return EINVAL; \ } \ INP_LOCK(inp); \ if (inirw == INI_READ) \ INP_INFO_RUNLOCK(&tcbinfo); \ tp = intotcpcb(inp); \ TCPDEBUG1(); \ } while(0) #define COMMON_END(req) \ out: TCPDEBUG2(req); \ do { \ if (tp) \ INP_UNLOCK(inp); \ if (inirw == INI_WRITE) \ INP_INFO_WUNLOCK(&tcbinfo); \ splx(s); \ return error; \ goto out; \ } while(0) /* * Give the socket an address. */ static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; const int inirw = INI_WRITE; COMMON_START(); /* * Must check for multicast addresses and disallow binding * to them. */ sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) return (EINVAL); if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } error = in_pcbbind(inp, nam, td); if (error) goto out; COMMON_END(PRU_BIND); } #ifdef INET6 static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; const int inirw = INI_WRITE; COMMON_START(); /* * Must check for multicast addresses and disallow binding * to them. */ sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) return (EINVAL); if (sin6p->sin6_family == AF_INET6 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { error = EAFNOSUPPORT; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td); goto out; } } error = in6_pcbbind(inp, nam, td); if (error) goto out; COMMON_END(PRU_BIND); } #endif /* INET6 */ /* * Prepare to accept connections. */ static int tcp_usr_listen(struct socket *so, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); if (inp->inp_lport == 0) error = in_pcbbind(inp, (struct sockaddr *)0, td); if (error == 0) tp->t_state = TCPS_LISTEN; COMMON_END(PRU_LISTEN); } #ifdef INET6 static int tcp6_usr_listen(struct socket *so, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); if (inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; error = in6_pcbbind(inp, (struct sockaddr *)0, td); } if (error == 0) tp->t_state = TCPS_LISTEN; COMMON_END(PRU_LISTEN); } #endif /* INET6 */ /* * Initiate connection to peer. * Create a template for use in transmissions on this connection. * Enter SYN_SENT state, and mark socket as connecting. * Start keep-alive timer, and seed output sequence space. * Send initial segment on connection. */ static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; const int inirw = INI_WRITE; COMMON_START(); /* * Must disallow TCP ``connections'' to multicast addresses. */ sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) return (EINVAL); if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); if ((error = tcp_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); COMMON_END(PRU_CONNECT); } #ifdef INET6 static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; const int inirw = INI_WRITE; COMMON_START(); /* * Must disallow TCP ``connections'' to multicast addresses. */ sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) return (EINVAL); if (sin6p->sin6_family == AF_INET6 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { error = EAFNOSUPPORT; goto out; } if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) goto out; error = tcp_output(tp); goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; inp->inp_inc.inc_isipv6 = 1; if ((error = tcp6_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); COMMON_END(PRU_CONNECT); } #endif /* INET6 */ /* * Initiate disconnect from peer. * If connection never passed embryonic stage, just drop; * else if don't need to let data drain, then can just drop anyways, * else have to begin TCP shutdown process: mark socket disconnecting, * drain unread data, state switch to reflect user close, and * send segment (e.g. FIN) to peer. Socket will be really disconnected * when peer sends FIN and acks ours. * * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. */ static int tcp_usr_disconnect(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); tp = tcp_disconnect(tp); COMMON_END(PRU_DISCONNECT); } /* * Accept a connection. Essentially all the work is * done at higher levels; just return the address * of the peer, storing through addr. */ static int tcp_usr_accept(struct socket *so, struct sockaddr **nam) { int s; int error = 0; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; struct in_addr addr; in_port_t port = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; goto out; } s = splnet(); INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); return (EINVAL); } INP_LOCK(inp); INP_INFO_RUNLOCK(&tcbinfo); tp = intotcpcb(inp); TCPDEBUG1(); /* * We inline in_setpeeraddr and COMMON_END here, so that we can * copy the data of interest and defer the malloc until after we * release the lock. */ port = inp->inp_fport; addr = inp->inp_faddr; out: TCPDEBUG2(PRU_ACCEPT); if (tp) INP_UNLOCK(inp); splx(s); if (error == 0) *nam = in_sockaddr(port, &addr); return error; } #ifdef INET6 static int tcp6_usr_accept(struct socket *so, struct sockaddr **nam) { int s; struct inpcb *inp = NULL; int error = 0; struct tcpcb *tp = NULL; struct in_addr addr; struct in6_addr addr6; in_port_t port = 0; int v4 = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; goto out; } s = splnet(); INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); return (EINVAL); } INP_LOCK(inp); INP_INFO_RUNLOCK(&tcbinfo); tp = intotcpcb(inp); TCPDEBUG1(); /* * We inline in6_mapped_peeraddr and COMMON_END here, so that we can * copy the data of interest and defer the malloc until after we * release the lock. */ if (inp->inp_vflag & INP_IPV4) { v4 = 1; port = inp->inp_fport; addr = inp->inp_faddr; } else { port = inp->inp_fport; addr6 = inp->in6p_faddr; } out: TCPDEBUG2(PRU_ACCEPT); if (tp) INP_UNLOCK(inp); splx(s); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); else *nam = in6_sockaddr(port, &addr6); } return error; } #endif /* INET6 */ /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking * here because in_setsockaddr will call malloc and can block. */ static int tcp_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &tcbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int tcp_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &tcbinfo)); } /* * Mark the connection as being incapable of further output. */ static int tcp_usr_shutdown(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); socantsendmore(so); tp = tcp_usrclosed(tp); if (tp) error = tcp_output(tp); COMMON_END(PRU_SHUTDOWN); } /* * After a receive, possibly send window update to peer. */ static int tcp_usr_rcvd(struct socket *so, int flags) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_READ; COMMON_START(); tcp_output(tp); COMMON_END(PRU_RCVD); } /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. */ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; #ifdef INET6 int isipv6; #endif TCPDEBUG0; /* * Need write lock here because this function might call * tcp_connect or tcp_usrclosed. * We really want to have to this function upgrade from read lock * to write lock. XXX */ INP_INFO_WLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == NULL) { /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a * network interrupt in the non-splnet() section of sosend(). */ if (m) m_freem(m); if (control) m_freem(control); error = ECONNRESET; /* XXX EPIPE? */ tp = NULL; TCPDEBUG1(); goto out; } INP_LOCK(inp); #ifdef INET6 isipv6 = nam && nam->sa_family == AF_INET6; #endif /* INET6 */ tp = intotcpcb(inp); TCPDEBUG1(); if (control) { /* TCP doesn't do control messages (rights, creds, etc) */ if (control->m_len) { m_freem(control); if (m) m_freem(m); error = EINVAL; goto out; } m_freem(control); /* empty control, just free it */ } if (!(flags & PRUS_OOB)) { sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg/maxopd using peer's cached * MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); else #endif /* INET6 */ error = tcp_connect(tp, nam, td); if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ socantsendmore(so); tp = tcp_usrclosed(tp); } if (tp != NULL) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tcp_output(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } } else { if (sbspace(&so->so_snd) < -512) { m_freem(m); error = ENOBUFS; goto out; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg/maxopd using peer's cached * MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); else #endif /* INET6 */ error = tcp_connect(tp, nam, td); if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; error = tcp_output(tp); tp->t_force = 0; } COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); } /* * Abort the TCP. */ static int tcp_usr_abort(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); tp = tcp_drop(tp, ECONNABORTED); COMMON_END(PRU_ABORT); } /* * Receive out-of-band data. */ static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_READ; COMMON_START(); if ((so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || tp->t_oobflags & TCPOOB_HADDATA) { error = EINVAL; goto out; } if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { error = EWOULDBLOCK; goto out; } m->m_len = 1; *mtod(m, caddr_t) = tp->t_iobc; if ((flags & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); COMMON_END(PRU_RCVOOB); } /* xxx - should be const */ struct pr_usrreqs tcp_usrreqs = { tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, tcp_usr_disconnect, tcp_usr_listen, tcp_peeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, tcp_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; #ifdef INET6 struct pr_usrreqs tcp6_usrreqs = { tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, in6_mapped_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; #endif /* INET6 */ /* * Common subroutine to open a TCP connection to remote host specified * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local * port number if needed. Call in_pcbconnect_setup to do the routing and * to choose a local host address (interface). If there is an existing * incarnation of the same connection in TIME-WAIT state and if the remote * host was sending CC options and if the connection duration was < MSL, then * truncate the previous TIME-WAIT state and proceed. * Initialize connection parameters and enter SYN-SENT state. */ static int tcp_connect(tp, nam, td) register struct tcpcb *tp; struct sockaddr *nam; struct thread *td; { struct inpcb *inp = tp->t_inpcb, *oinp; struct socket *so = inp->inp_socket; struct tcptw *otw; struct rmxp_tao tao; struct in_addr laddr; u_short lport; int error; bzero(&tao, sizeof(tao)); if (inp->inp_lport == 0) { error = in_pcbbind(inp, (struct sockaddr *)0, td); if (error) return error; } /* * Cannot simply call in_pcbconnect, because there might be an * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. */ laddr = inp->inp_laddr; lport = inp->inp_lport; error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td); if (error && oinp == NULL) return error; if (oinp) { if (oinp != inp && (oinp->inp_vflag & INP_TIMEWAIT) && (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl && otw->cc_recv != 0) { inp->inp_faddr = oinp->inp_faddr; inp->inp_fport = oinp->inp_fport; (void) tcp_twclose(otw, 0); } else return EADDRINUSE; } inp->inp_laddr = laddr; in_pcbrehash(inp); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) tp->request_r_scale++; soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tp->iss = tcp_new_isn(tp); tp->t_bw_rtseq = tp->iss; tcp_sendseqinit(tp); /* * Generate a CC value for this connection and * check whether CC or CCnew should be used. */ if (tcp_do_rfc1644) tcp_hc_gettao(&inp->inp_inc, &tao); tp->cc_send = CC_INC(tcp_ccgen); if (tao.tao_ccsent != 0 && CC_GEQ(tp->cc_send, tao.tao_ccsent)) { tao.tao_ccsent = tp->cc_send; } else { tao.tao_ccsent = 0; tp->t_flags |= TF_SENDCCNEW; } if (tcp_do_rfc1644) tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, tao.tao_ccsent, 0); return 0; } #ifdef INET6 static int tcp6_connect(tp, nam, td) register struct tcpcb *tp; struct sockaddr *nam; struct thread *td; { struct inpcb *inp = tp->t_inpcb, *oinp; struct socket *so = inp->inp_socket; struct tcptw *otw; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr *addr6; struct rmxp_tao tao; int error; bzero(&tao, sizeof(tao)); if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, td); if (error) return error; } /* * Cannot simply call in_pcbconnect, because there might be an * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. */ error = in6_pcbladdr(inp, nam, &addr6); if (error) return error; oinp = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); if (oinp) { if (oinp != inp && (oinp->inp_vflag & INP_TIMEWAIT) && (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl && otw->cc_recv != 0) { inp->inp_faddr = oinp->inp_faddr; inp->inp_fport = oinp->inp_fport; (void) tcp_twclose(otw, 0); } else return EADDRINUSE; } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = *addr6; inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) inp->in6p_flowinfo = sin6->sin6_flowinfo; in_pcbrehash(inp); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) tp->request_r_scale++; soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tp->iss = tcp_new_isn(tp); tp->t_bw_rtseq = tp->iss; tcp_sendseqinit(tp); /* * Generate a CC value for this connection and * check whether CC or CCnew should be used. */ if (tcp_do_rfc1644) tcp_hc_gettao(&inp->inp_inc, &tao); tp->cc_send = CC_INC(tcp_ccgen); if (tao.tao_ccsent != 0 && CC_GEQ(tp->cc_send, tao.tao_ccsent)) { tao.tao_ccsent = tp->cc_send; } else { tao.tao_ccsent = 0; tp->t_flags |= TF_SENDCCNEW; } if (tcp_do_rfc1644) tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, tao.tao_ccsent, 0); return 0; } #endif /* INET6 */ /* * The new sockopt interface makes it possible for us to block in the * copyin/out step (if we take a page fault). Taking a page fault at * splnet() is probably a Bad Thing. (Since sockets and pcbs both now * use TSM, there probably isn't any need for this function to run at * splnet() any more. This needs more examination.) */ int tcp_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int error, opt, optval, s; struct inpcb *inp; struct tcpcb *tp; error = 0; s = splnet(); /* XXX */ INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == NULL) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); return (ECONNRESET); } INP_LOCK(inp); INP_INFO_RUNLOCK(&tcbinfo); if (sopt->sopt_level != IPPROTO_TCP) { #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) error = ip6_ctloutput(so, sopt); else #endif /* INET6 */ error = ip_ctloutput(so, sopt); INP_UNLOCK(inp); splx(s); return (error); } tp = intotcpcb(inp); switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval > 0) tp->t_flags |= TF_SIGNATURE; else tp->t_flags &= ~TF_SIGNATURE; break; #endif /* TCP_SIGNATURE */ case TCP_NODELAY: case TCP_NOOPT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (sopt->sopt_name) { case TCP_NODELAY: opt = TF_NODELAY; break; case TCP_NOOPT: opt = TF_NOOPT; break; default: opt = 0; /* dead code to fool gcc */ break; } if (optval) tp->t_flags |= opt; else tp->t_flags &= ~opt; break; case TCP_NOPUSH: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) tp->t_flags |= TF_NOPUSH; else { tp->t_flags &= ~TF_NOPUSH; error = tcp_output(tp); } break; case TCP_MAXSEG: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval > 0 && optval <= tp->t_maxseg && optval + 40 >= tcp_minmss) tp->t_maxseg = optval; else error = EINVAL; break; default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; break; #endif case TCP_NODELAY: optval = tp->t_flags & TF_NODELAY; break; case TCP_MAXSEG: optval = tp->t_maxseg; break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; break; case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; break; default: error = ENOPROTOOPT; break; } if (error == 0) error = sooptcopyout(sopt, &optval, sizeof optval); break; } INP_UNLOCK(inp); splx(s); return (error); } /* * tcp_sendspace and tcp_recvspace are the default send and receive window * sizes, respectively. These are obsolescent (this information should * be set by the route). */ u_long tcp_sendspace = 1024*32; SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); u_long tcp_recvspace = 1024*64; SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); /* * Attach TCP protocol to socket, allocating * internet protocol control block, tcp control block, * bufer space, and entering LISTEN state if to accept connections. */ static int tcp_attach(so, td) struct socket *so; struct thread *td; { register struct tcpcb *tp; struct inpcb *inp; int error; #ifdef INET6 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; #endif if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, tcp_sendspace, tcp_recvspace); if (error) return (error); } - error = in_pcballoc(so, &tcbinfo, td, "tcpinp"); + error = in_pcballoc(so, &tcbinfo, "tcpinp"); if (error) return (error); inp = sotoinpcb(so); #ifdef INET6 if (isipv6) { inp->inp_vflag |= INP_IPV6; inp->in6p_hops = -1; /* use kernel default */ } else #endif inp->inp_vflag |= INP_IPV4; tp = tcp_newtcpcb(inp); if (tp == 0) { int nofd = so->so_state & SS_NOFDREF; /* XXX */ so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ #ifdef INET6 if (isipv6) in6_pcbdetach(inp); else #endif in_pcbdetach(inp); so->so_state |= nofd; return (ENOBUFS); } tp->t_state = TCPS_CLOSED; return (0); } /* * Initiate (or continue) disconnect. * If embryonic state, just send reset (once). * If in ``let data drain'' option and linger null, just drop. * Otherwise (hard), mark socket disconnecting and drop * current input data; switch states based on user close, and * send segment to peer (with FIN). */ static struct tcpcb * tcp_disconnect(tp) register struct tcpcb *tp; { struct socket *so = tp->t_inpcb->inp_socket; if (tp->t_state < TCPS_ESTABLISHED) tp = tcp_close(tp); else if ((so->so_options & SO_LINGER) && so->so_linger == 0) tp = tcp_drop(tp, 0); else { soisdisconnecting(so); sbflush(&so->so_rcv); tp = tcp_usrclosed(tp); if (tp) (void) tcp_output(tp); } return (tp); } /* * User issued close, and wish to trail through shutdown states: * if never received SYN, just forget it. If got a SYN from peer, * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. * If already got a FIN from peer, then almost done; go to LAST_ACK * state. In all other cases, have already sent FIN to peer (e.g. * after PRU_SHUTDOWN), and just have to play tedious game waiting * for peer to send FIN or not respond to keep-alives, etc. * We can let the user exit from the close as soon as the FIN is acked. */ static struct tcpcb * tcp_usrclosed(tp) register struct tcpcb *tp; { switch (tp->t_state) { case TCPS_CLOSED: case TCPS_LISTEN: tp->t_state = TCPS_CLOSED; tp = tcp_close(tp); break; case TCPS_SYN_SENT: case TCPS_SYN_RECEIVED: tp->t_flags |= TF_NEEDFIN; break; case TCPS_ESTABLISHED: tp->t_state = TCPS_FIN_WAIT_1; break; case TCPS_CLOSE_WAIT: tp->t_state = TCPS_LAST_ACK; break; } if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { soisdisconnected(tp->t_inpcb->inp_socket); /* To prevent the connection hanging in FIN_WAIT_2 forever. */ if (tp->t_state == TCPS_FIN_WAIT_2) callout_reset(tp->tt_2msl, tcp_maxidle, tcp_timer_2msl, tp); } return (tp); } Index: head/sys/netinet/udp_usrreq.c =================================================================== --- head/sys/netinet/udp_usrreq.c (revision 127503) +++ head/sys/netinet/udp_usrreq.c (revision 127504) @@ -1,1127 +1,1127 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #ifdef INET6 #include #endif #include #include #ifdef FAST_IPSEC #include #endif /*FAST_IPSEC*/ #ifdef IPSEC #include #endif /*IPSEC*/ #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ #ifndef COMPAT_42 static int udpcksum = 1; #else static int udpcksum = 0; /* XXX */ #endif SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udpcksum, 0, ""); int log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, &log_in_vain, 0, "Log all incoming UDP packets"); static int blackhole = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &blackhole, 0, "Do not send port unreachables for refused connects"); static int strict_mcast_mship = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, &strict_mcast_mship, 0, "Only send multicast to member sockets"); struct inpcbhead udb; /* from udp_var.h */ #define udb6 udb /* for KAME src sync over BSD*'s */ struct inpcbinfo udbinfo; #ifndef UDBHASHSIZE #define UDBHASHSIZE 16 #endif struct udpstat udpstat; /* from udp_var.h */ SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); static struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET }; #ifdef INET6 struct udp_in6 { struct sockaddr_in6 uin6_sin; u_char uin6_init_done : 1; } udp_in6 = { { sizeof(udp_in6.uin6_sin), AF_INET6 }, 0 }; struct udp_ip6 { struct ip6_hdr uip6_ip6; u_char uip6_init_done : 1; } udp_ip6; #endif /* INET6 */ static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off); #ifdef INET6 static void ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip); #endif static int udp_detach(struct socket *so); static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); void udp_init() { INP_INFO_LOCK_INIT(&udbinfo, "udp"); LIST_INIT(&udb); udbinfo.listhead = &udb; udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(udbinfo.ipi_zone, maxsockets); } void udp_input(m, off) register struct mbuf *m; int off; { int iphlen = off; register struct ip *ip; register struct udphdr *uh; register struct inpcb *inp; struct mbuf *opts = 0; int len; struct ip save_ip; udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof (struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); if (m->m_len < iphlen + sizeof(struct udphdr)) { if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { udpstat.udps_hdrops++; return; } ip = mtod(m, struct ip *); } uh = (struct udphdr *)((caddr_t)ip + iphlen); /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; #ifdef INET6 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; #endif /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = ntohs((u_short)uh->uh_ulen); if (ip->ip_len != len) { if (len > ip->ip_len || len < sizeof(struct udphdr)) { udpstat.udps_badlen++; goto badunlocked; } m_adj(m, len - ip->ip_len); /* ip->ip_len = len; */ } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ if (!blackhole) save_ip = *ip; /* * Checksum extended UDP header and data. */ if (uh->uh_sum) { if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh->uh_sum = m->m_pkthdr.csum_data; else uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_UDP)); uh->uh_sum ^= 0xffff; } else { char b[9]; bcopy(((struct ipovly *)ip)->ih_x1, b, 9); bzero(((struct ipovly *)ip)->ih_x1, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; uh->uh_sum = in_cksum(m, len + sizeof (struct ip)); bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); } if (uh->uh_sum) { udpstat.udps_badsum++; m_freem(m); return; } } else udpstat.udps_nosum++; INP_INFO_RLOCK(&udbinfo); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { struct inpcb *last; /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multi/broadcasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; LIST_FOREACH(inp, &udb, inp_list) { INP_LOCK(inp); if (inp->inp_lport != uh->uh_dport) { docontinue: INP_UNLOCK(inp); continue; } #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) goto docontinue; #endif if (inp->inp_laddr.s_addr != INADDR_ANY) { if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) goto docontinue; } if (inp->inp_faddr.s_addr != INADDR_ANY) { if (inp->inp_faddr.s_addr != ip->ip_src.s_addr || inp->inp_fport != uh->uh_sport) goto docontinue; } /* * Check multicast packets to make sure they are only * sent to sockets with multicast memberships for the * packet's destination address and arrival interface */ #define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)]) #define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships) if (strict_mcast_mship && inp->inp_moptions != NULL) { int mship, foundmship = 0; for (mship = 0; mship < NMSHIPS(inp); mship++) { if (MSHIP(inp, mship)->inm_addr.s_addr == ip->ip_dst.s_addr && MSHIP(inp, mship)->inm_ifp == m->m_pkthdr.rcvif) { foundmship = 1; break; } } if (foundmship == 0) goto docontinue; } #undef NMSHIPS #undef MSHIP if (last != NULL) { struct mbuf *n; n = m_copy(m, 0, M_COPYALL); if (n != NULL) udp_append(last, ip, n, iphlen + sizeof(struct udphdr)); INP_UNLOCK(last); } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It * assumes that an application will never * clear these options after setting them. */ if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udpstat.udps_noportbcast++; goto badheadlocked; } INP_INFO_RUNLOCK(&udbinfo); udp_append(last, ip, m, iphlen + sizeof(struct udphdr)); INP_UNLOCK(last); return; } /* * Locate pcb for datagram. */ inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (inp == NULL) { if (log_in_vain) { char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), ntohs(uh->uh_sport)); } udpstat.udps_noport++; if (m->m_flags & (M_BCAST | M_MCAST)) { udpstat.udps_noportbcast++; goto badheadlocked; } if (blackhole) goto badheadlocked; if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) goto badheadlocked; *ip = save_ip; ip->ip_len += iphlen; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); INP_INFO_RUNLOCK(&udbinfo); return; } INP_LOCK(inp); INP_INFO_RUNLOCK(&udbinfo); udp_append(inp, ip, m, iphlen + sizeof(struct udphdr)); INP_UNLOCK(inp); return; badheadlocked: INP_INFO_RUNLOCK(&udbinfo); if (inp) INP_UNLOCK(inp); badunlocked: m_freem(m); if (opts) m_freem(opts); return; } #ifdef INET6 static void ip_2_ip6_hdr(ip6, ip) struct ip6_hdr *ip6; struct ip *ip; { bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = ip->ip_len; ip6->ip6_nxt = ip->ip_p; ip6->ip6_hlim = ip->ip_ttl; ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_SMP; ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; } #endif /* * subroutine of udp_input(), mainly for source code readability. * caller must properly init udp_ip6 and udp_in6 beforehand. */ static void udp_append(last, ip, n, off) struct inpcb *last; struct ip *ip; struct mbuf *n; int off; { struct sockaddr *append_sa; struct mbuf *opts = 0; #if defined(IPSEC) || defined(FAST_IPSEC) /* check AH/ESP integrity. */ if (ipsec4_in_reject(n, last)) { #ifdef IPSEC ipsecstat.in_polvio++; #endif /*IPSEC*/ m_freem(n); return; } #endif /*IPSEC || FAST_IPSEC*/ #ifdef MAC if (mac_check_inpcb_deliver(last, n) != 0) { m_freem(n); return; } #endif if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { #ifdef INET6 if (last->inp_vflag & INP_IPV6) { int savedflags; if (udp_ip6.uip6_init_done == 0) { ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); udp_ip6.uip6_init_done = 1; } savedflags = last->inp_flags; last->inp_flags &= ~INP_UNMAPPABLEOPTS; ip6_savecontrol(last, n, &opts); last->inp_flags = savedflags; } else #endif ip_savecontrol(last, &opts, ip, n); } #ifdef INET6 if (last->inp_vflag & INP_IPV6) { if (udp_in6.uin6_init_done == 0) { in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); udp_in6.uin6_init_done = 1; } append_sa = (struct sockaddr *)&udp_in6.uin6_sin; } else #endif append_sa = (struct sockaddr *)&udp_in; m_adj(n, off); if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udpstat.udps_fullsock++; } else sorwakeup(last->inp_socket); } /* * Notify a udp user of an asynchronous error; * just wake up so that he can collect error status. */ struct inpcb * udp_notify(inp, errno) register struct inpcb *inp; int errno; { inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); sowwakeup(inp->inp_socket); return inp; } void udp_ctlinput(cmd, sa, vip) int cmd; struct sockaddr *sa; void *vip; { struct ip *ip = vip; struct udphdr *uh; struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; struct in_addr faddr; struct inpcb *inp; int s; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; /* * Redirects don't need to be handled up here. */ if (PRC_IS_REDIRECT(cmd)) return; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ if (cmd == PRC_HOSTDEAD) ip = 0; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip) { s = splnet(); uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_RLOCK(&udbinfo); inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, 0, NULL); if (inp != NULL) { INP_LOCK(inp); if (inp->inp_socket != NULL) { (*notify)(inp, inetctlerrmap[cmd]); } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&udbinfo); splx(s); } else in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify); } static int udp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n, s; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = udbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ s = splnet(); INP_INFO_RLOCK(&udbinfo); gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; INP_INFO_RUNLOCK(&udbinfo); splx(s); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + n * sizeof(struct xinpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; s = splnet(); INP_INFO_RLOCK(&udbinfo); for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) inp_list[i++] = inp; INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&udbinfo); splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); xi.xi_inp.inp_gencnt = inp->inp_gencnt; error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ s = splnet(); INP_INFO_RLOCK(&udbinfo); xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; INP_INFO_RUNLOCK(&udbinfo); splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); static int udp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error, s; error = suser_cred(req->td->td_ucred, PRISON_ROOT); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); INP_INFO_RLOCK(&udbinfo); inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = cr_canseesocket(req->td->td_ucred, inp->inp_socket); if (error) goto out; cru2x(inp->inp_socket->so_cred, &xuc); out: INP_INFO_RUNLOCK(&udbinfo); splx(s); if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); static int udp_output(inp, m, addr, control, td) register struct inpcb *inp; struct mbuf *m; struct sockaddr *addr; struct mbuf *control; struct thread *td; { register struct udpiphdr *ui; register int len = m->m_pkthdr.len; struct in_addr faddr, laddr; struct cmsghdr *cm; struct sockaddr_in *sin, src; int error = 0; int ipflags; u_short fport, lport; INP_LOCK_ASSERT(inp); #ifdef MAC mac_create_mbuf_from_socket(inp->inp_socket, m); #endif if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { error = EMSGSIZE; if (control) m_freem(control); goto release; } src.sin_addr.s_addr = INADDR_ANY; if (control != NULL) { /* * XXX: Currently, we assume all the optional information * is stored in a single mbuf. */ if (control->m_next) { error = EINVAL; m_freem(control); goto release; } for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { cm = mtod(control, struct cmsghdr *); if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) { error = EINVAL; break; } if (cm->cmsg_level != IPPROTO_IP) continue; switch (cm->cmsg_type) { case IP_SENDSRCADDR: if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) { error = EINVAL; break; } bzero(&src, sizeof(src)); src.sin_family = AF_INET; src.sin_len = sizeof(src); src.sin_port = inp->inp_lport; src.sin_addr = *(struct in_addr *)CMSG_DATA(cm); break; default: error = ENOPROTOOPT; break; } if (error) break; } m_freem(control); } if (error) goto release; laddr = inp->inp_laddr; lport = inp->inp_lport; if (src.sin_addr.s_addr != INADDR_ANY) { if (lport == 0) { error = EINVAL; goto release; } error = in_pcbbind_setup(inp, (struct sockaddr *)&src, &laddr.s_addr, &lport, td); if (error) goto release; } if (addr) { sin = (struct sockaddr_in *)addr; if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto release; } error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport, &faddr.s_addr, &fport, NULL, td); if (error) goto release; /* Commit the local port if newly assigned. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->inp_lport = 0; error = EAGAIN; goto release; } inp->inp_flags |= INP_ANONPORT; } } else { faddr = inp->inp_faddr; fport = inp->inp_fport; if (faddr.s_addr == INADDR_ANY) { error = ENOTCONN; goto release; } } /* * Calculate data length and get a mbuf * for UDP and IP headers. */ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; goto release; } /* * Fill in mbuf with extended UDP header * and addresses and length put into network format. */ ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ ui->ui_pr = IPPROTO_UDP; ui->ui_src = laddr; ui->ui_dst = faddr; ui->ui_sport = lport; ui->ui_dport = fport; ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); ipflags = inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST); if (inp->inp_flags & INP_ONESBCAST) ipflags |= IP_SENDONES; /* * Set up checksum and output datagram. */ if (udpcksum) { if (inp->inp_flags & INP_ONESBCAST) faddr.s_addr = INADDR_BROADCAST; ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } else { ui->ui_sum = 0; } ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ udpstat.udps_opackets++; error = ip_output(m, inp->inp_options, NULL, ipflags, inp->inp_moptions, inp); return (error); release: m_freem(m); return (error); } u_long udp_sendspace = 9216; /* really max datagram size */ /* 40 1K datagrams */ SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); u_long udp_recvspace = 40 * (1024 + #ifdef INET6 sizeof(struct sockaddr_in6) #else sizeof(struct sockaddr_in) #endif ); SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); static int udp_abort(struct socket *so) { struct inpcb *inp; int s; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; /* ??? possible? panic instead? */ } INP_LOCK(inp); soisdisconnected(so); s = splnet(); in_pcbdetach(inp); INP_INFO_WUNLOCK(&udbinfo); splx(s); return 0; } static int udp_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int s, error; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp != 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } error = soreserve(so, udp_sendspace, udp_recvspace); if (error) { INP_INFO_WUNLOCK(&udbinfo); return error; } s = splnet(); - error = in_pcballoc(so, &udbinfo, td, "udpinp"); + error = in_pcballoc(so, &udbinfo, "udpinp"); splx(s); if (error) { INP_INFO_WUNLOCK(&udbinfo); return error; } inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); INP_INFO_WUNLOCK(&udbinfo); inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = ip_defttl; INP_UNLOCK(inp); return 0; } static int udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); s = splnet(); error = in_pcbbind(inp, nam, td); splx(s); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return error; } static int udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; struct sockaddr_in *sin; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return EISCONN; } s = splnet(); sin = (struct sockaddr_in *)nam; if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); error = in_pcbconnect(inp, nam, td); splx(s); if (error == 0) soisconnected(so); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return error; } static int udp_detach(struct socket *so) { struct inpcb *inp; int s; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); s = splnet(); in_pcbdetach(inp); INP_INFO_WUNLOCK(&udbinfo); splx(s); return 0; } static int udp_disconnect(struct socket *so) { struct inpcb *inp; int s; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); if (inp->inp_faddr.s_addr == INADDR_ANY) { INP_INFO_WUNLOCK(&udbinfo); INP_UNLOCK(inp); return ENOTCONN; } s = splnet(); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } static int udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; int ret; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); m_freem(m); return EINVAL; } INP_LOCK(inp); ret = udp_output(inp, m, addr, control, td); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return ret; } int udp_shutdown(struct socket *so) { struct inpcb *inp; INP_INFO_RLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); INP_INFO_RUNLOCK(&udbinfo); socantsendmore(so); INP_UNLOCK(inp); return 0; } /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking * here because in_setsockaddr will call malloc and might block. */ static int udp_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &udbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int udp_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &udbinfo)); } struct pr_usrreqs udp_usrreqs = { udp_abort, pru_accept_notsupp, udp_attach, udp_bind, udp_connect, pru_connect2_notsupp, in_control, udp_detach, udp_disconnect, pru_listen_notsupp, udp_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp_send, pru_sense_null, udp_shutdown, udp_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; Index: head/sys/netinet6/raw_ip6.c =================================================================== --- head/sys/netinet6/raw_ip6.c (revision 127503) +++ head/sys/netinet6/raw_ip6.c (revision 127504) @@ -1,732 +1,732 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 */ #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ENABLE_DEFAULT_SCOPE #include #endif #ifdef IPSEC #include #include #endif /*IPSEC*/ #ifdef FAST_IPSEC #include #include #endif /* FAST_IPSEC */ #include #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) /* * Raw interface to IP6 protocol. */ extern struct inpcbhead ripcb; extern struct inpcbinfo ripcbinfo; extern u_long rip_sendspace; extern u_long rip_recvspace; struct rip6stat rip6stat; /* * Setup generic address and protocol structures * for raw_input routine, then pass them along with * mbuf chain. */ int rip6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp; register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); register struct inpcb *in6p; struct inpcb *last = 0; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; rip6stat.rip6s_ipackets++; if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* XXX send icmp6 host/port unreach? */ m_freem(m); return IPPROTO_DONE; } init_sin6(&fromsa, m); /* general init */ LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->in6p_vflag & INP_IPV6) == 0) continue; if (in6p->in6p_ip6_nxt && in6p->in6p_ip6_nxt != proto) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (in6p->in6p_cksum != -1) { rip6stat.rip6s_isum++; if (in6_cksum(m, ip6->ip6_nxt, *offp, m->m_pkthdr.len - *offp)) { rip6stat.rip6s_badsum++; continue; } } if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (n && ipsec6_in_reject(n, last)) { m_freem(n); #ifdef IPSEC ipsec6stat.in_polvio++; #endif /*IPSEC*/ /* do not inject data into pcb */ } else #endif /*IPSEC || FAST_IPSEC*/ if (n) { if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); rip6stat.rip6s_fullsock++; } else sorwakeup(last->in6p_socket); opts = NULL; } } last = in6p; } #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (last && ipsec6_in_reject(m, last)) { m_freem(m); #ifdef IPSEC ipsec6stat.in_polvio++; #endif /*IPSEC*/ ip6stat.ip6s_delivered--; /* do not inject data into pcb */ } else #endif /*IPSEC || FAST_IPSEC*/ if (last) { if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, *offp); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); rip6stat.rip6s_fullsock++; } else sorwakeup(last->in6p_socket); } else { rip6stat.rip6s_nosock++; if (m->m_flags & M_MCAST) rip6stat.rip6s_nosockmcast++; if (proto == IPPROTO_NONE) m_freem(m); else { char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, prvnxtp - mtod(m, char *)); } ip6stat.ip6s_delivered--; } return IPPROTO_DONE; } void rip6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify) __P((struct inpcb *, int)) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } (void) in6_pcbnotify(&ripcb, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } /* * Generate IPv6 header and pass packet to ip6_output. * Tack on options user may have setup with control call. */ int #if __STDC__ rip6_output(struct mbuf *m, ...) #else rip6_output(m, va_alist) struct mbuf *m; va_dcl #endif { struct mbuf *control; struct socket *so; struct sockaddr_in6 *dstsock; struct in6_addr *dst; struct ip6_hdr *ip6; struct inpcb *in6p; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *stickyopt = NULL; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int priv = 0; struct in6_addr *in6a; va_list ap; va_start(ap, m); so = va_arg(ap, struct socket *); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); in6p = sotoin6pcb(so); stickyopt = in6p->in6p_outputopts; priv = 0; if (so->so_cred->cr_uid == 0) priv = 1; dst = &dstsock->sin6_addr; if (control) { if ((error = ip6_setpktoptions(control, &opt, stickyopt, priv, 0, so->so_proto->pr_protocol)) != 0) { goto bad; } in6p->in6p_outputopts = &opt; } /* * For an ICMPv6 packet, we should know its type and code * to update statistics. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; goto bad; } icmp6 = mtod(m, struct icmp6_hdr *); type = icmp6->icmp6_type; code = icmp6->icmp6_code; } M_PREPEND(m, sizeof(*ip6), M_TRYWAIT); ip6 = mtod(m, struct ip6_hdr *); /* * Next header might not be ICMP6 but use its pseudo header anyway. */ ip6->ip6_dst = *dst; /* * If the scope of the destination is link-local, embed the interface * index in the address. * * XXX advanced-api value overrides sin6_scope_id */ if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { struct in6_pktinfo *pi; /* * XXX Boundary check is assumed to be already done in * ip6_setpktoptions(). */ if (in6p->in6p_outputopts && (pi = in6p->in6p_outputopts->ip6po_pktinfo) && pi->ipi6_ifindex) { ip6->ip6_dst.s6_addr16[1] = htons(pi->ipi6_ifindex); oifp = ifnet_byindex(pi->ipi6_ifindex); } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && in6p->in6p_moptions && in6p->in6p_moptions->im6o_multicast_ifp) { oifp = in6p->in6p_moptions->im6o_multicast_ifp; ip6->ip6_dst.s6_addr16[1] = htons(oifp->if_index); } else if (dstsock->sin6_scope_id) { /* boundary check */ if (dstsock->sin6_scope_id < 0 || if_index < dstsock->sin6_scope_id) { error = ENXIO; /* XXX EINVAL? */ goto bad; } ip6->ip6_dst.s6_addr16[1] = htons(dstsock->sin6_scope_id & 0xffff); /* XXX */ } } /* * Source address selection. */ if ((in6a = in6_selectsrc(dstsock, in6p->in6p_outputopts, in6p->in6p_moptions, NULL, &in6p->in6p_laddr, &error)) == 0) { if (error == 0) error = EADDRNOTAVAIL; goto bad; } ip6->ip6_src = *in6a; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->in6p_ip6_nxt; ip6->ip6_hlim = in6_selecthlim(in6p, oifp); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* compute checksum */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = in6p->in6p_cksum; if (plen < off + 1) { error = EINVAL; goto bad; } off += sizeof(struct ip6_hdr); n = m; while (n && n->m_len <= off) { off -= n->m_len; n = n->m_next; } if (!n) goto bad; p = (u_int16_t *)(mtod(n, caddr_t) + off); *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } error = ip6_output(m, in6p->in6p_outputopts, NULL, 0, in6p->in6p_moptions, &oifp, in6p); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); icmp6stat.icp6s_outhist[type]++; } else rip6stat.rip6s_opackets++; goto freectl; bad: if (m) m_freem(m); freectl: if (control) { ip6_clearpktopts(in6p->in6p_outputopts, -1); in6p->in6p_outputopts = stickyopt; m_freem(control); } return (error); } /* * Raw IPv6 socket option processing. */ int rip6_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int error; if (sopt->sopt_level == IPPROTO_ICMPV6) /* * XXX: is it better to call icmp6_ctloutput() directly * from protosw? */ return (icmp6_ctloutput(so, sopt)); else if (sopt->sopt_level != IPPROTO_IPV6) return (EINVAL); error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_get(so, sopt); break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_set(so, sopt); break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; } return (error); } static int rip6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error, s; inp = sotoinpcb(so); if (inp) panic("rip6_attach"); if (td && (error = suser(td)) != 0) return error; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return error; s = splnet(); - error = in_pcballoc(so, &ripcbinfo, td, "raw6inp"); + error = in_pcballoc(so, &ripcbinfo, "raw6inp"); splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; inp->in6p_ip6_nxt = (long)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; MALLOC(inp->in6p_icmp6filt, struct icmp6_filter *, sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); return 0; } static int rip6_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); if (inp == 0) panic("rip6_detach"); /* xxx: RSVP */ if (so == ip6_mrouter) ip6_mrouter_done(); if (inp->in6p_icmp6filt) { FREE(inp->in6p_icmp6filt, M_PCB); inp->in6p_icmp6filt = NULL; } in6_pcbdetach(inp); return 0; } static int rip6_abort(struct socket *so) { soisdisconnected(so); return rip6_detach(so); } static int rip6_disconnect(struct socket *so) { struct inpcb *inp = sotoinpcb(so); if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; inp->in6p_faddr = in6addr_any; return rip6_abort(so); } static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ia = NULL; if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet) || addr->sin6_family != AF_INET6) return EADDRNOTAVAIL; #ifdef ENABLE_DEFAULT_SCOPE if (addr->sin6_scope_id == 0) { /* not change if specified */ addr->sin6_scope_id = scope6_addr2default(&addr->sin6_addr); } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ia = ifa_ifwithaddr((struct sockaddr *)addr)) == 0) return EADDRNOTAVAIL; if (ia && ((struct in6_ifaddr *)ia)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { return (EADDRNOTAVAIL); } inp->in6p_laddr = addr->sin6_addr; return 0; } static int rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr *in6a = NULL; int error = 0; #ifdef ENABLE_DEFAULT_SCOPE struct sockaddr_in6 tmp; #endif if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet)) return EADDRNOTAVAIL; if (addr->sin6_family != AF_INET6) return EAFNOSUPPORT; #ifdef ENABLE_DEFAULT_SCOPE if (addr->sin6_scope_id == 0) { /* not change if specified */ /* avoid overwrites */ tmp = *addr; addr = &tmp; addr->sin6_scope_id = scope6_addr2default(&addr->sin6_addr); } #endif /* Source address selection. XXX: need pcblookup? */ in6a = in6_selectsrc(addr, inp->in6p_outputopts, inp->in6p_moptions, NULL, &inp->in6p_laddr, &error); if (in6a == NULL) return (error ? error : EADDRNOTAVAIL); inp->in6p_laddr = *in6a; inp->in6p_faddr = addr->sin6_addr; soisconnected(so); return 0; } static int rip6_shutdown(struct socket *so) { socantsendmore(so); return 0; } static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in6 tmp; struct sockaddr_in6 *dst; /* always copy sockaddr to avoid overwrites */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return EISCONN; } /* XXX */ bzero(&tmp, sizeof(tmp)); tmp.sin6_family = AF_INET6; tmp.sin6_len = sizeof(struct sockaddr_in6); bcopy(&inp->in6p_faddr, &tmp.sin6_addr, sizeof(struct in6_addr)); dst = &tmp; } else { if (nam == NULL) { m_freem(m); return ENOTCONN; } tmp = *(struct sockaddr_in6 *)nam; dst = &tmp; } #ifdef ENABLE_DEFAULT_SCOPE if (dst->sin6_scope_id == 0) { /* not change if specified */ dst->sin6_scope_id = scope6_addr2default(&dst->sin6_addr); } #endif return rip6_output(m, so, dst, control); } struct pr_usrreqs rip6_usrreqs = { rip6_abort, pru_accept_notsupp, rip6_attach, rip6_bind, rip6_connect, pru_connect2_notsupp, in6_control, rip6_detach, rip6_disconnect, pru_listen_notsupp, in6_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rip6_send, pru_sense_null, rip6_shutdown, in6_setsockaddr, sosend, soreceive, sopoll, pru_sosetlabel_null }; Index: head/sys/netinet6/udp6_usrreq.c =================================================================== --- head/sys/netinet6/udp6_usrreq.c (revision 127503) +++ head/sys/netinet6/udp6_usrreq.c (revision 127504) @@ -1,760 +1,760 @@ /* $FreeBSD$ */ /* $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /* IPSEC */ #ifdef FAST_IPSEC #include #include #endif /* FAST_IPSEC */ /* * UDP protocol inplementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static int udp6_detach __P((struct socket *so)); int udp6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp, *opts; register struct ip6_hdr *ip6; register struct udphdr *uh; register struct inpcb *in6p; int off = *offp; int plen, ulen; struct sockaddr_in6 fromsa; opts = NULL; ip6 = mtod(m, struct ip6_hdr *); if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* XXX send icmp6 host/port unreach? */ m_freem(m); return IPPROTO_DONE; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); if (!uh) return IPPROTO_DONE; #endif udpstat.udps_ipackets++; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); if (plen != ulen) { udpstat.udps_badlen++; goto bad; } /* * Checksum extended UDP header and data. */ if (uh->uh_sum == 0) udpstat.udps_nosum++; else if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { udpstat.udps_badsum++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; /* * Deliver a multicast datagram to all sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multicasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * In a case that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet does not match with * laddr. To cure this situation, the matching is relaxed * if the receiving interface is the same as one specified * in the socket and if the destination multicast address * matches one of the multicast groups specified in the socket. */ /* * Construct sockaddr format source address. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; /* * KAME note: traditionally we dropped udpiphdr from mbuf here. * We need udphdr for IPsec processing so we do that later. */ /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; LIST_FOREACH(in6p, &udb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->in6p_lport != uh->uh_dport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src) || in6p->in6p_fport != uh->uh_sport) continue; } if (last != NULL) { struct mbuf *n; #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, last)) { #ifdef IPSEC ipsec6stat.in_polvio++; #endif /* IPSEC */ /* do not inject data into pcb */ } else #endif /*IPSEC || FAST_IPSEC*/ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { /* * KAME NOTE: do not * m_copy(m, offset, ...) above. * sbappendaddr() expects M_PKTHDR, * and m_copy() will copy M_PKTHDR * only if offset is 0. */ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); m_adj(n, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udpstat.udps_fullsock++; } else sorwakeup(last->in6p_socket); opts = NULL; } } last = in6p; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It assumes that an application will never * clear these options after setting them. */ if ((last->in6p_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udpstat.udps_noport++; udpstat.udps_noportmcast++; goto bad; } #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, last)) { #ifdef IPSEC ipsec6stat.in_polvio++; #endif /* IPSEC */ goto bad; } #endif /*IPSEC || FAST_IPSEC*/ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(last->in6p_socket); return IPPROTO_DONE; } /* * Locate pcb for datagram. */ in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (in6p == 0) { if (log_in_vain) { char buf[INET6_ADDRSTRLEN]; strcpy(buf, ip6_sprintf(&ip6->ip6_dst)); log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", buf, ntohs(uh->uh_dport), ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); } udpstat.udps_noport++; if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); udpstat.udps_noportmcast++; goto bad; } icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return IPPROTO_DONE; } #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, in6p)) { #ifdef IPSEC ipsec6stat.in_polvio++; #endif /* IPSEC */ goto bad; } #endif /*IPSEC || FAST_IPSEC*/ /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; if (in6p->in6p_flags & IN6P_CONTROLOPTS || in6p->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(in6p, m, &opts); m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&in6p->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(in6p->in6p_socket); return IPPROTO_DONE; bad: if (m) m_freem(m); if (opts) m_freem(opts); return IPPROTO_DONE; } void udp6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { struct udphdr uh; struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify) __P((struct inpcb *, int)) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* check if we can safely examine src and dst ports */ if (m->m_pkthdr.len < off + sizeof(*uhp)) return; bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); (void) in6_pcbnotify(&udb, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else (void) in6_pcbnotify(&udb, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } static int udp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error, s; error = suser(req->td); if (error) return (error); if (req->newlen != sizeof(addrs)) return (EINVAL); if (req->oldlen != sizeof(struct xucred)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); inp = in6_pcblookup_hash(&udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1, NULL); if (!inp || !inp->inp_socket) { error = ENOENT; goto out; } cru2x(inp->inp_socket->so_cred, &xuc); error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); static int udp6_abort(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; /* ??? possible? panic instead? */ soisdisconnected(so); s = splnet(); in6_pcbdetach(inp); splx(s); return 0; } static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp != 0) return EINVAL; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return error; } s = splnet(); - error = in_pcballoc(so, &udbinfo, td, "udp6inp"); + error = in_pcballoc(so, &udbinfo, "udp6inp"); splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if (!ip6_v6only) inp->inp_vflag |= INP_IPV4; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = ip_defttl; return 0; } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; s = splnet(); error = in_pcbbind(inp, (struct sockaddr *)&sin, td); splx(s); return error; } } s = splnet(); error = in6_pcbbind(inp, nam, td); splx(s); return error; } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; if (inp->inp_faddr.s_addr != INADDR_ANY) return EISCONN; in6_sin6_2_sin(&sin, sin6_p); s = splnet(); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td); splx(s); if (error == 0) { inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; soisconnected(so); } return error; } } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return EISCONN; s = splnet(); error = in6_pcbconnect(inp, nam, td); splx(s); if (error == 0) { if (!ip6_v6only) { /* should be non mapped addr */ inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; } soisconnected(so); } return error; } static int udp6_detach(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; s = splnet(); in6_pcbdetach(inp); splx(s); return 0; } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; return ((*pru->pru_disconnect)(so)); } #endif if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return ENOTCONN; s = splnet(); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; int error = 0; inp = sotoinpcb(so); if (inp == 0) { error = EINVAL; goto bad; } if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if (!ip6_v6only) { int hasv4addr; struct sockaddr_in6 *sin6 = 0; if (addr == 0) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * since a user of this socket set the * IPV6_V6ONLY flag, we discard this * datagram destined to a v4 addr. */ return EINVAL; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is IPv4-mapped * address, local addr should not be * an IPv6 address; since you cannot * determine how to map IPv6 source * address to IPv4. */ return EINVAL; } if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; error = ((*pru->pru_send)(so, flags, m, addr, control, td)); /* addr will just be freed in sendit(). */ return error; } } #endif return udp6_output(inp, m, addr, control, td); bad: m_freem(m); return (error); } struct pr_usrreqs udp6_usrreqs = { udp6_abort, pru_accept_notsupp, udp6_attach, udp6_bind, udp6_connect, pru_connect2_notsupp, in6_control, udp6_detach, udp6_disconnect, pru_listen_notsupp, in6_mapped_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp6_send, pru_sense_null, udp_shutdown, in6_mapped_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel };