Index: head/sys/netinet/in_pcb.c =================================================================== --- head/sys/netinet/in_pcb.c (revision 127504) +++ head/sys/netinet/in_pcb.c (revision 127505) @@ -1,1181 +1,1180 @@ /* * Copyright (c) 1982, 1986, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif /* INET6 */ #ifdef IPSEC #include #include #endif /* IPSEC */ #ifdef FAST_IPSEC #if defined(IPSEC) || defined(IPSEC_ESP) #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" #endif #include #include #endif /* FAST_IPSEC */ /* * These configure the range of local port addresses assigned to * "unspecified" outgoing connections/packets/whatever. */ int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ /* * Reserved ports accessible only to root. There are significant * security considerations that must be accounted for when changing these, * but the security benefits can be great. Please be careful. */ int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ int ipport_reservedlow = 0; #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } static int sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) { int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) { RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); } return error; } #undef RANGECHK SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); /* * in_pcb.c: manage the Protocol Control Blocks. * * NOTE: It is assumed that most of these functions will be called at * splnet(). XXX - There are, unfortunately, a few exceptions to this * rule that should be fixed. */ /* * Allocate a PCB and associate it with the socket. */ int in_pcballoc(so, pcbinfo, type) struct socket *so; struct inpcbinfo *pcbinfo; const char *type; { register struct inpcb *inp; int error; INP_INFO_WLOCK_ASSERT(pcbinfo); error = 0; inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); if (inp == NULL) return (ENOBUFS); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; #ifdef MAC error = mac_init_inpcb(inp, M_NOWAIT); if (error != 0) goto out; mac_create_inpcb_from_socket(so, inp); #endif #if defined(IPSEC) || defined(FAST_IPSEC) #ifdef FAST_IPSEC error = ipsec_init_policy(so, &inp->inp_sp); #else error = ipsec_init_pcbpolicy(so, &inp->inp_sp); #endif if (error != 0) goto out; #endif /*IPSEC*/ #if defined(INET6) if (INP_SOCKAF(so) == AF_INET6) { inp->inp_vflag |= INP_IPV6PROTO; if (ip6_v6only) inp->inp_flags |= IN6P_IPV6_V6ONLY; } #endif LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; INP_LOCK_INIT(inp, "inp", type); #ifdef INET6 if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) out: if (error != 0) uma_zfree(pcbinfo->ipi_zone, inp); #endif return (error); } int -in_pcbbind(inp, nam, td) +in_pcbbind(inp, nam, cred) register struct inpcb *inp; struct sockaddr *nam; - struct thread *td; + struct ucred *cred; { int anonport, error; INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_LOCK_ASSERT(inp); if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); anonport = inp->inp_lport == 0 && (nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0); error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, - &inp->inp_lport, td); + &inp->inp_lport, cred); if (error) return (error); if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } /* * Set up a bind operation on a PCB, performing port allocation * as required, but do not actually modify the PCB. Callers can * either complete the bind by setting inp_laddr/inp_lport and * calling in_pcbinshash(), or they can just use the resulting * port and address to authorise the sending of a once-off packet. * * On error, the values of *laddrp and *lportp are not changed. */ int -in_pcbbind_setup(inp, nam, laddrp, lportp, td) +in_pcbbind_setup(inp, nam, laddrp, lportp, cred) struct inpcb *inp; struct sockaddr *nam; in_addr_t *laddrp; u_short *lportp; - struct thread *td; + struct ucred *cred; { struct socket *so = inp->inp_socket; unsigned short *lastport; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct in_addr laddr; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error, prison = 0; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; if (nam != NULL && laddr.s_addr != INADDR_ANY) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sin)) return (EINVAL); #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); #endif if (sin->sin_addr.s_addr != INADDR_ANY) - if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) + if (prison_ip(cred, 0, &sin->sin_addr.s_addr)) return(EINVAL); if (sin->sin_port != *lportp) { /* Don't allow the port to change. */ if (*lportp != 0) return (EINVAL); lport = sin->sin_port; } /* NB: lport is left as 0 if the port isn't being changed. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { sin->sin_port = 0; /* yech... */ bzero(&sin->sin_zero, sizeof(sin->sin_zero)); if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) return (EADDRNOTAVAIL); } laddr = sin->sin_addr; if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) <= ipport_reservedhigh && ntohs(lport) >= ipport_reservedlow && - td && suser_cred(td->td_ucred, PRISON_ROOT)) + suser_cred(cred, PRISON_ROOT)) return (EACCES); - if (td && jailed(td->td_ucred)) + if (jailed(cred)) prison = 1; if (so->so_cred->cr_uid != 0 && !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { t = in_pcblookup_local(inp->inp_pcbinfo, sin->sin_addr, lport, prison ? 0 : INPLOOKUP_WILDCARD); /* * XXX * This entire block sorely needs a rewrite. */ if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) return (EADDRINUSE); } else if (t && (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) { #if defined(INET6) if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } - if (prison && - prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) + if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr)) return (EADDRNOTAVAIL); t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, prison ? 0 : wild); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((reuseport & intotw(t)->tw_so_options) == 0) return (EADDRINUSE); } else if (t && (reuseport & t->inp_socket->so_options) == 0) { #if defined(INET6) if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) #endif /* defined(INET6) */ return (EADDRINUSE); } } } if (*lportp != 0) lport = *lportp; if (lport == 0) { u_short first, last; int count; if (laddr.s_addr != INADDR_ANY) - if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) + if (prison_ip(cred, 0, &laddr.s_addr)) return (EINVAL); if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { - if (td && (error = suser_cred(td->td_ucred, - PRISON_ROOT)) != 0) + if ((error = suser_cred(cred, PRISON_ROOT)) != 0) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) /* completely used? */ return (EADDRNOTAVAIL); ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local(pcbinfo, laddr, lport, wild)); } } - if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) + if (prison_ip(cred, 0, &laddr.s_addr)) return (EINVAL); *laddrp = laddr.s_addr; *lportp = lport; return (0); } /* * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int -in_pcbconnect(inp, nam, td) +in_pcbconnect(inp, nam, cred) register struct inpcb *inp; struct sockaddr *nam; - struct thread *td; + struct ucred *cred; { u_short lport, fport; in_addr_t laddr, faddr; int anonport, error; lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; anonport = (lport == 0); error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, - NULL, td); + NULL, cred); if (error) return (error); /* Do the initial binding of the local address if required. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; if (in_pcbinshash(inp) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; return (EAGAIN); } } /* Commit the remaining changes. */ inp->inp_lport = lport; inp->inp_laddr.s_addr = laddr; inp->inp_faddr.s_addr = faddr; inp->inp_fport = fport; in_pcbrehash(inp); #ifdef IPSEC if (inp->inp_socket->so_type == SOCK_STREAM) ipsec_pcbconn(inp->inp_sp); #endif if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } /* * Set up for a connect from a socket to the specified address. * On entry, *laddrp and *lportp should contain the current local * address and port for the PCB; these are updated to the values * that should be placed in inp_laddr and inp_lport to complete * the connect. * * On success, *faddrp and *fportp will be set to the remote address * and port. These are not updated in the error case. * * If the operation fails because the connection already exists, * *oinpp will be set to the PCB of that connection so that the * caller can decide to override it. In all other cases, *oinpp * is set to NULL. */ int -in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td) +in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, cred) register struct inpcb *inp; struct sockaddr *nam; in_addr_t *laddrp; u_short *lportp; in_addr_t *faddrp; u_short *fportp; struct inpcb **oinpp; - struct thread *td; + struct ucred *cred; { struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct in_ifaddr *ia; struct sockaddr_in sa; - struct ucred *cred; + struct ucred *socred; struct inpcb *oinp; struct in_addr laddr, faddr; u_short lport, fport; int error; if (oinpp != NULL) *oinpp = NULL; if (nam->sa_len != sizeof (*sin)) return (EINVAL); if (sin->sin_family != AF_INET) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); laddr.s_addr = *laddrp; lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; - cred = inp->inp_socket->so_cred; - if (laddr.s_addr == INADDR_ANY && jailed(cred)) { + socred = inp->inp_socket->so_cred; + if (laddr.s_addr == INADDR_ANY && jailed(socred)) { bzero(&sa, sizeof(sa)); - sa.sin_addr.s_addr = htonl(prison_getip(cred)); + sa.sin_addr.s_addr = htonl(prison_getip(socred)); sa.sin_len = sizeof(sa); sa.sin_family = AF_INET; error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, - &laddr.s_addr, &lport, td); + &laddr.s_addr, &lport, cred); if (error) return (error); } if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, * use the primary local address. * If the supplied address is INADDR_BROADCAST, * and the primary interface supports broadcast, * choose the broadcast address for that interface. */ if (faddr.s_addr == INADDR_ANY) faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; else if (faddr.s_addr == (u_long)INADDR_BROADCAST && (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST)) faddr = satosin(&TAILQ_FIRST( &in_ifaddrhead)->ia_broadaddr)->sin_addr; } if (laddr.s_addr == INADDR_ANY) { struct route sro; bzero(&sro, sizeof(sro)); ia = (struct in_ifaddr *)0; /* * If route is known our src addr is taken from the i/f, * else punt. */ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) { /* Find out route to destination */ sro.ro_dst.sa_family = AF_INET; sro.ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr; rtalloc_ign(&sro, RTF_CLONING); } /* * If we found a route, use the address * corresponding to the outgoing interface * unless it is the loopback (in case a route * to our address on another net goes to loopback). */ if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) ia = ifatoia(sro.ro_rt->rt_ifa); if (sro.ro_rt) RTFREE(sro.ro_rt); if (ia == 0) { bzero(&sa, sizeof(sa)); sa.sin_addr = faddr; sa.sin_len = sizeof(sa); sa.sin_family = AF_INET; ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); if (ia == 0) ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); if (ia == 0) ia = TAILQ_FIRST(&in_ifaddrhead); if (ia == 0) return (EADDRNOTAVAIL); } /* * If the destination address is multicast and an outgoing * interface has been set as a multicast option, use the * address of that interface as our source address. */ if (IN_MULTICAST(ntohl(faddr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; imo = inp->inp_moptions; if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) if (ia->ia_ifp == ifp) break; if (ia == 0) return (EADDRNOTAVAIL); } } laddr = ia->ia_addr.sin_addr; } oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 0, NULL); if (oinp != NULL) { if (oinpp != NULL) *oinpp = oinp; return (EADDRINUSE); } if (lport == 0) { - error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td); + error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, + cred); if (error) return (error); } *laddrp = laddr.s_addr; *lportp = lport; *faddrp = faddr.s_addr; *fportp = fport; return (0); } void in_pcbdisconnect(inp) struct inpcb *inp; { INP_LOCK_ASSERT(inp); inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; in_pcbrehash(inp); #ifdef IPSEC ipsec_pcbdisconn(inp->inp_sp); #endif if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } void in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; INP_LOCK_ASSERT(inp); #if defined(IPSEC) || defined(FAST_IPSEC) ipsec4_delete_pcbpolicy(inp); #endif /*IPSEC*/ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { so->so_pcb = 0; sotryfree(so); } if (inp->inp_options) (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); #ifdef MAC mac_destroy_inpcb(inp); #endif uma_zfree(ipi->ipi_zone, inp); } struct sockaddr * in_sockaddr(port, addr_p) in_port_t port; struct in_addr *addr_p; { struct sockaddr_in *sin; MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = *addr_p; sin->sin_port = port; return (struct sockaddr *)sin; } /* * The wrapper function will pass down the pcbinfo for this function to lock. * The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in_setsockaddr(so, nam, pcbinfo) struct socket *so; struct sockaddr **nam; struct inpcbinfo *pcbinfo; { int s; register struct inpcb *inp; struct in_addr addr; in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); return ECONNRESET; } INP_LOCK(inp); port = inp->inp_lport; addr = inp->inp_laddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); *nam = in_sockaddr(port, &addr); return 0; } /* * The wrapper function will pass down the pcbinfo for this function to lock. */ int in_setpeeraddr(so, nam, pcbinfo) struct socket *so; struct sockaddr **nam; struct inpcbinfo *pcbinfo; { int s; register struct inpcb *inp; struct in_addr addr; in_port_t port; s = splnet(); INP_INFO_RLOCK(pcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(pcbinfo); splx(s); return ECONNRESET; } INP_LOCK(inp); port = inp->inp_fport; addr = inp->inp_faddr; INP_UNLOCK(inp); INP_INFO_RUNLOCK(pcbinfo); splx(s); *nam = in_sockaddr(port, &addr); return 0; } void in_pcbnotifyall(pcbinfo, faddr, errno, notify) struct inpcbinfo *pcbinfo; struct in_addr faddr; int errno; struct inpcb *(*notify)(struct inpcb *, int); { struct inpcb *inp, *ninp; struct inpcbhead *head; int s; s = splnet(); INP_INFO_WLOCK(pcbinfo); head = pcbinfo->listhead; for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { INP_LOCK(inp); ninp = LIST_NEXT(inp, inp_list); #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) { INP_UNLOCK(inp); continue; } #endif if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) { INP_UNLOCK(inp); continue; } if ((*notify)(inp, errno)) INP_UNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); splx(s); } void in_pcbpurgeif0(pcbinfo, ifp) struct inpcbinfo *pcbinfo; struct ifnet *ifp; { struct inpcb *inp; struct ip_moptions *imo; int i, gap; /* why no splnet here? XXX */ INP_INFO_RLOCK(pcbinfo); LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { INP_LOCK(inp); imo = inp->inp_moptions; if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { /* * Unselect the outgoing interface if it is being * detached. */ if (imo->imo_multicast_ifp == ifp) imo->imo_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { if (imo->imo_membership[i]->inm_ifp == ifp) { in_delmulti(imo->imo_membership[i]); gap++; } else if (gap != 0) imo->imo_membership[i - gap] = imo->imo_membership[i]; } imo->imo_num_memberships -= gap; } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in_addr laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; INP_INFO_WLOCK_ASSERT(pcbinfo); if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ retrylookup: porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif /* * Clean out old time_wait sockets if they * are clogging up needed local ports. */ if ((inp->inp_vflag & INP_TIMEWAIT) != 0) { if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) { INP_LOCK(inp); tcp_twclose((struct tcptw *)inp->inp_ppcb, 0); match = NULL; goto retrylookup; } } if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; else if (inp->inp_laddr.s_addr != laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } /* * Lookup PCB in hash list. */ struct inpcb * in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in_addr faddr, laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; INP_INFO_RLOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; #if defined(INET6) struct inpcb *local_wild_mapped = NULL; #endif /* defined(INET6) */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_lport == lport) { if (ifp && ifp->if_type == IFT_FAITH && (inp->inp_flags & INP_FAITH) == 0) continue; if (inp->inp_laddr.s_addr == laddr.s_addr) return (inp); else if (inp->inp_laddr.s_addr == INADDR_ANY) { #if defined(INET6) if (INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) local_wild_mapped = inp; else #endif /* defined(INET6) */ local_wild = inp; } } } #if defined(INET6) if (local_wild == NULL) return (local_wild_mapped); #endif /* defined(INET6) */ return (local_wild); } /* * Not found. */ return (NULL); } /* * Insert PCB onto various hash lists. */ int in_pcbinshash(inp) struct inpcb *inp; { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbport *phd; u_int32_t hashkey_faddr; INP_INFO_WLOCK_ASSERT(pcbinfo); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; /* * Go through port list and look for a head for this lport. */ LIST_FOREACH(phd, pcbporthash, phd_hash) { if (phd->phd_port == inp->inp_lport) break; } /* * If none exists, malloc one and tack it on. */ if (phd == NULL) { MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); if (phd == NULL) { return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; LIST_INIT(&phd->phd_pcblist); LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); } inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); return (0); } /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void in_pcbrehash(inp) struct inpcb *inp; { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *head; u_int32_t hashkey_faddr; INP_INFO_WLOCK_ASSERT(pcbinfo); /* XXX? INP_LOCK_ASSERT(inp); */ #ifdef INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); } /* * Remove PCB from various lists. */ void in_pcbremlists(inp) struct inpcb *inp; { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; if (inp->inp_lport) { struct inpcbport *phd = inp->inp_phd; LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } } LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; } /* * A set label operation has occurred at the socket layer, propagate the * label change into the in_pcb for the socket. */ void in_pcbsosetlabel(so) struct socket *so; { #ifdef MAC struct inpcb *inp; /* XXX: Will assert socket lock when we have them. */ inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); mac_inpcb_sosetlabel(so, inp); INP_UNLOCK(inp); #endif } Index: head/sys/netinet/in_pcb.h =================================================================== --- head/sys/netinet/in_pcb.h (revision 127504) +++ head/sys/netinet/in_pcb.h (revision 127505) @@ -1,369 +1,369 @@ /* * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NETINET_IN_PCB_H_ #define _NETINET_IN_PCB_H_ #include #include #include #include #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ struct inpcbpolicy; /* * Common structure pcb for internet protocol implementation. * Here are stored pointers to local and foreign host table * entries, local and foreign socket numbers, and pointers * up (to a socket structure) and down (to a protocol-specific) * control block. */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); typedef u_quad_t inp_gen_t; /* * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. * So, AF_INET6 null laddr is also used as AF_INET null laddr, * by utilize following structure. (At last, same as INRIA) */ struct in_addr_4in6 { u_int32_t ia46_pad32[3]; struct in_addr ia46_addr4; }; /* * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. * in_conninfo has some extra padding to accomplish this. */ struct in_endpoints { u_int16_t ie_fport; /* foreign port */ u_int16_t ie_lport; /* local port */ /* protocol dependent part, local and foreign addr */ union { /* foreign host table entry */ struct in_addr_4in6 ie46_foreign; struct in6_addr ie6_foreign; } ie_dependfaddr; union { /* local host table entry */ struct in_addr_4in6 ie46_local; struct in6_addr ie6_local; } ie_dependladdr; #define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 #define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 #define ie6_faddr ie_dependfaddr.ie6_foreign #define ie6_laddr ie_dependladdr.ie6_local }; /* * XXX * the defines for inc_* are hacks and should be changed to direct references */ struct in_conninfo { u_int8_t inc_flags; u_int8_t inc_len; u_int16_t inc_pad; /* XXX alignment for in_endpoints */ /* protocol dependent part */ struct in_endpoints inc_ie; }; #define inc_isipv6 inc_flags /* temp compatability */ #define inc_fport inc_ie.ie_fport #define inc_lport inc_ie.ie_lport #define inc_faddr inc_ie.ie_faddr #define inc_laddr inc_ie.ie_laddr #define inc6_faddr inc_ie.ie6_faddr #define inc6_laddr inc_ie.ie6_laddr struct icmp6_filter; struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* hash list */ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ u_int32_t inp_flow; /* local and foreign ports, local and foreign addr */ struct in_conninfo inp_inc; caddr_t inp_ppcb; /* pointer to per-protocol pcb */ struct inpcbinfo *inp_pcbinfo; /* PCB list info */ struct socket *inp_socket; /* back pointer to socket */ /* list for this PCB's local port */ struct label *inp_label; /* MAC label */ int inp_flags; /* generic IP/datagram flags */ struct inpcbpolicy *inp_sp; /* for IPSEC */ u_char inp_vflag; /* IP version flag (v4/v6) */ #define INP_IPV4 0x1 #define INP_IPV6 0x2 #define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ #define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */ #define INP_ONESBCAST 0x10 /* send all-ones broadcast */ u_char inp_ip_ttl; /* time to live proto */ u_char inp_ip_p; /* protocol proto */ /* protocol dependent part; options */ struct { u_char inp4_ip_tos; /* type of service proto */ struct mbuf *inp4_options; /* IP options */ struct ip_moptions *inp4_moptions; /* IP multicast options */ } inp_depend4; #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport #define inp_faddr inp_inc.inc_faddr #define inp_laddr inp_inc.inc_laddr #define inp_ip_tos inp_depend4.inp4_ip_tos #define inp_options inp_depend4.inp4_options #define inp_moptions inp_depend4.inp4_moptions struct { /* IP options */ struct mbuf *inp6_options; /* IP6 options for outgoing packets */ struct ip6_pktopts *inp6_outputopts; /* IP multicast options */ struct ip6_moptions *inp6_moptions; /* ICMPv6 code type filter */ struct icmp6_filter *inp6_icmp6filt; /* IPV6_CHECKSUM setsockopt */ int inp6_cksum; u_short inp6_ifindex; short inp6_hops; } inp_depend6; LIST_ENTRY(inpcb) inp_portlist; struct inpcbport *inp_phd; /* head of this list */ inp_gen_t inp_gencnt; /* generation count of this instance */ struct mtx inp_mtx; #define in6p_faddr inp_inc.inc6_faddr #define in6p_laddr inp_inc.inc6_laddr #define in6p_ip6_hlim inp_depend6.inp6_hlim #define in6p_hops inp_depend6.inp6_hops /* default hop limit */ #define in6p_ip6_nxt inp_ip_p #define in6p_flowinfo inp_flow #define in6p_vflag inp_vflag #define in6p_options inp_depend6.inp6_options #define in6p_outputopts inp_depend6.inp6_outputopts #define in6p_moptions inp_depend6.inp6_moptions #define in6p_icmp6filt inp_depend6.inp6_icmp6filt #define in6p_cksum inp_depend6.inp6_cksum #define inp6_ifindex inp_depend6.inp6_ifindex #define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ #define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ #define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ #define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ #define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ }; /* * The range of the generation count, as used in this implementation, * is 9e19. We would have to create 300 billion connections per * second for this number to roll over in a year. This seems sufficiently * unlikely that we simply don't concern ourselves with that possibility. */ /* * Interface exported to userland by various protocols which use * inpcbs. Hack alert -- only define if struct xsocket is in scope. */ #ifdef _SYS_SOCKETVAR_H_ struct xinpcb { size_t xi_len; /* length of this structure */ struct inpcb xi_inp; struct xsocket xi_socket; u_quad_t xi_alignment_hack; }; struct xinpgen { size_t xig_len; /* length of this structure */ u_int xig_count; /* number of PCBs at this time */ inp_gen_t xig_gen; /* generation count at this time */ so_gen_t xig_sogen; /* socket generation count at this time */ }; #endif /* _SYS_SOCKETVAR_H_ */ struct inpcbport { LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; u_short phd_port; }; struct inpcbinfo { /* XXX documentation, prefixes */ struct inpcbhead *hashbase; u_long hashmask; struct inpcbporthead *porthashbase; u_long porthashmask; struct inpcbhead *listhead; u_short lastport; u_short lastlow; u_short lasthi; struct uma_zone *ipi_zone; /* zone to allocate pcbs from */ u_int ipi_count; /* number of pcbs in this list */ u_quad_t ipi_gencnt; /* current generation count */ struct mtx ipi_mtx; }; /* * NB: We cannot enable assertions when IPv6 is configured as * this code is shared by both IPv4 and IPv6 and IPv6 is * not properly locked. */ #define INP_LOCK_INIT(inp, d, t) \ mtx_init(&(inp)->inp_mtx, (d), (t), MTX_DEF | MTX_RECURSE | MTX_DUPOK) #define INP_LOCK_DESTROY(inp) mtx_destroy(&(inp)->inp_mtx) #define INP_LOCK(inp) mtx_lock(&(inp)->inp_mtx) #define INP_UNLOCK(inp) mtx_unlock(&(inp)->inp_mtx) #ifndef INET6 #define INP_LOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_OWNED) #else #define INP_LOCK_ASSERT(inp) #endif #define INP_INFO_LOCK_INIT(ipi, d) \ mtx_init(&(ipi)->ipi_mtx, (d), NULL, MTX_DEF | MTX_RECURSE) #define INP_INFO_RLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx) #define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx) #define INP_INFO_RUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) #define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) #ifndef INET6 #define INP_INFO_RLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) #define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) #else #define INP_INFO_RLOCK_ASSERT(ipi) #define INP_INFO_WLOCK_ASSERT(ipi) #endif #define INP_PCBHASH(faddr, lport, fport, mask) \ (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ (ntohs((lport)) & (mask)) /* flags in inp_flags: */ #define INP_RECVOPTS 0x01 /* receive incoming IP options */ #define INP_RECVRETOPTS 0x02 /* receive IP options for reply */ #define INP_RECVDSTADDR 0x04 /* receive IP dst address */ #define INP_HDRINCL 0x08 /* user supplies entire IP header */ #define INP_HIGHPORT 0x10 /* user wants "high" port binding */ #define INP_LOWPORT 0x20 /* user wants "low" port binding */ #define INP_ANONPORT 0x40 /* port chosen for user */ #define INP_RECVIF 0x80 /* receive incoming interface */ #define INP_MTUDISC 0x100 /* user can do MTU discovery */ #define INP_FAITH 0x200 /* accept FAITH'ed connections */ #define INP_RECVTTL 0x400 /* receive incoming IP TTL */ #define IN6P_IPV6_V6ONLY 0x008000 /* restrict AF_INET6 socket for v6 */ #define IN6P_PKTINFO 0x010000 /* receive IP6 dst and I/F */ #define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ #define IN6P_HOPOPTS 0x040000 /* receive hop-by-hop options */ #define IN6P_DSTOPTS 0x080000 /* receive dst options after rthdr */ #define IN6P_RTHDR 0x100000 /* receive routing header */ #define IN6P_RTHDRDSTOPTS 0x200000 /* receive dstoptions before rthdr */ #define IN6P_TCLASS 0x400000 /* receive traffic class value */ #define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ #define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */ #define IN6P_MTU 0x80000000 /* receive path MTU */ #define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ INP_RECVIF|INP_RECVTTL|\ IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ IN6P_MTU) #define INP_UNMAPPABLEOPTS (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\ IN6P_TCLASS|IN6P_AUTOFLOWLABEL) /* for KAME src sync over BSD*'s */ #define IN6P_HIGHPORT INP_HIGHPORT #define IN6P_LOWPORT INP_LOWPORT #define IN6P_ANONPORT INP_ANONPORT #define IN6P_RECVIF INP_RECVIF #define IN6P_MTUDISC INP_MTUDISC #define IN6P_FAITH INP_FAITH #define IN6P_CONTROLOPTS INP_CONTROLOPTS /* * socket AF version is {newer than,or include} * actual datagram AF version */ #define INPLOOKUP_WILDCARD 1 #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) #define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ #define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family #define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) #ifdef _KERNEL extern int ipport_lowfirstauto; extern int ipport_lowlastauto; extern int ipport_firstauto; extern int ipport_lastauto; extern int ipport_hifirstauto; extern int ipport_hilastauto; void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *, const char *); -int in_pcbbind(struct inpcb *, struct sockaddr *, struct thread *); +int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, - u_short *, struct thread *); -int in_pcbconnect(struct inpcb *, struct sockaddr *, struct thread *); + u_short *, struct ucred *); +int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, in_addr_t *, u_short *, struct inpcb **, - struct thread *); + struct ucred *); void in_pcbdetach(struct inpcb *); void in_pcbdisconnect(struct inpcb *); int in_pcbinshash(struct inpcb *); struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_int, int); struct inpcb * in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *); void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, int, struct inpcb *(*)(struct inpcb *, int)); void in_pcbrehash(struct inpcb *); void in_pcbsetsolabel(struct socket *so); int in_setpeeraddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo); int in_setsockaddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);; struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbsosetlabel(struct socket *so); void in_pcbremlists(struct inpcb *inp); #endif /* _KERNEL */ #endif /* !_NETINET_IN_PCB_H_ */ Index: head/sys/netinet/ip_divert.c =================================================================== --- head/sys/netinet/ip_divert.c (revision 127504) +++ head/sys/netinet/ip_divert.c (revision 127505) @@ -1,683 +1,683 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_ipfw.h" #include "opt_ipdivert.h" #include "opt_ipsec.h" #include "opt_mac.h" #ifndef INET #error "IPDIVERT requires INET." #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Divert sockets */ /* * Allocate enough space to hold a full IP packet */ #define DIVSNDQ (65536 + 100) #define DIVRCVQ (65536 + 100) /* * Divert sockets work in conjunction with ipfw, see the divert(4) * manpage for features. * Internally, packets selected by ipfw in ip_input() or ip_output(), * and never diverted before, are passed to the input queue of the * divert socket with a given 'divert_port' number (as specified in * the matching ipfw rule), and they are tagged with a 16 bit cookie * (representing the rule number of the matching ipfw rule), which * is passed to process reading from the socket. * * Packets written to the divert socket are again tagged with a cookie * (usually the same as above) and a destination address. * If the destination address is INADDR_ANY then the packet is * treated as outgoing and sent to ip_output(), otherwise it is * treated as incoming and sent to ip_input(). * In both cases, the packet is tagged with the cookie. * * On reinjection, processing in ip_input() and ip_output() * will be exactly the same as for the original packet, except that * ipfw processing will start at the rule number after the one * written in the cookie (so, tagging a packet with a cookie of 0 * will cause it to be effectively considered as a standard packet). */ /* Internal variables */ static struct inpcbhead divcb; static struct inpcbinfo divcbinfo; static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ /* * Initialize divert connection block queue. */ void div_init(void) { INP_INFO_LOCK_INIT(&divcbinfo, "div"); LIST_INIT(&divcb); divcbinfo.listhead = &divcb; /* * XXX We don't use the hash list for divert IP, but it's easier * to allocate a one entry hash list than it is to check all * over the place for hashbase == NULL. */ divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(divcbinfo.ipi_zone, maxsockets); } /* * IPPROTO_DIVERT is not in the real IP protocol number space; this * function should never be called. Just in case, drop any packets. */ void div_input(struct mbuf *m, int off) { ipstat.ips_noproto++; m_freem(m); } /* * Divert a packet by passing it up to the divert socket at port 'port'. * * Setup generic address and protocol structures for div_input routine, * then pass them along with mbuf chain. */ void divert_packet(struct mbuf *m, int incoming) { struct ip *ip; struct inpcb *inp; struct socket *sa; u_int16_t nport; struct sockaddr_in divsrc; struct m_tag *mtag; mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); if (mtag == NULL) { printf("%s: no divert tag\n", __func__); m_freem(m); return; } /* Assure header */ if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == 0) return; ip = mtod(m, struct ip *); /* * Record receive interface address, if any. * But only for incoming packets. */ bzero(&divsrc, sizeof(divsrc)); divsrc.sin_len = sizeof(divsrc); divsrc.sin_family = AF_INET; divsrc.sin_port = divert_cookie(mtag); /* record matching rule */ if (incoming) { struct ifaddr *ifa; /* Sanity check */ M_ASSERTPKTHDR(m); /* Find IP address for receive interface */ TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL) continue; if (ifa->ifa_addr->sa_family != AF_INET) continue; divsrc.sin_addr = ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr; break; } } /* * Record the incoming interface name whenever we have one. */ if (m->m_pkthdr.rcvif) { /* * Hide the actual interface name in there in the * sin_zero array. XXX This needs to be moved to a * different sockaddr type for divert, e.g. * sockaddr_div with multiple fields like * sockaddr_dl. Presently we have only 7 bytes * but that will do for now as most interfaces * are 4 or less + 2 or less bytes for unit. * There is probably a faster way of doing this, * possibly taking it from the sockaddr_dl on the iface. * This solves the problem of a P2P link and a LAN interface * having the same address, which can result in the wrong * interface being assigned to the packet when fed back * into the divert socket. Theoretically if the daemon saves * and re-uses the sockaddr_in as suggested in the man pages, * this iface name will come along for the ride. * (see div_output for the other half of this.) */ strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, sizeof(divsrc.sin_zero)); } /* * XXX sbappendaddr must be protected by Giant until * we have locking at the socket layer. When entered * from below we come in w/o Giant and must take it * here. Unfortunately we cannot tell whether we're * entering from above (already holding Giant), * below (potentially without Giant), or otherwise * (e.g. from tcp_syncache through a timeout) so we * have to grab it regardless. This causes a LOR with * the tcp lock, at least, and possibly others. For * the moment we're ignoring this. Once sockets are * locked this cruft can be removed. */ mtx_lock(&Giant); /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)divert_info(mtag)); INP_INFO_RLOCK(&divcbinfo); LIST_FOREACH(inp, &divcb, inp_list) { INP_LOCK(inp); /* XXX why does only one socket match? */ if (inp->inp_lport == nport) { sa = inp->inp_socket; if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, m, (struct mbuf *)0) == 0) sa = NULL; /* force mbuf reclaim below */ else sorwakeup(sa); INP_UNLOCK(inp); break; } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&divcbinfo); mtx_unlock(&Giant); if (sa == NULL) { m_freem(m); ipstat.ips_noproto++; ipstat.ips_delivered--; } } /* * Deliver packet back into the IP processing machinery. * * If no address specified, or address is 0.0.0.0, send to ip_output(); * otherwise, send to ip_input() and mark as having been received on * the interface with that address. */ static int div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { int error = 0; KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null")); #ifdef MAC mac_create_mbuf_from_socket(so, m); #endif if (control) m_freem(control); /* XXX */ /* Loopback avoidance and state recovery */ if (sin) { struct m_tag *mtag; struct divert_tag *dt; int i; mtag = m_tag_get(PACKET_TAG_DIVERT, sizeof(struct divert_tag), M_NOWAIT); if (mtag == NULL) { error = ENOBUFS; goto cantsend; } dt = (struct divert_tag *)(mtag+1); dt->info = 0; dt->cookie = sin->sin_port; m_tag_prepend(m, mtag); /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. * The name is user supplied data so don't trust its size * or that it is zero terminated. */ for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++) ; if ( i > 0 && i < sizeof(sin->sin_zero)) m->m_pkthdr.rcvif = ifunit(sin->sin_zero); } /* Reinject packet into the system as incoming or outgoing */ if (!sin || sin->sin_addr.s_addr == 0) { struct ip *const ip = mtod(m, struct ip *); struct inpcb *inp; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); INP_LOCK(inp); /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) { error = EINVAL; m_freem(m); } else { /* Convert fields to host order for ip_output() */ ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); /* Send packet to output processing */ ipstat.ips_rawout++; /* XXX */ error = ip_output(m, inp->inp_options, NULL, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions, NULL); } INP_UNLOCK(inp); INP_INFO_WUNLOCK(&divcbinfo); } else { if (m->m_pkthdr.rcvif == NULL) { /* * No luck with the name, check by IP address. * Clear the port and the ifname to make sure * there are no distractions for ifa_ifwithaddr. */ struct ifaddr *ifa; bzero(sin->sin_zero, sizeof(sin->sin_zero)); sin->sin_port = 0; ifa = ifa_ifwithaddr((struct sockaddr *) sin); if (ifa == NULL) { error = EADDRNOTAVAIL; goto cantsend; } m->m_pkthdr.rcvif = ifa->ifa_ifp; } /* Send packet to input processing */ ip_input(m); } return error; cantsend: m_freem(m); return error; } /* * Return a copy of the specified packet, but without * the divert tag. This is used when packets are ``tee'd'' * and we want the cloned copy to not have divert processing. */ struct mbuf * divert_clone(struct mbuf *m) { struct mbuf *clone; struct m_tag *mtag; clone = m_dup(m, M_DONTWAIT); if (clone != NULL) { /* strip divert tag from copy */ mtag = m_tag_find(clone, PACKET_TAG_DIVERT, NULL); if (mtag != NULL) m_tag_delete(clone, mtag); } return clone; } static int div_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp != 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; } if (td && (error = suser(td)) != 0) { INP_INFO_WUNLOCK(&divcbinfo); return error; } error = soreserve(so, div_sendspace, div_recvspace); if (error) { INP_INFO_WUNLOCK(&divcbinfo); return error; } error = in_pcballoc(so, &divcbinfo, "divinp"); if (error) { INP_INFO_WUNLOCK(&divcbinfo); return error; } inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); INP_INFO_WUNLOCK(&divcbinfo); inp->inp_ip_p = proto; inp->inp_vflag |= INP_IPV4; inp->inp_flags |= INP_HDRINCL; /* The socket is always "connected" because we always know "where" to send the packet */ INP_UNLOCK(inp); so->so_state |= SS_ISCONNECTED; return 0; } static int div_detach(struct socket *so) { struct inpcb *inp; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; } INP_LOCK(inp); in_pcbdetach(inp); INP_INFO_WUNLOCK(&divcbinfo); return 0; } static int div_abort(struct socket *so) { struct inpcb *inp; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; /* ??? possible? panic instead? */ } INP_LOCK(inp); soisdisconnected(so); in_pcbdetach(inp); INP_INFO_WUNLOCK(&divcbinfo); return 0; } static int div_disconnect(struct socket *so) { if ((so->so_state & SS_ISCONNECTED) == 0) return ENOTCONN; return div_abort(so); } static int div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int error; INP_INFO_WLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&divcbinfo); return EINVAL; } /* in_pcbbind assumes that nam is a sockaddr_in * and in_pcbbind requires a valid address. Since divert * sockets don't we need to make sure the address is * filled in properly. * XXX -- divert should not be abusing in_pcbind * and should probably have its own family. */ if (nam->sa_family != AF_INET) error = EAFNOSUPPORT; else { ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; INP_LOCK(inp); - error = in_pcbbind(inp, nam, td); + error = in_pcbbind(inp, nam, td->td_ucred); INP_UNLOCK(inp); } INP_INFO_WUNLOCK(&divcbinfo); return error; } static int div_shutdown(struct socket *so) { struct inpcb *inp; INP_INFO_RLOCK(&divcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&divcbinfo); return EINVAL; } INP_LOCK(inp); INP_INFO_RUNLOCK(&divcbinfo); socantsendmore(so); INP_UNLOCK(inp); return 0; } static int div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { /* Packet must have a header (but that's about it) */ if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { ipstat.ips_toosmall++; m_freem(m); return EINVAL; } /* Send packet */ return div_output(so, m, (struct sockaddr_in *)nam, control); } void div_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct in_addr faddr; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (PRC_IS_REDIRECT(cmd)) return; } static int div_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = divcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ INP_INFO_RLOCK(&divcbinfo); gencnt = divcbinfo.ipi_gencnt; n = divcbinfo.ipi_count; INP_INFO_RUNLOCK(&divcbinfo); error = sysctl_wire_old_buffer(req, 2 * sizeof(xig) + n*sizeof(struct xinpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; INP_INFO_RLOCK(&divcbinfo); for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) inp_list[i++] = inp; INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&divcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_INFO_RLOCK(&divcbinfo); xig.xig_gen = divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = divcbinfo.ipi_count; INP_INFO_RUNLOCK(&divcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int div_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &divcbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int div_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &divcbinfo)); } SYSCTL_DECL(_net_inet_divert); SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0, div_pcblist, "S,xinpcb", "List of active divert sockets"); struct pr_usrreqs div_usrreqs = { div_abort, pru_accept_notsupp, div_attach, div_bind, pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach, div_disconnect, pru_listen_notsupp, div_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown, div_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; Index: head/sys/netinet/tcp_syncache.c =================================================================== --- head/sys/netinet/tcp_syncache.c (revision 127504) +++ head/sys/netinet/tcp_syncache.c (revision 127505) @@ -1,1451 +1,1453 @@ /*- * Copyright (c) 2001 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jonathan Lemon * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mac.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #include /* for proc0 declaration */ #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #endif #include #ifdef TCPDEBUG #include #endif #include #include #include #include #ifdef TCPDEBUG #include #endif #ifdef INET6 #include #endif #ifdef IPSEC #include #ifdef INET6 #include #endif #endif /*IPSEC*/ #ifdef FAST_IPSEC #include #ifdef INET6 #include #endif #include #endif /*FAST_IPSEC*/ #include #include static int tcp_syncookies = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW, &tcp_syncookies, 0, "Use TCP SYN cookies if the syncache overflows"); static void syncache_drop(struct syncache *, struct syncache_head *); static void syncache_free(struct syncache *); static void syncache_insert(struct syncache *, struct syncache_head *); struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **); #ifdef TCPDEBUG static int syncache_respond(struct syncache *, struct mbuf *, struct socket *); #else static int syncache_respond(struct syncache *, struct mbuf *); #endif static struct socket *syncache_socket(struct syncache *, struct socket *, struct mbuf *m); static void syncache_timer(void *); static u_int32_t syncookie_generate(struct syncache *); static struct syncache *syncookie_lookup(struct in_conninfo *, struct tcphdr *, struct socket *); /* * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies. * 3 retransmits corresponds to a timeout of (1 + 2 + 4 + 8 == 15) seconds, * the odds are that the user has given up attempting to connect by then. */ #define SYNCACHE_MAXREXMTS 3 /* Arbitrary values */ #define TCP_SYNCACHE_HASHSIZE 512 #define TCP_SYNCACHE_BUCKETLIMIT 30 struct tcp_syncache { struct syncache_head *hashbase; uma_zone_t zone; u_int hashsize; u_int hashmask; u_int bucket_limit; u_int cache_count; u_int cache_limit; u_int rexmt_limit; u_int hash_secret; TAILQ_HEAD(, syncache) timerq[SYNCACHE_MAXREXMTS + 1]; struct callout tt_timerq[SYNCACHE_MAXREXMTS + 1]; }; static struct tcp_syncache tcp_syncache; SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache"); SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache"); SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD, &tcp_syncache.cache_count, 0, "Current number of entries in syncache"); SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN, &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable"); SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); #define SYNCACHE_HASH(inc, mask) \ ((tcp_syncache.hash_secret ^ \ (inc)->inc_faddr.s_addr ^ \ ((inc)->inc_faddr.s_addr >> 16) ^ \ (inc)->inc_fport ^ (inc)->inc_lport) & mask) #define SYNCACHE_HASH6(inc, mask) \ ((tcp_syncache.hash_secret ^ \ (inc)->inc6_faddr.s6_addr32[0] ^ \ (inc)->inc6_faddr.s6_addr32[3] ^ \ (inc)->inc_fport ^ (inc)->inc_lport) & mask) #define ENDPTS_EQ(a, b) ( \ (a)->ie_fport == (b)->ie_fport && \ (a)->ie_lport == (b)->ie_lport && \ (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \ (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \ ) #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0) #define SYNCACHE_TIMEOUT(sc, slot) do { \ sc->sc_rxtslot = (slot); \ sc->sc_rxttime = ticks + TCPTV_RTOBASE * tcp_backoff[(slot)]; \ TAILQ_INSERT_TAIL(&tcp_syncache.timerq[(slot)], sc, sc_timerq); \ if (!callout_active(&tcp_syncache.tt_timerq[(slot)])) \ callout_reset(&tcp_syncache.tt_timerq[(slot)], \ TCPTV_RTOBASE * tcp_backoff[(slot)], \ syncache_timer, (void *)((intptr_t)(slot))); \ } while (0) static void syncache_free(struct syncache *sc) { if (sc->sc_ipopts) (void) m_free(sc->sc_ipopts); uma_zfree(tcp_syncache.zone, sc); } void syncache_init(void) { int i; tcp_syncache.cache_count = 0; tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; tcp_syncache.cache_limit = tcp_syncache.hashsize * tcp_syncache.bucket_limit; tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; tcp_syncache.hash_secret = arc4random(); TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize", &tcp_syncache.hashsize); TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit", &tcp_syncache.cache_limit); TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit", &tcp_syncache.bucket_limit); if (!powerof2(tcp_syncache.hashsize)) { printf("WARNING: syncache hash size is not a power of 2.\n"); tcp_syncache.hashsize = 512; /* safe default */ } tcp_syncache.hashmask = tcp_syncache.hashsize - 1; /* Allocate the hash table. */ MALLOC(tcp_syncache.hashbase, struct syncache_head *, tcp_syncache.hashsize * sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK); /* Initialize the hash buckets. */ for (i = 0; i < tcp_syncache.hashsize; i++) { TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket); tcp_syncache.hashbase[i].sch_length = 0; } /* Initialize the timer queues. */ for (i = 0; i <= SYNCACHE_MAXREXMTS; i++) { TAILQ_INIT(&tcp_syncache.timerq[i]); callout_init(&tcp_syncache.tt_timerq[i], debug_mpsafenet ? CALLOUT_MPSAFE : 0); } /* * Allocate the syncache entries. Allow the zone to allocate one * more entry than cache limit, so a new entry can bump out an * older one. */ tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit); tcp_syncache.cache_limit -= 1; } static void syncache_insert(sc, sch) struct syncache *sc; struct syncache_head *sch; { struct syncache *sc2; int i; INP_INFO_WLOCK_ASSERT(&tcbinfo); /* * Make sure that we don't overflow the per-bucket * limit or the total cache size limit. */ if (sch->sch_length >= tcp_syncache.bucket_limit) { /* * The bucket is full, toss the oldest element. */ sc2 = TAILQ_FIRST(&sch->sch_bucket); sc2->sc_tp->ts_recent = ticks; syncache_drop(sc2, sch); tcpstat.tcps_sc_bucketoverflow++; } else if (tcp_syncache.cache_count >= tcp_syncache.cache_limit) { /* * The cache is full. Toss the oldest entry in the * entire cache. This is the front entry in the * first non-empty timer queue with the largest * timeout value. */ for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) { sc2 = TAILQ_FIRST(&tcp_syncache.timerq[i]); if (sc2 != NULL) break; } sc2->sc_tp->ts_recent = ticks; syncache_drop(sc2, NULL); tcpstat.tcps_sc_cacheoverflow++; } /* Initialize the entry's timer. */ SYNCACHE_TIMEOUT(sc, 0); /* Put it into the bucket. */ TAILQ_INSERT_TAIL(&sch->sch_bucket, sc, sc_hash); sch->sch_length++; tcp_syncache.cache_count++; tcpstat.tcps_sc_added++; } static void syncache_drop(sc, sch) struct syncache *sc; struct syncache_head *sch; { INP_INFO_WLOCK_ASSERT(&tcbinfo); if (sch == NULL) { #ifdef INET6 if (sc->sc_inc.inc_isipv6) { sch = &tcp_syncache.hashbase[ SYNCACHE_HASH6(&sc->sc_inc, tcp_syncache.hashmask)]; } else #endif { sch = &tcp_syncache.hashbase[ SYNCACHE_HASH(&sc->sc_inc, tcp_syncache.hashmask)]; } } TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; tcp_syncache.cache_count--; TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot], sc, sc_timerq); if (TAILQ_EMPTY(&tcp_syncache.timerq[sc->sc_rxtslot])) callout_stop(&tcp_syncache.tt_timerq[sc->sc_rxtslot]); syncache_free(sc); } /* * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. * If we have retransmitted an entry the maximum number of times, expire it. */ static void syncache_timer(xslot) void *xslot; { intptr_t slot = (intptr_t)xslot; struct syncache *sc, *nsc; struct inpcb *inp; INP_INFO_WLOCK(&tcbinfo); if (callout_pending(&tcp_syncache.tt_timerq[slot]) || !callout_active(&tcp_syncache.tt_timerq[slot])) { /* XXX can this happen? */ INP_INFO_WUNLOCK(&tcbinfo); return; } callout_deactivate(&tcp_syncache.tt_timerq[slot]); nsc = TAILQ_FIRST(&tcp_syncache.timerq[slot]); while (nsc != NULL) { if (ticks < nsc->sc_rxttime) break; sc = nsc; inp = sc->sc_tp->t_inpcb; if (slot == SYNCACHE_MAXREXMTS || slot >= tcp_syncache.rexmt_limit || inp == NULL || inp->inp_gencnt != sc->sc_inp_gencnt) { nsc = TAILQ_NEXT(sc, sc_timerq); syncache_drop(sc, NULL); tcpstat.tcps_sc_stale++; continue; } /* * syncache_respond() may call back into the syncache to * to modify another entry, so do not obtain the next * entry on the timer chain until it has completed. */ #ifdef TCPDEBUG (void) syncache_respond(sc, NULL, NULL); #else (void) syncache_respond(sc, NULL); #endif nsc = TAILQ_NEXT(sc, sc_timerq); tcpstat.tcps_sc_retransmitted++; TAILQ_REMOVE(&tcp_syncache.timerq[slot], sc, sc_timerq); SYNCACHE_TIMEOUT(sc, slot + 1); } if (nsc != NULL) callout_reset(&tcp_syncache.tt_timerq[slot], nsc->sc_rxttime - ticks, syncache_timer, (void *)(slot)); INP_INFO_WUNLOCK(&tcbinfo); } /* * Find an entry in the syncache. */ struct syncache * syncache_lookup(inc, schp) struct in_conninfo *inc; struct syncache_head **schp; { struct syncache *sc; struct syncache_head *sch; INP_INFO_WLOCK_ASSERT(&tcbinfo); #ifdef INET6 if (inc->inc_isipv6) { sch = &tcp_syncache.hashbase[ SYNCACHE_HASH6(inc, tcp_syncache.hashmask)]; *schp = sch; TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) return (sc); } } else #endif { sch = &tcp_syncache.hashbase[ SYNCACHE_HASH(inc, tcp_syncache.hashmask)]; *schp = sch; TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { #ifdef INET6 if (sc->sc_inc.inc_isipv6) continue; #endif if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) return (sc); } } return (NULL); } /* * This function is called when we get a RST for a * non-existent connection, so that we can see if the * connection is in the syn cache. If it is, zap it. */ void syncache_chkrst(inc, th) struct in_conninfo *inc; struct tcphdr *th; { struct syncache *sc; struct syncache_head *sch; INP_INFO_WLOCK_ASSERT(&tcbinfo); sc = syncache_lookup(inc, &sch); if (sc == NULL) return; /* * If the RST bit is set, check the sequence number to see * if this is a valid reset segment. * RFC 793 page 37: * In all states except SYN-SENT, all reset (RST) segments * are validated by checking their SEQ-fields. A reset is * valid if its sequence number is in the window. * * The sequence number in the reset segment is normally an * echo of our outgoing acknowlegement numbers, but some hosts * send a reset with the sequence number at the rightmost edge * of our receive window, and we have to handle this case. */ if (SEQ_GEQ(th->th_seq, sc->sc_irs) && SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { syncache_drop(sc, sch); tcpstat.tcps_sc_reset++; } } void syncache_badack(inc) struct in_conninfo *inc; { struct syncache *sc; struct syncache_head *sch; INP_INFO_WLOCK_ASSERT(&tcbinfo); sc = syncache_lookup(inc, &sch); if (sc != NULL) { syncache_drop(sc, sch); tcpstat.tcps_sc_badack++; } } void syncache_unreach(inc, th) struct in_conninfo *inc; struct tcphdr *th; { struct syncache *sc; struct syncache_head *sch; INP_INFO_WLOCK_ASSERT(&tcbinfo); /* we are called at splnet() here */ sc = syncache_lookup(inc, &sch); if (sc == NULL) return; /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ if (ntohl(th->th_seq) != sc->sc_iss) return; /* * If we've rertransmitted 3 times and this is our second error, * we remove the entry. Otherwise, we allow it to continue on. * This prevents us from incorrectly nuking an entry during a * spurious network outage. * * See tcp_notify(). */ if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtslot < 3) { sc->sc_flags |= SCF_UNREACH; return; } syncache_drop(sc, sch); tcpstat.tcps_sc_unreach++; } /* * Build a new TCP socket structure from a syncache entry. */ static struct socket * syncache_socket(sc, lso, m) struct syncache *sc; struct socket *lso; struct mbuf *m; { struct inpcb *inp = NULL; struct socket *so; struct tcpcb *tp; GIANT_REQUIRED; /* XXX until socket locking */ INP_INFO_WLOCK_ASSERT(&tcbinfo); /* * Ok, create the full blown connection, and set things up * as they would have been set up if we had created the * connection when the SYN arrived. If we can't create * the connection, abort it. */ so = sonewconn(lso, SS_ISCONNECTED); if (so == NULL) { /* * Drop the connection; we will send a RST if the peer * retransmits the ACK, */ tcpstat.tcps_listendrop++; goto abort2; } #ifdef MAC mac_set_socket_peer_from_mbuf(m, so); #endif inp = sotoinpcb(so); INP_LOCK(inp); /* * Insert new socket into hash list. */ inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6; #ifdef INET6 if (sc->sc_inc.inc_isipv6) { inp->in6p_laddr = sc->sc_inc.inc6_laddr; } else { inp->inp_vflag &= ~INP_IPV6; inp->inp_vflag |= INP_IPV4; #endif inp->inp_laddr = sc->sc_inc.inc_laddr; #ifdef INET6 } #endif inp->inp_lport = sc->sc_inc.inc_lport; if (in_pcbinshash(inp) != 0) { /* * Undo the assignments above if we failed to * put the PCB on the hash lists. */ #ifdef INET6 if (sc->sc_inc.inc_isipv6) inp->in6p_laddr = in6addr_any; else #endif inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; goto abort; } #ifdef IPSEC /* copy old policy into new socket's */ if (ipsec_copy_pcbpolicy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) printf("syncache_expand: could not copy policy\n"); #endif #ifdef FAST_IPSEC /* copy old policy into new socket's */ if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) printf("syncache_expand: could not copy policy\n"); #endif #ifdef INET6 if (sc->sc_inc.inc_isipv6) { struct inpcb *oinp = sotoinpcb(lso); struct in6_addr laddr6; struct sockaddr_in6 sin6; /* * Inherit socket options from the listening socket. * Note that in6p_inputopts are not (and should not be) * copied, since it stores previously received options and is * used to detect if each new option is different than the * previous one and hence should be passed to a user. * If we copied in6p_inputopts, a user would not be able to * receive options just after calling the accept system call. */ inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS; if (oinp->in6p_outputopts) inp->in6p_outputopts = ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); sin6.sin6_addr = sc->sc_inc.inc6_faddr; sin6.sin6_port = sc->sc_inc.inc_fport; sin6.sin6_flowinfo = sin6.sin6_scope_id = 0; laddr6 = inp->in6p_laddr; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = sc->sc_inc.inc6_laddr; - if (in6_pcbconnect(inp, (struct sockaddr *)&sin6, &thread0)) { + if (in6_pcbconnect(inp, (struct sockaddr *)&sin6, + thread0.td_ucred)) { inp->in6p_laddr = laddr6; goto abort; } } else #endif { struct in_addr laddr; struct sockaddr_in sin; inp->inp_options = ip_srcroute(); if (inp->inp_options == NULL) { inp->inp_options = sc->sc_ipopts; sc->sc_ipopts = NULL; } sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_addr = sc->sc_inc.inc_faddr; sin.sin_port = sc->sc_inc.inc_fport; bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero)); laddr = inp->inp_laddr; if (inp->inp_laddr.s_addr == INADDR_ANY) inp->inp_laddr = sc->sc_inc.inc_laddr; - if (in_pcbconnect(inp, (struct sockaddr *)&sin, &thread0)) { + if (in_pcbconnect(inp, (struct sockaddr *)&sin, + thread0.td_ucred)) { inp->inp_laddr = laddr; goto abort; } } tp = intotcpcb(inp); tp->t_state = TCPS_SYN_RECEIVED; tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; tcp_rcvseqinit(tp); tcp_sendseqinit(tp); tp->snd_wl1 = sc->sc_irs; tp->rcv_up = sc->sc_irs + 1; tp->rcv_wnd = sc->sc_wnd; tp->rcv_adv += tp->rcv_wnd; tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY); if (sc->sc_flags & SCF_NOOPT) tp->t_flags |= TF_NOOPT; if (sc->sc_flags & SCF_WINSCALE) { tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE; tp->requested_s_scale = sc->sc_requested_s_scale; tp->request_r_scale = sc->sc_request_r_scale; } if (sc->sc_flags & SCF_TIMESTAMP) { tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; tp->ts_recent = sc->sc_tsrecent; tp->ts_recent_age = ticks; } if (sc->sc_flags & SCF_CC) { /* * Initialization of the tcpcb for transaction; * set SND.WND = SEG.WND, * initialize CCsend and CCrecv. */ tp->t_flags |= TF_REQ_CC|TF_RCVD_CC; tp->cc_send = sc->sc_cc_send; tp->cc_recv = sc->sc_cc_recv; } #ifdef TCP_SIGNATURE if (sc->sc_flags & SCF_SIGNATURE) tp->t_flags |= TF_SIGNATURE; #endif /* * Set up MSS and get cached values from tcp_hostcache. * This might overwrite some of the defaults we just set. */ tcp_mss(tp, sc->sc_peer_mss); /* * If the SYN,ACK was retransmitted, reset cwnd to 1 segment. */ if (sc->sc_rxtslot != 0) tp->snd_cwnd = tp->t_maxseg; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); INP_UNLOCK(inp); tcpstat.tcps_accepts++; return (so); abort: INP_UNLOCK(inp); abort2: if (so != NULL) (void) soabort(so); return (NULL); } /* * This function gets called when we receive an ACK for a * socket in the LISTEN state. We look up the connection * in the syncache, and if its there, we pull it out of * the cache and turn it into a full-blown connection in * the SYN-RECEIVED state. */ int syncache_expand(inc, th, sop, m) struct in_conninfo *inc; struct tcphdr *th; struct socket **sop; struct mbuf *m; { struct syncache *sc; struct syncache_head *sch; struct socket *so; INP_INFO_WLOCK_ASSERT(&tcbinfo); sc = syncache_lookup(inc, &sch); if (sc == NULL) { /* * There is no syncache entry, so see if this ACK is * a returning syncookie. To do this, first: * A. See if this socket has had a syncache entry dropped in * the past. We don't want to accept a bogus syncookie * if we've never received a SYN. * B. check that the syncookie is valid. If it is, then * cobble up a fake syncache entry, and return. */ if (!tcp_syncookies) return (0); sc = syncookie_lookup(inc, th, *sop); if (sc == NULL) return (0); sch = NULL; tcpstat.tcps_sc_recvcookie++; } /* * If seg contains an ACK, but not for our SYN/ACK, send a RST. */ if (th->th_ack != sc->sc_iss + 1) return (0); so = syncache_socket(sc, *sop, m); if (so == NULL) { #if 0 resetandabort: /* XXXjlemon check this - is this correct? */ (void) tcp_respond(NULL, m, m, th, th->th_seq + tlen, (tcp_seq)0, TH_RST|TH_ACK); #endif m_freem(m); /* XXX only needed for above */ tcpstat.tcps_sc_aborted++; } else tcpstat.tcps_sc_completed++; if (sch == NULL) syncache_free(sc); else syncache_drop(sc, sch); *sop = so; return (1); } /* * Given a LISTEN socket and an inbound SYN request, add * this to the syn cache, and send back a segment: * * to the source. * * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. * Doing so would require that we hold onto the data and deliver it * to the application. However, if we are the target of a SYN-flood * DoS attack, an attacker could send data which would eventually * consume all available buffer space if it were ACKed. By not ACKing * the data, we avoid this DoS scenario. */ int syncache_add(inc, to, th, sop, m) struct in_conninfo *inc; struct tcpopt *to; struct tcphdr *th; struct socket **sop; struct mbuf *m; { struct tcpcb *tp; struct socket *so; struct syncache *sc = NULL; struct syncache_head *sch; struct mbuf *ipopts = NULL; struct rmxp_tao tao; int i, win; INP_INFO_WLOCK_ASSERT(&tcbinfo); so = *sop; tp = sototcpcb(so); bzero(&tao, sizeof(tao)); /* * Remember the IP options, if any. */ #ifdef INET6 if (!inc->inc_isipv6) #endif ipopts = ip_srcroute(); /* * See if we already have an entry for this connection. * If we do, resend the SYN,ACK, and reset the retransmit timer. * * XXX * should the syncache be re-initialized with the contents * of the new SYN here (which may have different options?) */ sc = syncache_lookup(inc, &sch); if (sc != NULL) { tcpstat.tcps_sc_dupsyn++; if (ipopts) { /* * If we were remembering a previous source route, * forget it and use the new one we've been given. */ if (sc->sc_ipopts) (void) m_free(sc->sc_ipopts); sc->sc_ipopts = ipopts; } /* * Update timestamp if present. */ if (sc->sc_flags & SCF_TIMESTAMP) sc->sc_tsrecent = to->to_tsval; /* * PCB may have changed, pick up new values. */ sc->sc_tp = tp; sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt; #ifdef TCPDEBUG if (syncache_respond(sc, m, so) == 0) { #else if (syncache_respond(sc, m) == 0) { #endif /* NB: guarded by INP_INFO_WLOCK(&tcbinfo) */ TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot], sc, sc_timerq); SYNCACHE_TIMEOUT(sc, sc->sc_rxtslot); tcpstat.tcps_sndacks++; tcpstat.tcps_sndtotal++; } *sop = NULL; return (1); } sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT); if (sc == NULL) { /* * The zone allocator couldn't provide more entries. * Treat this as if the cache was full; drop the oldest * entry and insert the new one. */ /* NB: guarded by INP_INFO_WLOCK(&tcbinfo) */ for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) { sc = TAILQ_FIRST(&tcp_syncache.timerq[i]); if (sc != NULL) break; } sc->sc_tp->ts_recent = ticks; syncache_drop(sc, NULL); tcpstat.tcps_sc_zonefail++; sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT); if (sc == NULL) { if (ipopts) (void) m_free(ipopts); return (0); } } /* * Fill in the syncache values. */ bzero(sc, sizeof(*sc)); sc->sc_tp = tp; sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt; sc->sc_ipopts = ipopts; sc->sc_inc.inc_fport = inc->inc_fport; sc->sc_inc.inc_lport = inc->inc_lport; #ifdef INET6 sc->sc_inc.inc_isipv6 = inc->inc_isipv6; if (inc->inc_isipv6) { sc->sc_inc.inc6_faddr = inc->inc6_faddr; sc->sc_inc.inc6_laddr = inc->inc6_laddr; } else #endif { sc->sc_inc.inc_faddr = inc->inc_faddr; sc->sc_inc.inc_laddr = inc->inc_laddr; } sc->sc_irs = th->th_seq; sc->sc_flags = 0; sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0; if (tcp_syncookies) sc->sc_iss = syncookie_generate(sc); else sc->sc_iss = arc4random(); /* Initial receive window: clip sbspace to [0 .. TCP_MAXWIN] */ win = sbspace(&so->so_rcv); win = imax(win, 0); win = imin(win, TCP_MAXWIN); sc->sc_wnd = win; if (tcp_do_rfc1323) { /* * A timestamp received in a SYN makes * it ok to send timestamp requests and replies. */ if (to->to_flags & TOF_TS) { sc->sc_tsrecent = to->to_tsval; sc->sc_flags |= SCF_TIMESTAMP; } if (to->to_flags & TOF_SCALE) { int wscale = 0; /* Compute proper scaling value from buffer space */ while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < so->so_rcv.sb_hiwat) wscale++; sc->sc_request_r_scale = wscale; sc->sc_requested_s_scale = to->to_requested_s_scale; sc->sc_flags |= SCF_WINSCALE; } } if (tcp_do_rfc1644) { /* * A CC or CC.new option received in a SYN makes * it ok to send CC in subsequent segments. */ if (to->to_flags & (TOF_CC|TOF_CCNEW)) { sc->sc_cc_recv = to->to_cc; sc->sc_cc_send = CC_INC(tcp_ccgen); sc->sc_flags |= SCF_CC; } } if (tp->t_flags & TF_NOOPT) sc->sc_flags = SCF_NOOPT; #ifdef TCP_SIGNATURE /* * If listening socket requested TCP digests, and received SYN * contains the option, flag this in the syncache so that * syncache_respond() will do the right thing with the SYN+ACK. * XXX Currently we always record the option by default and will * attempt to use it in syncache_respond(). */ if (to->to_flags & TOF_SIGNATURE) sc->sc_flags = SCF_SIGNATURE; #endif /* * XXX * We have the option here of not doing TAO (even if the segment * qualifies) and instead fall back to a normal 3WHS via the syncache. * This allows us to apply synflood protection to TAO-qualifying SYNs * also. However, there should be a hueristic to determine when to * do this, and is not present at the moment. */ /* * Perform TAO test on incoming CC (SEG.CC) option, if any. * - compare SEG.CC against cached CC from the same host, if any. * - if SEG.CC > chached value, SYN must be new and is accepted * immediately: save new CC in the cache, mark the socket * connected, enter ESTABLISHED state, turn on flag to * send a SYN in the next segment. * A virtual advertised window is set in rcv_adv to * initialize SWS prevention. Then enter normal segment * processing: drop SYN, process data and FIN. * - otherwise do a normal 3-way handshake. */ if (tcp_do_rfc1644) tcp_hc_gettao(&sc->sc_inc, &tao); if ((to->to_flags & TOF_CC) != 0) { if (((tp->t_flags & TF_NOPUSH) != 0) && sc->sc_flags & SCF_CC && tao.tao_cc != 0 && CC_GT(to->to_cc, tao.tao_cc)) { sc->sc_rxtslot = 0; so = syncache_socket(sc, *sop, m); if (so != NULL) { tao.tao_cc = to->to_cc; tcp_hc_updatetao(&sc->sc_inc, TCP_HC_TAO_CC, tao.tao_cc, 0); *sop = so; } syncache_free(sc); return (so != NULL); } } else { /* * No CC option, but maybe CC.NEW: invalidate cached value. */ if (tcp_do_rfc1644) { tao.tao_cc = 0; tcp_hc_updatetao(&sc->sc_inc, TCP_HC_TAO_CC, tao.tao_cc, 0); } } /* * TAO test failed or there was no CC option, * do a standard 3-way handshake. */ #ifdef TCPDEBUG if (syncache_respond(sc, m, so) == 0) { #else if (syncache_respond(sc, m) == 0) { #endif syncache_insert(sc, sch); tcpstat.tcps_sndacks++; tcpstat.tcps_sndtotal++; } else { syncache_free(sc); tcpstat.tcps_sc_dropped++; } *sop = NULL; return (1); } #ifdef TCPDEBUG static int syncache_respond(sc, m, so) struct syncache *sc; struct mbuf *m; struct socket *so; #else static int syncache_respond(sc, m) struct syncache *sc; struct mbuf *m; #endif { u_int8_t *optp; int optlen, error; u_int16_t tlen, hlen, mssopt; struct ip *ip = NULL; struct tcphdr *th; struct inpcb *inp; #ifdef INET6 struct ip6_hdr *ip6 = NULL; #endif hlen = #ifdef INET6 (sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) : #endif sizeof(struct ip); KASSERT((&sc->sc_inc) != NULL, ("syncache_respond with NULL in_conninfo pointer")); /* Determine MSS we advertize to other end of connection */ mssopt = tcp_mssopt(&sc->sc_inc); /* Compute the size of the TCP options. */ if (sc->sc_flags & SCF_NOOPT) { optlen = 0; } else { optlen = TCPOLEN_MAXSEG + ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) + ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0) + ((sc->sc_flags & SCF_CC) ? TCPOLEN_CC_APPA * 2 : 0); #ifdef TCP_SIGNATURE optlen += (sc->sc_flags & SCF_SIGNATURE) ? TCPOLEN_SIGNATURE + 2 : 0; #endif } tlen = hlen + sizeof(struct tcphdr) + optlen; /* * XXX * assume that the entire packet will fit in a header mbuf */ KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small")); /* * XXX shouldn't this reuse the mbuf if possible ? * Create the IP+TCP header from scratch. */ if (m) m_freem(m); m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return (ENOBUFS); m->m_data += max_linkhdr; m->m_len = tlen; m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = NULL; inp = sc->sc_tp->t_inpcb; INP_LOCK(inp); #ifdef MAC mac_create_mbuf_from_socket(inp->inp_socket, m); #endif #ifdef INET6 if (sc->sc_inc.inc_isipv6) { ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_src = sc->sc_inc.inc6_laddr; ip6->ip6_dst = sc->sc_inc.inc6_faddr; ip6->ip6_plen = htons(tlen - hlen); /* ip6_hlim is set after checksum */ /* ip6_flow = ??? */ th = (struct tcphdr *)(ip6 + 1); } else #endif { ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_len = tlen; ip->ip_id = 0; ip->ip_off = 0; ip->ip_sum = 0; ip->ip_p = IPPROTO_TCP; ip->ip_src = sc->sc_inc.inc_laddr; ip->ip_dst = sc->sc_inc.inc_faddr; ip->ip_ttl = inp->inp_ip_ttl; /* XXX */ ip->ip_tos = inp->inp_ip_tos; /* XXX */ /* * See if we should do MTU discovery. Route lookups are * expensive, so we will only unset the DF bit if: * * 1) path_mtu_discovery is disabled * 2) the SCF_UNREACH flag has been set */ if (path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0)) ip->ip_off |= IP_DF; th = (struct tcphdr *)(ip + 1); } th->th_sport = sc->sc_inc.inc_lport; th->th_dport = sc->sc_inc.inc_fport; th->th_seq = htonl(sc->sc_iss); th->th_ack = htonl(sc->sc_irs + 1); th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; th->th_x2 = 0; th->th_flags = TH_SYN|TH_ACK; th->th_win = htons(sc->sc_wnd); th->th_urp = 0; /* Tack on the TCP options. */ if (optlen != 0) { optp = (u_int8_t *)(th + 1); *optp++ = TCPOPT_MAXSEG; *optp++ = TCPOLEN_MAXSEG; *optp++ = (mssopt >> 8) & 0xff; *optp++ = mssopt & 0xff; if (sc->sc_flags & SCF_WINSCALE) { *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 | TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | sc->sc_request_r_scale); optp += 4; } if (sc->sc_flags & SCF_TIMESTAMP) { u_int32_t *lp = (u_int32_t *)(optp); /* Form timestamp option per appendix A of RFC 1323. */ *lp++ = htonl(TCPOPT_TSTAMP_HDR); *lp++ = htonl(ticks); *lp = htonl(sc->sc_tsrecent); optp += TCPOLEN_TSTAMP_APPA; } /* * Send CC and CC.echo if we received CC from our peer. */ if (sc->sc_flags & SCF_CC) { u_int32_t *lp = (u_int32_t *)(optp); *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC)); *lp++ = htonl(sc->sc_cc_send); *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CCECHO)); *lp = htonl(sc->sc_cc_recv); optp += TCPOLEN_CC_APPA * 2; } #ifdef TCP_SIGNATURE /* * Handle TCP-MD5 passive opener response. */ if (sc->sc_flags & SCF_SIGNATURE) { u_int8_t *bp = optp; int i; *bp++ = TCPOPT_SIGNATURE; *bp++ = TCPOLEN_SIGNATURE; for (i = 0; i < TCP_SIGLEN; i++) *bp++ = 0; tcp_signature_compute(m, sizeof(struct ip), 0, optlen, optp + 2, IPSEC_DIR_OUTBOUND); *bp++ = TCPOPT_NOP; *bp++ = TCPOPT_EOL; optp += TCPOLEN_SIGNATURE + 2; } #endif /* TCP_SIGNATURE */ } #ifdef INET6 if (sc->sc_inc.inc_isipv6) { th->th_sum = 0; th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen); ip6->ip6_hlim = in6_selecthlim(NULL, NULL); error = ip6_output(m, NULL, NULL, 0, NULL, NULL, inp); } else #endif { th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(tlen - hlen + IPPROTO_TCP)); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef TCPDEBUG /* * Trace. */ if (so != NULL && so->so_options & SO_DEBUG) { struct tcpcb *tp = sototcpcb(so); tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0); } #endif error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, inp); } INP_UNLOCK(inp); return (error); } /* * cookie layers: * * |. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .| * | peer iss | * | MD5(laddr,faddr,secret,lport,fport) |. . . . . . .| * | 0 |(A)| | * (A): peer mss index */ /* * The values below are chosen to minimize the size of the tcp_secret * table, as well as providing roughly a 16 second lifetime for the cookie. */ #define SYNCOOKIE_WNDBITS 5 /* exposed bits for window indexing */ #define SYNCOOKIE_TIMESHIFT 1 /* scale ticks to window time units */ #define SYNCOOKIE_WNDMASK ((1 << SYNCOOKIE_WNDBITS) - 1) #define SYNCOOKIE_NSECRETS (1 << SYNCOOKIE_WNDBITS) #define SYNCOOKIE_TIMEOUT \ (hz * (1 << SYNCOOKIE_WNDBITS) / (1 << SYNCOOKIE_TIMESHIFT)) #define SYNCOOKIE_DATAMASK ((3 << SYNCOOKIE_WNDBITS) | SYNCOOKIE_WNDMASK) static struct { u_int32_t ts_secbits[4]; u_int ts_expire; } tcp_secret[SYNCOOKIE_NSECRETS]; static int tcp_msstab[] = { 0, 536, 1460, 8960 }; static MD5_CTX syn_ctx; #define MD5Add(v) MD5Update(&syn_ctx, (u_char *)&v, sizeof(v)) struct md5_add { u_int32_t laddr, faddr; u_int32_t secbits[4]; u_int16_t lport, fport; }; #ifdef CTASSERT CTASSERT(sizeof(struct md5_add) == 28); #endif /* * Consider the problem of a recreated (and retransmitted) cookie. If the * original SYN was accepted, the connection is established. The second * SYN is inflight, and if it arrives with an ISN that falls within the * receive window, the connection is killed. * * However, since cookies have other problems, this may not be worth * worrying about. */ static u_int32_t syncookie_generate(struct syncache *sc) { u_int32_t md5_buffer[4]; u_int32_t data; int idx, i; struct md5_add add; /* NB: single threaded; could add INP_INFO_WLOCK_ASSERT(&tcbinfo) */ idx = ((ticks << SYNCOOKIE_TIMESHIFT) / hz) & SYNCOOKIE_WNDMASK; if (tcp_secret[idx].ts_expire < ticks) { for (i = 0; i < 4; i++) tcp_secret[idx].ts_secbits[i] = arc4random(); tcp_secret[idx].ts_expire = ticks + SYNCOOKIE_TIMEOUT; } for (data = sizeof(tcp_msstab) / sizeof(int) - 1; data > 0; data--) if (tcp_msstab[data] <= sc->sc_peer_mss) break; data = (data << SYNCOOKIE_WNDBITS) | idx; data ^= sc->sc_irs; /* peer's iss */ MD5Init(&syn_ctx); #ifdef INET6 if (sc->sc_inc.inc_isipv6) { MD5Add(sc->sc_inc.inc6_laddr); MD5Add(sc->sc_inc.inc6_faddr); add.laddr = 0; add.faddr = 0; } else #endif { add.laddr = sc->sc_inc.inc_laddr.s_addr; add.faddr = sc->sc_inc.inc_faddr.s_addr; } add.lport = sc->sc_inc.inc_lport; add.fport = sc->sc_inc.inc_fport; add.secbits[0] = tcp_secret[idx].ts_secbits[0]; add.secbits[1] = tcp_secret[idx].ts_secbits[1]; add.secbits[2] = tcp_secret[idx].ts_secbits[2]; add.secbits[3] = tcp_secret[idx].ts_secbits[3]; MD5Add(add); MD5Final((u_char *)&md5_buffer, &syn_ctx); data ^= (md5_buffer[0] & ~SYNCOOKIE_WNDMASK); return (data); } static struct syncache * syncookie_lookup(inc, th, so) struct in_conninfo *inc; struct tcphdr *th; struct socket *so; { u_int32_t md5_buffer[4]; struct syncache *sc; u_int32_t data; int wnd, idx; struct md5_add add; /* NB: single threaded; could add INP_INFO_WLOCK_ASSERT(&tcbinfo) */ data = (th->th_ack - 1) ^ (th->th_seq - 1); /* remove ISS */ idx = data & SYNCOOKIE_WNDMASK; if (tcp_secret[idx].ts_expire < ticks || sototcpcb(so)->ts_recent + SYNCOOKIE_TIMEOUT < ticks) return (NULL); MD5Init(&syn_ctx); #ifdef INET6 if (inc->inc_isipv6) { MD5Add(inc->inc6_laddr); MD5Add(inc->inc6_faddr); add.laddr = 0; add.faddr = 0; } else #endif { add.laddr = inc->inc_laddr.s_addr; add.faddr = inc->inc_faddr.s_addr; } add.lport = inc->inc_lport; add.fport = inc->inc_fport; add.secbits[0] = tcp_secret[idx].ts_secbits[0]; add.secbits[1] = tcp_secret[idx].ts_secbits[1]; add.secbits[2] = tcp_secret[idx].ts_secbits[2]; add.secbits[3] = tcp_secret[idx].ts_secbits[3]; MD5Add(add); MD5Final((u_char *)&md5_buffer, &syn_ctx); data ^= md5_buffer[0]; if ((data & ~SYNCOOKIE_DATAMASK) != 0) return (NULL); data = data >> SYNCOOKIE_WNDBITS; sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT); if (sc == NULL) return (NULL); /* * Fill in the syncache values. * XXX duplicate code from syncache_add */ sc->sc_ipopts = NULL; sc->sc_inc.inc_fport = inc->inc_fport; sc->sc_inc.inc_lport = inc->inc_lport; #ifdef INET6 sc->sc_inc.inc_isipv6 = inc->inc_isipv6; if (inc->inc_isipv6) { sc->sc_inc.inc6_faddr = inc->inc6_faddr; sc->sc_inc.inc6_laddr = inc->inc6_laddr; } else #endif { sc->sc_inc.inc_faddr = inc->inc_faddr; sc->sc_inc.inc_laddr = inc->inc_laddr; } sc->sc_irs = th->th_seq - 1; sc->sc_iss = th->th_ack - 1; wnd = sbspace(&so->so_rcv); wnd = imax(wnd, 0); wnd = imin(wnd, TCP_MAXWIN); sc->sc_wnd = wnd; sc->sc_flags = 0; sc->sc_rxtslot = 0; sc->sc_peer_mss = tcp_msstab[data]; return (sc); } Index: head/sys/netinet/tcp_usrreq.c =================================================================== --- head/sys/netinet/tcp_usrreq.c (revision 127504) +++ head/sys/netinet/tcp_usrreq.c (revision 127505) @@ -1,1309 +1,1310 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif #ifdef IPSEC #include #endif /*IPSEC*/ /* * TCP protocol interface to socket abstraction. */ extern char *tcpstates[]; /* XXX ??? */ static int tcp_attach(struct socket *, struct thread *td); static int tcp_connect(struct tcpcb *, struct sockaddr *, struct thread *td); #ifdef INET6 static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct thread *td); #endif /* INET6 */ static struct tcpcb * tcp_disconnect(struct tcpcb *); static struct tcpcb * tcp_usrclosed(struct tcpcb *); #ifdef TCPDEBUG #define TCPDEBUG0 int ostate = 0 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ tcp_trace(TA_USER, ostate, tp, 0, 0, req) #else #define TCPDEBUG0 #define TCPDEBUG1() #define TCPDEBUG2(req) #endif /* * TCP attaches to socket via pru_attach(), reserving space, * and an internet control block. */ static int tcp_usr_attach(struct socket *so, int proto, struct thread *td) { int s = splnet(); int error; struct inpcb *inp; struct tcpcb *tp = 0; TCPDEBUG0; INP_INFO_WLOCK(&tcbinfo); TCPDEBUG1(); inp = sotoinpcb(so); if (inp) { error = EISCONN; goto out; } error = tcp_attach(so, td); if (error) goto out; if ((so->so_options & SO_LINGER) && so->so_linger == 0) so->so_linger = TCP_LINGERTIME; inp = sotoinpcb(so); tp = intotcpcb(inp); out: TCPDEBUG2(PRU_ATTACH); INP_INFO_WUNLOCK(&tcbinfo); splx(s); return error; } /* * pru_detach() detaches the TCP protocol from the socket. * If the protocol state is non-embryonic, then can't * do this directly: have to initiate a pru_disconnect(), * which may finish later; embryonic TCB's can just * be discarded here. */ static int tcp_usr_detach(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; TCPDEBUG0; INP_INFO_WLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&tcbinfo); splx(s); return EINVAL; /* XXX */ } INP_LOCK(inp); tp = intotcpcb(inp); TCPDEBUG1(); tp = tcp_disconnect(tp); TCPDEBUG2(PRU_DETACH); if (tp) INP_UNLOCK(inp); INP_INFO_WUNLOCK(&tcbinfo); splx(s); return error; } #define INI_NOLOCK 0 #define INI_READ 1 #define INI_WRITE 2 #define COMMON_START() \ TCPDEBUG0; \ do { \ if (inirw == INI_READ) \ INP_INFO_RLOCK(&tcbinfo); \ else if (inirw == INI_WRITE) \ INP_INFO_WLOCK(&tcbinfo); \ inp = sotoinpcb(so); \ if (inp == 0) { \ if (inirw == INI_READ) \ INP_INFO_RUNLOCK(&tcbinfo); \ else if (inirw == INI_WRITE) \ INP_INFO_WUNLOCK(&tcbinfo); \ splx(s); \ return EINVAL; \ } \ INP_LOCK(inp); \ if (inirw == INI_READ) \ INP_INFO_RUNLOCK(&tcbinfo); \ tp = intotcpcb(inp); \ TCPDEBUG1(); \ } while(0) #define COMMON_END(req) \ out: TCPDEBUG2(req); \ do { \ if (tp) \ INP_UNLOCK(inp); \ if (inirw == INI_WRITE) \ INP_INFO_WUNLOCK(&tcbinfo); \ splx(s); \ return error; \ goto out; \ } while(0) /* * Give the socket an address. */ static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; const int inirw = INI_WRITE; COMMON_START(); /* * Must check for multicast addresses and disallow binding * to them. */ sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) return (EINVAL); if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } - error = in_pcbbind(inp, nam, td); + error = in_pcbbind(inp, nam, td->td_ucred); if (error) goto out; COMMON_END(PRU_BIND); } #ifdef INET6 static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; const int inirw = INI_WRITE; COMMON_START(); /* * Must check for multicast addresses and disallow binding * to them. */ sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) return (EINVAL); if (sin6p->sin6_family == AF_INET6 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { error = EAFNOSUPPORT; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; - error = in_pcbbind(inp, (struct sockaddr *)&sin, td); + error = in_pcbbind(inp, (struct sockaddr *)&sin, + td->td_ucred); goto out; } } - error = in6_pcbbind(inp, nam, td); + error = in6_pcbbind(inp, nam, td->td_ucred); if (error) goto out; COMMON_END(PRU_BIND); } #endif /* INET6 */ /* * Prepare to accept connections. */ static int tcp_usr_listen(struct socket *so, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); if (inp->inp_lport == 0) - error = in_pcbbind(inp, (struct sockaddr *)0, td); + error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error == 0) tp->t_state = TCPS_LISTEN; COMMON_END(PRU_LISTEN); } #ifdef INET6 static int tcp6_usr_listen(struct socket *so, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); if (inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; - error = in6_pcbbind(inp, (struct sockaddr *)0, td); + error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); } if (error == 0) tp->t_state = TCPS_LISTEN; COMMON_END(PRU_LISTEN); } #endif /* INET6 */ /* * Initiate connection to peer. * Create a template for use in transmissions on this connection. * Enter SYN_SENT state, and mark socket as connecting. * Start keep-alive timer, and seed output sequence space. * Send initial segment on connection. */ static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in *sinp; const int inirw = INI_WRITE; COMMON_START(); /* * Must disallow TCP ``connections'' to multicast addresses. */ sinp = (struct sockaddr_in *)nam; if (nam->sa_len != sizeof (*sinp)) return (EINVAL); if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { error = EAFNOSUPPORT; goto out; } if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); if ((error = tcp_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); COMMON_END(PRU_CONNECT); } #ifdef INET6 static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; struct sockaddr_in6 *sin6p; const int inirw = INI_WRITE; COMMON_START(); /* * Must disallow TCP ``connections'' to multicast addresses. */ sin6p = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof (*sin6p)) return (EINVAL); if (sin6p->sin6_family == AF_INET6 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { error = EAFNOSUPPORT; goto out; } if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } in6_sin6_2_sin(&sin, sin6p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) goto out; error = tcp_output(tp); goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; inp->inp_inc.inc_isipv6 = 1; if ((error = tcp6_connect(tp, nam, td)) != 0) goto out; error = tcp_output(tp); COMMON_END(PRU_CONNECT); } #endif /* INET6 */ /* * Initiate disconnect from peer. * If connection never passed embryonic stage, just drop; * else if don't need to let data drain, then can just drop anyways, * else have to begin TCP shutdown process: mark socket disconnecting, * drain unread data, state switch to reflect user close, and * send segment (e.g. FIN) to peer. Socket will be really disconnected * when peer sends FIN and acks ours. * * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. */ static int tcp_usr_disconnect(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); tp = tcp_disconnect(tp); COMMON_END(PRU_DISCONNECT); } /* * Accept a connection. Essentially all the work is * done at higher levels; just return the address * of the peer, storing through addr. */ static int tcp_usr_accept(struct socket *so, struct sockaddr **nam) { int s; int error = 0; struct inpcb *inp = NULL; struct tcpcb *tp = NULL; struct in_addr addr; in_port_t port = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; goto out; } s = splnet(); INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (!inp) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); return (EINVAL); } INP_LOCK(inp); INP_INFO_RUNLOCK(&tcbinfo); tp = intotcpcb(inp); TCPDEBUG1(); /* * We inline in_setpeeraddr and COMMON_END here, so that we can * copy the data of interest and defer the malloc until after we * release the lock. */ port = inp->inp_fport; addr = inp->inp_faddr; out: TCPDEBUG2(PRU_ACCEPT); if (tp) INP_UNLOCK(inp); splx(s); if (error == 0) *nam = in_sockaddr(port, &addr); return error; } #ifdef INET6 static int tcp6_usr_accept(struct socket *so, struct sockaddr **nam) { int s; struct inpcb *inp = NULL; int error = 0; struct tcpcb *tp = NULL; struct in_addr addr; struct in6_addr addr6; in_port_t port = 0; int v4 = 0; TCPDEBUG0; if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; goto out; } s = splnet(); INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); return (EINVAL); } INP_LOCK(inp); INP_INFO_RUNLOCK(&tcbinfo); tp = intotcpcb(inp); TCPDEBUG1(); /* * We inline in6_mapped_peeraddr and COMMON_END here, so that we can * copy the data of interest and defer the malloc until after we * release the lock. */ if (inp->inp_vflag & INP_IPV4) { v4 = 1; port = inp->inp_fport; addr = inp->inp_faddr; } else { port = inp->inp_fport; addr6 = inp->in6p_faddr; } out: TCPDEBUG2(PRU_ACCEPT); if (tp) INP_UNLOCK(inp); splx(s); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); else *nam = in6_sockaddr(port, &addr6); } return error; } #endif /* INET6 */ /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking * here because in_setsockaddr will call malloc and can block. */ static int tcp_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &tcbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int tcp_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &tcbinfo)); } /* * Mark the connection as being incapable of further output. */ static int tcp_usr_shutdown(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); socantsendmore(so); tp = tcp_usrclosed(tp); if (tp) error = tcp_output(tp); COMMON_END(PRU_SHUTDOWN); } /* * After a receive, possibly send window update to peer. */ static int tcp_usr_rcvd(struct socket *so, int flags) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_READ; COMMON_START(); tcp_output(tp); COMMON_END(PRU_RCVD); } /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. */ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; #ifdef INET6 int isipv6; #endif TCPDEBUG0; /* * Need write lock here because this function might call * tcp_connect or tcp_usrclosed. * We really want to have to this function upgrade from read lock * to write lock. XXX */ INP_INFO_WLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == NULL) { /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a * network interrupt in the non-splnet() section of sosend(). */ if (m) m_freem(m); if (control) m_freem(control); error = ECONNRESET; /* XXX EPIPE? */ tp = NULL; TCPDEBUG1(); goto out; } INP_LOCK(inp); #ifdef INET6 isipv6 = nam && nam->sa_family == AF_INET6; #endif /* INET6 */ tp = intotcpcb(inp); TCPDEBUG1(); if (control) { /* TCP doesn't do control messages (rights, creds, etc) */ if (control->m_len) { m_freem(control); if (m) m_freem(m); error = EINVAL; goto out; } m_freem(control); /* empty control, just free it */ } if (!(flags & PRUS_OOB)) { sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg/maxopd using peer's cached * MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); else #endif /* INET6 */ error = tcp_connect(tp, nam, td); if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ socantsendmore(so); tp = tcp_usrclosed(tp); } if (tp != NULL) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tcp_output(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } } else { if (sbspace(&so->so_snd) < -512) { m_freem(m); error = ENOBUFS; goto out; } /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ sbappendstream(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, * initialize window to default value, and * initialize maxseg/maxopd using peer's cached * MSS. */ #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); else #endif /* INET6 */ error = tcp_connect(tp, nam, td); if (error) goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); } tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_force = 1; error = tcp_output(tp); tp->t_force = 0; } COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); } /* * Abort the TCP. */ static int tcp_usr_abort(struct socket *so) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_WRITE; COMMON_START(); tp = tcp_drop(tp, ECONNABORTED); COMMON_END(PRU_ABORT); } /* * Receive out-of-band data. */ static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { int s = splnet(); int error = 0; struct inpcb *inp; struct tcpcb *tp; const int inirw = INI_READ; COMMON_START(); if ((so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0) || so->so_options & SO_OOBINLINE || tp->t_oobflags & TCPOOB_HADDATA) { error = EINVAL; goto out; } if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { error = EWOULDBLOCK; goto out; } m->m_len = 1; *mtod(m, caddr_t) = tp->t_iobc; if ((flags & MSG_PEEK) == 0) tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); COMMON_END(PRU_RCVOOB); } /* xxx - should be const */ struct pr_usrreqs tcp_usrreqs = { tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, tcp_usr_disconnect, tcp_usr_listen, tcp_peeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, tcp_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; #ifdef INET6 struct pr_usrreqs tcp6_usrreqs = { tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, in6_mapped_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; #endif /* INET6 */ /* * Common subroutine to open a TCP connection to remote host specified * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local * port number if needed. Call in_pcbconnect_setup to do the routing and * to choose a local host address (interface). If there is an existing * incarnation of the same connection in TIME-WAIT state and if the remote * host was sending CC options and if the connection duration was < MSL, then * truncate the previous TIME-WAIT state and proceed. * Initialize connection parameters and enter SYN-SENT state. */ static int tcp_connect(tp, nam, td) register struct tcpcb *tp; struct sockaddr *nam; struct thread *td; { struct inpcb *inp = tp->t_inpcb, *oinp; struct socket *so = inp->inp_socket; struct tcptw *otw; struct rmxp_tao tao; struct in_addr laddr; u_short lport; int error; bzero(&tao, sizeof(tao)); if (inp->inp_lport == 0) { - error = in_pcbbind(inp, (struct sockaddr *)0, td); + error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) return error; } /* * Cannot simply call in_pcbconnect, because there might be an * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. */ laddr = inp->inp_laddr; lport = inp->inp_lport; error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, - &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td); + &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); if (error && oinp == NULL) return error; if (oinp) { if (oinp != inp && (oinp->inp_vflag & INP_TIMEWAIT) && (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl && otw->cc_recv != 0) { inp->inp_faddr = oinp->inp_faddr; inp->inp_fport = oinp->inp_fport; (void) tcp_twclose(otw, 0); } else return EADDRINUSE; } inp->inp_laddr = laddr; in_pcbrehash(inp); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) tp->request_r_scale++; soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tp->iss = tcp_new_isn(tp); tp->t_bw_rtseq = tp->iss; tcp_sendseqinit(tp); /* * Generate a CC value for this connection and * check whether CC or CCnew should be used. */ if (tcp_do_rfc1644) tcp_hc_gettao(&inp->inp_inc, &tao); tp->cc_send = CC_INC(tcp_ccgen); if (tao.tao_ccsent != 0 && CC_GEQ(tp->cc_send, tao.tao_ccsent)) { tao.tao_ccsent = tp->cc_send; } else { tao.tao_ccsent = 0; tp->t_flags |= TF_SENDCCNEW; } if (tcp_do_rfc1644) tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, tao.tao_ccsent, 0); return 0; } #ifdef INET6 static int tcp6_connect(tp, nam, td) register struct tcpcb *tp; struct sockaddr *nam; struct thread *td; { struct inpcb *inp = tp->t_inpcb, *oinp; struct socket *so = inp->inp_socket; struct tcptw *otw; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr *addr6; struct rmxp_tao tao; int error; bzero(&tao, sizeof(tao)); if (inp->inp_lport == 0) { - error = in6_pcbbind(inp, (struct sockaddr *)0, td); + error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) return error; } /* * Cannot simply call in_pcbconnect, because there might be an * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. */ error = in6_pcbladdr(inp, nam, &addr6); if (error) return error; oinp = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); if (oinp) { if (oinp != inp && (oinp->inp_vflag & INP_TIMEWAIT) && (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl && otw->cc_recv != 0) { inp->inp_faddr = oinp->inp_faddr; inp->inp_fport = oinp->inp_fport; (void) tcp_twclose(otw, 0); } else return EADDRINUSE; } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = *addr6; inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) inp->in6p_flowinfo = sin6->sin6_flowinfo; in_pcbrehash(inp); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) tp->request_r_scale++; soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tp->iss = tcp_new_isn(tp); tp->t_bw_rtseq = tp->iss; tcp_sendseqinit(tp); /* * Generate a CC value for this connection and * check whether CC or CCnew should be used. */ if (tcp_do_rfc1644) tcp_hc_gettao(&inp->inp_inc, &tao); tp->cc_send = CC_INC(tcp_ccgen); if (tao.tao_ccsent != 0 && CC_GEQ(tp->cc_send, tao.tao_ccsent)) { tao.tao_ccsent = tp->cc_send; } else { tao.tao_ccsent = 0; tp->t_flags |= TF_SENDCCNEW; } if (tcp_do_rfc1644) tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, tao.tao_ccsent, 0); return 0; } #endif /* INET6 */ /* * The new sockopt interface makes it possible for us to block in the * copyin/out step (if we take a page fault). Taking a page fault at * splnet() is probably a Bad Thing. (Since sockets and pcbs both now * use TSM, there probably isn't any need for this function to run at * splnet() any more. This needs more examination.) */ int tcp_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int error, opt, optval, s; struct inpcb *inp; struct tcpcb *tp; error = 0; s = splnet(); /* XXX */ INP_INFO_RLOCK(&tcbinfo); inp = sotoinpcb(so); if (inp == NULL) { INP_INFO_RUNLOCK(&tcbinfo); splx(s); return (ECONNRESET); } INP_LOCK(inp); INP_INFO_RUNLOCK(&tcbinfo); if (sopt->sopt_level != IPPROTO_TCP) { #ifdef INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) error = ip6_ctloutput(so, sopt); else #endif /* INET6 */ error = ip_ctloutput(so, sopt); INP_UNLOCK(inp); splx(s); return (error); } tp = intotcpcb(inp); switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval > 0) tp->t_flags |= TF_SIGNATURE; else tp->t_flags &= ~TF_SIGNATURE; break; #endif /* TCP_SIGNATURE */ case TCP_NODELAY: case TCP_NOOPT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (sopt->sopt_name) { case TCP_NODELAY: opt = TF_NODELAY; break; case TCP_NOOPT: opt = TF_NOOPT; break; default: opt = 0; /* dead code to fool gcc */ break; } if (optval) tp->t_flags |= opt; else tp->t_flags &= ~opt; break; case TCP_NOPUSH: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) tp->t_flags |= TF_NOPUSH; else { tp->t_flags &= ~TF_NOPUSH; error = tcp_output(tp); } break; case TCP_MAXSEG: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval > 0 && optval <= tp->t_maxseg && optval + 40 >= tcp_minmss) tp->t_maxseg = optval; else error = EINVAL; break; default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { #ifdef TCP_SIGNATURE case TCP_MD5SIG: optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; break; #endif case TCP_NODELAY: optval = tp->t_flags & TF_NODELAY; break; case TCP_MAXSEG: optval = tp->t_maxseg; break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; break; case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; break; default: error = ENOPROTOOPT; break; } if (error == 0) error = sooptcopyout(sopt, &optval, sizeof optval); break; } INP_UNLOCK(inp); splx(s); return (error); } /* * tcp_sendspace and tcp_recvspace are the default send and receive window * sizes, respectively. These are obsolescent (this information should * be set by the route). */ u_long tcp_sendspace = 1024*32; SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); u_long tcp_recvspace = 1024*64; SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); /* * Attach TCP protocol to socket, allocating * internet protocol control block, tcp control block, * bufer space, and entering LISTEN state if to accept connections. */ static int tcp_attach(so, td) struct socket *so; struct thread *td; { register struct tcpcb *tp; struct inpcb *inp; int error; #ifdef INET6 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; #endif if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, tcp_sendspace, tcp_recvspace); if (error) return (error); } error = in_pcballoc(so, &tcbinfo, "tcpinp"); if (error) return (error); inp = sotoinpcb(so); #ifdef INET6 if (isipv6) { inp->inp_vflag |= INP_IPV6; inp->in6p_hops = -1; /* use kernel default */ } else #endif inp->inp_vflag |= INP_IPV4; tp = tcp_newtcpcb(inp); if (tp == 0) { int nofd = so->so_state & SS_NOFDREF; /* XXX */ so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ #ifdef INET6 if (isipv6) in6_pcbdetach(inp); else #endif in_pcbdetach(inp); so->so_state |= nofd; return (ENOBUFS); } tp->t_state = TCPS_CLOSED; return (0); } /* * Initiate (or continue) disconnect. * If embryonic state, just send reset (once). * If in ``let data drain'' option and linger null, just drop. * Otherwise (hard), mark socket disconnecting and drop * current input data; switch states based on user close, and * send segment to peer (with FIN). */ static struct tcpcb * tcp_disconnect(tp) register struct tcpcb *tp; { struct socket *so = tp->t_inpcb->inp_socket; if (tp->t_state < TCPS_ESTABLISHED) tp = tcp_close(tp); else if ((so->so_options & SO_LINGER) && so->so_linger == 0) tp = tcp_drop(tp, 0); else { soisdisconnecting(so); sbflush(&so->so_rcv); tp = tcp_usrclosed(tp); if (tp) (void) tcp_output(tp); } return (tp); } /* * User issued close, and wish to trail through shutdown states: * if never received SYN, just forget it. If got a SYN from peer, * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. * If already got a FIN from peer, then almost done; go to LAST_ACK * state. In all other cases, have already sent FIN to peer (e.g. * after PRU_SHUTDOWN), and just have to play tedious game waiting * for peer to send FIN or not respond to keep-alives, etc. * We can let the user exit from the close as soon as the FIN is acked. */ static struct tcpcb * tcp_usrclosed(tp) register struct tcpcb *tp; { switch (tp->t_state) { case TCPS_CLOSED: case TCPS_LISTEN: tp->t_state = TCPS_CLOSED; tp = tcp_close(tp); break; case TCPS_SYN_SENT: case TCPS_SYN_RECEIVED: tp->t_flags |= TF_NEEDFIN; break; case TCPS_ESTABLISHED: tp->t_state = TCPS_FIN_WAIT_1; break; case TCPS_CLOSE_WAIT: tp->t_state = TCPS_LAST_ACK; break; } if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { soisdisconnected(tp->t_inpcb->inp_socket); /* To prevent the connection hanging in FIN_WAIT_2 forever. */ if (tp->t_state == TCPS_FIN_WAIT_2) callout_reset(tp->tt_2msl, tcp_maxidle, tcp_timer_2msl, tp); } return (tp); } Index: head/sys/netinet/udp_usrreq.c =================================================================== --- head/sys/netinet/udp_usrreq.c (revision 127504) +++ head/sys/netinet/udp_usrreq.c (revision 127505) @@ -1,1127 +1,1127 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 * $FreeBSD$ */ #include "opt_ipsec.h" #include "opt_inet6.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #ifdef INET6 #include #endif #include #include #ifdef FAST_IPSEC #include #endif /*FAST_IPSEC*/ #ifdef IPSEC #include #endif /*IPSEC*/ #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ #ifndef COMPAT_42 static int udpcksum = 1; #else static int udpcksum = 0; /* XXX */ #endif SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udpcksum, 0, ""); int log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, &log_in_vain, 0, "Log all incoming UDP packets"); static int blackhole = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &blackhole, 0, "Do not send port unreachables for refused connects"); static int strict_mcast_mship = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, &strict_mcast_mship, 0, "Only send multicast to member sockets"); struct inpcbhead udb; /* from udp_var.h */ #define udb6 udb /* for KAME src sync over BSD*'s */ struct inpcbinfo udbinfo; #ifndef UDBHASHSIZE #define UDBHASHSIZE 16 #endif struct udpstat udpstat; /* from udp_var.h */ SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); static struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET }; #ifdef INET6 struct udp_in6 { struct sockaddr_in6 uin6_sin; u_char uin6_init_done : 1; } udp_in6 = { { sizeof(udp_in6.uin6_sin), AF_INET6 }, 0 }; struct udp_ip6 { struct ip6_hdr uip6_ip6; u_char uip6_init_done : 1; } udp_ip6; #endif /* INET6 */ static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off); #ifdef INET6 static void ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip); #endif static int udp_detach(struct socket *so); static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct thread *); void udp_init() { INP_INFO_LOCK_INIT(&udbinfo, "udp"); LIST_INIT(&udb); udbinfo.listhead = &udb; udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(udbinfo.ipi_zone, maxsockets); } void udp_input(m, off) register struct mbuf *m; int off; { int iphlen = off; register struct ip *ip; register struct udphdr *uh; register struct inpcb *inp; struct mbuf *opts = 0; int len; struct ip save_ip; udpstat.udps_ipackets++; /* * Strip IP options, if any; should skip this, * make available to user, and use on returned packets, * but we don't yet have a way to check the checksum * with options still present. */ if (iphlen > sizeof (struct ip)) { ip_stripoptions(m, (struct mbuf *)0); iphlen = sizeof(struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); if (m->m_len < iphlen + sizeof(struct udphdr)) { if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { udpstat.udps_hdrops++; return; } ip = mtod(m, struct ip *); } uh = (struct udphdr *)((caddr_t)ip + iphlen); /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; #ifdef INET6 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; #endif /* * Make mbuf data length reflect UDP length. * If not enough data to reflect UDP length, drop. */ len = ntohs((u_short)uh->uh_ulen); if (ip->ip_len != len) { if (len > ip->ip_len || len < sizeof(struct udphdr)) { udpstat.udps_badlen++; goto badunlocked; } m_adj(m, len - ip->ip_len); /* ip->ip_len = len; */ } /* * Save a copy of the IP header in case we want restore it * for sending an ICMP error message in response. */ if (!blackhole) save_ip = *ip; /* * Checksum extended UDP header and data. */ if (uh->uh_sum) { if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh->uh_sum = m->m_pkthdr.csum_data; else uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_UDP)); uh->uh_sum ^= 0xffff; } else { char b[9]; bcopy(((struct ipovly *)ip)->ih_x1, b, 9); bzero(((struct ipovly *)ip)->ih_x1, 9); ((struct ipovly *)ip)->ih_len = uh->uh_ulen; uh->uh_sum = in_cksum(m, len + sizeof (struct ip)); bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); } if (uh->uh_sum) { udpstat.udps_badsum++; m_freem(m); return; } } else udpstat.udps_nosum++; INP_INFO_RLOCK(&udbinfo); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { struct inpcb *last; /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multi/broadcasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; LIST_FOREACH(inp, &udb, inp_list) { INP_LOCK(inp); if (inp->inp_lport != uh->uh_dport) { docontinue: INP_UNLOCK(inp); continue; } #ifdef INET6 if ((inp->inp_vflag & INP_IPV4) == 0) goto docontinue; #endif if (inp->inp_laddr.s_addr != INADDR_ANY) { if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) goto docontinue; } if (inp->inp_faddr.s_addr != INADDR_ANY) { if (inp->inp_faddr.s_addr != ip->ip_src.s_addr || inp->inp_fport != uh->uh_sport) goto docontinue; } /* * Check multicast packets to make sure they are only * sent to sockets with multicast memberships for the * packet's destination address and arrival interface */ #define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)]) #define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships) if (strict_mcast_mship && inp->inp_moptions != NULL) { int mship, foundmship = 0; for (mship = 0; mship < NMSHIPS(inp); mship++) { if (MSHIP(inp, mship)->inm_addr.s_addr == ip->ip_dst.s_addr && MSHIP(inp, mship)->inm_ifp == m->m_pkthdr.rcvif) { foundmship = 1; break; } } if (foundmship == 0) goto docontinue; } #undef NMSHIPS #undef MSHIP if (last != NULL) { struct mbuf *n; n = m_copy(m, 0, M_COPYALL); if (n != NULL) udp_append(last, ip, n, iphlen + sizeof(struct udphdr)); INP_UNLOCK(last); } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It * assumes that an application will never * clear these options after setting them. */ if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udpstat.udps_noportbcast++; goto badheadlocked; } INP_INFO_RUNLOCK(&udbinfo); udp_append(last, ip, m, iphlen + sizeof(struct udphdr)); INP_UNLOCK(last); return; } /* * Locate pcb for datagram. */ inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (inp == NULL) { if (log_in_vain) { char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_INFO, "Connection attempt to UDP %s:%d from %s:%d\n", buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), ntohs(uh->uh_sport)); } udpstat.udps_noport++; if (m->m_flags & (M_BCAST | M_MCAST)) { udpstat.udps_noportbcast++; goto badheadlocked; } if (blackhole) goto badheadlocked; if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) goto badheadlocked; *ip = save_ip; ip->ip_len += iphlen; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); INP_INFO_RUNLOCK(&udbinfo); return; } INP_LOCK(inp); INP_INFO_RUNLOCK(&udbinfo); udp_append(inp, ip, m, iphlen + sizeof(struct udphdr)); INP_UNLOCK(inp); return; badheadlocked: INP_INFO_RUNLOCK(&udbinfo); if (inp) INP_UNLOCK(inp); badunlocked: m_freem(m); if (opts) m_freem(opts); return; } #ifdef INET6 static void ip_2_ip6_hdr(ip6, ip) struct ip6_hdr *ip6; struct ip *ip; { bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = ip->ip_len; ip6->ip6_nxt = ip->ip_p; ip6->ip6_hlim = ip->ip_ttl; ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_SMP; ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; } #endif /* * subroutine of udp_input(), mainly for source code readability. * caller must properly init udp_ip6 and udp_in6 beforehand. */ static void udp_append(last, ip, n, off) struct inpcb *last; struct ip *ip; struct mbuf *n; int off; { struct sockaddr *append_sa; struct mbuf *opts = 0; #if defined(IPSEC) || defined(FAST_IPSEC) /* check AH/ESP integrity. */ if (ipsec4_in_reject(n, last)) { #ifdef IPSEC ipsecstat.in_polvio++; #endif /*IPSEC*/ m_freem(n); return; } #endif /*IPSEC || FAST_IPSEC*/ #ifdef MAC if (mac_check_inpcb_deliver(last, n) != 0) { m_freem(n); return; } #endif if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) { #ifdef INET6 if (last->inp_vflag & INP_IPV6) { int savedflags; if (udp_ip6.uip6_init_done == 0) { ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); udp_ip6.uip6_init_done = 1; } savedflags = last->inp_flags; last->inp_flags &= ~INP_UNMAPPABLEOPTS; ip6_savecontrol(last, n, &opts); last->inp_flags = savedflags; } else #endif ip_savecontrol(last, &opts, ip, n); } #ifdef INET6 if (last->inp_vflag & INP_IPV6) { if (udp_in6.uin6_init_done == 0) { in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); udp_in6.uin6_init_done = 1; } append_sa = (struct sockaddr *)&udp_in6.uin6_sin; } else #endif append_sa = (struct sockaddr *)&udp_in; m_adj(n, off); if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udpstat.udps_fullsock++; } else sorwakeup(last->inp_socket); } /* * Notify a udp user of an asynchronous error; * just wake up so that he can collect error status. */ struct inpcb * udp_notify(inp, errno) register struct inpcb *inp; int errno; { inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); sowwakeup(inp->inp_socket); return inp; } void udp_ctlinput(cmd, sa, vip) int cmd; struct sockaddr *sa; void *vip; { struct ip *ip = vip; struct udphdr *uh; struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; struct in_addr faddr; struct inpcb *inp; int s; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; /* * Redirects don't need to be handled up here. */ if (PRC_IS_REDIRECT(cmd)) return; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ if (cmd == PRC_HOSTDEAD) ip = 0; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip) { s = splnet(); uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_RLOCK(&udbinfo); inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, 0, NULL); if (inp != NULL) { INP_LOCK(inp); if (inp->inp_socket != NULL) { (*notify)(inp, inetctlerrmap[cmd]); } INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&udbinfo); splx(s); } else in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify); } static int udp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n, s; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = udbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); return 0; } if (req->newptr != 0) return EPERM; /* * OK, now we're committed to doing something. */ s = splnet(); INP_INFO_RLOCK(&udbinfo); gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; INP_INFO_RUNLOCK(&udbinfo); splx(s); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + n * sizeof(struct xinpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) return ENOMEM; s = splnet(); INP_INFO_RLOCK(&udbinfo); for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_LOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) inp_list[i++] = inp; INP_UNLOCK(inp); } INP_INFO_RUNLOCK(&udbinfo); splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ bcopy(inp, &xi.xi_inp, sizeof *inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); xi.xi_inp.inp_gencnt = inp->inp_gencnt; error = SYSCTL_OUT(req, &xi, sizeof xi); } } if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ s = splnet(); INP_INFO_RLOCK(&udbinfo); xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; INP_INFO_RUNLOCK(&udbinfo); splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return error; } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); static int udp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error, s; error = suser_cred(req->td->td_ucred, PRISON_ROOT); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); INP_INFO_RLOCK(&udbinfo); inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; } error = cr_canseesocket(req->td->td_ucred, inp->inp_socket); if (error) goto out; cru2x(inp->inp_socket->so_cred, &xuc); out: INP_INFO_RUNLOCK(&udbinfo); splx(s); if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, udp_getcred, "S,xucred", "Get the xucred of a UDP connection"); static int udp_output(inp, m, addr, control, td) register struct inpcb *inp; struct mbuf *m; struct sockaddr *addr; struct mbuf *control; struct thread *td; { register struct udpiphdr *ui; register int len = m->m_pkthdr.len; struct in_addr faddr, laddr; struct cmsghdr *cm; struct sockaddr_in *sin, src; int error = 0; int ipflags; u_short fport, lport; INP_LOCK_ASSERT(inp); #ifdef MAC mac_create_mbuf_from_socket(inp->inp_socket, m); #endif if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { error = EMSGSIZE; if (control) m_freem(control); goto release; } src.sin_addr.s_addr = INADDR_ANY; if (control != NULL) { /* * XXX: Currently, we assume all the optional information * is stored in a single mbuf. */ if (control->m_next) { error = EINVAL; m_freem(control); goto release; } for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { cm = mtod(control, struct cmsghdr *); if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) { error = EINVAL; break; } if (cm->cmsg_level != IPPROTO_IP) continue; switch (cm->cmsg_type) { case IP_SENDSRCADDR: if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) { error = EINVAL; break; } bzero(&src, sizeof(src)); src.sin_family = AF_INET; src.sin_len = sizeof(src); src.sin_port = inp->inp_lport; src.sin_addr = *(struct in_addr *)CMSG_DATA(cm); break; default: error = ENOPROTOOPT; break; } if (error) break; } m_freem(control); } if (error) goto release; laddr = inp->inp_laddr; lport = inp->inp_lport; if (src.sin_addr.s_addr != INADDR_ANY) { if (lport == 0) { error = EINVAL; goto release; } error = in_pcbbind_setup(inp, (struct sockaddr *)&src, - &laddr.s_addr, &lport, td); + &laddr.s_addr, &lport, td->td_ucred); if (error) goto release; } if (addr) { sin = (struct sockaddr_in *)addr; if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto release; } error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport, - &faddr.s_addr, &fport, NULL, td); + &faddr.s_addr, &fport, NULL, td->td_ucred); if (error) goto release; /* Commit the local port if newly assigned. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->inp_lport = 0; error = EAGAIN; goto release; } inp->inp_flags |= INP_ANONPORT; } } else { faddr = inp->inp_faddr; fport = inp->inp_fport; if (faddr.s_addr == INADDR_ANY) { error = ENOTCONN; goto release; } } /* * Calculate data length and get a mbuf * for UDP and IP headers. */ M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; goto release; } /* * Fill in mbuf with extended UDP header * and addresses and length put into network format. */ ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ ui->ui_pr = IPPROTO_UDP; ui->ui_src = laddr; ui->ui_dst = faddr; ui->ui_sport = lport; ui->ui_dport = fport; ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); ipflags = inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST); if (inp->inp_flags & INP_ONESBCAST) ipflags |= IP_SENDONES; /* * Set up checksum and output datagram. */ if (udpcksum) { if (inp->inp_flags & INP_ONESBCAST) faddr.s_addr = INADDR_BROADCAST; ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr, htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } else { ui->ui_sum = 0; } ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ udpstat.udps_opackets++; error = ip_output(m, inp->inp_options, NULL, ipflags, inp->inp_moptions, inp); return (error); release: m_freem(m); return (error); } u_long udp_sendspace = 9216; /* really max datagram size */ /* 40 1K datagrams */ SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); u_long udp_recvspace = 40 * (1024 + #ifdef INET6 sizeof(struct sockaddr_in6) #else sizeof(struct sockaddr_in) #endif ); SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, &udp_recvspace, 0, "Maximum space for incoming UDP datagrams"); static int udp_abort(struct socket *so) { struct inpcb *inp; int s; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; /* ??? possible? panic instead? */ } INP_LOCK(inp); soisdisconnected(so); s = splnet(); in_pcbdetach(inp); INP_INFO_WUNLOCK(&udbinfo); splx(s); return 0; } static int udp_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int s, error; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp != 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } error = soreserve(so, udp_sendspace, udp_recvspace); if (error) { INP_INFO_WUNLOCK(&udbinfo); return error; } s = splnet(); error = in_pcballoc(so, &udbinfo, "udpinp"); splx(s); if (error) { INP_INFO_WUNLOCK(&udbinfo); return error; } inp = (struct inpcb *)so->so_pcb; INP_LOCK(inp); INP_INFO_WUNLOCK(&udbinfo); inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = ip_defttl; INP_UNLOCK(inp); return 0; } static int udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); s = splnet(); - error = in_pcbbind(inp, nam, td); + error = in_pcbbind(inp, nam, td->td_ucred); splx(s); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return error; } static int udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; struct sockaddr_in *sin; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return EISCONN; } s = splnet(); sin = (struct sockaddr_in *)nam; if (td && jailed(td->td_ucred)) prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); - error = in_pcbconnect(inp, nam, td); + error = in_pcbconnect(inp, nam, td->td_ucred); splx(s); if (error == 0) soisconnected(so); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return error; } static int udp_detach(struct socket *so) { struct inpcb *inp; int s; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); s = splnet(); in_pcbdetach(inp); INP_INFO_WUNLOCK(&udbinfo); splx(s); return 0; } static int udp_disconnect(struct socket *so) { struct inpcb *inp; int s; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); if (inp->inp_faddr.s_addr == INADDR_ANY) { INP_INFO_WUNLOCK(&udbinfo); INP_UNLOCK(inp); return ENOTCONN; } s = splnet(); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } static int udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; int ret; INP_INFO_WLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_WUNLOCK(&udbinfo); m_freem(m); return EINVAL; } INP_LOCK(inp); ret = udp_output(inp, m, addr, control, td); INP_UNLOCK(inp); INP_INFO_WUNLOCK(&udbinfo); return ret; } int udp_shutdown(struct socket *so) { struct inpcb *inp; INP_INFO_RLOCK(&udbinfo); inp = sotoinpcb(so); if (inp == 0) { INP_INFO_RUNLOCK(&udbinfo); return EINVAL; } INP_LOCK(inp); INP_INFO_RUNLOCK(&udbinfo); socantsendmore(so); INP_UNLOCK(inp); return 0; } /* * This is the wrapper function for in_setsockaddr. We just pass down * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking * here because in_setsockaddr will call malloc and might block. */ static int udp_sockaddr(struct socket *so, struct sockaddr **nam) { return (in_setsockaddr(so, nam, &udbinfo)); } /* * This is the wrapper function for in_setpeeraddr. We just pass down * the pcbinfo for in_setpeeraddr to lock. */ static int udp_peeraddr(struct socket *so, struct sockaddr **nam) { return (in_setpeeraddr(so, nam, &udbinfo)); } struct pr_usrreqs udp_usrreqs = { udp_abort, pru_accept_notsupp, udp_attach, udp_bind, udp_connect, pru_connect2_notsupp, in_control, udp_detach, udp_disconnect, pru_listen_notsupp, udp_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp_send, pru_sense_null, udp_shutdown, udp_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel }; Index: head/sys/netinet6/in6_pcb.c =================================================================== --- head/sys/netinet6/in6_pcb.c (revision 127504) +++ head/sys/netinet6/in6_pcb.c (revision 127505) @@ -1,955 +1,955 @@ /* $FreeBSD$ */ /* $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_random_ip_id.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #endif /* IPSEC */ #ifdef FAST_IPSEC #include #include #include #endif /* FAST_IPSEC */ struct in6_addr zeroin6_addr; int -in6_pcbbind(inp, nam, td) +in6_pcbbind(inp, nam, cred) register struct inpcb *inp; struct sockaddr *nam; - struct thread *td; + struct ucred *cred; { struct socket *so = inp->inp_socket; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); if (!in6_ifaddr) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof(*sin6)) return (EINVAL); /* * family check. */ if (nam->sa_family != AF_INET6) return (EAFNOSUPPORT); /* KAME hack: embed scopeid */ if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) return EINVAL; /* this must be cleared for ifa_ifwithaddr() */ sin6->sin6_scope_id = 0; lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ia = NULL; sin6->sin6_port = 0; /* yech... */ if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) return (EADDRNOTAVAIL); /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. * We should allow to bind to a deprecated address, since * the application dares to use it. */ if (ia && ((struct in6_ifaddr *)ia)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { return (EADDRNOTAVAIL); } } if (lport) { struct inpcb *t; /* GROSS */ - if (ntohs(lport) < IPV6PORT_RESERVED && td && - suser_cred(td->td_ucred, PRISON_ROOT)) + if (ntohs(lport) < IPV6PORT_RESERVED && + suser_cred(cred, PRISON_ROOT)) return (EACCES); if (so->so_cred->cr_uid != 0 && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if ((!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || !(intotw(t)->tw_so_options & SO_REUSEPORT)) && so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid) return (EADDRINUSE); } else if (t && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid)) return (EADDRINUSE); if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, INPLOOKUP_WILDCARD); if (t && (t->inp_vflag & INP_TIMEWAIT)) { if (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (so->so_cred->cr_uid != t->inp_socket->so_cred->cr_uid) && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))) return (EADDRINUSE); } } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, wild); if (t && (reuseport & ((t->inp_vflag & INP_TIMEWAIT) ? intotw(t)->tw_so_options : t->inp_socket->so_options)) == 0) return (EADDRINUSE); if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, wild); if (t && t->inp_vflag & INP_TIMEWAIT) { if ((reuseport & intotw(t)->tw_so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (reuseport & t->inp_socket->so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))) return (EADDRINUSE); } } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { int e; - if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, td)) != 0) + if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) return (e); } else { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } } return (0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ int in6_pcbladdr(inp, nam, plocal_addr6) register struct inpcb *inp; struct sockaddr *nam; struct in6_addr **plocal_addr6; { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct ifnet *ifp = NULL; int error = 0; if (nam->sa_len != sizeof (*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); /* KAME hack: embed scopeid */ if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) return EINVAL; if (in6_ifaddr) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } { /* * XXX: in6_selectsrc might replace the bound local address * with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp->in6p_moptions, NULL, &inp->in6p_laddr, &error); if (*plocal_addr6 == 0) { if (error == 0) error = EADDRNOTAVAIL; return (error); } /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ } return (0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int -in6_pcbconnect(inp, nam, td) +in6_pcbconnect(inp, nam, cred) register struct inpcb *inp; struct sockaddr *nam; - struct thread *td; + struct ucred *cred; { struct in6_addr *addr6; register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error; /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return (error); if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { - error = in6_pcbbind(inp, (struct sockaddr *)0, td); + error = in6_pcbbind(inp, (struct sockaddr *)0, cred); if (error) return (error); } inp->in6p_laddr = *addr6; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) inp->in6p_flowinfo |= #ifdef RANDOM_IP_ID (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); #else (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); #endif in_pcbrehash(inp); #ifdef IPSEC if (inp->inp_socket->so_type == SOCK_STREAM) ipsec_pcbconn(inp->inp_sp); #endif return (0); } void in6_pcbdisconnect(inp) struct inpcb *inp; { bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); #ifdef IPSEC ipsec_pcbdisconn(inp->inp_sp); #endif if (inp->inp_socket->so_state & SS_NOFDREF) in6_pcbdetach(inp); } void in6_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; #if defined(IPSEC) || defined(FAST_IPSEC) if (inp->in6p_sp != NULL) ipsec6_delete_pcbpolicy(inp); #endif /* IPSEC */ inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { so->so_pcb = NULL; sotryfree(so); } ip6_freepcbopts(inp->in6p_outputopts); ip6_freemoptions(inp->in6p_moptions); /* Check and free IPv4 related resources in case of mapped addr */ if (inp->inp_options) (void)m_free(inp->inp_options); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); uma_zfree(ipi->ipi_zone, inp); } struct sockaddr * in6_sockaddr(port, addr_p) in_port_t port; struct in6_addr *addr_p; { struct sockaddr_in6 *sin6; MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); else sin6->sin6_scope_id = 0; /*XXX*/ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; return (struct sockaddr *)sin6; } struct sockaddr * in6_v4mapsin6_sockaddr(port, addr_p) in_port_t port; struct in_addr *addr_p; { struct sockaddr_in sin; struct sockaddr_in6 *sin6_p; bzero(&sin, sizeof sin); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = port; sin.sin_addr = *addr_p; MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, M_WAITOK); in6_sin_2_v4mapsin6(&sin, sin6_p); return (struct sockaddr *)sin6_p; } /* * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly * without the need for a wrapper function. The socket must have a valid * (i.e., non-nil) PCB, but it should be impossible to get an invalid one * except through a kernel programming error, so it is acceptable to panic * (or in this case trap) if the PCB is invalid. (Actually, we don't trap * because there actually /is/ a programming error somewhere... XXX) */ int in6_setsockaddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; register struct inpcb *inp; struct in6_addr addr; in_port_t port; s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); return EINVAL; } port = inp->inp_lport; addr = inp->in6p_laddr; splx(s); *nam = in6_sockaddr(port, &addr); return 0; } int in6_setpeeraddr(so, nam) struct socket *so; struct sockaddr **nam; { int s; struct inpcb *inp; struct in6_addr addr; in_port_t port; s = splnet(); inp = sotoinpcb(so); if (!inp) { splx(s); return EINVAL; } port = inp->inp_fport; addr = inp->in6p_faddr; splx(s); *nam = in6_sockaddr(port, &addr); return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return EINVAL; if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_setsockaddr(so, nam, &tcbinfo); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else { /* scope issues will be handled in in6_setsockaddr(). */ error = in6_setsockaddr(so, nam); } return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return EINVAL; if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_setpeeraddr(so, nam, &tcbinfo); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else /* scope issues will be handled in in6_setpeeraddr(). */ error = in6_setpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. * * Must be called at splnet. */ void in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify) struct inpcbhead *head; struct sockaddr *dst; const struct sockaddr *src; u_int fport_arg, lport_arg; int cmd; void *cmdarg; struct inpcb *(*notify) __P((struct inpcb *, int)); { struct inpcb *inp, *ninp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno, s; if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; sa6_dst = (struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; flowinfo = sa6_src.sin6_flowinfo; /* * Redirects go to all references to the destination, * and use in6_rtchange to invalidate the route cache. * Dead host indications: also use in6_rtchange to invalidate * the cache, and deliver the error to all the sockets. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; s = splnet(); for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { ninp = LIST_NEXT(inp, inp_list); if ((inp->inp_vflag & INP_IPV6) == 0) continue; /* * If the error designates a new path MTU for a destination * and the application (associated with this socket) wanted to * know the value, notify. Note that we notify for all * disconnected sockets if the corresponding application * wanted. This is because some UDP applications keep sending * sockets disconnected. * XXX: should we avoid to notify the value to TCP sockets? */ if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, (u_int32_t *)cmdarg); } /* * Detect if we should notify the error. If no source and * destination ports are specifed, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) continue; do_notify: if (notify) (*notify)(inp, errno); } splx(s); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; struct in6_addr *laddr; u_int lport_arg; int wild_okay; { register struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; if (!wild_okay) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* * Found. */ return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) { break; } } } } return (match); } } void in6_pcbpurgeif0(head, ifp) struct in6pcb *head; struct ifnet *ifp; { struct in6pcb *in6p; struct ip6_moptions *im6o; struct in6_multi_mship *imm, *nimm; for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) { im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o) { /* * Unselect the outgoing interface if it is being * detached. */ if (im6o->im6o_multicast_ifp == ifp) im6o->im6o_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. * XXX controversial - is it really legal for kernel * to force this? */ for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = nimm) { nimm = imm->i6mm_chain.le_next; if (imm->i6mm_maddr->in6m_ifp == ifp) { LIST_REMOVE(imm, i6mm_chain); in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); } } } } } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(in6p) struct inpcb *in6p; { /* * We don't store route pointers in the routing table anymore */ return; } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in6_rtchange(inp, errno) struct inpcb *inp; int errno; { /* * We don't store route pointers in the routing table anymore */ return inp; } /* * Lookup PCB in hash list. */ struct inpcb * in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) struct inpcbinfo *pcbinfo; struct in6_addr *faddr, *laddr; u_int fport_arg, lport_arg; int wildcard; struct ifnet *ifp; { struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; int faith; if (faithprefix_p != NULL) faith = (*faithprefix_p)(laddr); else faith = 0; /* * First look for an exact match. */ head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * Found. */ return (inp); } } if (wildcard) { struct inpcb *local_wild = NULL; head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; LIST_FOREACH(inp, head, inp_hash) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && inp->inp_lport == lport) { if (faith && (inp->inp_flags & INP_FAITH) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) return (inp); else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) local_wild = inp; } } return (local_wild); } /* * Not found. */ return (NULL); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) ? m->m_pkthdr.rcvif->if_index : 0; return; } Index: head/sys/netinet6/in6_pcb.h =================================================================== --- head/sys/netinet6/in6_pcb.h (revision 127504) +++ head/sys/netinet6/in6_pcb.h (revision 127505) @@ -1,110 +1,110 @@ /* $FreeBSD$ */ /* $KAME: in6_pcb.h,v 1.13 2001/02/06 09:16:53 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 */ #ifndef _NETINET6_IN6_PCB_H_ #define _NETINET6_IN6_PCB_H_ #ifdef _KERNEL #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) void in6_pcbpurgeif0 __P((struct in6pcb *, struct ifnet *)); void in6_losing __P((struct inpcb *)); -int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct thread *)); -int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct thread *)); +int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *)); +int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct ucred *)); void in6_pcbdetach __P((struct inpcb *)); void in6_pcbdisconnect __P((struct inpcb *)); int in6_pcbladdr __P((struct inpcb *, struct sockaddr *, struct in6_addr **)); struct inpcb * in6_pcblookup_local __P((struct inpcbinfo *, struct in6_addr *, u_int, int)); struct inpcb * in6_pcblookup_hash __P((struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *)); void in6_pcbnotify __P((struct inpcbhead *, struct sockaddr *, u_int, const struct sockaddr *, u_int, int, void *, struct inpcb *(*)(struct inpcb *, int))); struct inpcb * in6_rtchange __P((struct inpcb *, int)); struct sockaddr * in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p)); struct sockaddr * in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p)); int in6_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); int in6_setsockaddr __P((struct socket *so, struct sockaddr **nam)); int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam)); int in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam)); int in6_selecthlim __P((struct in6pcb *, struct ifnet *)); -int in6_pcbsetport __P((struct in6_addr *, struct inpcb *, struct thread *)); +int in6_pcbsetport __P((struct in6_addr *, struct inpcb *, struct ucred *)); void init_sin6 __P((struct sockaddr_in6 *sin6, struct mbuf *m)); #endif /* _KERNEL */ #endif /* !_NETINET6_IN6_PCB_H_ */ Index: head/sys/netinet6/in6_src.c =================================================================== --- head/sys/netinet6/in6_src.c (revision 127504) +++ head/sys/netinet6/in6_src.c (revision 127505) @@ -1,1211 +1,1211 @@ /* $FreeBSD$ */ /* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ENABLE_DEFAULT_SCOPE #include #endif #include static struct mtx addrsel_lock; #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) #define ADDR_LABEL_NOTAPP (-1) struct in6_addrpolicy defaultaddrpolicy; int ip6_prefer_tempaddr = 0; static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); static void init_policy_queue __P((void)); static int add_addrsel_policyent __P((struct in6_addrpolicy *)); static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), void *)); static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); /* * Return an IPv6 address, which is the most appropriate for a given * destination and user specified options. * If necessary, this function lookups the routing table and returns * an entry to the caller for later use. */ #define REPLACE(r) do {\ if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ ip6stat.ip6s_sources_rule[(r)]++; \ /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ goto replace; \ } while(0) #define NEXT(r) do {\ if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ ip6stat.ip6s_sources_rule[(r)]++; \ /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ goto next; /* XXX: we can't use 'continue' here */ \ } while(0) #define BREAK(r) do { \ if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ ip6stat.ip6s_sources_rule[(r)]++; \ goto out; /* XXX: we can't use 'break' here */ \ } while(0) struct in6_addr * in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) struct sockaddr_in6 *dstsock; struct ip6_pktopts *opts; struct ip6_moptions *mopts; struct route_in6 *ro; struct in6_addr *laddr; int *errorp; { struct in6_addr *dst; struct ifnet *ifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; struct in6_pktinfo *pi = NULL; int dst_scope = -1, best_scope = -1, best_matchlen = -1; struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; u_int32_t odstzone; int prefer_tempaddr; struct sockaddr_in6 dstsock0; dstsock0 = *dstsock; if (IN6_IS_SCOPE_LINKLOCAL(&dstsock0.sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&dstsock0.sin6_addr)) { /* KAME assumption: link id == interface id */ if (opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(opts->ip6po_pktinfo->ipi6_ifindex); dstsock0.sin6_addr.s6_addr16[1] = htons(opts->ip6po_pktinfo->ipi6_ifindex); } else if (mopts && IN6_IS_ADDR_MULTICAST(&dstsock0.sin6_addr) && mopts->im6o_multicast_ifp) { ifp = mopts->im6o_multicast_ifp; dstsock0.sin6_addr.s6_addr16[1] = htons(ifp->if_index); } else if ((*errorp = in6_embedscope(&dstsock0.sin6_addr, &dstsock0, NULL, NULL)) != 0) return (NULL); } dstsock = &dstsock0; dst = &dstsock->sin6_addr; *errorp = 0; /* * If the source address is explicitly specified by the caller, * check if the requested source address is indeed a unicast address * assigned to the node, and can be used as the packet's source * address. If everything is okay, use the address as source. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { struct sockaddr_in6 srcsock; struct in6_ifaddr *ia6; /* get the outgoing interface */ if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) { return (NULL); } /* * determine the appropriate zone id of the source based on * the zone of the destination and the outgoing interface. */ bzero(&srcsock, sizeof(srcsock)); srcsock.sin6_family = AF_INET6; srcsock.sin6_len = sizeof(srcsock); srcsock.sin6_addr = pi->ipi6_addr; if (ifp) { if (in6_addr2zoneid(ifp, &pi->ipi6_addr, &srcsock.sin6_scope_id)) { *errorp = EINVAL; /* XXX */ return (NULL); } } if ((*errorp = in6_embedscope(&srcsock.sin6_addr, &srcsock, NULL, NULL)) != 0) { return (NULL); } srcsock.sin6_scope_id = 0; /* XXX: ifa_ifwithaddr expects 0 */ ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { *errorp = EADDRNOTAVAIL; return (NULL); } pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ return (&ia6->ia_addr.sin6_addr); } /* * Otherwise, if the socket has already bound the source, just use it. */ if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) return (laddr); /* * If the address is not specified, choose the best one based on * the outgoing interface and the destination address. */ /* get the outgoing interface */ if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) return (NULL); #ifdef DIAGNOSTIC if (ifp == NULL) /* this should not happen */ panic("in6_selectsrc: NULL ifp"); #endif if (in6_addr2zoneid(ifp, dst, &odstzone)) { /* impossible */ *errorp = EIO; /* XXX */ return (NULL); } for (ia = in6_ifaddr; ia; ia = ia->ia_next) { int new_scope = -1, new_matchlen = -1; struct in6_addrpolicy *new_policy = NULL; u_int32_t srczone, osrczone, dstzone; struct ifnet *ifp1 = ia->ia_ifp; /* * We'll never take an address that breaks the scope zone * of the destination. We also skip an address if its zone * does not contain the outgoing interface. * XXX: we should probably use sin6_scope_id here. */ if (in6_addr2zoneid(ifp1, dst, &dstzone) || odstzone != dstzone) { continue; } if (in6_addr2zoneid(ifp, &ia->ia_addr.sin6_addr, &osrczone) || in6_addr2zoneid(ifp1, &ia->ia_addr.sin6_addr, &srczone) || osrczone != srczone) { continue; } /* avoid unusable addresses */ if ((ia->ia6_flags & (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { continue; } if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) continue; /* Rule 1: Prefer same address */ if (IN6_ARE_ADDR_EQUAL(dst, &ia->ia_addr.sin6_addr)) { ia_best = ia; BREAK(1); /* there should be no better candidate */ } if (ia_best == NULL) REPLACE(0); /* Rule 2: Prefer appropriate scope */ if (dst_scope < 0) dst_scope = in6_addrscope(dst); new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) REPLACE(2); NEXT(2); } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) NEXT(2); REPLACE(2); } /* * Rule 3: Avoid deprecated addresses. Note that the case of * !ip6_use_deprecated is already rejected above. */ if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) NEXT(3); if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) REPLACE(3); /* Rule 4: Prefer home addresses */ /* * XXX: This is a TODO. We should probably merge the MIP6 * case above. */ /* Rule 5: Prefer outgoing interface */ if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) NEXT(5); if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) REPLACE(5); /* * Rule 6: Prefer matching label * Note that best_policy should be non-NULL here. */ if (dst_policy == NULL) dst_policy = lookup_addrsel_policy(dstsock); if (dst_policy->label != ADDR_LABEL_NOTAPP) { new_policy = lookup_addrsel_policy(&ia->ia_addr); if (dst_policy->label == best_policy->label && dst_policy->label != new_policy->label) NEXT(6); if (dst_policy->label != best_policy->label && dst_policy->label == new_policy->label) REPLACE(6); } /* * Rule 7: Prefer public addresses. * We allow users to reverse the logic by configuring * a sysctl variable, so that privacy conscious users can * always prefer temporary addresses. */ if (opts == NULL || opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { prefer_tempaddr = ip6_prefer_tempaddr; } else if (opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_NOTPREFER) { prefer_tempaddr = 0; } else prefer_tempaddr = 1; if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && (ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) REPLACE(7); else NEXT(7); } if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) NEXT(7); else REPLACE(7); } /* * Rule 8: prefer addresses on alive interfaces. * This is a KAME specific rule. */ if ((ia_best->ia_ifp->if_flags & IFF_UP) && !(ia->ia_ifp->if_flags & IFF_UP)) NEXT(8); if (!(ia_best->ia_ifp->if_flags & IFF_UP) && (ia->ia_ifp->if_flags & IFF_UP)) REPLACE(8); /* * Rule 14: Use longest matching prefix. * Note: in the address selection draft, this rule is * documented as "Rule 8". However, since it is also * documented that this rule can be overridden, we assign * a large number so that it is easy to assign smaller numbers * to more preferred rules. */ new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, dst); if (best_matchlen < new_matchlen) REPLACE(14); if (new_matchlen < best_matchlen) NEXT(14); /* Rule 15 is reserved. */ /* * Last resort: just keep the current candidate. * Or, do we need more rules? */ continue; replace: ia_best = ia; best_scope = (new_scope >= 0 ? new_scope : in6_addrscope(&ia_best->ia_addr.sin6_addr)); best_policy = (new_policy ? new_policy : lookup_addrsel_policy(&ia_best->ia_addr)); best_matchlen = (new_matchlen >= 0 ? new_matchlen : in6_matchlen(&ia_best->ia_addr.sin6_addr, dst)); next: continue; out: break; } if ((ia = ia_best) == NULL) { *errorp = EADDRNOTAVAIL; return (NULL); } return (&ia->ia_addr.sin6_addr); } static int in6_selectif(dstsock, opts, mopts, ro, retifp) struct sockaddr_in6 *dstsock; struct ip6_pktopts *opts; struct ip6_moptions *mopts; struct route_in6 *ro; struct ifnet **retifp; { int error; struct route_in6 sro; struct rtentry *rt = NULL; if (ro == NULL) { bzero(&sro, sizeof(sro)); ro = &sro; } if ((error = in6_selectroute(dstsock, opts, mopts, ro, retifp, &rt, 0)) != 0) { if (rt && rt == sro.ro_rt) RTFREE(rt); return (error); } /* * do not use a rejected or black hole route. * XXX: this check should be done in the L2 output routine. * However, if we skipped this check here, we'd see the following * scenario: * - install a rejected route for a scoped address prefix * (like fe80::/10) * - send a packet to a destination that matches the scoped prefix, * with ambiguity about the scope zone. * - pick the outgoing interface from the route, and disambiguate the * scope zone with the interface. * - ip6_output() would try to get another route with the "new" * destination, which may be valid. * - we'd see no error on output. * Although this may not be very harmful, it should still be confusing. * We thus reject the case here. */ if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); if (rt && rt == sro.ro_rt) RTFREE(rt); return (flags); } /* * Adjust the "outgoing" interface. If we're going to loop the packet * back to ourselves, the ifp would be the loopback interface. * However, we'd rather know the interface associated to the * destination address (which should probably be one of our own * addresses.) */ if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) *retifp = rt->rt_ifa->ifa_ifp; if (rt && rt == sro.ro_rt) RTFREE(rt); return (0); } int in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) struct sockaddr_in6 *dstsock; struct ip6_pktopts *opts; struct ip6_moptions *mopts; struct route_in6 *ro; struct ifnet **retifp; struct rtentry **retrt; int clone; /* meaningful only for bsdi and freebsd. */ { int error = 0; struct ifnet *ifp = NULL; struct rtentry *rt = NULL; struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; #if 0 if (dstsock->sin6_addr.s6_addr32[0] == 0 && dstsock->sin6_addr.s6_addr32[1] == 0 && !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { printf("in6_selectroute: strange destination %s\n", ip6_sprintf(&dstsock->sin6_addr)); } else { printf("in6_selectroute: destination = %s%%%d\n", ip6_sprintf(&dstsock->sin6_addr), dstsock->sin6_scope_id); /* for debug */ } #endif /* If the caller specify the outgoing interface explicitly, use it. */ if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ ifp = ifnet_byindex(pi->ipi6_ifindex); if (ifp != NULL && (retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { /* * we do not have to check nor get the route for * multicast. */ goto done; } else goto getroute; } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. */ if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { goto done; /* we do not need a route for multicast. */ } getroute: /* * If the next hop address for the packet is specified by the caller, * use it as the gateway. */ if (opts && opts->ip6po_nexthop) { struct route_in6 *ron; sin6_next = satosin6(opts->ip6po_nexthop); /* at this moment, we only support AF_INET6 next hops */ if (sin6_next->sin6_family != AF_INET6) { error = EAFNOSUPPORT; /* or should we proceed? */ goto done; } /* * If the next hop is an IPv6 address, then the node identified * by that address must be a neighbor of the sending host. */ ron = &opts->ip6po_nextroute; if ((ron->ro_rt && (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != (RTF_UP | RTF_LLINFO)) || !SA6_ARE_ADDR_EQUAL(satosin6(&ron->ro_dst), sin6_next)) { if (ron->ro_rt) { RTFREE(ron->ro_rt); ron->ro_rt = NULL; } *satosin6(&ron->ro_dst) = *sin6_next; } if (ron->ro_rt == NULL) { rtalloc((struct route *)ron); /* multi path case? */ if (ron->ro_rt == NULL || !(ron->ro_rt->rt_flags & RTF_LLINFO)) { if (ron->ro_rt) { RTFREE(ron->ro_rt); ron->ro_rt = NULL; } error = EHOSTUNREACH; goto done; } } rt = ron->ro_rt; ifp = rt->rt_ifp; /* * When cloning is required, try to allocate a route to the * destination so that the caller can store path MTU * information. */ if (!clone) goto done; } /* * Use a cached route if it exists and is valid, else try to allocate * a new one. Note that we should check the address family of the * cached destination, in case of sharing the cache with IPv4. */ if (ro) { if (ro->ro_rt && (!(ro->ro_rt->rt_flags & RTF_UP) || ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)NULL; } if (ro->ro_rt == (struct rtentry *)NULL) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; *sa6 = *dstsock; sa6->sin6_scope_id = 0; if (clone) { rtalloc((struct route *)ro); } else { ro->ro_rt = rtalloc1(&((struct route *)ro) ->ro_dst, 0, 0UL); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } } /* * do not care about the result if we have the nexthop * explicitly specified. */ if (opts && opts->ip6po_nexthop) goto done; if (ro->ro_rt) { ifp = ro->ro_rt->rt_ifp; if (ifp == NULL) { /* can this really happen? */ RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } if (ro->ro_rt == NULL) error = EHOSTUNREACH; rt = ro->ro_rt; /* * Check if the outgoing interface conflicts with * the interface specified by ipi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to one of * our own addresses.) */ if (opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) { if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opts->ip6po_pktinfo->ipi6_ifindex) { error = EHOSTUNREACH; goto done; } } } done: if (ifp == NULL && rt == NULL) { /* * This can happen if the caller did not pass a cached route * nor any other hints. We treat this case an error. */ error = EHOSTUNREACH; } if (error == EHOSTUNREACH) ip6stat.ip6s_noroute++; if (retifp != NULL) *retifp = ifp; if (retrt != NULL) *retrt = rt; /* rt may be NULL */ return (error); } /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. */ int in6_selecthlim(in6p, ifp) struct in6pcb *in6p; struct ifnet *ifp; { if (in6p && in6p->in6p_hops >= 0) return (in6p->in6p_hops); else if (ifp) return (ND_IFINFO(ifp)->chlim); else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { struct route_in6 ro6; struct ifnet *lifp; bzero(&ro6, sizeof(ro6)); ro6.ro_dst.sin6_family = AF_INET6; ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); ro6.ro_dst.sin6_addr = in6p->in6p_faddr; rtalloc((struct route *)&ro6); if (ro6.ro_rt) { lifp = ro6.ro_rt->rt_ifp; RTFREE(ro6.ro_rt); if (lifp) return (ND_IFINFO(lifp)->chlim); } else return (ip6_defhlim); } return (ip6_defhlim); } /* * XXX: this is borrowed from in6_pcbbind(). If possible, we should * share this function by all *bsd*... */ int -in6_pcbsetport(laddr, inp, td) +in6_pcbsetport(laddr, inp, cred) struct in6_addr *laddr; struct inpcb *inp; - struct thread *td; + struct ucred *cred; { struct socket *so = inp->inp_socket; u_int16_t lport = 0, first, last, *lastport; int count, error = 0, wild = 0; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { - if (td && (error = suser(td))) + if ((error = suser_cred(cred, 0))) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing * a deadlock here. * * We split the two cases (up and down) so that the direction * is not being tested on each round of the loop. */ if (first > last) { /* * counting down */ count = first - last; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->in6p_laddr = in6addr_any; return (EAGAIN); } --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, wild)); } else { /* * counting up */ count = last - first; do { if (count-- < 0) { /* completely used? */ /* * Undo any address bind that may have * occurred above. */ inp->in6p_laddr = in6addr_any; return (EAGAIN); } ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, wild)); } inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } return (0); } /* * Generate kernel-internal form (scopeid embedded into s6_addr16[1]). * If the address scope of is link-local, embed the interface index in the * address. The routine determines our precedence * between advanced API scope/interface specification and basic API * specification. * * This function should be nuked in the future, when we get rid of embedded * scopeid thing. * * XXX actually, it is over-specification to return ifp against sin6_scope_id. * there can be multiple interfaces that belong to a particular scope zone * (in specification, we have 1:N mapping between a scope zone and interfaces). * we may want to change the function to return something other than ifp. */ int in6_embedscope(in6, sin6, in6p, ifpp) struct in6_addr *in6; const struct sockaddr_in6 *sin6; struct in6pcb *in6p; struct ifnet **ifpp; { struct ifnet *ifp = NULL; u_int32_t zoneid = sin6->sin6_scope_id; *in6 = sin6->sin6_addr; if (ifpp) *ifpp = NULL; /* * don't try to read sin6->sin6_addr beyond here, since the caller may * ask us to overwrite existing sockaddr_in6 */ #ifdef ENABLE_DEFAULT_SCOPE if (zoneid == 0) zoneid = scope6_addr2default(in6); #endif if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { struct in6_pktinfo *pi; /* KAME assumption: link id == interface id */ if (in6p && in6p->in6p_outputopts && (pi = in6p->in6p_outputopts->ip6po_pktinfo) && pi->ipi6_ifindex) { ifp = ifnet_byindex(pi->ipi6_ifindex); in6->s6_addr16[1] = htons(pi->ipi6_ifindex); } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && in6p->in6p_moptions && in6p->in6p_moptions->im6o_multicast_ifp) { ifp = in6p->in6p_moptions->im6o_multicast_ifp; in6->s6_addr16[1] = htons(ifp->if_index); } else if (zoneid) { if (if_index < zoneid) return (ENXIO); /* XXX EINVAL? */ ifp = ifnet_byindex(zoneid); /* XXX assignment to 16bit from 32bit variable */ in6->s6_addr16[1] = htons(zoneid & 0xffff); } if (ifpp) *ifpp = ifp; } return 0; } /* * generate standard sockaddr_in6 from embedded form. * touches sin6_addr and sin6_scope_id only. * * this function should be nuked in the future, when we get rid of * embedded scopeid thing. */ int in6_recoverscope(sin6, in6, ifp) struct sockaddr_in6 *sin6; const struct in6_addr *in6; struct ifnet *ifp; { u_int32_t zoneid; sin6->sin6_addr = *in6; /* * don't try to read *in6 beyond here, since the caller may * ask us to overwrite existing sockaddr_in6 */ sin6->sin6_scope_id = 0; if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { /* * KAME assumption: link id == interface id */ zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); if (zoneid) { /* sanity check */ if (zoneid < 0 || if_index < zoneid) return ENXIO; if (ifp && ifp->if_index != zoneid) return ENXIO; sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = zoneid; } } return 0; } /* * just clear the embedded scope identifier. */ void in6_clearscope(addr) struct in6_addr *addr; { if (IN6_IS_SCOPE_LINKLOCAL(addr) || IN6_IS_ADDR_MC_INTFACELOCAL(addr)) addr->s6_addr16[1] = 0; } void addrsel_policy_init() { ADDRSEL_LOCK_INIT(); init_policy_queue(); /* initialize the "last resort" policy */ bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; } static struct in6_addrpolicy * lookup_addrsel_policy(key) struct sockaddr_in6 *key; { struct in6_addrpolicy *match = NULL; ADDRSEL_LOCK(); match = match_addrsel_policy(key); if (match == NULL) match = &defaultaddrpolicy; else match->use++; ADDRSEL_UNLOCK(); return (match); } /* * Subroutines to manage the address selection policy table via sysctl. */ struct walkarg { struct sysctl_req *w_req; }; static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_DECL(_net_inet6_ip6); SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, CTLFLAG_RD, in6_src_sysctl, ""); static int in6_src_sysctl(SYSCTL_HANDLER_ARGS) { struct walkarg w; if (req->newptr) return EPERM; bzero(&w, sizeof(w)); w.w_req = req; return (walk_addrsel_policy(dump_addrsel_policyent, &w)); } int in6_src_ioctl(cmd, data) u_long cmd; caddr_t data; { int i; struct in6_addrpolicy ent0; if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) return (EOPNOTSUPP); /* check for safety */ ent0 = *(struct in6_addrpolicy *)data; if (ent0.label == ADDR_LABEL_NOTAPP) return (EINVAL); /* check if the prefix mask is consecutive. */ if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) return (EINVAL); /* clear trailing garbages (if any) of the prefix address. */ for (i = 0; i < 4; i++) { ent0.addr.sin6_addr.s6_addr32[i] &= ent0.addrmask.sin6_addr.s6_addr32[i]; } ent0.use = 0; switch (cmd) { case SIOCAADDRCTL_POLICY: return (add_addrsel_policyent(&ent0)); case SIOCDADDRCTL_POLICY: return (delete_addrsel_policyent(&ent0)); } return (0); /* XXX: compromise compilers */ } /* * The followings are implementation of the policy table using a * simple tail queue. * XXX such details should be hidden. * XXX implementation using binary tree should be more efficient. */ struct addrsel_policyent { TAILQ_ENTRY(addrsel_policyent) ape_entry; struct in6_addrpolicy ape_policy; }; TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); struct addrsel_policyhead addrsel_policytab; static void init_policy_queue() { TAILQ_INIT(&addrsel_policytab); } static int add_addrsel_policyent(newpolicy) struct in6_addrpolicy *newpolicy; { struct addrsel_policyent *new, *pol; MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, M_WAITOK); ADDRSEL_LOCK(); /* duplication check */ for (pol = TAILQ_FIRST(&addrsel_policytab); pol; pol = TAILQ_NEXT(pol, ape_entry)) { if (SA6_ARE_ADDR_EQUAL(&newpolicy->addr, &pol->ape_policy.addr) && SA6_ARE_ADDR_EQUAL(&newpolicy->addrmask, &pol->ape_policy.addrmask)) { ADDRSEL_UNLOCK(); FREE(new, M_IFADDR); return (EEXIST); /* or override it? */ } } bzero(new, sizeof(*new)); /* XXX: should validate entry */ new->ape_policy = *newpolicy; TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); ADDRSEL_UNLOCK(); return (0); } static int delete_addrsel_policyent(key) struct in6_addrpolicy *key; { struct addrsel_policyent *pol; ADDRSEL_LOCK(); /* search for the entry in the table */ for (pol = TAILQ_FIRST(&addrsel_policytab); pol; pol = TAILQ_NEXT(pol, ape_entry)) { if (SA6_ARE_ADDR_EQUAL(&key->addr, &pol->ape_policy.addr) && SA6_ARE_ADDR_EQUAL(&key->addrmask, &pol->ape_policy.addrmask)) { break; } } if (pol == NULL) { ADDRSEL_UNLOCK(); return (ESRCH); } TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); ADDRSEL_UNLOCK(); return (0); } static int walk_addrsel_policy(callback, w) int (*callback) __P((struct in6_addrpolicy *, void *)); void *w; { struct addrsel_policyent *pol; int error = 0; ADDRSEL_LOCK(); for (pol = TAILQ_FIRST(&addrsel_policytab); pol; pol = TAILQ_NEXT(pol, ape_entry)) { if ((error = (*callback)(&pol->ape_policy, w)) != 0) { ADDRSEL_UNLOCK(); return (error); } } ADDRSEL_UNLOCK(); return (error); } static int dump_addrsel_policyent(pol, arg) struct in6_addrpolicy *pol; void *arg; { int error = 0; struct walkarg *w = arg; error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); return (error); } static struct in6_addrpolicy * match_addrsel_policy(key) struct sockaddr_in6 *key; { struct addrsel_policyent *pent; struct in6_addrpolicy *bestpol = NULL, *pol; int matchlen, bestmatchlen = -1; u_char *mp, *ep, *k, *p, m; for (pent = TAILQ_FIRST(&addrsel_policytab); pent; pent = TAILQ_NEXT(pent, ape_entry)) { matchlen = 0; pol = &pent->ape_policy; mp = (u_char *)&pol->addrmask.sin6_addr; ep = mp + 16; /* XXX: scope field? */ k = (u_char *)&key->sin6_addr; p = (u_char *)&pol->addr.sin6_addr; for (; mp < ep && *mp; mp++, k++, p++) { m = *mp; if ((*k & m) != *p) goto next; /* not match */ if (m == 0xff) /* short cut for a typical case */ matchlen += 8; else { while (m >= 0x80) { matchlen++; m <<= 1; } } } /* matched. check if this is better than the current best. */ if (bestpol == NULL || matchlen > bestmatchlen) { bestpol = pol; bestmatchlen = matchlen; } next: continue; } return (bestpol); } Index: head/sys/netinet6/udp6_output.c =================================================================== --- head/sys/netinet6/udp6_output.c (revision 127504) +++ head/sys/netinet6/udp6_output.c (revision 127505) @@ -1,310 +1,311 @@ /* $FreeBSD$ */ /* $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 */ #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #ifdef INET6 #include #endif #endif /* IPSEC */ #include /* * UDP protocol inplementation. * Per RFC 768, August, 1980. */ #define in6pcb inpcb #define udp6stat udpstat #define udp6s_opackets udps_opackets int udp6_output(in6p, m, addr6, control, td) struct in6pcb *in6p; struct mbuf *m; struct mbuf *control; struct sockaddr *addr6; struct thread *td; { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr; u_short fport; int error = 0; struct ip6_pktopts opt, *stickyopt = in6p->in6p_outputopts; int priv; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; priv = 0; if (td && !suser(td)) priv = 1; if (control) { if ((error = ip6_setpktoptions(control, &opt, stickyopt, priv, 0, IPPROTO_UDP)) != 0) goto release; in6p->in6p_outputopts = &opt; } if (addr6) { /* * IPv4 version of udp_output calls in_pcbconnect in this case, * which needs splnet and affects performance. * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address * and the local port. */ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr6; if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; faddr = &sin6->sin6_addr; fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 jinmei@kame.net) */ error = EINVAL; goto release; } else af = AF_INET; } /* KAME hack: embed scopeid */ if (in6_embedscope(&sin6->sin6_addr, sin6, in6p, NULL) != 0) { error = EINVAL; goto release; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { laddr = in6_selectsrc(sin6, in6p->in6p_outputopts, in6p->in6p_moptions, NULL, &in6p->in6p_laddr, &error); } else laddr = &in6p->in6p_laddr; /* XXX */ if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (in6p->in6p_lport == 0 && - (error = in6_pcbsetport(laddr, in6p, td)) != 0) + (error = in6_pcbsetport(laddr, in6p, td->td_ucred)) != 0) goto release; } else { if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) { if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &in6p->in6p_laddr; faddr = &in6p->in6p_faddr; fport = in6p->in6p_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = in6p->in6p_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; #if 0 /* ip6_plen will be filled in ip6_output. */ ip6->ip6_plen = htons((u_short)plen); #endif ip6->ip6_nxt = IPPROTO_UDP; ip6->ip6_hlim = in6_selecthlim(in6p, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP, sizeof(struct ip6_hdr), plen)) == 0) { udp6->uh_sum = 0xffff; } flags = 0; udp6stat.udp6s_opackets++; error = ip6_output(m, in6p->in6p_outputopts, NULL, flags, in6p->in6p_moptions, NULL, in6p); break; case AF_INET: error = EAFNOSUPPORT; goto release; } goto releaseopt; release: m_freem(m); releaseopt: if (control) { ip6_clearpktopts(in6p->in6p_outputopts, -1); in6p->in6p_outputopts = stickyopt; m_freem(control); } return (error); } Index: head/sys/netinet6/udp6_usrreq.c =================================================================== --- head/sys/netinet6/udp6_usrreq.c (revision 127504) +++ head/sys/netinet6/udp6_usrreq.c (revision 127505) @@ -1,760 +1,762 @@ /* $FreeBSD$ */ /* $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /* IPSEC */ #ifdef FAST_IPSEC #include #include #endif /* FAST_IPSEC */ /* * UDP protocol inplementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static int udp6_detach __P((struct socket *so)); int udp6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp, *opts; register struct ip6_hdr *ip6; register struct udphdr *uh; register struct inpcb *in6p; int off = *offp; int plen, ulen; struct sockaddr_in6 fromsa; opts = NULL; ip6 = mtod(m, struct ip6_hdr *); if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* XXX send icmp6 host/port unreach? */ m_freem(m); return IPPROTO_DONE; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); if (!uh) return IPPROTO_DONE; #endif udpstat.udps_ipackets++; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); if (plen != ulen) { udpstat.udps_badlen++; goto bad; } /* * Checksum extended UDP header and data. */ if (uh->uh_sum == 0) udpstat.udps_nosum++; else if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { udpstat.udps_badsum++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; /* * Deliver a multicast datagram to all sockets * for which the local and remote addresses and ports match * those of the incoming datagram. This allows more than * one process to receive multicasts on the same port. * (This really ought to be done for unicast datagrams as * well, but that would cause problems with existing * applications that open both address-specific sockets and * a wildcard socket listening to the same port -- they would * end up receiving duplicates of every unicast datagram. * Those applications open the multiple sockets to overcome an * inadequacy of the UDP socket interface, but for backwards * compatibility we avoid the problem here rather than * fixing the interface. Maybe 4.5BSD will remedy this?) */ /* * In a case that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet does not match with * laddr. To cure this situation, the matching is relaxed * if the receiving interface is the same as one specified * in the socket and if the destination multicast address * matches one of the multicast groups specified in the socket. */ /* * Construct sockaddr format source address. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; /* * KAME note: traditionally we dropped udpiphdr from mbuf here. * We need udphdr for IPsec processing so we do that later. */ /* * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ last = NULL; LIST_FOREACH(in6p, &udb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->in6p_lport != uh->uh_dport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src) || in6p->in6p_fport != uh->uh_sport) continue; } if (last != NULL) { struct mbuf *n; #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, last)) { #ifdef IPSEC ipsec6stat.in_polvio++; #endif /* IPSEC */ /* do not inject data into pcb */ } else #endif /*IPSEC || FAST_IPSEC*/ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { /* * KAME NOTE: do not * m_copy(m, offset, ...) above. * sbappendaddr() expects M_PKTHDR, * and m_copy() will copy M_PKTHDR * only if offset is 0. */ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); m_adj(n, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); udpstat.udps_fullsock++; } else sorwakeup(last->in6p_socket); opts = NULL; } } last = in6p; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids searching * through all pcbs in the common case of a non-shared * port. It assumes that an application will never * clear these options after setting them. */ if ((last->in6p_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. * (No need to send an ICMP Port Unreachable * for a broadcast or multicast datgram.) */ udpstat.udps_noport++; udpstat.udps_noportmcast++; goto bad; } #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, last)) { #ifdef IPSEC ipsec6stat.in_polvio++; #endif /* IPSEC */ goto bad; } #endif /*IPSEC || FAST_IPSEC*/ if (last->in6p_flags & IN6P_CONTROLOPTS || last->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(last->in6p_socket); return IPPROTO_DONE; } /* * Locate pcb for datagram. */ in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (in6p == 0) { if (log_in_vain) { char buf[INET6_ADDRSTRLEN]; strcpy(buf, ip6_sprintf(&ip6->ip6_dst)); log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", buf, ntohs(uh->uh_dport), ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); } udpstat.udps_noport++; if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); udpstat.udps_noportmcast++; goto bad; } icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return IPPROTO_DONE; } #if defined(IPSEC) || defined(FAST_IPSEC) /* * Check AH/ESP integrity. */ if (ipsec6_in_reject(m, in6p)) { #ifdef IPSEC ipsec6stat.in_polvio++; #endif /* IPSEC */ goto bad; } #endif /*IPSEC || FAST_IPSEC*/ /* * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; if (in6p->in6p_flags & IN6P_CONTROLOPTS || in6p->in6p_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(in6p, m, &opts); m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&in6p->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { udpstat.udps_fullsock++; goto bad; } sorwakeup(in6p->in6p_socket); return IPPROTO_DONE; bad: if (m) m_freem(m); if (opts) m_freem(opts); return IPPROTO_DONE; } void udp6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { struct udphdr uh; struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify) __P((struct inpcb *, int)) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* check if we can safely examine src and dst ports */ if (m->m_pkthdr.len < off + sizeof(*uhp)) return; bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); (void) in6_pcbnotify(&udb, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else (void) in6_pcbnotify(&udb, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } static int udp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error, s; error = suser(req->td); if (error) return (error); if (req->newlen != sizeof(addrs)) return (EINVAL); if (req->oldlen != sizeof(struct xucred)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); s = splnet(); inp = in6_pcblookup_hash(&udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1, NULL); if (!inp || !inp->inp_socket) { error = ENOENT; goto out; } cru2x(inp->inp_socket->so_cred, &xuc); error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); out: splx(s); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); static int udp6_abort(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; /* ??? possible? panic instead? */ soisdisconnected(so); s = splnet(); in6_pcbdetach(inp); splx(s); return 0; } static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp != 0) return EINVAL; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return error; } s = splnet(); error = in_pcballoc(so, &udbinfo, "udp6inp"); splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if (!ip6_v6only) inp->inp_vflag |= INP_IPV4; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = ip_defttl; return 0; } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; s = splnet(); - error = in_pcbbind(inp, (struct sockaddr *)&sin, td); + error = in_pcbbind(inp, (struct sockaddr *)&sin, + td->td_ucred); splx(s); return error; } } s = splnet(); - error = in6_pcbbind(inp, nam, td); + error = in6_pcbbind(inp, nam, td->td_ucred); splx(s); return error; } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int s, error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; if (inp->inp_faddr.s_addr != INADDR_ANY) return EISCONN; in6_sin6_2_sin(&sin, sin6_p); s = splnet(); - error = in_pcbconnect(inp, (struct sockaddr *)&sin, td); + error = in_pcbconnect(inp, (struct sockaddr *)&sin, + td->td_ucred); splx(s); if (error == 0) { inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; soisconnected(so); } return error; } } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return EISCONN; s = splnet(); - error = in6_pcbconnect(inp, nam, td); + error = in6_pcbconnect(inp, nam, td->td_ucred); splx(s); if (error == 0) { if (!ip6_v6only) { /* should be non mapped addr */ inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; } soisconnected(so); } return error; } static int udp6_detach(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; s = splnet(); in6_pcbdetach(inp); splx(s); return 0; } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; return ((*pru->pru_disconnect)(so)); } #endif if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return ENOTCONN; s = splnet(); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; int error = 0; inp = sotoinpcb(so); if (inp == 0) { error = EINVAL; goto bad; } if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if (!ip6_v6only) { int hasv4addr; struct sockaddr_in6 *sin6 = 0; if (addr == 0) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * since a user of this socket set the * IPV6_V6ONLY flag, we discard this * datagram destined to a v4 addr. */ return EINVAL; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is IPv4-mapped * address, local addr should not be * an IPv6 address; since you cannot * determine how to map IPv6 source * address to IPv4. */ return EINVAL; } if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; error = ((*pru->pru_send)(so, flags, m, addr, control, td)); /* addr will just be freed in sendit(). */ return error; } } #endif return udp6_output(inp, m, addr, control, td); bad: m_freem(m); return (error); } struct pr_usrreqs udp6_usrreqs = { udp6_abort, pru_accept_notsupp, udp6_attach, udp6_bind, udp6_connect, pru_connect2_notsupp, in6_control, udp6_detach, udp6_disconnect, pru_listen_notsupp, in6_mapped_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp6_send, pru_sense_null, udp_shutdown, in6_mapped_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel };