Index: head/sys/netinet/raw_ip.c =================================================================== --- head/sys/netinet/raw_ip.c (revision 354473) +++ head/sys/netinet/raw_ip.c (revision 354474) @@ -1,1185 +1,1184 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DEFINE(int, ip_defttl) = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_defttl), 0, "Maximum TTL on IP packets"); VNET_DEFINE(struct inpcbhead, ripcb); VNET_DEFINE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) /* * Control and data hooks for ipfw, dummynet, divert and so on. * The data hooks are not used here but it is convenient * to keep them all in one place. */ VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL; VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *); void (*ip_divert_ptr)(struct mbuf *, bool); int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip_mrouter); /* * The various mrouter and rsvp functions. */ int (*ip_mrouter_set)(struct socket *, struct sockopt *); int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int (*mrt_ioctl)(u_long, caddr_t, int); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); int (*rsvp_input_p)(struct mbuf **, int *, int); int (*ip_rsvp_vif)(struct socket *, struct sockopt *); void (*ip_rsvp_force_done)(struct socket *); #endif /* INET */ extern struct protosw inetsw[]; u_long rip_sendspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); u_long rip_recvspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); /* * Hash functions */ #define INP_PCBHASH_RAW_SIZE 256 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ (((proto) + (laddr) + (faddr)) % (mask) + 1) #ifdef INET static void rip_inshash(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *pcbhash; int hash; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); if (inp->inp_ip_p != 0 && inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != INADDR_ANY) { hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); } else hash = 0; pcbhash = &pcbinfo->ipi_hashbase[hash]; CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash); } static void rip_delhash(struct inpcb *inp) { INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); CK_LIST_REMOVE(inp, inp_hash); } #endif /* INET */ /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ static void rip_zone_change(void *tag) { uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); } static int rip_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "rawinp"); return (0); } void rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, 1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE static void rip_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_ripcbinfo); } VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL); #endif #ifdef INET static int rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, struct sockaddr_in *ripsrc) { int policyfail = 0; INP_LOCK_ASSERT(last); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv4)) { if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0) policyfail = 1; } #endif /* IPSEC */ #ifdef MAC if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) policyfail = 1; #endif /* Check the minimum TTL for socket. */ if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) policyfail = 1; if (!policyfail) { struct mbuf *opts = NULL; struct socket *so; so = last->inp_socket; if ((last->inp_flags & INP_CONTROLOPTS) || (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) ip_savecontrol(last, &opts, ip, n); SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)ripsrc, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&so->so_rcv); } else sorwakeup_locked(so); } else m_freem(n); return (policyfail); } /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last; struct sockaddr_in ripsrc; - struct epoch_tracker et; int hash; + NET_EPOCH_ASSERT(); + *mp = NULL; bzero(&ripsrc, sizeof(ripsrc)); ripsrc.sin_len = sizeof(ripsrc); ripsrc.sin_family = AF_INET; ripsrc.sin_addr = ip->ip_src; last = NULL; ifp = m->m_pkthdr.rcvif; hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); - INP_INFO_RLOCK_ET(&V_ripcbinfo, et); CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_1; if (jailed_without_vnet(inp->inp_cred)) { /* * XXX: If faddr was bound to multicast group, * jailed raw socket will drop datagram. */ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) goto skip_1; } last = inp; continue; skip_1: INP_RUNLOCK(inp); } CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (!in_nullhost(inp->inp_laddr) && !in_hosteq(inp->inp_laddr, ip->ip_dst)) continue; if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_2; if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) goto skip_2; } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (inp->inp_moptions != NULL && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { /* * If the incoming datagram is for IGMP, allow it * through unconditionally to the raw socket. * * In the case of IGMPv2, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. imo_multi_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if (proto != IPPROTO_IGMP) { struct sockaddr_in group; bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(inp->inp_moptions, ifp, (struct sockaddr *)&group, (struct sockaddr *)&ripsrc); } if (blocked != MCAST_PASS) { IPSTAT_INC(ips_notmember); goto skip_2; } } last = inp; continue; skip_2: INP_RUNLOCK(inp); } - INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); if (last != NULL) { if (rip_append(last, ip, m, &ripsrc) != 0) IPSTAT_INC(ips_delivered); INP_RUNLOCK(last); } else { if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { IPSTAT_INC(ips_noproto); IPSTAT_DEC(ips_delivered); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); } else { m_freem(m); } } return (IPPROTO_DONE); } /* * Generate IP header and pass packet to ip_output. Tack on options user may * have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, ...) { struct ip *ip; int error; struct inpcb *inp = sotoinpcb(so); va_list ap; u_long dst; int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST; int cnt, hlen; u_char opttype, optlen, *cp; va_start(ap, so); dst = va_arg(ap, u_long); va_end(ap); /* * If the user handed us a complete IP packet, use it. Otherwise, * allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_NOWAIT); if (m == NULL) return(ENOBUFS); INP_RLOCK(inp); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; if (inp->inp_flags & INP_DONTFRAG) ip->ip_off = htons(IP_DF); else ip->ip_off = htons(0); ip->ip_p = inp->inp_ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would * let a source of INADDR_ANY pass, which we do not * want to see from jails. */ if (ip->ip_src.s_addr == INADDR_ANY) { error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src, inp->inp_cred); } else { error = prison_local_ip4(inp->inp_cred, &ip->ip_src); } if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } } ip->ip_ttl = inp->inp_ip_ttl; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; if (m->m_len < hlen) { m = m_pullup(m, hlen); if (m == NULL) return (EINVAL); ip = mtod(m, struct ip *); } INP_RLOCK(inp); /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash. */ if ((hlen < sizeof (*ip)) || ((hlen > sizeof (*ip)) && inp->inp_options) || (ntohs(ip->ip_len) != m->m_pkthdr.len)) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } error = prison_check_ip4(inp->inp_cred, &ip->ip_src); if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } /* * Don't allow IP options which do not have the required * structure as specified in section 3.1 of RFC 791 on * pages 15-23. */ cp = (u_char *)(ip + 1); cnt = hlen - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opttype = cp[IPOPT_OPTVAL]; if (opttype == IPOPT_EOL) break; if (opttype == IPOPT_NOP) { optlen = 1; continue; } if (cnt < IPOPT_OLEN + sizeof(u_char)) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(u_char) || optlen > cnt) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } } /* * This doesn't allow application to specify ID of zero, * but we got this limitation from the beginning of history. */ if (ip->ip_id == 0) ip_fillid(ip); /* * XXX prevent ip_output from overwriting header fields. */ flags |= IP_RAWOUTPUT; IPSTAT_INC(ips_rawout); } if (inp->inp_flags & INP_ONESBCAST) flags |= IP_SENDONES; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif error = ip_output(m, inp->inp_options, NULL, flags, inp->inp_moptions, inp); INP_RUNLOCK(inp); return (error); } /* * Raw IP socket option processing. * * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could * only be created by a privileged process, and as such, socket option * operations to manage system properties on any raw socket were allowed to * take place without explicit additional access control checks. However, * raw sockets can now also be created in jail(), and therefore explicit * checks are now required. Likewise, raw sockets can be used by a process * after it gives up privilege, so some caution is required. For options * passed down to the IP layer via ip_ctloutput(), checks are assumed to be * performed in ip_ctloutput() and therefore no check occurs here. * Unilaterally checking priv_check() here breaks normal IP socket option * operations on raw sockets. * * When adding new socket options here, make sure to add access control * checks here as necessary. * * XXX-BZ inp locking? */ int rip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) { if ((sopt->sopt_level == SOL_SOCKET) && (sopt->sopt_name == SO_SETFIB)) { inp->inp_inc.inc_fibnum = so->so_fibnum; return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: case IP_FW_TABLE_GETSIZE: case IP_FW_TABLE_LIST: case IP_FW_NAT_GET_CONFIG: case IP_FW_NAT_GET_LOG: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT; break ; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: case IP_FW_TABLE_FLUSH: case IP_FW_NAT_CFG: case IP_FW_NAT_DEL: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT ; break ; case IP_RSVP_ON: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_done(); break; case IP_RSVP_VIF_ON: case IP_RSVP_VIF_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_vif ? ip_rsvp_vif(so, sopt) : EINVAL; break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which are * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls * in_ifadown() to remove all routes corresponding to that address. It also * receives the PRC_IFUP messages from if_up() and reinstalls the interface * routes. */ void rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: IN_IFADDR_RLOCK(&in_ifa_tracker); CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, 0); /* * in_ifadown gets rid of all the rest of the * routes. This is not quite the right thing * to do, but at least if we are running a * routing process they will come back. */ in_ifadown(&ia->ia_ifa, 0); ifa_free(&ia->ia_ifa); break; } } if (ia == NULL) /* If ia matched, already unlocked. */ IN_IFADDR_RUNLOCK(&in_ifa_tracker); break; case PRC_IFUP: IN_IFADDR_RLOCK(&in_ifa_tracker); CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = ifa_del_loopback_route((struct ifaddr *)ia, sa); err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; err = ifa_add_loopback_route((struct ifaddr *)ia, sa); ifa_free(&ia->ia_ifa); break; } } static int rip_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); if (proto >= IPPROTO_MAX || proto < 0) return EPROTONOSUPPORT; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; inp->inp_ip_ttl = V_ip_defttl; rip_inshash(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); INP_WUNLOCK(inp); return (0); } static void rip_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_detach: inp == NULL")); KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, ("rip_detach: not closed")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); if (so == V_ip_mrouter && ip_mrouter_done) ip_mrouter_done(); if (ip_rsvp_force_done) ip_rsvp_force_done(so); if (so == V_ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } static void rip_dodisconnect(struct socket *so, struct inpcb *inp) { struct inpcbinfo *pcbinfo; pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr.s_addr = INADDR_ANY; rip_inshash(inp); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); } static void rip_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_abort: inp == NULL")); rip_dodisconnect(so, inp); } static void rip_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_close: inp == NULL")); rip_dodisconnect(so, inp); } static int rip_disconnect(struct socket *so) { struct inpcb *inp; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); rip_dodisconnect(so, inp); return (0); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; int error; if (nam->sa_len != sizeof(*addr)) return (EINVAL); error = prison_check_ip4(td->td_ucred, &addr->sin_addr); if (error != 0) return (error); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_bind: inp == NULL")); if (CK_STAILQ_EMPTY(&V_ifnet) || (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || (addr->sin_addr.s_addr && (inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)addr) == 0)) return (EADDRNOTAVAIL); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_laddr = addr->sin_addr; rip_inshash(inp); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (CK_STAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) return (EAFNOSUPPORT); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_connect: inp == NULL")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr = addr->sin_addr; rip_inshash(inp); soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; u_long dst; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_send: inp == NULL")); /* * Note: 'dst' reads below are unlocked. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } dst = inp->inp_faddr.s_addr; /* Unlocked read. */ } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } return (rip_output(m, so, dst)); } #endif /* INET */ static int rip_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; struct epoch_tracker et; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = V_ripcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } if (req->newptr != 0) return (EPERM); /* * OK, now we're committed to doing something. */ INP_INFO_WLOCK(&V_ripcbinfo); gencnt = V_ripcbinfo.ipi_gencnt; n = V_ripcbinfo.ipi_count; INP_INFO_WUNLOCK(&V_ripcbinfo); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); INP_INFO_RLOCK_ET(&V_ripcbinfo, et); for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { in_pcbref(inp); inp_list[i++] = inp; } INP_WUNLOCK(inp); } INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else INP_RUNLOCK(inp); } INP_INFO_WLOCK(&V_ripcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_ripcbinfo); if (!error) { struct epoch_tracker et; /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ INP_INFO_RLOCK_ET(&V_ripcbinfo, et); xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); #ifdef INET struct pr_usrreqs rip_usrreqs = { .pru_abort = rip_abort, .pru_attach = rip_attach, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_control = in_control, .pru_detach = rip_detach, .pru_disconnect = rip_disconnect, .pru_peeraddr = in_getpeeraddr, .pru_send = rip_send, .pru_shutdown = rip_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = rip_close, }; #endif /* INET */ Index: head/sys/netinet6/raw_ip6.c =================================================================== --- head/sys/netinet6/raw_ip6.c (revision 354473) +++ head/sys/netinet6/raw_ip6.c (revision 354474) @@ -1,912 +1,911 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) /* * Raw interface to IP6 protocol. */ VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) extern u_long rip_sendspace; extern u_long rip_recvspace; VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat); VNET_PCPUSTAT_SYSINIT(rip6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rip6stat); #endif /* VIMAGE */ /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip6_mrouter); /* * The various mrouter functions. */ int (*ip6_mrouter_set)(struct socket *, struct sockopt *); int (*ip6_mrouter_get)(struct socket *, struct sockopt *); int (*ip6_mrouter_done)(void); int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int (*mrt6_ioctl)(u_long, caddr_t); /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip6_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb *inp; struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; - struct epoch_tracker et; + NET_EPOCH_ASSERT(); + RIP6STAT_INC(rip6s_ipackets); init_sin6(&fromsa, m, 0); /* general init */ ifp = m->m_pkthdr.rcvif; - INP_INFO_RLOCK_ET(&V_ripcbinfo, et); CK_LIST_FOREACH(inp, &V_ripcb, inp_list) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src)) continue; if (last != NULL) { struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv6)) { if (n != NULL && IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { m_freem(n); /* Do not inject data into pcb. */ n = NULL; } } #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_2; if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && prison_check_ip6(inp->inp_cred, &ip6->ip6_dst) != 0) goto skip_2; } if (inp->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 || in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { RIP6STAT_INC(rip6s_badsum); /* * Drop the received message, don't send an * ICMP6 message. Set proto to IPPROTO_NONE * to achieve that. */ proto = IPPROTO_NONE; goto skip_2; } } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (inp->in6p_moptions && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If the incoming datagram is for MLD, allow it * through unconditionally to the raw socket. * * Use the M_RTALERT_MLD flag to check for MLD * traffic without having to inspect the mbuf chain * more deeply, as all MLDv1/v2 host messages MUST * contain the Router Alert option. * * In the case of MLDv1, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. im6o_mc_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if ((m->m_flags & M_RTALERT_MLD) == 0) { struct sockaddr_in6 mcaddr; bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(inp->in6p_moptions, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); } if (blocked != MCAST_PASS) { IP6STAT_INC(ip6s_notmember); goto skip_2; } } last = inp; continue; skip_2: INP_RUNLOCK(inp); } - INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv6) && last != NULL && IPSEC_CHECK_POLICY(ipv6, m, last) != 0) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); } else #endif /* IPSEC */ if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); /* Strip intermediate headers. */ m_adj(m, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); INP_RUNLOCK(last); } else { RIP6STAT_INC(rip6s_nosock); if (m->m_flags & M_MCAST) RIP6STAT_INC(rip6s_nosockmcast); if (proto == IPPROTO_NONE) m_freem(m); else icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, ip6_get_prevhdr(m, *offp)); IP6STAT_DEC(ip6s_delivered); } return (IPPROTO_DONE); } void rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* * If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { cmdarg = NULL; sa6_src = &sa6_any; } (void) in6_pcbnotify(&V_ripcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } /* * Generate IPv6 header and pass packet to ip6_output. Tack on options user * may have setup with control call. */ int rip6_output(struct mbuf *m, struct socket *so, ...) { struct mbuf *control; struct m_tag *mtag; struct sockaddr_in6 *dstsock; struct ip6_hdr *ip6; struct inpcb *inp; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; int hlim = 0; struct in6_addr in6a; va_list ap; va_start(ap, so); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); inp = sotoinpcb(so); INP_WLOCK(inp); if (control != NULL) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, so->so_cred, so->so_proto->pr_protocol)) != 0) { goto bad; } optp = &opt; } else optp = inp->in6p_outputopts; /* * Check and convert scope zone ID into internal form. * * XXX: we may still need to determine the zone later. */ if (!(so->so_state & SS_ISCONNECTED)) { if (!optp || !optp->ip6po_pktinfo || !optp->ip6po_pktinfo->ipi6_ifindex) use_defzone = V_ip6_use_defzone; if (dstsock->sin6_scope_id == 0 && !use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(dstsock, use_defzone)) != 0) goto bad; } /* * For an ICMPv6 packet, we should know its type and code to update * statistics. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; goto bad; } icmp6 = mtod(m, struct icmp6_hdr *); type = icmp6->icmp6_type; code = icmp6->icmp6_code; } M_PREPEND(m, sizeof(*ip6), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } ip6 = mtod(m, struct ip6_hdr *); /* * Source address selection. */ error = in6_selectsrc_socket(dstsock, optp, inp, so->so_cred, scope_ambiguous, &in6a, &hlim); if (error) goto bad; error = prison_check_ip6(inp->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; ip6->ip6_dst = dstsock->sin6_addr; /* * Fill in the rest of the IPv6 header fields. */ ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = inp->inp_ip_p; ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || inp->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* Compute checksum. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = inp->in6p_cksum; if (plen < off + 2) { error = EINVAL; goto bad; } off += sizeof(struct ip6_hdr); n = m; while (n && n->m_len <= off) { off -= n->m_len; n = n->m_next; } if (!n) goto bad; p = (u_int16_t *)(mtod(n, caddr_t) + off); *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } /* * Send RA/RS messages to user land for protection, before sending * them to rtadvd/rtsol. */ if ((send_sendso_input_hook != NULL) && so->so_proto->pr_protocol == IPPROTO_ICMPV6) { switch (type) { case ND_ROUTER_ADVERT: case ND_ROUTER_SOLICIT: mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; m_tag_prepend(m, mtag); } } error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); ICMP6STAT_INC(icp6s_outhist[type]); } else RIP6STAT_INC(rip6s_opackets); goto freectl; bad: if (m) m_freem(m); freectl: if (control != NULL) { ip6_clearpktopts(&opt, -1); m_freem(control); } INP_WUNLOCK(inp); return (error); } /* * Raw IPv6 socket option processing. */ int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) /* * XXX: is it better to call icmp6_ctloutput() directly * from protosw? */ return (icmp6_ctloutput(so, sopt)); else if (sopt->sopt_level != IPPROTO_IPV6) { if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_SETFIB) { inp = sotoinpcb(so); INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; } return (error); } static int rip6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct icmp6_filter *filter; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip6_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); if (filter == NULL) return (ENOMEM); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); free(filter, M_PCB); return (error); } inp = (struct inpcb *)so->so_pcb; INP_INFO_WUNLOCK(&V_ripcbinfo); inp->inp_vflag |= INP_IPV6; inp->inp_ip_p = (long)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; inp->in6p_icmp6filt = filter; ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); INP_WUNLOCK(inp); return (0); } static void rip6_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_detach: inp == NULL")); if (so == V_ip6_mrouter && ip6_mrouter_done) ip6_mrouter_done(); /* xxx: RSVP */ INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); free(inp->in6p_icmp6filt, M_PCB); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } /* XXXRW: This can't ever be called. */ static void rip6_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_abort: inp == NULL")); soisdisconnected(so); } static void rip6_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_close: inp == NULL")); soisdisconnected(so); } static int rip6_disconnect(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL")); if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp->in6p_faddr = in6addr_any; rip6_abort(so); return (0); } static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct epoch_tracker et; struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ifa = NULL; int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_bind: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0) return (error); if (CK_STAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6) return (EADDRNOTAVAIL); if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); NET_EPOCH_ENTER(et); if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) { NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } NET_EPOCH_EXIT(et); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); inp->in6p_laddr = addr->sin6_addr; INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_connect: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (CK_STAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin6_family != AF_INET6) return (EAFNOSUPPORT); /* * Application should provide a proper zone ID or the use of default * zone IDs should be enabled. Unfortunately, some applications do * not behave as it should, so we need a workaround. Even if an * appropriate ID is not determined, we'll see if we can determine * the outgoing interface. If we can, determine the zone ID based on * the interface below. */ if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ error = in6_selectsrc_socket(addr, inp->in6p_outputopts, inp, so->so_cred, scope_ambiguous, &in6a, NULL); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 tmp; struct sockaddr_in6 *dst; int ret; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_send: inp == NULL")); /* Always copy sockaddr to avoid overwrites. */ /* Unlocked read. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } /* XXX */ bzero(&tmp, sizeof(tmp)); tmp.sin6_family = AF_INET6; tmp.sin6_len = sizeof(struct sockaddr_in6); INP_RLOCK(inp); bcopy(&inp->in6p_faddr, &tmp.sin6_addr, sizeof(struct in6_addr)); INP_RUNLOCK(inp); dst = &tmp; } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } if (nam->sa_len != sizeof(struct sockaddr_in6)) { m_freem(m); return (EINVAL); } tmp = *(struct sockaddr_in6 *)nam; dst = &tmp; if (dst->sin6_family == AF_UNSPEC) { /* * XXX: we allow this case for backward * compatibility to buggy applications that * rely on old (and wrong) kernel behavior. */ log(LOG_INFO, "rip6 SEND: address family is " "unspec. Assume AF_INET6\n"); dst->sin6_family = AF_INET6; } else if (dst->sin6_family != AF_INET6) { m_freem(m); return(EAFNOSUPPORT); } } ret = rip6_output(m, so, dst, control); return (ret); } struct pr_usrreqs rip6_usrreqs = { .pru_abort = rip6_abort, .pru_attach = rip6_attach, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_control = in6_control, .pru_detach = rip6_detach, .pru_disconnect = rip6_disconnect, .pru_peeraddr = in6_getpeeraddr, .pru_send = rip6_send, .pru_shutdown = rip6_shutdown, .pru_sockaddr = in6_getsockaddr, .pru_close = rip6_close, };