Index: sys/net/rt_nhops.h =================================================================== --- /dev/null +++ sys/net/rt_nhops.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2015-2016 + * Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NET_RT_NHOPS_H_ +#define _NET_RT_NHOPS_H_ + +/* Maximum header length that can be prepended */ +#define MAX_PREPEND_LEN (56 - sizeof(void *) * 2) + +/* Non-recursive nexthop */ +struct nhop_prepend { + uint16_t nh_flags; /* NH flags */ + uint8_t nh_count; /* Number of nexthops or data length */ + uint8_t spare[3]; + uint16_t nh_mtu; /* given nhop MTU */ + struct ifnet *nh_lifp; /* Logical transmit interface */ + struct ifnet *nh_aifp; /* Interface address */ + union { + char nh_data[MAX_PREPEND_LEN]; /* data to prepend */ + struct in_addr nh4_addr;/* IPv4 gw address */ + struct in6_addr nh6_addr;/* IPv4 gw address */ + }; +}; + +#endif + + Index: sys/netinet/icmp6.h =================================================================== --- sys/netinet/icmp6.h +++ sys/netinet/icmp6.h @@ -695,7 +695,7 @@ void icmp6_reflect(struct mbuf *, size_t); void icmp6_prepare(struct mbuf *); void icmp6_redirect_input(struct mbuf *, int); -void icmp6_redirect_output(struct mbuf *, struct rtentry *); +void icmp6_redirect_output(struct mbuf *, const struct in6_addr *); struct ip6ctlparam; void icmp6_mtudisc_update(struct ip6ctlparam *, int); Index: sys/netinet6/icmp6.c =================================================================== --- sys/netinet6/icmp6.c +++ sys/netinet6/icmp6.c @@ -2477,7 +2477,7 @@ } void -icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) +icmp6_redirect_output(struct mbuf *m0, const struct in6_addr *dst_in6) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; @@ -2491,7 +2491,9 @@ size_t maxlen; u_char *p; struct ifnet *outif = NULL; - struct sockaddr_in6 src_sa; + struct sockaddr_in6 rt_gateway, src_sa; + struct rt_addrinfo info; + icmp6_errcount(ND_REDIRECT, 0); @@ -2499,8 +2501,26 @@ if (!V_ip6_forwarding) goto fail; - /* sanity check */ - if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) + /* Do route lookup once again to get RTF_DYNAMIC/RTF_MODIFIED flags */ + + /* Re-use src_sa */ + bzero(&src_sa, sizeof(src_sa)); + src_sa.sin6_len = sizeof(struct sockaddr_in6); + src_sa.sin6_family = AF_INET6; + src_sa.sin6_addr = *dst_in6; + + bzero(&rt_gateway, sizeof(rt_gateway)); + rt_gateway.sin6_len = sizeof(rt_gateway); + bzero(&info, sizeof(info)); + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; + + if (rib_lookup_info(M_GETFIB(m0), (struct sockaddr *)&src_sa, + 0, 0, &info) != 0) + goto fail; + + ifp = info.rti_ifp; + if (ifp != m0->m_pkthdr.rcvif || + (info.rti_flags & (RTF_DYNAMIC|RTF_MODIFIED)) != 0) goto fail; /* @@ -2534,7 +2554,7 @@ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto fail; - M_SETFIB(m, rt->rt_fibnum); + M_SETFIB(m, M_GETFIB(m0)); maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ @@ -2556,9 +2576,9 @@ } /* get ip6 linklocal address for the router. */ - if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { + if (info.rti_flags & RTF_GATEWAY) { struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)rt->rt_gateway; + sin6 = (struct sockaddr_in6 *)&rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; @@ -2582,7 +2602,7 @@ nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; - if (rt->rt_flags & RTF_GATEWAY) { + if (info.rti_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. Index: sys/netinet6/in6_fib.h =================================================================== --- sys/netinet6/in6_fib.h +++ sys/netinet6/in6_fib.h @@ -51,11 +51,16 @@ uint64_t spare2[2]; }; +struct nhop_prepend; + int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid,struct nhop6_basic *pnh6); int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6); void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6); +int fib6_lookup_prepend(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, uint32_t flags, uint32_t flowid, struct route *ro); +void fib6_free_prepend(struct nhop_prepend *pnh); #endif Index: sys/netinet6/in6_fib.c =================================================================== --- sys/netinet6/in6_fib.c +++ sys/netinet6/in6_fib.c @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -114,7 +115,7 @@ if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in6 *)rte->rt_gateway; pnh6->nh_addr = gw->sin6_addr; - in6_clearscope(&pnh6->nh_addr); + //in6_clearscope(&pnh6->nh_addr); } else pnh6->nh_addr = *dst; /* Set flags */ @@ -142,7 +143,7 @@ if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in6 *)rte->rt_gateway; pnh6->nh_addr = gw->sin6_addr; - in6_clearscope(&pnh6->nh_addr); + //in6_clearscope(&pnh6->nh_addr); } else pnh6->nh_addr = *dst; /* Set flags */ @@ -152,6 +153,38 @@ pnh6->nh_flags |= NHF_DEFAULT; } +static void +fib6_rte_to_nhop(struct rtentry *rte, struct in6_addr *dst, + struct route *ro) +{ + struct sockaddr_in6 *gw; + struct nhop_prepend *pnh; + + /* XXX: Temporary for migration */ + pnh = (struct nhop_prepend *)ro->ro_rt; + + pnh->nh_aifp = rte->rt_ifa->ifa_ifp; + /* XXX */ + pnh->nh_aifp = fib6_get_ifaifp(rte); + pnh->nh_lifp = rte->rt_ifp; + + pnh->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); + if (rte->rt_flags & RTF_GATEWAY) { + gw = (struct sockaddr_in6 *)rte->rt_gateway; + pnh->nh6_addr = gw->sin6_addr; + } else + pnh->nh6_addr = *dst; + /* Set flags */ + pnh->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); + gw = (struct sockaddr_in6 *)rt_key(rte); + if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr)) + pnh->nh_flags |= NHF_DEFAULT; + /* XXX: Set RTF_BROADCAST if GW address is broadcast */ + + /* No nexhops yet */ + pnh->nh_count = 0; +} + /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores basic nexthop info into provided @pnh6 structure. @@ -271,5 +304,68 @@ } +/* + * Performs IPv6 route table lookup on @dst. Returns 0 on success. + * Stores extended nexthop info into provided @pnh6 structure. + * Note that + * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. + * - in that case you need to call fib6_free_nh_ext() + * - nh_ifp represents logical transmit interface (rt_ifp) by default + * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed + * - mtu from logical transmit interface will be returned. + * - scope will be embedded in nh_addr + */ +int +fib6_lookup_prepend(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, uint32_t flags, uint32_t flowid, struct route *ro) +{ + struct radix_node_head *rh; + struct radix_node *rn; + struct sockaddr_in6 sin6; + struct rtentry *rte; + + KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rh == NULL) + return (ENOENT); + + /* Prepare lookup key */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = *dst; + /* Assume scopeid is valid and embed it directly */ + if (IN6_IS_SCOPE_LINKLOCAL(dst)) + sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); + + RADIX_NODE_HEAD_RLOCK(rh); + rn = rh->rnh_matchaddr((void *)&sin6, rh); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RADIX_NODE_HEAD_RUNLOCK(rh); + return (ENOENT); + } +#endif + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(rte->rt_ifp)) { + fib6_rte_to_nhop(rte, &sin6.sin6_addr, ro); + RADIX_NODE_HEAD_RUNLOCK(rh); + + return (0); + } + } + RADIX_NODE_HEAD_RUNLOCK(rh); + + return (ENOENT); +} + +void +fib6_free_prepend(struct nhop_prepend *pnh) +{ + +} + #endif Index: sys/netinet6/ip6_forward.c =================================================================== --- sys/netinet6/ip6_forward.c +++ sys/netinet6/ip6_forward.c @@ -62,11 +62,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include @@ -94,8 +96,9 @@ { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct sockaddr_in6 *dst = NULL; - struct rtentry *rt = NULL; struct route_in6 rin6; + struct nhop_prepend nh, *pnh = NULL; + struct sockaddr_in6 gw6; int error, type = 0, code = 0; struct mbuf *mcopy = NULL; struct ifnet *origifp; /* maybe unnecessary */ @@ -339,12 +342,12 @@ dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; + rin6.ro_rt = (struct rtentry *)&nh; again2: - rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); - rt = rin6.ro_rt; - if (rin6.ro_rt != NULL) - RT_UNLOCK(rin6.ro_rt); - else { + + if (fib6_lookup_prepend(M_GETFIB(m), &dst->sin6_addr, 0, 0, + m->m_pkthdr.flowid, (struct route *)&rin6) != 0) { + pnh = NULL; IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { @@ -353,6 +356,7 @@ } goto bad; } + pnh = (struct nhop_prepend *)rin6.ro_rt; /* * Source scope check: if a packet can't be delivered to its @@ -364,7 +368,7 @@ * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1] */ src_in6 = ip6->ip6_src; - if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) { + if (in6_setscope(&src_in6, nh.nh_lifp, &outzone)) { /* XXX: this should not happen */ IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); @@ -378,7 +382,7 @@ if (inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); - in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); + in6_ifstat_inc(nh.nh_lifp, ifs6_in_discard); if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { V_ip6_log_time = time_uptime; @@ -388,7 +392,7 @@ ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, - if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); + if_name(m->m_pkthdr.rcvif), if_name(nh.nh_lifp)); } if (mcopy) icmp6_error(mcopy, ICMP6_DST_UNREACH, @@ -405,15 +409,20 @@ */ dst_in6 = ip6->ip6_dst; if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 || - in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 || + in6_setscope(&dst_in6, nh.nh_lifp, &outzone) != 0 || inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } - if (rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in6 *)rt->rt_gateway; + if (nh.nh_flags & NHF_GATEWAY) { + bzero(&gw6, sizeof(gw6)); + gw6.sin6_len = sizeof(gw6); + gw6.sin6_family = AF_INET6; + gw6.sin6_addr = nh.nh6_addr; + dst = &gw6; + } /* * If we are to forward the packet using the same interface @@ -424,9 +433,8 @@ * Also, don't send redirect if forwarding using a route * modified by a redirect. */ - if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt && - (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { - if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) { + if (V_ip6_sendredirects && nh.nh_lifp == m->m_pkthdr.rcvif && !srcrt) { + if ((nh.nh_lifp->if_flags & IFF_POINTOPOINT) != 0) { /* * If the incoming interface is equal to the outgoing * one, and the link attached to the interface is @@ -452,7 +460,7 @@ * link identifiers, we can do this stuff after making a copy for * returning an error. */ - if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { + if ((nh.nh_lifp->if_flags & IFF_LOOPBACK) != 0) { /* * See corresponding comments in ip6_output. * XXX: but is it possible that ip6_forward() sends a packet @@ -473,14 +481,14 @@ ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), - if_name(rt->rt_ifp)); + if_name(nh.nh_lifp)); } /* we can just use rcvif in forwarding. */ origifp = m->m_pkthdr.rcvif; } else - origifp = rt->rt_ifp; + origifp = nh.nh_lifp; /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. @@ -494,7 +502,7 @@ odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL); + error = pfil_run_hooks(&V_inet6_pfil_hook, &m, nh.nh_lifp, PFIL_OUT, NULL); if (error != 0 || m == NULL) goto freecopy; /* consumed by filter */ ip6 = mtod(m, struct ip6_hdr *); @@ -506,7 +514,7 @@ if (in6_localip(&ip6->ip6_dst)) m->m_flags |= M_FASTFWD_OURS; else { - RTFREE(rt); + fib6_free_prepend(pnh); goto again; /* Redo the routing table lookup. */ } } @@ -535,21 +543,21 @@ m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); - RTFREE(rt); + fib6_free_prepend(pnh); goto again2; } pass: /* See if the size was changed by the packet filter. */ - if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { - in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); + if (m->m_pkthdr.len > nh.nh_mtu) { + in6_ifstat_inc(nh.nh_lifp, ifs6_in_toobig); if (mcopy) { u_long mtu; #ifdef IPSEC size_t ipsechdrsiz; #endif /* IPSEC */ - mtu = IN6_LINKMTU(rt->rt_ifp); + mtu = nh.nh_mtu; #ifdef IPSEC /* * When we do IPsec tunnel ingress, we need to play @@ -574,16 +582,15 @@ goto bad; } - error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL); + error = nd6_output_ifp(nh.nh_lifp, origifp, m, dst, + (struct route *)&rin6); if (error) { - in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); + in6_ifstat_inc(nh.nh_lifp, ifs6_out_discard); IP6STAT_INC(ip6s_cantforward); } else { IP6STAT_INC(ip6s_forward); - in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward); - if (type) - IP6STAT_INC(ip6s_redirectsent); - else { + in6_ifstat_inc(nh.nh_lifp, ifs6_out_forward); + if (type == 0) { if (mcopy) goto freecopy; } @@ -594,7 +601,7 @@ switch (error) { case 0: if (type == ND_REDIRECT) { - icmp6_redirect_output(mcopy, rt); + icmp6_redirect_output(mcopy, &dst_in6); goto out; } goto freecopy; @@ -625,6 +632,6 @@ bad: m_freem(m); out: - if (rt != NULL) - RTFREE(rt); + if (pnh != NULL) + fib6_free_prepend(pnh); }