Index: sys/net/rt_nhops.h =================================================================== --- /dev/null +++ sys/net/rt_nhops.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2015-2016 + * Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NET_RT_NHOPS_H_ +#define _NET_RT_NHOPS_H_ + +/* Maximum header length that can be prepended */ +#define MAX_PREPEND_LEN (56 - sizeof(void *) * 2) + +/* Non-recursive nexthop */ +struct nhop_prepend { + uint16_t nh_flags; /* NH flags */ + uint8_t nh_count; /* Number of nexthops or data length */ + uint8_t spare[3]; + uint16_t nh_mtu; /* given nhop MTU */ + struct ifnet *nh_lifp; /* Logical transmit interface */ + struct ifnet *nh_aifp; /* Interface address */ + union { + char nh_data[MAX_PREPEND_LEN]; /* data to prepend */ + struct in_addr nh4_addr;/* IPv4 gw address */ + struct in6_addr nh6_addr;/* IPv4 gw address */ + }; +}; + +#endif + + Index: sys/netinet/in_fib.h =================================================================== --- sys/netinet/in_fib.h +++ sys/netinet/in_fib.h @@ -51,11 +51,21 @@ uint64_t spare2[2]; }; +/* Used to retrieve additional info from nhop_prepend */ +struct nhop4_helper { + struct in_addr nh_src; /* default source IPv4 address */ +}; + +struct nhop_prepend; + int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4); int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4); void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4); +int fib4_lookup_prepend(uint32_t fibnum, struct in_addr dst, uint32_t flags, + uint32_t flowid, struct route *ro, struct nhop4_helper *hh); +void fib4_free_prepend(struct nhop_prepend *pnh); #endif Index: sys/netinet/in_fib.c =================================================================== --- sys/netinet/in_fib.c +++ sys/netinet/in_fib.c @@ -57,6 +57,7 @@ #include #include #include +#include #ifdef INET static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, @@ -118,6 +119,42 @@ pnh4->nh_src = IA_SIN(ia)->sin_addr; } +static void +fib4_rte_to_nhop(struct rtentry *rte, struct in_addr dst, + struct route *ro, struct nhop4_helper *hh) +{ + struct sockaddr_in *gw; + struct in_ifaddr *ia; + struct nhop_prepend *pnh; + + /* XXX: Temporary for migration */ + pnh = (struct nhop_prepend *)ro->ro_rt; + + pnh->nh_aifp = rte->rt_ifa->ifa_ifp; + pnh->nh_lifp = rte->rt_ifp; + + pnh->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); + if (rte->rt_flags & RTF_GATEWAY) { + gw = (struct sockaddr_in *)rte->rt_gateway; + pnh->nh4_addr = gw->sin_addr; + } else + pnh->nh4_addr = dst; + /* Set flags */ + pnh->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); + gw = (struct sockaddr_in *)rt_key(rte); + if (gw->sin_addr.s_addr == 0) + pnh->nh_flags |= NHF_DEFAULT; + /* XXX: Set RTF_BROADCAST if GW address is broadcast */ + + /* No nexhops yet */ + pnh->nh_count = 0; + + if (hh != NULL) { + ia = ifatoia(rte->rt_ifa); + hh->nh_src = IA_SIN(ia)->sin_addr; + } +} + /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores nexthop info provided @pnh4 structure. @@ -229,4 +266,63 @@ } +/* + * Performs lookup in IPv4 table fib @fibnum. + * Assumes @ro->ro_rt points to 'struct nhop_prepend' storage. + * In case of successful lookup ro->ro_rt is filled with + * appropriate interface info and full L2 header to prepend or + * nhop address. If route does not contain gateway, or gateway is unreachable, + * NHF_L2_INCOMPLETE flag and gateway address is stored into nh->d.gw4 + * If @hh is not NULL, additional nexthop data is stored there. + * + * Returns 0 on success. + */ +int +fib4_lookup_prepend(uint32_t fibnum, struct in_addr dst, uint32_t flags, + uint32_t flowid, struct route *ro, struct nhop4_helper *hh) +{ + struct radix_node_head *rh; + struct radix_node *rn; + struct sockaddr_in sin; + struct rtentry *rte; + + KASSERT((fibnum < rt_numfibs), ("fib4_lookup_prepend: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, AF_INET); + if (rh == NULL) + return (ENOENT); + + /* Prepare lookup key */ + memset(&sin, 0, sizeof(sin)); + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr = dst; + + RADIX_NODE_HEAD_RLOCK(rh); + rn = rh->rnh_matchaddr((void *)&sin, rh); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RADIX_NODE_HEAD_RUNLOCK(rh); + return (ENOENT); + } +#endif + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(rte->rt_ifp)) { + fib4_rte_to_nhop(rte, dst, ro, hh); + RADIX_NODE_HEAD_RUNLOCK(rh); + return (0); + } + } + RADIX_NODE_HEAD_RUNLOCK(rh); + + return (ENOENT); +} + +void +fib4_free_prepend(struct nhop_prepend *pnh) +{ + +} + #endif Index: sys/netinet/ip_fastfwd.c =================================================================== --- sys/netinet/ip_fastfwd.c +++ sys/netinet/ip_fastfwd.c @@ -100,48 +100,47 @@ #include #include #include +#include #include #include #include #include #include +#include #include static struct sockaddr_in * -ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) +ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m, + struct nhop_prepend *pnh) { struct sockaddr_in *dst; - struct rtentry *rt; + uint32_t hash; + int error; /* * Find route to destination. */ bzero(ro, sizeof(*ro)); - dst = (struct sockaddr_in *)&ro->ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr.s_addr = dest.s_addr; - in_rtalloc_ign(ro, 0, M_GETFIB(m)); + /* XXX: Temporary cast to until ro_rt rename */ + ro->ro_rt = (struct rtentry *)pnh; + struct ip *ip; + ip = mtod(m, struct ip *); + hash = m->m_pkthdr.flowid; + error = fib4_lookup_prepend(M_GETFIB(m), dest, 0, hash, ro, NULL); - /* - * Route there and interface still up? - */ - rt = ro->ro_rt; - if (rt && (rt->rt_flags & RTF_UP) && - (rt->rt_ifp->if_flags & IFF_UP) && - (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if (rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in *)rt->rt_gateway; - } else { - IPSTAT_INC(ips_noroute); - IPSTAT_INC(ips_cantforward); - if (rt) - RTFREE(rt); - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - return NULL; + if (error == 0) { + dst = (struct sockaddr_in *)&ro->ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = pnh->nh4_addr; + return (dst); } - return dst; + + IPSTAT_INC(ips_noroute); + IPSTAT_INC(ips_cantforward); + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); + return NULL; } /* @@ -163,6 +162,7 @@ uint16_t ip_len, ip_off; int error = 0; int mtu; + struct nhop_prepend nh; struct m_tag *fwd_tag = NULL; /* @@ -306,16 +306,16 @@ /* * Find route to destination. */ - if ((dst = ip_findroute(&ro, dest, m)) == NULL) + if ((dst = ip_findroute(&ro, dest, m, &nh)) == NULL) return NULL; /* icmp unreach already sent */ - ifp = ro.ro_rt->rt_ifp; + ifp = nh.nh_lifp; /* * Immediately drop blackholed traffic, and directed broadcasts * for either the all-ones or all-zero subnet addresses on * locally attached networks. */ - if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0) + if ((nh.nh_flags & (NHF_BLACKHOLE|NHF_BROADCAST)) != 0) goto drop; /* @@ -353,8 +353,10 @@ * Return packet for processing by ip_input(). */ m->m_flags |= M_FASTFWD_OURS; - if (ro.ro_rt) - RTFREE(ro.ro_rt); + if (ro.ro_rt) { + fib4_free_prepend(&nh); + ro.ro_rt = NULL; + } return m; } /* @@ -366,10 +368,11 @@ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP_NEXTHOP; } - RTFREE(ro.ro_rt); - if ((dst = ip_findroute(&ro, dest, m)) == NULL) + fib4_free_prepend(&nh); + ro.ro_rt = NULL; + if ((dst = ip_findroute(&ro, dest, m, &nh)) == NULL) return NULL; /* icmp unreach already sent */ - ifp = ro.ro_rt->rt_ifp; + ifp = nh.nh_lifp; } passout: @@ -382,8 +385,7 @@ /* * Check if route is dampned (when ARP is unable to resolve) */ - if ((ro.ro_rt->rt_flags & RTF_REJECT) && - (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) { + if (nh.nh_flags & NHF_REJECT) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); goto consumed; } @@ -399,11 +401,7 @@ /* * Check if packet fits MTU or if hardware will fragment for us */ - if (ro.ro_rt->rt_mtu) - mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); - else - mtu = ifp->if_mtu; - + mtu = nh.nh_mtu; if (ip_len <= mtu) { /* * Avoid confusing lower layers. @@ -464,17 +462,16 @@ if (error != 0) IPSTAT_INC(ips_odropped); else { - counter_u64_add(ro.ro_rt->rt_pksent, 1); IPSTAT_INC(ips_forward); IPSTAT_INC(ips_fastforward); } consumed: - RTFREE(ro.ro_rt); + fib4_free_prepend(&nh); return NULL; drop: if (m) m_freem(m); if (ro.ro_rt) - RTFREE(ro.ro_rt); + fib4_free_prepend(&nh); return NULL; }