Index: head/sys/net/route.h =================================================================== --- head/sys/net/route.h +++ head/sys/net/route.h @@ -210,6 +210,7 @@ #define NHF_DEFAULT 0x0080 /* Default route */ #define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */ #define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */ +#define NHF_HOST 0x0400 /* RTF_HOST */ /* Nexthop request flags */ #define NHR_IFAIF 0x01 /* Return ifa_ifp interface */ Index: head/sys/net/route_var.h =================================================================== --- head/sys/net/route_var.h +++ head/sys/net/route_var.h @@ -67,6 +67,7 @@ uint16_t res; res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; + res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0; res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; Index: head/sys/netinet/in_fib.h =================================================================== --- head/sys/netinet/in_fib.h +++ head/sys/netinet/in_fib.h @@ -43,12 +43,13 @@ /* Extended nexthop info used for control protocols */ struct nhop4_extended { struct ifnet *nh_ifp; /* Logical egress interface */ + struct in_ifaddr *nh_ia; /* Associated address */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ uint8_t spare[4]; struct in_addr nh_addr; /* GW/DST IPv4 address */ struct in_addr nh_src; /* default source IPv4 address */ - uint64_t spare2[2]; + uint64_t spare2; }; int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, Index: head/sys/netinet/in_fib.c =================================================================== --- head/sys/netinet/in_fib.c +++ head/sys/netinet/in_fib.c @@ -96,7 +96,6 @@ uint32_t flags, struct nhop4_extended *pnh4) { struct sockaddr_in *gw; - struct in_ifaddr *ia; if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; @@ -113,10 +112,8 @@ gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; - /* XXX: Set RTF_BROADCAST if GW address is broadcast */ - - ia = ifatoia(rte->rt_ifa); - pnh4->nh_src = IA_SIN(ia)->sin_addr; + pnh4->nh_ia = ifatoia(rte->rt_ifa); + pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr; } /* Index: head/sys/netinet/ip_output.c =================================================================== --- head/sys/netinet/ip_output.c +++ head/sys/netinet/ip_output.c @@ -72,6 +72,7 @@ #include #include +#include #include #include #include @@ -227,13 +228,12 @@ int hlen = sizeof (struct ip); int mtu; int error = 0; - struct sockaddr_in *dst; + struct sockaddr_in *dst, sin; const struct sockaddr_in *gw; struct in_ifaddr *ia; + struct in_addr src; int isbroadcast; uint16_t ip_len, ip_off; - struct route iproute; - struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; #if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; @@ -252,11 +252,6 @@ #endif } - if (ro == NULL) { - ro = &iproute; - bzero(ro, sizeof (*ro)); - } - if (opt) { int len = 0; m = ip_insertoptions(m, opt, &len); @@ -281,26 +276,28 @@ /* * dst/gw handling: * - * dst can be rewritten but always points to &ro->ro_dst. * gw is readonly but can point either to dst OR rt_gateway, * therefore we need restore gw if we're redoing lookup. */ - gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); - rte = ro->ro_rt; - if (rte == NULL) { + if (ro != NULL) + dst = (struct sockaddr_in *)&ro->ro_dst; + else + dst = &sin; + if (ro == NULL || ro->ro_rt == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } + gw = dst; NET_EPOCH_ENTER(et); again: /* * Validate route against routing table additions; * a better/more specific route might have been added. */ - if (inp) + if (inp != NULL && ro != NULL && ro->ro_rt != NULL) RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); /* * If there is a cached route, @@ -310,15 +307,12 @@ * cache with IPv6. * Also check whether routing cache needs invalidation. */ - rte = ro->ro_rt; - if (rte && ((rte->rt_flags & RTF_UP) == 0 || - rte->rt_ifp == NULL || - !RT_LINK_IS_UP(rte->rt_ifp) || - dst->sin_family != AF_INET || - dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { + if (ro != NULL && ro->ro_rt != NULL && + ((ro->ro_rt->rt_flags & RTF_UP) == 0 || + ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) || + dst->sin_family != AF_INET || + dst->sin_addr.s_addr != ip->ip_dst.s_addr)) RO_INVALIDATE_CACHE(ro); - rte = NULL; - } ia = NULL; /* * If routing to interface only, short circuit routing lookup. @@ -338,8 +332,10 @@ ip->ip_dst.s_addr = INADDR_BROADCAST; dst->sin_addr = ip->ip_dst; ifp = ia->ia_ifp; + mtu = ifp->if_mtu; ip->ip_ttl = 1; isbroadcast = 1; + src = IA_SIN(ia)->sin_addr; } else if (flags & IP_ROUTETOIF) { if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), M_GETFIB(m)))) == NULL && @@ -350,9 +346,11 @@ goto bad; } ifp = ia->ia_ifp; + mtu = ifp->if_mtu; ip->ip_ttl = 1; isbroadcast = ifp->if_flags & IFF_BROADCAST ? in_ifaddr_broadcast(dst->sin_addr, ia) : 0; + src = IA_SIN(ia)->sin_addr; } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && imo != NULL && imo->imo_multicast_ifp != NULL) { /* @@ -360,15 +358,17 @@ * packets if the interface is specified. */ ifp = imo->imo_multicast_ifp; + mtu = ifp->if_mtu; IFP_TO_IA(ifp, ia, &in_ifa_tracker); isbroadcast = 0; /* fool gcc */ - } else { - /* - * We want to do any cloning requested by the link layer, - * as this is probably required in all cases for correct - * operation (as it is for ARP). - */ - if (rte == NULL) { + src = IA_SIN(ia)->sin_addr; + } else if (ro != NULL) { + if (ro->ro_rt == NULL) { + /* + * We want to do any cloning requested by the link + * layer, as this is probably required in all cases + * for correct operation (as it is for ARP). + */ #ifdef RADIX_MPATH rtalloc_mpath_fib(ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), @@ -376,12 +376,47 @@ #else in_rtalloc_ign(ro, 0, fibnum); #endif - rte = ro->ro_rt; + if (ro->ro_rt == NULL || + (ro->ro_rt->rt_flags & RTF_UP) == 0 || + ro->ro_rt->rt_ifp == NULL || + !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) { +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * There is no route for this packet, but it is + * possible that a matching SPD entry exists. + */ + no_route_but_check_spd = 1; + mtu = 0; /* Silence GCC warning. */ + goto sendit; +#endif + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } } - if (rte == NULL || - (rte->rt_flags & RTF_UP) == 0 || - rte->rt_ifp == NULL || - !RT_LINK_IS_UP(rte->rt_ifp)) { + ia = ifatoia(ro->ro_rt->rt_ifa); + ifp = ro->ro_rt->rt_ifp; + counter_u64_add(ro->ro_rt->rt_pksent, 1); + rt_update_ro_flags(ro); + if (ro->ro_rt->rt_flags & RTF_GATEWAY) + gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway; + if (ro->ro_rt->rt_flags & RTF_HOST) + isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); + else if (ifp->if_flags & IFF_BROADCAST) + isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); + else + isbroadcast = 0; + if (ro->ro_rt->rt_flags & RTF_HOST) + mtu = ro->ro_rt->rt_mtu; + else + mtu = ifp->if_mtu; + src = IA_SIN(ia)->sin_addr; + } else { + struct nhop4_extended nh; + + bzero(&nh, sizeof(nh)); + if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) != + 0) { #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * There is no route for this packet, but it is @@ -395,31 +430,29 @@ error = EHOSTUNREACH; goto bad; } - ia = ifatoia(rte->rt_ifa); - ifp = rte->rt_ifp; - counter_u64_add(rte->rt_pksent, 1); - rt_update_ro_flags(ro); - if (rte->rt_flags & RTF_GATEWAY) - gw = (struct sockaddr_in *)rte->rt_gateway; - if (rte->rt_flags & RTF_HOST) - isbroadcast = (rte->rt_flags & RTF_BROADCAST); - else if (ifp->if_flags & IFF_BROADCAST) - isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); - else - isbroadcast = 0; + ifp = nh.nh_ifp; + mtu = nh.nh_mtu; + /* + * We are rewriting here dst to be gw actually, contradicting + * comment at the beginning of the function. However, in this + * case we are always dealing with on stack dst. + * In case if pfil(9) sends us back to beginning of the + * function, the dst would be rewritten by ip_output_pfil(). + */ + MPASS(dst == &sin); + dst->sin_addr = nh.nh_addr; + ia = nh.nh_ia; + src = nh.nh_src; + isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) == + (NHF_HOST | NHF_BROADCAST)) || + ((ifp->if_flags & IFF_BROADCAST) && + in_ifaddr_broadcast(dst->sin_addr, ia))); } - /* - * Calculate MTU. If we have a route that is up, use that, - * otherwise use the interface's MTU. - */ - if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) - mtu = rte->rt_mtu; - else - mtu = ifp->if_mtu; /* Catch a possible divide by zero later. */ - KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", - __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); + KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p", + __func__, mtu, ro, + (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp)); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { m->m_flags |= M_MCAST; @@ -455,11 +488,8 @@ * If source address not specified yet, use address * of outgoing interface. */ - if (ip->ip_src.s_addr == INADDR_ANY) { - /* Interface may have no addresses. */ - if (ia != NULL) - ip->ip_src = IA_SIN(ia)->sin_addr; - } + if (ip->ip_src.s_addr == INADDR_ANY) + ip->ip_src = src; if ((imo == NULL && in_mcast_loop) || (imo && imo->imo_multicast_loop)) { @@ -522,12 +552,8 @@ * If the source address is not specified yet, use the address * of the outoing interface. */ - if (ip->ip_src.s_addr == INADDR_ANY) { - /* Interface may have no addresses. */ - if (ia != NULL) { - ip->ip_src = IA_SIN(ia)->sin_addr; - } - } + if (ip->ip_src.s_addr == INADDR_ANY) + ip->ip_src = src; /* * Look for broadcast address and @@ -587,9 +613,10 @@ case -1: /* Need to try again */ /* Reset everything for a new round */ - RO_RTFREE(ro); - ro->ro_prepend = NULL; - rte = NULL; + if (ro != NULL) { + RO_RTFREE(ro); + ro->ro_prepend = NULL; + } gw = dst; ip = mtod(m, struct ip *); goto again; @@ -733,15 +760,6 @@ IPSTAT_INC(ips_fragmented); done: - if (ro == &iproute) - RO_RTFREE(ro); - else if (rte == NULL) - /* - * If the caller supplied a route but somehow the reference - * to it has been released need to prevent the caller - * calling RTFREE on it again. - */ - ro->ro_rt = NULL; NET_EPOCH_EXIT(et); return (error); bad: