diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c --- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c @@ -689,7 +689,9 @@ register struct mbuf *m = *mpp; int len, off, error = 0, hlen, code; struct ifnet *ifp, *sifp; - struct sockaddr_in dst; + struct route ro; + struct sockaddr_in *dst; + const struct sockaddr *gw; struct nhop_object *nh; u_long fibnum = 0; u_short ip_off; @@ -739,10 +741,12 @@ /* * Route packet. */ - bzero(&dst, sizeof (dst)); - dst.sin_family = AF_INET; - dst.sin_addr = ip->ip_dst; - dst.sin_len = sizeof(dst); + bzero(&ro, sizeof (ro)); + dst = (struct sockaddr_in *)&ro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_addr = ip->ip_dst; + dst->sin_len = sizeof(dst); + gw = (const struct sockaddr *)dst; fr = fin->fin_fr; if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) && @@ -762,11 +766,11 @@ } if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0)) - dst.sin_addr = fdp->fd_ip; + dst->sin_addr = fdp->fd_ip; fibnum = M_GETFIB(m0); NET_EPOCH_ASSERT(); - nh = fib4_lookup(fibnum, dst.sin_addr, 0, NHR_NONE, 0); + nh = fib4_lookup(fibnum, dst->sin_addr, 0, NHR_NONE, 0); if (nh == NULL) { if (in_localaddr(ip->ip_dst)) error = EHOSTUNREACH; @@ -777,8 +781,10 @@ if (ifp == NULL) ifp = nh->nh_ifp; - if (nh->nh_flags & NHF_GATEWAY) - dst.sin_addr = nh->gw4_sa.sin_addr; + if (nh->nh_flags & NHF_GATEWAY) { + gw = &nh->gw_sa; + ro.ro_flags |= RT_HAS_GW; + } /* * For input packets which are being "fastrouted", they won't @@ -822,9 +828,7 @@ if (ntohs(ip->ip_len) <= ifp->if_mtu) { if (!ip->ip_sum) ip->ip_sum = in_cksum(m, hlen); - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst, - NULL - ); + error = (*ifp->if_output)(ifp, m, gw, &ro); goto done; } /* @@ -904,10 +908,7 @@ m0 = m->m_act; m->m_act = 0; if (error == 0) - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)&dst, - NULL - ); + error = (*ifp->if_output)(ifp, m, gw, &ro); else FREE_MB_T(m); } diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -1113,7 +1113,14 @@ if (nh->nh_ifp != ifp) return (NULL); if (nh->nh_flags & NHF_GATEWAY) - ((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr; + if (nh->gw_sa.sa_family == AF_INET) + ((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr; + else { + bzero(dst, sizeof(struct sockaddr_in6)); + dst->sa_len = sizeof(struct sockaddr_in6); + dst->sa_family = AF_INET6; + ((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr; + } else ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; } diff --git a/sys/dev/iicbus/if_ic.c b/sys/dev/iicbus/if_ic.c --- a/sys/dev/iicbus/if_ic.c +++ b/sys/dev/iicbus/if_ic.c @@ -372,7 +372,7 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &hdr, sizeof(hdr)); else - hdr = dst->sa_family; + hdr = RO_GET_FAMILY(ro, dst); mtx_lock(&sc->ic_lock); ifp->if_drv_flags |= IFF_DRV_RUNNING; diff --git a/sys/net/debugnet.c b/sys/net/debugnet.c --- a/sys/net/debugnet.c +++ b/sys/net/debugnet.c @@ -673,6 +673,7 @@ goto cleanup; } + /* TODO support AF_INET6 */ if (nh->gw_sa.sa_family == AF_INET) gw_sin = &nh->gw4_sa; else { diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c --- a/sys/net/if_disc.c +++ b/sys/net/if_disc.c @@ -185,7 +185,7 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); if (bpf_peers_present(ifp->if_bpf)) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -236,8 +236,8 @@ #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) == 0) - error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, - plle); + error = nd6_resolve(ifp, ro_get_gw_type(ro), m, dst, + phdr, &lleflags, plle); else { const struct in6_addr *a6; a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr); @@ -289,6 +289,7 @@ uint32_t pflags; struct llentry *lle = NULL; int addref = 0; + int af = RO_GET_FAMILY(ro, dst); phdr = NULL; pflags = 0; @@ -352,7 +353,7 @@ if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); - return (if_simloop(ifp, m, dst->sa_family, 0)); + return (if_simloop(ifp, m, af, 0)); } loop_copy = (pflags & RT_MAY_LOOP) != 0; @@ -370,6 +371,19 @@ if ((pflags & RT_HAS_HEADER) == 0) { eh = mtod(m, struct ether_header *); memcpy(eh, phdr, hlen); +#if defined(INET) && defined(INET6) + /* XXX phdr might be from lle cache, let's fix the ether_type */ + /* FIXME maybe we can cache phdr in route ??? */ + if (dst->sa_family != af) { + uint16_t etype = 0; + if (af == AF_INET) + etype = htons(ETHERTYPE_IP); + else if (af == AF_INET6) + etype = htons(ETHERTYPE_IPV6); + if (etype != 0) + memcpy(&eh->ether_type, &etype, sizeof(etype)); + } +#endif } /* @@ -399,7 +413,7 @@ */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); - (void)if_simloop(ifp, n, dst->sa_family, hlen); + (void)if_simloop(ifp, n, af, hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -94,6 +94,7 @@ #if defined(INET) || defined(INET6) int is_gw = 0; #endif + int af = RO_GET_FAMILY(ro, dst); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); @@ -137,6 +138,26 @@ destfw = NULL; } + switch (af) { +#ifdef INET + case AF_INET: + type = ETHERTYPE_IP; + break; + case AF_ARP: + type = ETHERTYPE_ARP; + break; +#endif +#ifdef INET6 + case AF_INET6: + type = ETHERTYPE_IPV6; + break; +#endif + default: + if_printf(ifp, "can't handle af%d\n", af); + error = EAFNOSUPPORT; + goto bad; + } + switch (dst->sa_family) { #ifdef INET case AF_INET: @@ -151,7 +172,6 @@ if (error) return (error == EWOULDBLOCK ? 0 : error); } - type = ETHERTYPE_IP; break; case AF_ARP: @@ -159,7 +179,6 @@ struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_IEEE1394); - type = ETHERTYPE_ARP; if (unicast) *destfw = *(struct fw_hwaddr *) ar_tha(ah); @@ -176,12 +195,11 @@ #ifdef INET6 case AF_INET6: if (unicast) { - error = nd6_resolve(fc->fc_ifp, is_gw, m, dst, - (u_char *) destfw, NULL, NULL); + error = nd6_resolve(fc->fc_ifp, ro_get_gw_type(ro), m, + dst, (u_char *) destfw, NULL, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } - type = ETHERTYPE_IPV6; break; #endif diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -409,7 +409,7 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -613,7 +613,7 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c --- a/sys/net/if_infiniband.c +++ b/sys/net/if_infiniband.c @@ -253,7 +253,8 @@ #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) == 0) { - error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, plle); + error = nd6_resolve(ifp, ro_get_gw_type(ro), m, dst, + phdr, &lleflags, plle); } else { infiniband_ipv6_multicast_map( &((const struct sockaddr_in6 *)dst)->sin6_addr, @@ -300,6 +301,7 @@ int hlen; /* link layer header length */ uint32_t pflags; bool addref; + int af = RO_GET_FAMILY(ro, dst); NET_EPOCH_ASSERT(); @@ -370,7 +372,7 @@ if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); - return (if_simloop(ifp, m, dst->sa_family, 0)); + return (if_simloop(ifp, m, af, 0)); } /* @@ -385,6 +387,15 @@ if ((pflags & RT_HAS_HEADER) == 0) { ih = mtod(m, struct infiniband_header *); memcpy(ih, phdr, hlen); +#if defined(INET) && defined(INET6) + /* XXX phdr might be from lle cache, let's fix the ether_type */ + if (dst->sa_family != af) { + if (af == AF_INET) + ih->ib_protocol = htons(ETHERTYPE_IP); + else if (af == AF_INET6) + ih->ib_protocol = htons(ETHERTYPE_IPV6); + } +#endif } /* diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -235,7 +235,7 @@ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT) bcopy(dst->sa_data, &af, sizeof(af)); else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); #if 1 /* XXX */ switch (af) { diff --git a/sys/net/if_me.c b/sys/net/if_me.c --- a/sys/net/if_me.c +++ b/sys/net/if_me.c @@ -533,14 +533,14 @@ static int me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, - struct route *ro __unused) + struct route *ro) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } diff --git a/sys/net/if_spppsubr.c b/sys/net/if_spppsubr.c --- a/sys/net/if_spppsubr.c +++ b/sys/net/if_spppsubr.c @@ -780,6 +780,7 @@ int ipproto = PPP_IP; #endif int debug = ifp->if_flags & IFF_DEBUG; + int af = RO_GET_FAMILY(ro, dst); SPPP_LOCK(sp); @@ -805,7 +806,7 @@ * dialout event in case IPv6 has been * administratively disabled on that interface. */ - if (dst->sa_family == AF_INET6 && + if (af == AF_INET6 && !(sp->confflags & CONF_ENABLE_IPV6)) goto drop; #endif @@ -818,7 +819,7 @@ } #ifdef INET - if (dst->sa_family == AF_INET) { + if (af == AF_INET) { /* XXX Check mbuf length here? */ struct ip *ip = mtod (m, struct ip*); struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl); @@ -888,14 +889,14 @@ #endif #ifdef INET6 - if (dst->sa_family == AF_INET6) { + if (af == AF_INET6) { /* XXX do something tricky here? */ } #endif if (sp->pp_mode == PP_FR) { /* Add frame relay header. */ - m = sppp_fr_header (sp, m, dst->sa_family); + m = sppp_fr_header (sp, m, af); if (! m) goto nobufs; goto out; @@ -926,7 +927,7 @@ h->control = PPP_UI; /* Unnumbered Info */ } - switch (dst->sa_family) { + switch (af) { #ifdef INET case AF_INET: /* Internet Protocol */ if (sp->pp_mode == IFF_CISCO) diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -427,6 +427,7 @@ #endif sc = ifp->if_softc; + /* FIXME possible sockaddr_in gw ? */ dst6 = (const struct sockaddr_in6 *)dst; /* just in case */ diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -1401,8 +1401,9 @@ /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); + /* FIXME TUN is p-t-p device */ else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); if (bpf_peers_present(ifp->if_bpf)) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); diff --git a/sys/net/route.h b/sys/net/route.h --- a/sys/net/route.h +++ b/sys/net/route.h @@ -230,6 +230,38 @@ #define NHR_COPY 0x100 /* Copy rte data */ #define NHR_UNLOCKED 0x200 /* Do not lock table */ +/* nd6 / arpresolve gateway type */ +/* FIXME need proper place */ +typedef enum { + GW_NONE = 0, +#ifdef INET + GW_IPV4_SRC, +#endif +#ifdef INET6 + GW_IPV6_SRC +#endif +} gw_type_t; + +/* FIXME need proper place */ +static __inline gw_type_t +ro_get_gw_type(struct route *ro) +{ + if (ro == NULL || (ro->ro_flags & RT_HAS_GW) == 0) + return GW_NONE; + switch(ro->ro_dst.sa_family) { +#ifdef INET + case AF_INET: + return GW_IPV4_SRC; +#endif +#ifdef INET6 + case AF_INET6: + return GW_IPV6_SRC; +#endif + default: + return GW_NONE; + } +} + /* * Routing statistics. */ @@ -394,6 +426,10 @@ } \ } while (0) +#define RO_GET_FAMILY(ro, dst) ((ro) != NULL && \ + (ro)->ro_flags & RT_HAS_GW \ + ? (ro)->ro_dst.sa_family : (dst)->sa_family) + /* * Validate a cached route based on a supplied cookie. If there is an * out-of-date cache, simply free it. Update the generation number diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c --- a/sys/net/route/route_ctl.c +++ b/sys/net/route/route_ctl.c @@ -106,6 +106,20 @@ &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); #undef _MP_FLAGS +#if defined(INET) && defined(INET6) +/* +#define V_rib_route_ipv4_nexthop VNET(rib_route_ipv4_nexthop) +VNET_DEFINE(u_int, rib_route_ipv4_nexthop) = 1; +SYSCTL_UINT(_net_route, OID_AUTO, ipv4_nexthop, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(rib_route_ipv4_nexthop), 0, "Enable IPv6 route via IPv4 Next Hop address"); +*/ + +#define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) +VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1; +SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); +#endif + /* Routing table UMA zone */ VNET_DEFINE_STATIC(uma_zone_t, rtzone); #define V_rtzone VNET(rtzone) @@ -197,6 +211,34 @@ return (rnh); } +#if defined(INET) && defined(INET6) +/* +static bool +rib_can_ipv4_nexthop_address(struct rib_head *rh) +{ + int result; + + CURVNET_SET(rh->rib_vnet); + result = !!V_rib_route_ipv4_nexthop; + CURVNET_RESTORE(); + + return (result); +} +*/ + +static bool +rib_can_ipv6_nexthop_address(struct rib_head *rh) +{ + int result; + + CURVNET_SET(rh->rib_vnet); + result = !!V_rib_route_ipv6_nexthop; + CURVNET_RESTORE(); + + return (result); +} +#endif + #ifdef ROUTE_MPATH static bool rib_can_multipath(struct rib_head *rh) @@ -567,6 +609,35 @@ return (error); } +/* + * Checks if @dst and @gateway is valid combination. + * + * Returns true if is valid, false otherwise. + */ +static bool +gateway_is_valid(struct rib_head *rnh, struct sockaddr *dst, + struct sockaddr *gateway) +{ + if (dst->sa_family == gateway->sa_family) + return (true); + else if (gateway->sa_family == AF_UNSPEC) + return (true); + else if (gateway->sa_family == AF_LINK) + return (true); +#if defined(INET) && defined(INET6) + else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 && + rib_can_ipv6_nexthop_address(rnh)) + return (true); + /* + else if (dst->sa_family == AF_INET6 && gateway->sa_family == AF_INET && + rib_can_ipv4_nexthop_address(rnh)) + return (true); + */ +#endif + else + return (false); +} + /* * Creates rtentry and nexthop based on @info data. * Return 0 and fills in rtentry into @prt on success, @@ -589,8 +660,7 @@ if ((flags & RTF_GATEWAY) && !gateway) return (EINVAL); - if (dst && gateway && (dst->sa_family != gateway->sa_family) && - (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) + if (dst && gateway && !gateway_is_valid(rnh, dst, gateway)) return (EINVAL); if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -364,6 +364,7 @@ fle->f.fle_o_ifx = nh->nh_ifp->if_index; if (nh->gw_sa.sa_family == AF_INET) fle->f.next_hop = nh->gw4_sa.sin_addr; + /* FIXME else nh->gw_sa.sa_family == AF_INET6 ? */ fle->f.dst_mask = plen; } } diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c --- a/sys/netgraph/ng_iface.c +++ b/sys/netgraph/ng_iface.c @@ -371,7 +371,7 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else - af = dst->sa_family; + af = RO_GET_FAMILY(ro, dst); /* Berkeley packet filter */ ng_iface_bpftap(ifp, m, af); diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -199,7 +199,9 @@ struct ip *ip; struct mbuf *m0 = NULL; struct nhop_object *nh = NULL; - struct sockaddr_in dst; + struct route ro; + struct sockaddr_in *dst; + const struct sockaddr *gw; struct in_addr dest, odest, rtdest; uint16_t ip_len, ip_off; int error = 0; @@ -421,19 +423,23 @@ ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); - bzero(&dst, sizeof(dst)); - dst.sin_family = AF_INET; - dst.sin_len = sizeof(dst); - if (nh->nh_flags & NHF_GATEWAY) - dst.sin_addr = nh->gw4_sa.sin_addr; - else - dst.sin_addr = dest; + bzero(&ro, sizeof(ro)); + dst = (struct sockaddr_in *)&ro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = dest; + if (nh->nh_flags & NHF_GATEWAY) { + gw = &nh->gw_sa; + ro.ro_flags |= RT_HAS_GW; + } else + gw = (const struct sockaddr *)dst; /* * Handle redirect case. */ redest.s_addr = 0; - if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif)) + if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif) && + gw->sa_family == AF_INET) mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr); /* @@ -448,8 +454,7 @@ * Send off the packet via outgoing interface */ IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL); - error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, - (struct sockaddr *)&dst, NULL); + error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro); } else { /* * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery @@ -484,7 +489,7 @@ mtod(m, struct ip *), nh->nh_ifp, mtod(m, struct ip *), NULL); error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, - (struct sockaddr *)&dst, NULL); + gw, &ro); if (error) break; } while ((m = m0) != NULL); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1065,13 +1065,16 @@ if (nh_ia != NULL && (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { - if (nh->nh_flags & NHF_GATEWAY) - dest.s_addr = nh->gw4_sa.sin_addr.s_addr; - else - dest.s_addr = ip->ip_dst.s_addr; /* Router requirements says to only send host redirects */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; + if (nh->nh_flags & NHF_GATEWAY) { + if (nh->gw_sa.sa_family == AF_INET) + dest.s_addr = nh->gw4_sa.sin_addr.s_addr; + else /* Do not redirect in case gw is AF_INET6 */ + type = 0; + } else + dest.s_addr = ip->ip_dst.s_addr; } } } diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -212,7 +212,7 @@ static int ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, - const struct sockaddr_in *gw, struct route *ro, bool stamp_tag) + const struct sockaddr *gw, struct route *ro, bool stamp_tag) { #ifdef KERN_TLS struct ktls_session *tls = NULL; @@ -273,7 +273,7 @@ m->m_pkthdr.csum_flags |= CSUM_SND_TAG; } - error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); + error = (*ifp->if_output)(ifp, m, gw, ro); done: /* Check for route change invalidating send tags. */ @@ -293,9 +293,9 @@ /* rte<>ro_flags translation */ static inline void -rt_update_ro_flags(struct route *ro) +rt_update_ro_flags(struct route *ro, struct nhop_object *nh) { - int nh_flags = ro->ro_nh->nh_flags; + int nh_flags = nh->nh_flags; ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW); @@ -329,12 +329,13 @@ int mtu = 0; int error = 0; int vlan_pcp = -1; - struct sockaddr_in *dst, sin; - const struct sockaddr_in *gw; + struct sockaddr_in *dst; + const struct sockaddr *gw; struct in_ifaddr *ia = NULL; struct in_addr src; int isbroadcast; uint16_t ip_len, ip_off; + struct route iproute; uint32_t fibnum; #if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; @@ -386,23 +387,23 @@ * therefore we need restore gw if we're redoing lookup. */ fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); - if (ro != NULL) - dst = (struct sockaddr_in *)&ro->ro_dst; - else - dst = &sin; - if (ro == NULL || ro->ro_nh == NULL) { - bzero(dst, sizeof(*dst)); + if (ro == NULL) { + ro = &iproute; + bzero(ro, sizeof (*ro)); + } + dst = (struct sockaddr_in *)&ro->ro_dst; + if (ro->ro_nh == NULL) { dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } - gw = dst; + gw = (const struct sockaddr *)dst; again: /* * Validate route against routing table additions; * a better/more specific route might have been added. */ - if (inp != NULL && ro != NULL && ro->ro_nh != NULL) + if (inp != NULL && ro->ro_nh != NULL) NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); /* * If there is a cached route, @@ -412,7 +413,7 @@ * cache with IPv6. * Also check whether routing cache needs invalidation. */ - if (ro != NULL && ro->ro_nh != NULL && + if (ro->ro_nh != NULL && ((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET || dst->sin_addr.s_addr != ip->ip_dst.s_addr)) RO_INVALIDATE_CACHE(ro); @@ -469,7 +470,7 @@ src = IA_SIN(ia)->sin_addr; else src.s_addr = INADDR_ANY; - } else if (ro != NULL) { + } else if (ro != &iproute) { if (ro->ro_nh == NULL) { /* * We want to do any cloning requested by the link @@ -498,13 +499,13 @@ ia = ifatoia(ro->ro_nh->nh_ifa); ifp = ro->ro_nh->nh_ifp; counter_u64_add(ro->ro_nh->nh_pksent, 1); - rt_update_ro_flags(ro); + rt_update_ro_flags(ro, ro->ro_nh); if (ro->ro_nh->nh_flags & NHF_GATEWAY) - gw = &ro->ro_nh->gw4_sa; + gw = &ro->ro_nh->gw_sa; if (ro->ro_nh->nh_flags & NHF_HOST) isbroadcast = (ro->ro_nh->nh_flags & NHF_BROADCAST); - else if (ifp->if_flags & IFF_BROADCAST) - isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); + else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET)) + isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia); else isbroadcast = 0; if (ro->ro_nh->nh_flags & NHF_HOST) @@ -532,6 +533,7 @@ } ifp = nh->nh_ifp; mtu = nh->nh_mtu; + rt_update_ro_flags(ro, nh); /* * We are rewriting here dst to be gw actually, contradicting * comment at the beginning of the function. However, in this @@ -539,15 +541,15 @@ * In case if pfil(9) sends us back to beginning of the * function, the dst would be rewritten by ip_output_pfil(). */ - MPASS(dst == &sin); if (nh->nh_flags & NHF_GATEWAY) - dst->sin_addr = nh->gw4_sa.sin_addr; + gw = &nh->gw_sa; ia = ifatoia(nh->nh_ifa); src = IA_SIN(ia)->sin_addr; isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) == (NHF_HOST | NHF_BROADCAST)) || ((ifp->if_flags & IFF_BROADCAST) && - in_ifaddr_broadcast(dst->sin_addr, ia))); + (gw->sa_family == AF_INET) && + in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia))); } /* Catch a possible divide by zero later. */ @@ -562,7 +564,7 @@ * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ - gw = dst; + gw = (const struct sockaddr *)dst; /* * See if the caller provided any multicast options */ @@ -722,7 +724,7 @@ RO_NHFREE(ro); ro->ro_prepend = NULL; } - gw = dst; + gw = (const struct sockaddr *)dst; ip = mtod(m, struct ip *); goto again; } diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -483,7 +483,7 @@ #endif #ifdef INET6 case AF_INET6: - rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL, NULL); + rc = nd6_resolve(ifp, GW_NONE, NULL, sa, lladdr, NULL, NULL); break; #endif default: diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -42,6 +42,7 @@ #include #include +#include struct llentry; @@ -371,7 +372,7 @@ void nd6_purge(struct ifnet *); int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, char *desten, uint32_t *pflags); -int nd6_resolve(struct ifnet *, int, struct mbuf *, +int nd6_resolve(struct ifnet *, gw_type_t, struct mbuf *, const struct sockaddr *, u_char *, uint32_t *, struct llentry **); int nd6_ioctl(u_long, caddr_t, struct ifnet *); void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -140,8 +140,11 @@ static void nd6_llinfo_settimer_locked(struct llentry *, long); static void clear_llinfo_pqueue(struct llentry *); static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, - const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **); + const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **, + gw_type_t); static int nd6_need_cache(struct ifnet *); +static bool nd6_attach_encap_tag(struct mbuf *, gw_type_t); +static int output_ifp_one(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) @@ -2213,7 +2216,7 @@ * - other errors (alloc failure, etc) */ int -nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, +nd6_resolve(struct ifnet *ifp, gw_type_t gw_type, struct mbuf *m, const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags, struct llentry **plle) { @@ -2252,6 +2255,15 @@ if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { /* Entry found, let's copy lle info */ bcopy(ln->r_linkdata, desten, ln->r_hdrlen); +#ifdef INET + /* XXX fix ether type */ + if (gw_type == GW_IPV4_SRC) { + struct ether_header *eh; + uint16_t etype = htons(ETHERTYPE_IP); + eh = (struct ether_header *)desten; + bcopy(&etype, &eh->ether_type, sizeof(etype)); + } +#endif if (pflags != NULL) *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); /* Check if we have feedback request from nd6 timer */ @@ -2270,7 +2282,8 @@ } else if (plle && ln) LLE_WUNLOCK(ln); - return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle)); + return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle, + gw_type)); } /* @@ -2287,7 +2300,7 @@ static __noinline int nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags, - struct llentry **plle) + struct llentry **plle, gw_type_t gw_type) { struct llentry *lle = NULL, *lle_tmp; struct in6_addr *psrc, src; @@ -2366,6 +2379,15 @@ ll_len = lle->r_hdrlen; } bcopy(lladdr, desten, ll_len); +#ifdef INET + /* XXX fix ether type */ + if (gw_type == GW_IPV4_SRC) { + struct ether_header *eh; + uint16_t etype = htons(ETHERTYPE_IP); + eh = (struct ether_header *)desten; + bcopy(&etype, &eh->ether_type, sizeof(etype)); + } +#endif if (pflags != NULL) *pflags = lle->la_flags; if (plle) { @@ -2382,6 +2404,12 @@ * packet queue in the mbuf. When it exceeds nd6_maxqueuelen, * the oldest packet in the queue will be removed. */ + if (gw_type != GW_NONE && gw_type != GW_IPV6_SRC && + !nd6_attach_encap_tag(m, gw_type)) { + LLE_WUNLOCK(lle); + m_freem(m); + return (ENOMEM); + } if (lle->la_hold != NULL) { struct mbuf *m_hold; @@ -2446,7 +2474,7 @@ flags |= LLE_ADDRONLY; error = nd6_resolve_slow(ifp, flags, NULL, - (const struct sockaddr_in6 *)dst, desten, pflags, NULL); + (const struct sockaddr_in6 *)dst, desten, pflags, NULL, GW_NONE); return (error); } @@ -2463,7 +2491,7 @@ m = m_head; m_head = m_head->m_nextpkt; m->m_nextpkt = NULL; - error = nd6_output_ifp(ifp, ifp, m, dst, NULL); + error = output_ifp_one(ifp, m, dst); } /* @@ -2473,6 +2501,56 @@ return (error); } +#define MTAG_ND6_ENCAP_TYPE 4237446679 + +static bool +nd6_attach_encap_tag(struct mbuf *m, gw_type_t type) +{ + struct m_tag *mtag; + sa_family_t af = AF_UNSPEC; + +#ifdef INET + if (type == GW_IPV4_SRC) + af = AF_INET; +#endif + if (af == AF_UNSPEC) + return (true); + mtag = m_tag_alloc(MTAG_ND6_ENCAP_TYPE, 0, sizeof(sa_family_t), + M_NOWAIT); + if (mtag == NULL) + return (false); + *(sa_family_t *)(mtag + 1) = af; + m_tag_prepend(m, mtag); + + return (true); +} + +static int +output_ifp_one(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) +{ + int error; + struct m_tag *mtag; + sa_family_t af; + struct route ro; + + mtag = m_tag_locate(m, MTAG_ND6_ENCAP_TYPE, 0, NULL); + if (mtag != NULL) { + af = *(sa_family_t *)(mtag + 1); +#ifdef INET + KASSERT(af == AF_INET, ("Unexpected af %d", af)); +#endif + bzero(&ro, sizeof(struct route)); + ro.ro_flags = RT_HAS_GW; + ro.ro_dst.sa_family = af; /* XXX only restore af */ + /* XXX no ro.ro_dst.sa_len */ + error = (*ifp->if_output)(ifp, m, + (const struct sockaddr *)dst, &ro); + } else + error = nd6_output_ifp(ifp, ifp, m, dst, NULL); + + return (error); +} + static int nd6_need_cache(struct ifnet *ifp) { diff --git a/sys/ofed/drivers/infiniband/core/ib_addr.c b/sys/ofed/drivers/infiniband/core/ib_addr.c --- a/sys/ofed/drivers/infiniband/core/ib_addr.c +++ b/sys/ofed/drivers/infiniband/core/ib_addr.c @@ -396,9 +396,16 @@ } else { bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0; memset(edst, 0, MAX_ADDR_LEN); - error = arpresolve(ifp, is_gw, NULL, is_gw ? - &nh->gw_sa : (const struct sockaddr *)&dst_tmp, - edst, NULL, NULL); +#ifdef INET6 + if (is_gw && nh->gw_sa.sa_family == AF_INET6) + error = nd6_resolve(ifp, GW_IPV4_SRC, NULL, &nh->gw_sa, + edst, NULL, NULL); + else +#endif + error = arpresolve(ifp, is_gw, NULL, is_gw ? + &nh->gw_sa : (const struct sockaddr *)&dst_tmp, + edst, NULL, NULL); + if (error != 0) goto error_put_ifp; else if (is_gw) @@ -584,7 +591,7 @@ } else { bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0; memset(edst, 0, MAX_ADDR_LEN); - error = nd6_resolve(ifp, is_gw, NULL, is_gw ? + error = nd6_resolve(ifp, GW_NONE, NULL, is_gw ? &nh->gw_sa : (const struct sockaddr *)&dst_tmp, edst, NULL, NULL); if (error != 0)