diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c --- a/sys/net/if_disc.c +++ b/sys/net/if_disc.c @@ -184,8 +184,13 @@ /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); - else + else { af = dst->sa_family; +#if defined(INET) && defined(INET6) + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif + } if (bpf_peers_present(ifp->if_bpf)) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -289,7 +289,14 @@ uint32_t pflags; struct llentry *lle = NULL; int addref = 0; + int af; + af = dst->sa_family; +#if defined(INET) && defined(INET6) + /* Restore address family */ + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif phdr = NULL; pflags = 0; if (ro != NULL) { @@ -352,7 +359,7 @@ if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); - return (if_simloop(ifp, m, dst->sa_family, 0)); + return (if_simloop(ifp, m, af, 0)); } loop_copy = (pflags & RT_MAY_LOOP) != 0; @@ -370,6 +377,13 @@ if ((pflags & RT_HAS_HEADER) == 0) { eh = mtod(m, struct ether_header *); memcpy(eh, phdr, hlen); +#if defined(INET) && defined(INET6) + /* Fix ether_type, for ipv4 packet with ipv6 ND resolve */ + if (af == AF_INET && eh->ether_type == htons(ETHERTYPE_IPV6)) { + uint16_t etype = htons(ETHERTYPE_IP); + memcpy(&eh->ether_type, &etype, sizeof(etype)); + } +#endif } /* @@ -399,7 +413,7 @@ */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); - (void)if_simloop(ifp, n, dst->sa_family, hlen); + (void)if_simloop(ifp, n, af, hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -181,7 +181,12 @@ if (error) return (error == EWOULDBLOCK ? 0 : error); } - type = ETHERTYPE_IPV6; +#ifdef INET + if (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP) + type = ETHERTYPE_IP; + else +#endif + type = ETHERTYPE_IPV6; break; #endif diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -408,8 +408,13 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); - else + else { af = dst->sa_family; +#if defined(INET) && defined(INET6) + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif + } /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -612,8 +612,13 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); - else + else { af = dst->sa_family; +#if defined(INET) && defined(INET6) + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif + } /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c --- a/sys/net/if_infiniband.c +++ b/sys/net/if_infiniband.c @@ -300,9 +300,16 @@ int hlen; /* link layer header length */ uint32_t pflags; bool addref; + int af; NET_EPOCH_ASSERT(); + af = dst->sa_family; +#if defined(INET) && defined(INET6) + /* Restore address family */ + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif addref = false; phdr = NULL; pflags = 0; @@ -370,7 +377,7 @@ if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); - return (if_simloop(ifp, m, dst->sa_family, 0)); + return (if_simloop(ifp, m, af, 0)); } /* @@ -385,6 +392,11 @@ if ((pflags & RT_HAS_HEADER) == 0) { ih = mtod(m, struct infiniband_header *); memcpy(ih, phdr, hlen); +#if defined(INET) && defined(INET6) + /* Fix ib_protocol, for ipv4 packet with ipv6 ND resolve */ + if (af == AF_INET && ih->ib_protocol == htons(ETHERTYPE_IPV6)) + ih->ib_protocol = htons(ETHERTYPE_IP); +#endif } /* diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -234,8 +234,13 @@ /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT) bcopy(dst->sa_data, &af, sizeof(af)); - else + else { af = dst->sa_family; +#if defined(INET) && defined(INET6) + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif + } #if 1 /* XXX */ switch (af) { diff --git a/sys/net/if_me.c b/sys/net/if_me.c --- a/sys/net/if_me.c +++ b/sys/net/if_me.c @@ -27,6 +27,8 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_inet6.h" + #include #include #include @@ -539,8 +541,13 @@ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); - else + else { af = dst->sa_family; +#if defined(INET) && defined(INET6) + if (af == AF_INET6 && (m->m_pkthdr.mhdr_flags & HDR_IPV4_IPV6_NHOP)) + af = AF_INET; +#endif + } m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c --- a/sys/net/route/route_ctl.c +++ b/sys/net/route/route_ctl.c @@ -106,6 +106,11 @@ &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); #undef _MP_FLAGS +#define V_rib_route_rfc5549 VNET(rib_route_rfc5549) +VNET_DEFINE(u_int, rib_route_rfc5549) = 1; +SYSCTL_UINT(_net_route, OID_AUTO, rfc5549, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(rib_route_rfc5549), 0, "Enable rfc5549 IPv6 Next Hop address"); + /* Routing table UMA zone */ VNET_DEFINE_STATIC(uma_zone_t, rtzone); #define V_rtzone VNET(rtzone) @@ -197,6 +202,18 @@ return (rnh); } +static bool +rib_can_ipv6_nexthop_address(struct rib_head *rh) +{ + int result; + + CURVNET_SET(rh->rib_vnet); + result = !!V_rib_route_rfc5549; + CURVNET_RESTORE(); + + return (result); +} + #ifdef ROUTE_MPATH static bool rib_can_multipath(struct rib_head *rh) @@ -590,8 +607,10 @@ if ((flags & RTF_GATEWAY) && !gateway) return (EINVAL); if (dst && gateway && (dst->sa_family != gateway->sa_family) && - (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) - return (EINVAL); + (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) { + if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 && !rib_can_ipv6_nexthop_address(rnh)) + return (EINVAL); + } if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) return (EINVAL); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1244,6 +1244,10 @@ if (cred == NULL || !prison_flag(cred, PR_IP4)) { ia = (struct in_ifaddr *)nh->nh_ifa; laddr->s_addr = ia->ia_addr.sin_addr.s_addr; + if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY) { + /* FIXME obtain address from other interface? */ + /* TODO default source address selection */ + } goto done; } diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -78,6 +78,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_inet6.h" #include "opt_ipstealth.h" #include @@ -143,7 +144,7 @@ if (nh_ia != NULL && (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { - if (nh->nh_flags & NHF_GATEWAY) + if (nh->nh_flags & NHF_GATEWAY && nh->gw_sa.sa_family == AF_INET) // FIXME AF_INET6 ??? *addr = nh->gw4_sa.sin_addr.s_addr; else *addr = ip->ip_dst.s_addr; @@ -199,7 +200,13 @@ struct ip *ip; struct mbuf *m0 = NULL; struct nhop_object *nh = NULL; - struct sockaddr_in dst; + union { + struct sockaddr sa; + struct sockaddr_in sin; +#ifdef INET6 + struct sockaddr_in6 sin6; +#endif + } dst; struct in_addr dest, odest, rtdest; uint16_t ip_len, ip_off; int error = 0; @@ -422,19 +429,29 @@ ip_off = ntohs(ip->ip_off); bzero(&dst, sizeof(dst)); - dst.sin_family = AF_INET; - dst.sin_len = sizeof(dst); - if (nh->nh_flags & NHF_GATEWAY) - dst.sin_addr = nh->gw4_sa.sin_addr; - else - dst.sin_addr = dest; + dst.sin.sin_family = AF_INET; + dst.sin.sin_len = sizeof(struct sockaddr_in); + if (nh->nh_flags & NHF_GATEWAY) { + if (nh->gw_sa.sa_family == AF_INET) + dst.sin.sin_addr = nh->gw4_sa.sin_addr; +#ifdef INET6 + else { // AF_INET6 + bzero(&dst.sin6, sizeof(struct sockaddr_in6)); + dst.sin6.sin6_family = AF_INET6; + dst.sin6.sin6_len = sizeof(struct sockaddr_in6); + dst.sin6.sin6_addr = nh->gw6_sa.sin6_addr; + dst.sin6.sin6_scope_id = nh->gw6_sa.sin6_scope_id; + } +#endif + } else + dst.sin.sin_addr = dest; /* * Handle redirect case. */ redest.s_addr = 0; if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif)) - mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr); + mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr); // FIXME redirect without interface ip address (unnumbered interface) /* * Check if packet fits MTU or if hardware will fragment for us @@ -448,8 +465,13 @@ * Send off the packet via outgoing interface */ IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL); - error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, - (struct sockaddr *)&dst, NULL); +#ifdef INET6 + /* RFC5549 */ + if (dst.sa.sa_family == AF_INET6) + m->m_pkthdr.mhdr_flags |= HDR_IPV4_IPV6_NHOP; +#endif + + error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, &dst.sa, NULL); } else { /* * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery @@ -483,8 +505,13 @@ IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), nh->nh_ifp, mtod(m, struct ip *), NULL); +#ifdef INET6 + /* RFC5549 */ + if (dst.sa.sa_family == AF_INET6) + m->m_pkthdr.mhdr_flags |= HDR_IPV4_IPV6_NHOP; +#endif error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, - (struct sockaddr *)&dst, NULL); + &dst.sa, NULL); if (error) break; } while ((m = m0) != NULL); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1062,7 +1062,7 @@ if (nh_ia != NULL && (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { - if (nh->nh_flags & NHF_GATEWAY) + if (nh->nh_flags & NHF_GATEWAY && nh->gw_sa.sa_family == AF_INET) dest.s_addr = nh->gw4_sa.sin_addr.s_addr; else dest.s_addr = ip->ip_dst.s_addr; diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_kern_tls.h" #include "opt_mbuf_stress_test.h" @@ -212,7 +213,7 @@ static int ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, - const struct sockaddr_in *gw, struct route *ro, bool stamp_tag) + const struct sockaddr *gw, struct route *ro, bool stamp_tag) { #ifdef KERN_TLS struct ktls_session *tls = NULL; @@ -273,7 +274,12 @@ m->m_pkthdr.csum_flags |= CSUM_SND_TAG; } - error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); +#ifdef INET6 + /* RFC5549 */ + if (gw->sa_family == AF_INET6) + m->m_pkthdr.mhdr_flags |= HDR_IPV4_IPV6_NHOP; +#endif + error = (*ifp->if_output)(ifp, m, gw, ro); done: /* Check for route change invalidating send tags. */ @@ -329,8 +335,14 @@ int mtu = 0; int error = 0; int vlan_pcp = -1; - struct sockaddr_in *dst, sin; - const struct sockaddr_in *gw; + struct sockaddr_in *dst; + union { + struct sockaddr_in sin; +#ifdef INET6 + struct sockaddr_in6 sin6; +#endif + } ss; + const struct sockaddr *gw; struct in_ifaddr *ia = NULL; struct in_addr src; int isbroadcast; @@ -389,14 +401,14 @@ if (ro != NULL) dst = (struct sockaddr_in *)&ro->ro_dst; else - dst = &sin; + dst = &ss.sin; if (ro == NULL || ro->ro_nh == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } - gw = dst; + gw = (const struct sockaddr *)dst; again: /* * Validate route against routing table additions; @@ -500,11 +512,11 @@ counter_u64_add(ro->ro_nh->nh_pksent, 1); rt_update_ro_flags(ro); if (ro->ro_nh->nh_flags & NHF_GATEWAY) - gw = &ro->ro_nh->gw4_sa; + gw = &ro->ro_nh->gw_sa; if (ro->ro_nh->nh_flags & NHF_HOST) isbroadcast = (ro->ro_nh->nh_flags & NHF_BROADCAST); - else if (ifp->if_flags & IFF_BROADCAST) - isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); + else if (ifp->if_flags & IFF_BROADCAST && gw->sa_family == AF_INET) + isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia); else isbroadcast = 0; if (ro->ro_nh->nh_flags & NHF_HOST) @@ -512,6 +524,14 @@ else mtu = ifp->if_mtu; src = IA_SIN(ia)->sin_addr; +#ifdef INET6 + if (ip->ip_src.s_addr == INADDR_ANY && src.s_addr == INADDR_ANY && gw->sa_family == AF_INET6) { + // FIXME unnumbered interface, use loopback alias address or other interface address instead ??? + IPSTAT_INC(ips_badaddr); /* ips_sources_none ??? */ + error = EADDRNOTAVAIL; /* better error ??? */ + goto bad; + } +#endif } else { struct nhop_object *nh; @@ -539,14 +559,38 @@ * In case if pfil(9) sends us back to beginning of the * function, the dst would be rewritten by ip_output_pfil(). */ - MPASS(dst == &sin); - if (nh->nh_flags & NHF_GATEWAY) - dst->sin_addr = nh->gw4_sa.sin_addr; + MPASS(dst == &ss.sin); + if (nh->nh_flags & NHF_GATEWAY) { + if (nh->gw_sa.sa_family == AF_INET) { + bzero(&ss.sin, sizeof(struct sockaddr_in)); + ss.sin.sin_family = AF_INET; + ss.sin.sin_len = sizeof(struct sockaddr_in); + ss.sin.sin_addr = nh->gw4_sa.sin_addr; + } +#ifdef INET6 + else { // AF_INET6 + bzero(&ss.sin6, sizeof(struct sockaddr_in6)); + ss.sin6.sin6_family = AF_INET6; + ss.sin6.sin6_len = sizeof(struct sockaddr_in6); + ss.sin6.sin6_addr = nh->gw6_sa.sin6_addr; + ss.sin6.sin6_scope_id = nh->gw6_sa.sin6_scope_id; + } +#endif + } + ia = ifatoia(nh->nh_ifa); src = IA_SIN(ia)->sin_addr; +#ifdef INET6 + if (ip->ip_src.s_addr == INADDR_ANY && src.s_addr == INADDR_ANY && gw->sa_family == AF_INET6) { + // FIXME unnumbered interface, use loopback alias address or other interface address instead ??? + IPSTAT_INC(ips_badaddr); /* ips_sources_none ??? */ + error = EADDRNOTAVAIL; /* better error ??? */ + goto bad; + } +#endif isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) == (NHF_HOST | NHF_BROADCAST)) || - ((ifp->if_flags & IFF_BROADCAST) && + ((ifp->if_flags & IFF_BROADCAST) && nh->gw_sa.sa_family == AF_INET && in_ifaddr_broadcast(dst->sin_addr, ia))); } @@ -562,7 +606,7 @@ * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ - gw = dst; + gw = (const struct sockaddr *)dst; /* * See if the caller provided any multicast options */ @@ -722,7 +766,7 @@ RO_NHFREE(ro); ro->ro_prepend = NULL; } - gw = dst; + gw = (const struct sockaddr *)dst; ip = mtod(m, struct ip *); goto again; } diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -210,6 +210,9 @@ #define lro_etype PH_loc.sixteen[3] /* inbound during LRO (no reassembly) */ /* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */ +/* FIXME better naming ??? */ +#define mhdr_flags PH_loc.sixteen[3] + /* * TLS records for TLS 1.0-1.2 can have the following header lengths: * - 5 (AES-CBC with implicit IV) @@ -720,6 +723,13 @@ #define CSUM_TCP_IPV6 CSUM_IP6_TCP #define CSUM_SCTP_IPV6 CSUM_IP6_SCTP + +/* + * header flags + */ +#define HDR_IPV4_IPV6_NHOP 0x0001 /* RFC5549 IPv6 next hop */ + + /* * mbuf types describing the content of the mbuf (including external storage). */ diff --git a/tests/sys/netinet/forward.sh b/tests/sys/netinet/forward.sh --- a/tests/sys/netinet/forward.sh +++ b/tests/sys/netinet/forward.sh @@ -269,5 +269,8 @@ atf_add_test_case "fwd_ip_icmp_gw_slow_success" } +# TODO add tests for IPv4 network with IPv6 nethop +# TODO IPv4 source address selection with unnumbered interface (has IPv6 addresses but no IPv4 addresses) + # end