Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4161,7 +4161,7 @@ net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp -net/toeplitz.c optional inet rss | inet6 rss +net/toeplitz.c optional inet rss | inet6 rss | route_mpath net/vnet.c optional vimage net80211/ieee80211.c optional wlan net80211/ieee80211_acl.c optional wlan wlan_acl Index: sys/net/radix.c =================================================================== --- sys/net/radix.c +++ sys/net/radix.c @@ -624,21 +624,6 @@ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { -#ifdef RADIX_MPATH - /* permit multipath, if enabled for the family */ - if (rn_mpath_capable(head) && netmask == tt->rn_mask) { - /* - * go down to the end of multipaths, so that - * new entry goes into the end of rn_dupedkey - * chain. - */ - do { - t = tt; - tt = tt->rn_dupedkey; - } while (tt && t->rn_mask == tt->rn_mask); - break; - } -#endif if (tt->rn_mask == netmask) return (0); if (netmask == 0 || @@ -744,10 +729,8 @@ if (m->rm_flags & RNF_NORMAL) { mmask = m->rm_leaf->rn_mask; if (tt->rn_flags & RNF_NORMAL) { -#if !defined(RADIX_MPATH) log(LOG_ERR, "Non-unique normal route, mask not entered\n"); -#endif return (tt); } } else Index: sys/net/route.h =================================================================== --- sys/net/route.h +++ sys/net/route.h @@ -125,6 +125,10 @@ #define rt_numfibs V_rt_numfibs VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) + +/* Calculate flowid for locally-originated packets */ +#define V_fib_hash_outbound VNET(fib_hash_outbound) +VNET_DECLARE(u_int, fib_hash_outbound); #endif /* Index: sys/net/route/mpath_ctl.c =================================================================== --- sys/net/route/mpath_ctl.c +++ sys/net/route/mpath_ctl.c @@ -64,7 +64,21 @@ */ SYSCTL_DECL(_net_route); +VNET_DEFINE(u_int, fib_hash_outbound) = 0; +SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, + &VNET_NAME(fib_hash_outbound), 0, + "Compute flowid for locally-originated packets"); +/* Default entropy to add to the hash calculation for the outbound connections*/ +uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, +}; + + /* * Tries to add @rnd_add nhop to the existing set of nhops (@nh_orig) for the * prefix specified by @rt. @@ -113,6 +127,17 @@ if (error != EAGAIN) break; RTSTAT_INC(rts_add_retry); + } + + if (V_fib_hash_outbound == 0 && error == 0 && + NH_IS_NHGRP(rc->rc_nh_new)) { + /* + * First multipath route got installed. Enable local + * outbound connections hashing. + */ + if (bootverbose) + printf("FIB: enabled flowid calculation for locally-originated packets\n"); + V_fib_hash_outbound = 1; } return (error); Index: sys/net/route/route_var.h =================================================================== --- sys/net/route/route_var.h +++ sys/net/route/route_var.h @@ -307,4 +307,8 @@ void nhgrp_free(struct nhgrp_object *nhg); +/* Entropy data used for outbound hashing */ +#define MPATH_ENTROPY_KEY_LEN 40 +extern uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN]; + #endif Index: sys/netinet/in_fib.h =================================================================== --- sys/netinet/in_fib.h +++ sys/netinet/in_fib.h @@ -51,4 +51,7 @@ uint32_t flags, const struct ifnet *src_if); struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags); +uint32_t fib4_calc_software_hash(struct in_addr src, struct in_addr dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype); #endif Index: sys/netinet/in_fib.c =================================================================== --- sys/netinet/in_fib.c +++ sys/netinet/in_fib.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,40 @@ /* Verify struct route compatiblity */ /* Assert 'struct route_in' is compatible with 'struct route' */ CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4); + +#ifdef ROUTE_MPATH +struct _hash_5tuple_ipv4 { + struct in_addr src; + struct in_addr dst; + unsigned short src_port; + unsigned short dst_port; + char proto; + char spare[3]; +}; +_Static_assert(sizeof(struct _hash_5tuple_ipv4) == 16, + "_hash_5tuple_ipv4 size is wrong"); + + +uint32_t +fib4_calc_software_hash(struct in_addr src, struct in_addr dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype) +{ + struct _hash_5tuple_ipv4 data; + + data.src = src; + data.dst = dst; + data.src_port = src_port; + data.dst_port = dst_port; + data.proto = proto; + data.spare[0] = data.spare[1] = data.spare[2] = 0; + + *phashtype = M_HASHTYPE_OPAQUE; + + return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key, + sizeof(data), (uint8_t *)&data)); +} +#endif /* * Looks up path in fib @fibnum specified by @dst. Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -46,6 +46,7 @@ #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_pcbgroup.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -1327,7 +1328,17 @@ lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_val, hash_type; + hash_val = fib4_calc_software_hash(laddr, faddr, 0, fport, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -38,6 +38,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_route.h" #include #include @@ -67,6 +68,7 @@ #include #include +#include #include #include #include @@ -484,6 +486,17 @@ ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib4_calc_software_hash(ip->ip_src, + ip->ip_dst, 0, 0, ip->ip_p, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + flags |= IP_NODEFAULTFLOWID; + } +#endif if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would @@ -519,7 +532,17 @@ return (EINVAL); ip = mtod(m, struct ip *); } +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + hash_val = fib4_calc_software_hash(ip->ip_dst, + ip->ip_src, 0, 0, ip->ip_p, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + flags |= IP_NODEFAULTFLOWID; + } +#endif INP_RLOCK(inp); /* * Don't allow both user specified and setsockopt options, Index: sys/netinet/udp_usrreq.c =================================================================== --- sys/netinet/udp_usrreq.c +++ sys/netinet/udp_usrreq.c @@ -1473,6 +1473,16 @@ ((struct ip *)ui)->ip_tos = tos; /* XXX */ UDPSTAT_INC(udps_opackets); +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type; + + flowid = fib4_calc_software_hash(laddr, faddr, + lport, fport, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + } +#endif /* * Setup flowid / RSS information for outbound socket. * Index: sys/netinet6/in6_fib.h =================================================================== --- sys/netinet6/in6_fib.h +++ sys/netinet6/in6_fib.h @@ -39,4 +39,7 @@ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags); +uint32_t fib6_calc_software_hash(const struct in6_addr *src, + const struct in6_addr *dst, unsigned short src_port, unsigned short dst_port, + char proto, uint32_t *phashtype); #endif Index: sys/netinet6/in6_fib.c =================================================================== --- sys/netinet6/in6_fib.c +++ sys/netinet6/in6_fib.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -67,6 +68,39 @@ #ifdef INET6 CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst); + +#ifdef ROUTE_MPATH +struct _hash_5tuple_ipv6 { + struct in6_addr src; + struct in6_addr dst; + unsigned short src_port; + unsigned short dst_port; + char proto; + char spare[3]; +}; +_Static_assert(sizeof(struct _hash_5tuple_ipv6) == 40, + "_hash_5tuple_ipv6 size is wrong"); + +uint32_t +fib6_calc_software_hash(const struct in6_addr *src, const struct in6_addr *dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype) +{ + struct _hash_5tuple_ipv6 data; + + data.src = *src; + data.dst = *dst; + data.src_port = src_port; + data.dst_port = dst_port; + data.proto = proto; + data.spare[0] = data.spare[1] = data.spare[2] = 0; + + *phashtype = M_HASHTYPE_OPAQUE_HASH; + + return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key, + sizeof(data), (uint8_t *)&data)); +} +#endif /* * Looks up path in fib @fibnum specified by @dst. Index: sys/netinet6/in6_pcb.c =================================================================== --- sys/netinet6/in6_pcb.c +++ sys/netinet6/in6_pcb.c @@ -73,6 +73,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -423,6 +424,17 @@ INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &sin6->sin6_addr, 0, sin6->sin6_port, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -417,9 +417,6 @@ * * ifpp - XXX: just for statistics */ -/* - * XXX TODO: no flowid is assigned for outbound flows? - */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, @@ -769,7 +766,8 @@ } } - nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0); + nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, + m->m_pkthdr.flowid); if (nh == NULL) { IP6STAT_INC(ip6s_noroute); /* No ifp in6_ifstat_inc(ifp, ifs6_out_discard); */ Index: sys/netinet6/raw_ip6.c =================================================================== --- sys/netinet6/raw_ip6.c +++ sys/netinet6/raw_ip6.c @@ -66,6 +66,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" +#include "opt_route.h" #include #include @@ -103,6 +104,7 @@ #include #include #include +#include #include #include @@ -462,6 +464,17 @@ } ip6 = mtod(m, struct ip6_hdr *); +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &dstsock->sin6_addr, 0, 0, so->so_proto->pr_protocol, + &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif /* * Source address selection. */ Index: sys/netinet6/udp6_usrreq.c =================================================================== --- sys/netinet6/udp6_usrreq.c +++ sys/netinet6/udp6_usrreq.c @@ -954,6 +954,16 @@ } flags = 0; +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(laddr, faddr, lport, + inp->inp_lport, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + } +#endif #ifdef RSS { uint32_t hash_val, hash_type; Index: tests/sys/netinet/output.sh =================================================================== --- tests/sys/netinet/output.sh +++ tests/sys/netinet/output.sh @@ -223,11 +223,19 @@ mpath_check() { - if [ "`sysctl -i -n net.route.multipath`" != 1 ]; then + if [ `sysctl -iW net.route.multipath | wc -l` != "1" ]; then atf_skip "This test requires ROUTE_MPATH enabled" fi } +mpath_enable() +{ + jexec $1 sysctl net.route.multipath=1 + if [ $? != 0 ]; then + atf_fail "Setting multipath in jail $1 failed". + fi +} + atf_test_case "output_tcp_flowid_mpath_success" "cleanup" output_tcp_flowid_mpath_success_head() { @@ -258,6 +266,7 @@ lo_dst=$(vnet_mkloopback) vnet_mkjail ${jname}a ${epair0}a ${epair1}a ${lo_src} + mpath_enable ${jname}a # Setup transit IPv4 networks jexec ${jname}a ifconfig ${epair0}a up jexec ${jname}a ifconfig ${epair0}a inet 203.0.113.1/30 @@ -386,6 +395,7 @@ lo_dst=$(vnet_mkloopback) vnet_mkjail ${jname}a ${epair0}a ${epair1}a ${lo_src} + mpath_enable ${jname}a # Setup transit IPv4 networks jexec ${jname}a ifconfig ${epair0}a up jexec ${jname}a ifconfig ${epair0}a inet 203.0.113.1/30 @@ -509,6 +519,7 @@ lo_dst=$(vnet_mkloopback) vnet_mkjail ${jname}a ${epair0}a ${epair1}a ${lo_src} + mpath_enable ${jname}a # Setup transit IPv4 networks jexec ${jname}a ifconfig ${epair0}a up jexec ${jname}a ifconfig ${epair0}a inet 203.0.113.1/30 Index: tests/sys/netinet6/output6.sh =================================================================== --- tests/sys/netinet6/output6.sh +++ tests/sys/netinet6/output6.sh @@ -247,11 +247,20 @@ mpath_check() { - if [ "`sysctl -i -n net.route.multipath`" != 1 ]; then + if [ `sysctl -iW net.route.multipath | wc -l` != "1" ]; then atf_skip "This test requires ROUTE_MPATH enabled" fi } +mpath_enable() +{ + jexec $1 sysctl net.route.multipath=1 + if [ $? != 0 ]; then + atf_fail "Setting multipath in jail $1 failed". + fi +} + + atf_test_case "output6_tcp_flowid_mpath_success" "cleanup" output6_tcp_flowid_mpath_success_head() { @@ -282,6 +291,7 @@ lo_dst=$(vnet_mkloopback) vnet_mkjail ${jname}a ${epair0}a ${epair1}a ${lo_src} + mpath_enable ${jname}a jls -N # enable link-local IPv6 jexec ${jname}a ndp -i ${epair0}a -- -disabled @@ -422,6 +432,7 @@ lo_dst=$(vnet_mkloopback) vnet_mkjail ${jname}a ${epair0}a ${epair1}a ${lo_src} + mpath_enable ${jname}a jls -N # enable link-local IPv6 jexec ${jname}a ndp -i ${epair0}a -- -disabled @@ -559,6 +570,7 @@ lo_dst=$(vnet_mkloopback) vnet_mkjail ${jname}a ${epair0}a ${epair1}a ${lo_src} + mpath_enable ${jname}a jls -N # enable link-local IPv6 jexec ${jname}a ndp -i ${epair0}a -- -disabled @@ -603,6 +615,9 @@ # A -> towards B via epair1a LL ll=`jexec ${jname}b ifconfig ${epair1}b inet6 | awk '$2~/^fe80:/{print$2}' | awk -F% '{print$1}'` jexec ${jname}a route add -6 -net ${net_dst}::/${plen} ${ll}%${epair1}a + + jexec ${jname}a netstat -6rnW + jexec ${jname}a netstat -6OnW # B towards A via epair0b LL ll=`jexec ${jname}a ifconfig ${epair1}a inet6 | awk '$2~/^fe80:/{print$2}' | awk -F% '{print$1}'`