Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4158,7 +4158,7 @@ net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp -net/toeplitz.c optional inet rss | inet6 rss +net/toeplitz.c optional inet rss | inet6 rss | route_mpath net/vnet.c optional vimage net80211/ieee80211.c optional wlan net80211/ieee80211_acl.c optional wlan wlan_acl Index: sys/net/radix.c =================================================================== --- sys/net/radix.c +++ sys/net/radix.c @@ -624,21 +624,6 @@ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { -#ifdef RADIX_MPATH - /* permit multipath, if enabled for the family */ - if (rn_mpath_capable(head) && netmask == tt->rn_mask) { - /* - * go down to the end of multipaths, so that - * new entry goes into the end of rn_dupedkey - * chain. - */ - do { - t = tt; - tt = tt->rn_dupedkey; - } while (tt && t->rn_mask == tt->rn_mask); - break; - } -#endif if (tt->rn_mask == netmask) return (0); if (netmask == 0 || @@ -744,10 +729,8 @@ if (m->rm_flags & RNF_NORMAL) { mmask = m->rm_leaf->rn_mask; if (tt->rn_flags & RNF_NORMAL) { -#if !defined(RADIX_MPATH) log(LOG_ERR, "Non-unique normal route, mask not entered\n"); -#endif return (tt); } } else Index: sys/net/route.h =================================================================== --- sys/net/route.h +++ sys/net/route.h @@ -125,6 +125,10 @@ #define rt_numfibs V_rt_numfibs VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) + +/* Calculate flowid for locally-originated packets */ +#define V_fib_hash_outbound VNET(fib_hash_outbound) +VNET_DECLARE(u_int, fib_hash_outbound); #endif /* Index: sys/net/route/mpath_ctl.c =================================================================== --- sys/net/route/mpath_ctl.c +++ sys/net/route/mpath_ctl.c @@ -64,6 +64,20 @@ */ SYSCTL_DECL(_net_route); +VNET_DEFINE(u_int, fib_hash_outbound) = 0; +SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, + &VNET_NAME(fib_hash_outbound), 0, + "Compute flowid for locally-originated packets"); + +/* Default entropy to add to the hash calculation for the outbound connections*/ +uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, +}; + /* * Tries to add @rnd_add nhop to the existing set of nhops (@nh_orig) for the @@ -115,6 +129,17 @@ RTSTAT_INC(rts_add_retry); } + if (V_fib_hash_outbound == 0 && error == 0 && + NH_IS_NHGRP(rc->rc_nh_new)) { + /* + * First multipath route got installed. Enable local + * outbound connections hashing. + */ + if (bootverbose) + printf("FIB: enabled flowid calculation for locally-originated packets\n"); + V_fib_hash_outbound = 1; + } + return (error); } Index: sys/net/route/route_var.h =================================================================== --- sys/net/route/route_var.h +++ sys/net/route/route_var.h @@ -307,4 +307,8 @@ void nhgrp_free(struct nhgrp_object *nhg); +/* Entropy data used for outbound hashing */ +#define MPATH_ENTROPY_KEY_LEN 40 +extern uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN]; + #endif Index: sys/netinet/in_fib.h =================================================================== --- sys/netinet/in_fib.h +++ sys/netinet/in_fib.h @@ -51,4 +51,7 @@ uint32_t flags, const struct ifnet *src_if); struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags); +uint32_t fib4_calc_software_hash(struct in_addr src, struct in_addr dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype); #endif Index: sys/netinet/in_fib.c =================================================================== --- sys/netinet/in_fib.c +++ sys/netinet/in_fib.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,40 @@ /* Assert 'struct route_in' is compatible with 'struct route' */ CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4); +#ifdef ROUTE_MPATH +struct _hash_5tuple_ipv4 { + struct in_addr src; + struct in_addr dst; + unsigned short src_port; + unsigned short dst_port; + char proto; + char spare[3]; +}; +_Static_assert(sizeof(struct _hash_5tuple_ipv4) == 16, + "_hash_5tuple_ipv4 size is wrong"); + + +uint32_t +fib4_calc_software_hash(struct in_addr src, struct in_addr dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype) +{ + struct _hash_5tuple_ipv4 data; + + data.src = src; + data.dst = dst; + data.src_port = src_port; + data.dst_port = dst_port; + data.proto = proto; + data.spare[0] = data.spare[1] = data.spare[2] = 0; + + *phashtype = M_HASHTYPE_OPAQUE; + + return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key, + sizeof(data), (uint8_t *)&data)); +} +#endif + /* * Looks up path in fib @fibnum specified by @dst. * Returns path nexthop on success. Nexthop is safe to use Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -46,6 +46,7 @@ #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_pcbgroup.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -1327,7 +1328,17 @@ lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_val, hash_type; + hash_val = fib4_calc_software_hash(laddr, faddr, 0, fport, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -38,6 +38,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_route.h" #include #include @@ -67,6 +68,7 @@ #include #include +#include #include #include #include @@ -484,6 +486,17 @@ ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib4_calc_software_hash(ip->ip_src, + ip->ip_dst, 0, 0, ip->ip_p, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + flags |= IP_NODEFAULTFLOWID; + } +#endif if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would @@ -519,7 +532,17 @@ return (EINVAL); ip = mtod(m, struct ip *); } - +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib4_calc_software_hash(ip->ip_dst, + ip->ip_src, 0, 0, ip->ip_p, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + flags |= IP_NODEFAULTFLOWID; + } +#endif INP_RLOCK(inp); /* * Don't allow both user specified and setsockopt options, Index: sys/netinet6/in6_fib.h =================================================================== --- sys/netinet6/in6_fib.h +++ sys/netinet6/in6_fib.h @@ -39,4 +39,7 @@ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags); +uint32_t fib6_calc_software_hash(const struct in6_addr *src, + const struct in6_addr *dst, unsigned short src_port, unsigned short dst_port, + char proto, uint32_t *phashtype); #endif Index: sys/netinet6/in6_fib.c =================================================================== --- sys/netinet6/in6_fib.c +++ sys/netinet6/in6_fib.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -68,6 +69,39 @@ CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst); +#ifdef ROUTE_MPATH +struct _hash_5tuple_ipv6 { + struct in6_addr src; + struct in6_addr dst; + unsigned short src_port; + unsigned short dst_port; + char proto; + char spare[3]; +}; +_Static_assert(sizeof(struct _hash_5tuple_ipv6) == 40, + "_hash_5tuple_ipv6 size is wrong"); + +uint32_t +fib6_calc_software_hash(const struct in6_addr *src, const struct in6_addr *dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype) +{ + struct _hash_5tuple_ipv6 data; + + data.src = *src; + data.dst = *dst; + data.src_port = src_port; + data.dst_port = dst_port; + data.proto = proto; + data.spare[0] = data.spare[1] = data.spare[2] = 0; + + *phashtype = M_HASHTYPE_OPAQUE_HASH; + + return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key, + sizeof(data), (uint8_t *)&data)); +} +#endif + /* * Looks up path in fib @fibnum specified by @dst. * Assumes scope is deembedded and provided in @scopeid. Index: sys/netinet6/in6_pcb.c =================================================================== --- sys/netinet6/in6_pcb.c +++ sys/netinet6/in6_pcb.c @@ -73,6 +73,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -423,6 +424,17 @@ INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &sin6->sin6_addr, 0, sin6->sin6_port, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. Index: sys/netinet6/raw_ip6.c =================================================================== --- sys/netinet6/raw_ip6.c +++ sys/netinet6/raw_ip6.c @@ -66,6 +66,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" +#include "opt_route.h" #include #include @@ -103,6 +104,7 @@ #include #include #include +#include #include #include @@ -462,6 +464,17 @@ } ip6 = mtod(m, struct ip6_hdr *); +#ifdef ROUTE_MPATH + if (V_fib_hash_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &dstsock->sin6_addr, 0, 0, so->so_proto->pr_protocol, + &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif /* * Source address selection. */