Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4180,7 +4180,7 @@ net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp -net/toeplitz.c optional inet rss | inet6 rss +net/toeplitz.c optional inet rss | inet6 rss | route_mpath net/vnet.c optional vimage net80211/ieee80211.c optional wlan net80211/ieee80211_acl.c optional wlan wlan_acl Index: sys/net/radix.c =================================================================== --- sys/net/radix.c +++ sys/net/radix.c @@ -624,21 +624,6 @@ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { -#ifdef RADIX_MPATH - /* permit multipath, if enabled for the family */ - if (rn_mpath_capable(head) && netmask == tt->rn_mask) { - /* - * go down to the end of multipaths, so that - * new entry goes into the end of rn_dupedkey - * chain. - */ - do { - t = tt; - tt = tt->rn_dupedkey; - } while (tt && t->rn_mask == tt->rn_mask); - break; - } -#endif if (tt->rn_mask == netmask) return (0); if (netmask == 0 || @@ -744,10 +729,8 @@ if (m->rm_flags & RNF_NORMAL) { mmask = m->rm_leaf->rn_mask; if (tt->rn_flags & RNF_NORMAL) { -#if !defined(RADIX_MPATH) log(LOG_ERR, "Non-unique normal route, mask not entered\n"); -#endif return (tt); } } else Index: sys/net/route.h =================================================================== --- sys/net/route.h +++ sys/net/route.h @@ -125,7 +125,41 @@ #define rt_numfibs V_rt_numfibs VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) + +/* Calculate flowid for locally-originated packets */ +#define V_fib_hash_outbound VNET(fib_hash_outbound) +VNET_DECLARE(u_int, fib_hash_outbound); + +/* Outbound flowid generation rules */ +#ifdef RSS + +#define fib4_calc_packet_hash xps_proto_software_hash_v4 +#define fib6_calc_packet_hash xps_proto_software_hash_v6 +#define calc_flowid_outbound_sendto true + +#ifdef ROUTE_MPATH +#define calc_flowid_outbound V_fib_hash_outbound +#else +#define calc_flowid_outbound false #endif + +#else /* !RSS */ + +#define fib4_calc_packet_hash fib4_calc_software_hash +#define fib6_calc_packet_hash fib6_calc_software_hash + +#ifdef ROUTE_MPATH +#define calc_flowid_outbound_sendto V_fib_hash_outbound +#define calc_flowid_outbound V_fib_hash_outbound +#else +#define calc_flowid_outbound_sendto false +#define calc_flowid_outbound false +#endif + +#endif /* RSS */ + + +#endif /* _KERNEL */ /* * We distinguish between routes to hosts and routes to networks, Index: sys/net/route/mpath_ctl.c =================================================================== --- sys/net/route/mpath_ctl.c +++ sys/net/route/mpath_ctl.c @@ -64,7 +64,21 @@ */ SYSCTL_DECL(_net_route); +VNET_DEFINE(u_int, fib_hash_outbound) = 0; +SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, + &VNET_NAME(fib_hash_outbound), 0, + "Compute flowid for locally-originated packets"); +/* Default entropy to add to the hash calculation for the outbound connections*/ +uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, +}; + + /* * Tries to add @rnd_add nhop to the existing set of nhops (@nh_orig) for the * prefix specified by @rt. @@ -113,6 +127,17 @@ if (error != EAGAIN) break; RTSTAT_INC(rts_add_retry); + } + + if (V_fib_hash_outbound == 0 && error == 0 && + NH_IS_NHGRP(rc->rc_nh_new)) { + /* + * First multipath route got installed. Enable local + * outbound connections hashing. + */ + if (bootverbose) + printf("FIB: enabled flowid calculation for locally-originated packets\n"); + V_fib_hash_outbound = 1; } return (error); Index: sys/net/route/route_var.h =================================================================== --- sys/net/route/route_var.h +++ sys/net/route/route_var.h @@ -307,4 +307,8 @@ void nhgrp_free(struct nhgrp_object *nhg); +/* Entropy data used for outbound hashing */ +#define MPATH_ENTROPY_KEY_LEN 40 +extern uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN]; + #endif Index: sys/netinet/in_fib.h =================================================================== --- sys/netinet/in_fib.h +++ sys/netinet/in_fib.h @@ -51,4 +51,7 @@ uint32_t flags, const struct ifnet *src_if); struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags); +uint32_t fib4_calc_software_hash(struct in_addr src, struct in_addr dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype); #endif Index: sys/netinet/in_fib.c =================================================================== --- sys/netinet/in_fib.c +++ sys/netinet/in_fib.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,40 @@ /* Verify struct route compatiblity */ /* Assert 'struct route_in' is compatible with 'struct route' */ CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4); + +#ifdef ROUTE_MPATH +struct _hash_5tuple_ipv4 { + struct in_addr src; + struct in_addr dst; + unsigned short src_port; + unsigned short dst_port; + char proto; + char spare[3]; +}; +_Static_assert(sizeof(struct _hash_5tuple_ipv4) == 16, + "_hash_5tuple_ipv4 size is wrong"); + + +uint32_t +fib4_calc_software_hash(struct in_addr src, struct in_addr dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype) +{ + struct _hash_5tuple_ipv4 data; + + data.src = src; + data.dst = dst; + data.src_port = src_port; + data.dst_port = dst_port; + data.proto = proto; + data.spare[0] = data.spare[1] = data.spare[2] = 0; + + *phashtype = M_HASHTYPE_OPAQUE; + + return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key, + sizeof(data), (uint8_t *)&data)); +} +#endif /* * Looks up path in fib @fibnum specified by @dst. Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -46,6 +46,7 @@ #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_pcbgroup.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -1327,7 +1328,17 @@ lport = *lportp; faddr = sin->sin_addr; fport = sin->sin_port; +#ifdef ROUTE_MPATH + if (calc_flowid_outbound) { + uint32_t hash_val, hash_type; + hash_val = fib4_calc_software_hash(laddr, faddr, 0, fport, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, Index: sys/netinet/in_rss.h =================================================================== --- sys/netinet/in_rss.h +++ sys/netinet/in_rss.h @@ -53,5 +53,7 @@ uint32_t *hashtype); struct mbuf * rss_soft_m2cpuid_v4(struct mbuf *m, uintptr_t source, u_int *cpuid); +uint32_t xps_proto_software_hash_v4(struct in_addr s, struct in_addr d, + u_short sp, u_short dp, int proto, uint32_t *hashtype); #endif /* !_NETINET_IN_RSS_H_ */ Index: sys/netinet/in_rss.c =================================================================== --- sys/netinet/in_rss.c +++ sys/netinet/in_rss.c @@ -152,6 +152,48 @@ } /* + * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given + * IPv4 source/destination address, UDP or TCP source/destination ports + * and the protocol type. + * + * The protocol code may wish to do a software hash of the given + * tuple. This depends upon the currently configured RSS hash types. + * + * It assumes the packet source/destination address + * are in "outgoing" packet order (ie, destination is "far" address.) + */ +uint32_t +xps_proto_software_hash_v4(struct in_addr s, struct in_addr d, + u_short sp, u_short dp, int proto, uint32_t *hashtype) +{ + uint32_t hash; + + /* + * Next, choose the hash type depending upon the protocol + * identifier. + */ + if ((proto == IPPROTO_TCP) && + (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) { + hash = rss_hash_ip4_4tuple(d, dp, s, sp); + *hashtype = M_HASHTYPE_RSS_TCP_IPV4; + return (hash); + } else if ((proto == IPPROTO_UDP) && + (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) { + hash = rss_hash_ip4_4tuple(d, dp, s, sp); + *hashtype = M_HASHTYPE_RSS_UDP_IPV4; + return (hash); + } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) { + /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */ + hash = rss_hash_ip4_2tuple(d, s); + *hashtype = M_HASHTYPE_RSS_IPV4; + return (hash); + } + + *hashtype = M_HASHTYPE_NONE; + return (0); +} + +/* * Do a software calculation of the RSS for the given mbuf. * * This is typically used by the input path to recalculate the RSS after Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -38,6 +38,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_route.h" #include #include @@ -67,6 +68,7 @@ #include #include +#include #include #include #include @@ -484,6 +486,17 @@ ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; +#ifdef ROUTE_MPATH + if (calc_flowid_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib4_calc_software_hash(ip->ip_src, + ip->ip_dst, 0, 0, ip->ip_p, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + flags |= IP_NODEFAULTFLOWID; + } +#endif if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would @@ -519,7 +532,17 @@ return (EINVAL); ip = mtod(m, struct ip *); } +#ifdef ROUTE_MPATH + if (calc_flowid_outbound) { + uint32_t hash_type, hash_val; + hash_val = fib4_calc_software_hash(ip->ip_dst, + ip->ip_src, 0, 0, ip->ip_p, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + flags |= IP_NODEFAULTFLOWID; + } +#endif INP_RLOCK(inp); /* * Don't allow both user specified and setsockopt options, Index: sys/netinet/udp_usrreq.c =================================================================== --- sys/netinet/udp_usrreq.c +++ sys/netinet/udp_usrreq.c @@ -44,6 +44,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -76,6 +77,7 @@ #include #include +#include #include #include #include @@ -1483,30 +1485,14 @@ m->m_pkthdr.flowid = flowid; M_HASHTYPE_SET(m, flowtype); } -#ifdef RSS - else { +#if defined(ROUTE_MPATH) || defined(RSS) + else if (calc_flowid_outbound_sendto) { uint32_t hash_val, hash_type; - /* - * Calculate an appropriate RSS hash for UDP and - * UDP Lite. - * - * The called function will take care of figuring out - * whether a 2-tuple or 4-tuple hash is required based - * on the currently configured scheme. - * - * Later later on connected socket values should be - * cached in the inpcb and reused, rather than constantly - * re-calculating it. - * - * UDP Lite is a different protocol number and will - * likely end up being hashed as a 2-tuple until - * RSS / NICs grow UDP Lite protocol awareness. - */ - if (rss_proto_software_hash_v4(faddr, laddr, fport, lport, - pr, &hash_val, &hash_type) == 0) { - m->m_pkthdr.flowid = hash_val; - M_HASHTYPE_SET(m, hash_type); - } + + hash_val = fib4_calc_packet_hash(laddr, faddr, + lport, fport, pr, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); } /* Index: sys/netinet6/in6_fib.h =================================================================== --- sys/netinet6/in6_fib.h +++ sys/netinet6/in6_fib.h @@ -39,4 +39,7 @@ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags); +uint32_t fib6_calc_software_hash(const struct in6_addr *src, + const struct in6_addr *dst, unsigned short src_port, unsigned short dst_port, + char proto, uint32_t *phashtype); #endif Index: sys/netinet6/in6_fib.c =================================================================== --- sys/netinet6/in6_fib.c +++ sys/netinet6/in6_fib.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -67,6 +68,39 @@ #ifdef INET6 CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst); + +#ifdef ROUTE_MPATH +struct _hash_5tuple_ipv6 { + struct in6_addr src; + struct in6_addr dst; + unsigned short src_port; + unsigned short dst_port; + char proto; + char spare[3]; +}; +_Static_assert(sizeof(struct _hash_5tuple_ipv6) == 40, + "_hash_5tuple_ipv6 size is wrong"); + +uint32_t +fib6_calc_software_hash(const struct in6_addr *src, const struct in6_addr *dst, + unsigned short src_port, unsigned short dst_port, char proto, + uint32_t *phashtype) +{ + struct _hash_5tuple_ipv6 data; + + data.src = *src; + data.dst = *dst; + data.src_port = src_port; + data.dst_port = dst_port; + data.proto = proto; + data.spare[0] = data.spare[1] = data.spare[2] = 0; + + *phashtype = M_HASHTYPE_OPAQUE_HASH; + + return (toeplitz_hash(MPATH_ENTROPY_KEY_LEN, mpath_entropy_key, + sizeof(data), (uint8_t *)&data)); +} +#endif /* * Looks up path in fib @fibnum specified by @dst. Index: sys/netinet6/in6_pcb.c =================================================================== --- sys/netinet6/in6_pcb.c +++ sys/netinet6/in6_pcb.c @@ -73,6 +73,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -423,6 +424,17 @@ INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); +#ifdef ROUTE_MPATH + if (calc_flowid_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &sin6->sin6_addr, 0, sin6->sin6_port, + inp->inp_socket->so_proto->pr_protocol, &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. Index: sys/netinet6/in6_rss.h =================================================================== --- sys/netinet6/in6_rss.h +++ sys/netinet6/in6_rss.h @@ -54,5 +54,8 @@ uint32_t *hashtype); struct mbuf * rss_soft_m2cpuid_v6(struct mbuf *m, uintptr_t source, u_int *cpuid); +uint32_t xps_proto_software_hash_v6(const struct in6_addr *s, + const struct in6_addr *d, u_short sp, u_short dp, + int proto, uint32_t *hashtype); #endif /* !_NETINET6_IN6_RSS_H_ */ Index: sys/netinet6/in6_rss.c =================================================================== --- sys/netinet6/in6_rss.c +++ sys/netinet6/in6_rss.c @@ -153,6 +153,50 @@ } /* + * Calculate an appropriate ipv6 2-tuple or 4-tuple given the given + * IPv6 source/destination address, UDP or TCP source/destination ports + * and the protocol type. + * + * The protocol code may wish to do a software hash of the given + * tuple. This depends upon the currently configured RSS hash types. + * + * It assumes the packet source/destination address + * are in "outgoin" packet order (ie, destination is "far" address.) + */ +uint32_t +xps_proto_software_hash_v6(const struct in6_addr *s, const struct in6_addr *d, + u_short sp, u_short dp, int proto, uint32_t *hashtype) +{ + + uint32_t hash; + + /* + * Next, choose the hash type depending upon the protocol + * identifier. + */ + if ((proto == IPPROTO_TCP) && + (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) { + hash = rss_hash_ip6_4tuple(d, dp, s, sp); + *hashtype = M_HASHTYPE_RSS_TCP_IPV6; + return (hash); + } else if ((proto == IPPROTO_UDP) && + (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) { + hash = rss_hash_ip6_4tuple(d, dp, s, sp); + *hashtype = M_HASHTYPE_RSS_UDP_IPV6; + return (hash); + } else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV6) { + /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */ + hash = rss_hash_ip6_2tuple(d, s); + *hashtype = M_HASHTYPE_RSS_IPV6; + return (hash); + } + + *hashtype = M_HASHTYPE_NONE; + return (0); +} + + +/* * Do a software calculation of the RSS for the given mbuf. * * This is typically used by the input path to recalculate the RSS after Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -419,9 +419,6 @@ * * ifpp - XXX: just for statistics */ -/* - * XXX TODO: no flowid is assigned for outbound flows? - */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, @@ -775,7 +772,8 @@ } } - nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0); + nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, + m->m_pkthdr.flowid); if (nh == NULL) { IP6STAT_INC(ip6s_noroute); /* No ifp in6_ifstat_inc(ifp, ifs6_out_discard); */ Index: sys/netinet6/raw_ip6.c =================================================================== --- sys/netinet6/raw_ip6.c +++ sys/netinet6/raw_ip6.c @@ -66,6 +66,7 @@ #include "opt_ipsec.h" #include "opt_inet6.h" +#include "opt_route.h" #include #include @@ -103,6 +104,7 @@ #include #include #include +#include #include #include @@ -462,6 +464,17 @@ } ip6 = mtod(m, struct ip6_hdr *); +#ifdef ROUTE_MPATH + if (calc_flowid_outbound) { + uint32_t hash_type, hash_val; + + hash_val = fib6_calc_software_hash(&inp->in6p_laddr, + &dstsock->sin6_addr, 0, 0, so->so_proto->pr_protocol, + &hash_type); + inp->inp_flowid = hash_val; + inp->inp_flowtype = hash_type; + } +#endif /* * Source address selection. */ Index: sys/netinet6/udp6_usrreq.c =================================================================== --- sys/netinet6/udp6_usrreq.c +++ sys/netinet6/udp6_usrreq.c @@ -75,6 +75,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_route.h" #include "opt_rss.h" #include @@ -115,6 +116,7 @@ #include #include +#include #include #include #include @@ -954,42 +956,20 @@ } flags = 0; -#ifdef RSS - { - uint32_t hash_val, hash_type; +#if defined(ROUTE_MPATH) || defined(RSS) + if (calc_flowid_outbound_sendto) { + uint32_t hash_type, hash_val; uint8_t pr; pr = inp->inp_socket->so_proto->pr_protocol; - /* - * Calculate an appropriate RSS hash for UDP and - * UDP Lite. - * - * The called function will take care of figuring out - * whether a 2-tuple or 4-tuple hash is required based - * on the currently configured scheme. - * - * Later later on connected socket values should be - * cached in the inpcb and reused, rather than constantly - * re-calculating it. - * - * UDP Lite is a different protocol number and will - * likely end up being hashed as a 2-tuple until - * RSS / NICs grow UDP Lite protocol awareness. - */ - if (rss_proto_software_hash_v6(faddr, laddr, fport, - inp->inp_lport, pr, &hash_val, &hash_type) == 0) { - m->m_pkthdr.flowid = hash_val; - M_HASHTYPE_SET(m, hash_type); - } - /* - * Don't override with the inp cached flowid. - * - * Until the whole UDP path is vetted, it may actually - * be incorrect. - */ - flags |= IP_NODEFAULTFLOWID; + hash_val = fib6_calc_packet_hash(laddr, faddr, + inp->inp_lport, fport, pr, &hash_type); + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); } + /* do not use inp flowid */ + flags |= IP_NODEFAULTFLOWID; #endif UDPSTAT_INC(udps_opackets);