Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/in_rss.c
Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | |||||
#include <net/netisr.h> | #include <net/netisr.h> | ||||
#include <netinet/in.h> | #include <netinet/in.h> | ||||
#include <netinet/in_pcb.h> | #include <netinet/in_pcb.h> | ||||
#include <netinet/in_rss.h> | #include <netinet/in_rss.h> | ||||
#include <netinet/in_var.h> | #include <netinet/in_var.h> | ||||
#include <netinet/toeplitz.h> | #include <netinet/toeplitz.h> | ||||
/* for software rss hash support */ | |||||
#include <netinet/ip.h> | |||||
#include <netinet/tcp.h> | |||||
#include <netinet/udp.h> | |||||
/*- | /*- | ||||
* Operating system parts of receiver-side scaling (RSS), which allows | * Operating system parts of receiver-side scaling (RSS), which allows | ||||
* network cards to direct flows to particular receive queues based on hashes | * network cards to direct flows to particular receive queues based on hashes | ||||
* of header tuples. This implementation aligns RSS buckets with connection | * of header tuples. This implementation aligns RSS buckets with connection | ||||
* groups at the TCP/IP layer, so each bucket is associated with exactly one | * groups at the TCP/IP layer, so each bucket is associated with exactly one | ||||
* group. As a result, the group lookup structures (and lock) should have an | * group. As a result, the group lookup structures (and lock) should have an | ||||
* effective affinity with exactly one CPU. | * effective affinity with exactly one CPU. | ||||
* | * | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | |||||
* Drivers may supplement this table with a seperate CPU<->queue table when | * Drivers may supplement this table with a seperate CPU<->queue table when | ||||
* programming devices. | * programming devices. | ||||
*/ | */ | ||||
struct rss_table_entry { | struct rss_table_entry { | ||||
uint8_t rte_cpu; /* CPU affinity of bucket. */ | uint8_t rte_cpu; /* CPU affinity of bucket. */ | ||||
}; | }; | ||||
static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN]; | static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN]; | ||||
static inline u_int rss_gethashconfig_local(void); | |||||
static void | static void | ||||
rss_init(__unused void *arg) | rss_init(__unused void *arg) | ||||
{ | { | ||||
u_int i; | u_int i; | ||||
u_int cpuid; | u_int cpuid; | ||||
/* | /* | ||||
* Validate tunables, coerce to sensible values. | * Validate tunables, coerce to sensible values. | ||||
▲ Show 20 Lines • Show All 305 Lines • ▼ Show 20 Lines | rss_m2bucket(struct mbuf *m, uint32_t *bucket_id) | ||||
M_ASSERTPKTHDR(m); | M_ASSERTPKTHDR(m); | ||||
return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), | return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), | ||||
bucket_id)); | bucket_id)); | ||||
} | } | ||||
/* | /* | ||||
* Calculate an appropriate ipv4 2-tuple or 4-tuple given the given | |||||
* IPv4 source/destination address, UDP or TCP source/destination ports | |||||
* and the protocol type. | |||||
* | |||||
* The protocol code may wish to do a software hash of the given | |||||
* tuple. This depends upon the currently configured RSS hash types. | |||||
* | |||||
* dir is 0 for in, 1 for out. | |||||
grehan: I'd recommend a #define at a minimum for INGRESS and EGRESS. in/out is easy to get wrong… | |||||
* proto is the IPv4 protocol type. | |||||
*/ | |||||
int | |||||
rss_software_hash_proto_v4(struct in_addr src, struct in_addr dst, | |||||
u_short src_port, u_short dst_port, int proto, int dir, | |||||
uint32_t *hashval, uint32_t *hashtype) | |||||
{ | |||||
struct in_addr s, d; | |||||
u_short sp, dp; | |||||
uint32_t hash; | |||||
/* first, assign data appropriately */ | |||||
if (dir == 0) { | |||||
s = src; | |||||
d = dst; | |||||
sp = src_port; | |||||
dp = dst_port; | |||||
} else { | |||||
s = dst; | |||||
d = src; | |||||
sp = dst_port; | |||||
dp = src_port; | |||||
} | |||||
/* | |||||
* Next, choose the hash type depending upon the protocol | |||||
* identifier. | |||||
*/ | |||||
if ((proto == IPPROTO_TCP) && | |||||
(rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4)) { | |||||
hash = rss_hash_ip4_4tuple(s, sp, d, dp); | |||||
*hashval = hash; | |||||
*hashtype = M_HASHTYPE_RSS_TCP_IPV4; | |||||
return (0); | |||||
} else if ((proto == IPPROTO_UDP) && | |||||
(rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4)) { | |||||
hash = rss_hash_ip4_4tuple(s, sp, d, dp); | |||||
*hashval = hash; | |||||
*hashtype = M_HASHTYPE_RSS_UDP_IPV4; | |||||
return (0); | |||||
} else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) { | |||||
/* RSS doesn't hash on other protocols like SCTP; so 2-tuple */ | |||||
hash = rss_hash_ip4_2tuple(s, d); | |||||
*hashval = hash; | |||||
*hashtype = M_HASHTYPE_RSS_IPV4; | |||||
return (0); | |||||
} | |||||
/* No configured available hashtypes! */ | |||||
return (-1); | |||||
} | |||||
/* | |||||
* Do a software calculation of the RSS for the given mbuf. | |||||
* | |||||
* This is typically used by the input path to recalculate the RSS after | |||||
* some form of packet processing (eg de-capsulation, IP fragment reassembly.) | |||||
* | |||||
* dir is 0 for in, 1 for out. | |||||
* | |||||
* Returns 0 if a hash was done, -1 if no hash was done, +1 if | |||||
* the mbuf already had a valid RSS flowid. | |||||
* | |||||
* This function doesn't modify the mbuf. It's up to the caller to | |||||
* assign flowid/flowtype as appropriate. | |||||
* | |||||
* TODO: Make this more efficient! | |||||
*/ | |||||
int | |||||
rss_mbuf_software_hash_v4(const struct mbuf *m, int dir, uint32_t *hashval, | |||||
uint32_t *hashtype) | |||||
{ | |||||
const struct ip *ip; | |||||
const struct tcphdr *th; | |||||
const struct udphdr *uh; | |||||
uint8_t proto; | |||||
int iphlen; | |||||
/* | |||||
* First, validate that the mbuf we have is long enough | |||||
* to have an IPv4 header in it. | |||||
*/ | |||||
if (m->m_pkthdr.len < (sizeof(struct ip))) | |||||
return (-1); | |||||
if (m->m_len < (sizeof(struct ip))) | |||||
return (-1); | |||||
/* Ok, let's dereference that */ | |||||
ip = mtod(m, struct ip *); | |||||
proto = ip->ip_p; | |||||
/* XXX unaligned access! */ | |||||
iphlen = ip->ip_hl << 2; | |||||
/* | |||||
* If the mbuf flowid/flowtype matches the packet type, | |||||
* then signal to the owner that it can trust the flowid/flowtype | |||||
* details. | |||||
*/ | |||||
if (m->m_flags & M_FLOWID) { | |||||
uint32_t flowid, flowtype; | |||||
flowid = m->m_pkthdr.flowid; | |||||
flowtype = M_HASHTYPE_GET(m); | |||||
switch (proto) { | |||||
case IPPROTO_UDP: | |||||
if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) && | |||||
flowtype == M_HASHTYPE_RSS_UDP_IPV4) { | |||||
return (1); | |||||
} | |||||
if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && | |||||
flowtype == M_HASHTYPE_RSS_IPV4) { | |||||
return (1); | |||||
} | |||||
break; | |||||
case IPPROTO_TCP: | |||||
if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) && | |||||
flowtype == M_HASHTYPE_RSS_TCP_IPV4) { | |||||
return (1); | |||||
} | |||||
if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && | |||||
flowtype == M_HASHTYPE_RSS_IPV4) { | |||||
return (1); | |||||
} | |||||
break; | |||||
default: | |||||
if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) && | |||||
flowtype == M_HASHTYPE_RSS_IPV4) { | |||||
return (1); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
/* | |||||
* Decode enough information to make a hash decision. | |||||
*/ | |||||
if (proto == IPPROTO_TCP) { | |||||
Not Done Inline ActionsInsert options into the PCS scripts I wrote for RSS verification and see what happens ! grehan: Insert options into the PCS scripts I wrote for RSS verification and see what happens ! | |||||
if (m->m_len < iphlen + sizeof(struct tcphdr)) | |||||
return (-1); | |||||
th = (struct tcphdr *)((caddr_t)ip + iphlen); | |||||
return rss_software_hash_proto_v4(ip->ip_src, ip->ip_dst, | |||||
th->th_sport, | |||||
th->th_dport, | |||||
proto, | |||||
dir, | |||||
hashval, | |||||
hashtype); | |||||
} else if (proto == IPPROTO_UDP) { | |||||
uh = (struct udphdr *)((caddr_t)ip + iphlen); | |||||
if (m->m_len < iphlen + sizeof(struct udphdr)) | |||||
return (-1); | |||||
return rss_software_hash_proto_v4(ip->ip_src, ip->ip_dst, | |||||
uh->uh_sport, | |||||
uh->uh_dport, | |||||
proto, | |||||
dir, | |||||
hashval, | |||||
hashtype); | |||||
} else { | |||||
/* Default to 2-tuple hash */ | |||||
return rss_software_hash_proto_v4(ip->ip_src, ip->ip_dst, | |||||
0, /* source port */ | |||||
Not Done Inline ActionsSeems like an easy test to write. grehan: Seems like an easy test to write. | |||||
0, /* destination port */ | |||||
0, /* IPPROTO_IP */ | |||||
dir, | |||||
hashval, | |||||
hashtype); | |||||
} | |||||
/* Default (shouldn't get here) - no hashing done */ | |||||
printf("%s: .. eep!\n", __func__); | |||||
return (-1); | |||||
} | |||||
/* | |||||
* Query the RSS hash algorithm. | * Query the RSS hash algorithm. | ||||
*/ | */ | ||||
u_int | u_int | ||||
rss_gethashalgo(void) | rss_gethashalgo(void) | ||||
{ | { | ||||
return (rss_hashalgo); | return (rss_hashalgo); | ||||
} | } | ||||
Show All 31 Lines | |||||
*/ | */ | ||||
u_int | u_int | ||||
rss_getnumcpus(void) | rss_getnumcpus(void) | ||||
{ | { | ||||
return (rss_ncpus); | return (rss_ncpus); | ||||
} | } | ||||
/* | static inline u_int | ||||
* Return the supported RSS hash configuration. | rss_gethashconfig_local(void) | ||||
* | |||||
* NICs should query this to determine what to configure in their redirection | |||||
* matching table. | |||||
*/ | |||||
u_int | |||||
rss_gethashconfig(void) | |||||
{ | { | ||||
/* Return 4-tuple for TCP; 2-tuple for others */ | /* Return 4-tuple for TCP; 2-tuple for others */ | ||||
/* | /* | ||||
* UDP may fragment more often than TCP and thus we'll end up with | * UDP may fragment more often than TCP and thus we'll end up with | ||||
* NICs returning 2-tuple fragments. | * NICs returning 2-tuple fragments. | ||||
* udp_init() and udplite_init() both currently initialise things | * udp_init() and udplite_init() both currently initialise things | ||||
* as 2-tuple. | * as 2-tuple. | ||||
* So for now disable UDP 4-tuple hashing until all of the other | * So for now disable UDP 4-tuple hashing until all of the other | ||||
* pieces are in place. | * pieces are in place. | ||||
*/ | */ | ||||
return ( | return ( | ||||
RSS_HASHTYPE_RSS_IPV4 | RSS_HASHTYPE_RSS_IPV4 | ||||
| RSS_HASHTYPE_RSS_TCP_IPV4 | | RSS_HASHTYPE_RSS_TCP_IPV4 | ||||
| RSS_HASHTYPE_RSS_IPV6 | | RSS_HASHTYPE_RSS_IPV6 | ||||
| RSS_HASHTYPE_RSS_TCP_IPV6 | | RSS_HASHTYPE_RSS_TCP_IPV6 | ||||
| RSS_HASHTYPE_RSS_IPV6_EX | | RSS_HASHTYPE_RSS_IPV6_EX | ||||
| RSS_HASHTYPE_RSS_TCP_IPV6_EX | | RSS_HASHTYPE_RSS_TCP_IPV6_EX | ||||
#if 0 | #if 0 | ||||
| RSS_HASHTYPE_RSS_UDP_IPV4 | | RSS_HASHTYPE_RSS_UDP_IPV4 | ||||
| RSS_HASHTYPE_RSS_UDP_IPV4_EX | | RSS_HASHTYPE_RSS_UDP_IPV4_EX | ||||
| RSS_HASHTYPE_RSS_UDP_IPV6 | | RSS_HASHTYPE_RSS_UDP_IPV6 | ||||
| RSS_HASHTYPE_RSS_UDP_IPV6_EX | | RSS_HASHTYPE_RSS_UDP_IPV6_EX | ||||
#endif | #endif | ||||
); | ); | ||||
} | |||||
/* | |||||
* Return the supported RSS hash configuration. | |||||
* | |||||
* NICs should query this to determine what to configure in their redirection | |||||
* matching table. | |||||
*/ | |||||
u_int | |||||
rss_gethashconfig(void) | |||||
{ | |||||
return (rss_gethashconfig_local()); | |||||
} | } | ||||
/* | /* | ||||
* XXXRW: Confirm that sysctl -a won't dump this keying material, don't want | * XXXRW: Confirm that sysctl -a won't dump this keying material, don't want | ||||
* it appearing in debugging output unnecessarily. | * it appearing in debugging output unnecessarily. | ||||
*/ | */ | ||||
static int | static int | ||||
sysctl_rss_key(SYSCTL_HANDLER_ARGS) | sysctl_rss_key(SYSCTL_HANDLER_ARGS) | ||||
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines |
I'd recommend a #define at a minimum for INGRESS and EGRESS. in/out is easy to get wrong depending on what your viewpoint is (in to the NIC ? in to the stack ?)