Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F158660084
D527.id1109.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
16 KB
Referenced Files
None
Subscribers
None
D527.id1109.diff
View Options
Index: sys/netinet/in_rss.h
===================================================================
--- sys/netinet/in_rss.h
+++ sys/netinet/in_rss.h
@@ -83,6 +83,16 @@
#define RSS_KEYSIZE 40
/*
+ * For RSS hash methods that do a software hash on an mbuf, the packet
+ * direction (ingress / egress) is required.
+ *
+ * The default direction (INGRESS) is the "receive into the NIC" - ie,
+ * what the hardware is hashing on.
+ */
+#define RSS_HASH_PKT_INGRESS 0
+#define RSS_HASH_PKT_EGRESS 1
+
+/*
* Device driver interfaces to query RSS properties that must be programmed
* into hardware.
*/
@@ -116,4 +126,15 @@
uint32_t *bucket_id);
int rss_m2bucket(struct mbuf *m, uint32_t *bucket_id);
+/*
+ * Functions to calculate a software RSS hash for a given mbuf or
+ * packet detail.
+ */
+int rss_mbuf_software_hash_v4(const struct mbuf *m, int dir,
+ uint32_t *hashval, uint32_t *hashtype);
+int rss_proto_software_hash_v4(struct in_addr src,
+ struct in_addr dst, u_short src_port, u_short dst_port,
+ int proto, int dir, uint32_t *hashval,
+ uint32_t *hashtype);
+
#endif /* !_NETINET_IN_RSS_H_ */
Index: sys/netinet/in_rss.c
===================================================================
--- sys/netinet/in_rss.c
+++ sys/netinet/in_rss.c
@@ -57,6 +57,11 @@
#include <netinet/in_var.h>
#include <netinet/toeplitz.h>
+/* for software rss hash support */
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
/*-
* Operating system parts of receiver-side scaling (RSS), which allows
* network cards to direct flows to particular receive queues based on hashes
@@ -170,6 +175,8 @@
};
static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN];
+static inline u_int rss_gethashconfig_local(void);
+
static void
rss_init(__unused void *arg)
{
@@ -491,6 +498,188 @@
}
/*
+ * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given
+ * IPv4 source/destination address, UDP or TCP source/destination ports
+ * and the protocol type.
+ *
+ * The protocol code may wish to do a software hash of the given
+ * tuple. This depends upon the currently configured RSS hash types.
+ *
+ * dir is RSS_HASH_PKT_INGRESS 0 for in, RSS_HASH_PKT_EGRESS for out.
+ * proto is the IPv4 protocol type.
+ */
+int
+rss_proto_software_hash_v4(struct in_addr src, struct in_addr dst,
+ u_short src_port, u_short dst_port, int proto, int dir,
+ uint32_t *hashval, uint32_t *hashtype)
+{
+ struct in_addr s, d;
+ u_short sp, dp;
+ uint32_t hash;
+
+ /* first, assign data appropriately */
+ if (dir == RSS_HASH_PKT_INGRESS) {
+ s = src;
+ d = dst;
+ sp = src_port;
+ dp = dst_port;
+ } else {
+ s = dst;
+ d = src;
+ sp = dst_port;
+ dp = src_port;
+ }
+
+ /*
+ * Next, choose the hash type depending upon the protocol
+ * identifier.
+ */
+ if ((proto == IPPROTO_TCP) &&
+ (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
+ hash = rss_hash_ip4_4tuple(s, sp, d, dp);
+ *hashval = hash;
+ *hashtype = M_HASHTYPE_RSS_TCP_IPV4;
+ return (0);
+ } else if ((proto == IPPROTO_UDP) &&
+ (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
+ hash = rss_hash_ip4_4tuple(s, sp, d, dp);
+ *hashval = hash;
+ *hashtype = M_HASHTYPE_RSS_UDP_IPV4;
+ return (0);
+ } else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) {
+ /* RSS doesn't hash on other protocols like SCTP; so 2-tuple */
+ hash = rss_hash_ip4_2tuple(s, d);
+ *hashval = hash;
+ *hashtype = M_HASHTYPE_RSS_IPV4;
+ return (0);
+ }
+
+ /* No configured available hashtypes! */
+ return (-1);
+}
+
+/*
+ * Do a software calculation of the RSS for the given mbuf.
+ *
+ * This is typically used by the input path to recalculate the RSS after
+ * some form of packet processing (eg de-capsulation, IP fragment reassembly.)
+ *
+ * dir is the packet direction - RSS_HASH_PKT_INGRESS for incoming and
+ * RSS_HASH_PKT_EGRESS for outgoing.
+ *
+ * Returns 0 if a hash was done, -1 if no hash was done, +1 if
+ * the mbuf already had a valid RSS flowid.
+ *
+ * This function doesn't modify the mbuf. It's up to the caller to
+ * assign flowid/flowtype as appropriate.
+ */
+int
+rss_mbuf_software_hash_v4(const struct mbuf *m, int dir, uint32_t *hashval,
+ uint32_t *hashtype)
+{
+ const struct ip *ip;
+ const struct tcphdr *th;
+ const struct udphdr *uh;
+ uint8_t proto;
+ int iphlen;
+
+ /*
+ * First, validate that the mbuf we have is long enough
+ * to have an IPv4 header in it.
+ */
+
+ if (m->m_pkthdr.len < (sizeof(struct ip)))
+ return (-1);
+ if (m->m_len < (sizeof(struct ip)))
+ return (-1);
+
+ /* Ok, let's dereference that */
+ ip = mtod(m, struct ip *);
+ proto = ip->ip_p;
+ iphlen = ip->ip_hl << 2;
+
+ /*
+ * If the mbuf flowid/flowtype matches the packet type,
+ * then signal to the owner that it can trust the flowid/flowtype
+ * details.
+ */
+ if (m->m_flags & M_FLOWID) {
+ uint32_t flowid, flowtype;
+
+ flowid = m->m_pkthdr.flowid;
+ flowtype = M_HASHTYPE_GET(m);
+
+ switch (proto) {
+ case IPPROTO_UDP:
+ if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) &&
+ flowtype == M_HASHTYPE_RSS_UDP_IPV4) {
+ return (1);
+ }
+ if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) &&
+ flowtype == M_HASHTYPE_RSS_IPV4) {
+ return (1);
+ }
+ break;
+ case IPPROTO_TCP:
+ if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) &&
+ flowtype == M_HASHTYPE_RSS_TCP_IPV4) {
+ return (1);
+ }
+ if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) &&
+ flowtype == M_HASHTYPE_RSS_IPV4) {
+ return (1);
+ }
+ break;
+ default:
+ if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) &&
+ flowtype == M_HASHTYPE_RSS_IPV4) {
+ return (1);
+ }
+ break;
+ }
+ }
+
+ /*
+ * Decode enough information to make a hash decision.
+ *
+ * XXX TODO: does the hardware hash on 4-tuple if IP
+ * options are present?
+ */
+ if (proto == IPPROTO_TCP) {
+ if (m->m_len < iphlen + sizeof(struct tcphdr))
+ return (-1);
+ th = (struct tcphdr *)((caddr_t)ip + iphlen);
+ return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst,
+ th->th_sport,
+ th->th_dport,
+ proto,
+ dir,
+ hashval,
+ hashtype);
+ } else if (proto == IPPROTO_UDP) {
+ uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ if (m->m_len < iphlen + sizeof(struct udphdr))
+ return (-1);
+ return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst,
+ uh->uh_sport,
+ uh->uh_dport,
+ proto,
+ dir,
+ hashval,
+ hashtype);
+ } else {
+ /* Default to 2-tuple hash */
+ return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst,
+ 0, /* source port */
+ 0, /* destination port */
+ 0, /* IPPROTO_IP */
+ dir,
+ hashval,
+ hashtype);
+ }
+}
+
+/*
* Query the RSS hash algorithm.
*/
u_int
@@ -538,15 +727,10 @@
return (rss_ncpus);
}
-/*
- * Return the supported RSS hash configuration.
- *
- * NICs should query this to determine what to configure in their redirection
- * matching table.
- */
-u_int
-rss_gethashconfig(void)
+static inline u_int
+rss_gethashconfig_local(void)
{
+
/* Return 4-tuple for TCP; 2-tuple for others */
/*
* UDP may fragment more often than TCP and thus we'll end up with
@@ -573,6 +757,19 @@
}
/*
+ * Return the supported RSS hash configuration.
+ *
+ * NICs should query this to determine what to configure in their redirection
+ * matching table.
+ */
+u_int
+rss_gethashconfig(void)
+{
+
+ return (rss_gethashconfig_local());
+}
+
+/*
* XXXRW: Confirm that sysctl -a won't dump this keying material, don't want
* it appearing in debugging output unnecessarily.
*/
Index: sys/netinet/ip_input.c
===================================================================
--- sys/netinet/ip_input.c
+++ sys/netinet/ip_input.c
@@ -37,6 +37,7 @@
#include "opt_ipstealth.h"
#include "opt_ipsec.h"
#include "opt_route.h"
+#include "opt_rss.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -77,6 +78,7 @@
#ifdef IPSEC
#include <netinet/ip_ipsec.h>
#endif /* IPSEC */
+#include <netinet/in_rss.h>
#include <sys/socketvar.h>
@@ -140,11 +142,21 @@
VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */
+/*
+ * We may need to re-inject packets into the IP stack for further work.
+ * In this instance, use the CPU policy and query the RSS layer for the
+ * relevant CPU ID to use.
+ */
static struct netisr_handler ip_nh = {
.nh_name = "ip",
.nh_handler = ip_input,
.nh_proto = NETISR_IP,
+#ifdef RSS
+ .nh_m2cpuid = rss_m2cpuid,
+ .nh_policy = NETISR_POLICY_CPU,
+#else
.nh_policy = NETISR_POLICY_FLOW,
+#endif
};
extern struct domain inetdomain;
@@ -368,13 +380,18 @@
M_ASSERTPKTHDR(m);
- if (m->m_flags & M_FASTFWD_OURS) {
- m->m_flags &= ~M_FASTFWD_OURS;
+ if (m->m_flags & (M_REINJECT_OURS | M_FASTFWD_OURS)) {
/* Set up some basics that will be used later. */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
ip_len = ntohs(ip->ip_len);
- goto ours;
+ if (m->m_flags & M_REINJECT_OURS) {
+ m->m_flags &= ~(M_REINJECT_OURS|M_FASTFWD_OURS);
+ goto reinject_ours;
+ } else {
+ m->m_flags &= ~(M_REINJECT_OURS|M_FASTFWD_OURS);
+ goto ours;
+ }
}
IPSTAT_INC(ips_total);
@@ -463,6 +480,7 @@
} else
m_adj(m, ip_len - m->m_pkthdr.len);
}
+
#ifdef IPSEC
/*
* Bypass packet filtering for packets previously handled by IPsec.
@@ -721,6 +739,8 @@
goto bad;
#endif /* IPSEC */
+reinject_ours:
+
/*
* Switch out to protocol's input routine.
*/
@@ -817,6 +837,9 @@
int i, hlen, next;
u_int8_t ecn, ecn0;
u_short hash;
+#ifdef RSS
+ uint32_t rss_hash, rss_type;
+#endif
/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
@@ -1106,6 +1129,41 @@
m_fixhdr(m);
IPSTAT_INC(ips_reassembled);
IPQ_UNLOCK();
+
+#ifdef RSS
+ /*
+ * Query the RSS layer for the flowid / flowtype for the
+ * mbuf payload.
+ *
+ * For now, just assume we have to calculate a new one.
+ * Later on we should check to see if the assigned flowid matches
+ * what RSS wants for the given IP protocol and if so, just keep it.
+ *
+ * We then queue into the relevant netisr so it can be dispatched
+ * to the correct CPU.
+ *
+ * Note - this may return 1, which means the flowid in the mbuf
+ * is correct for the configured RSS hash types and can be used.
+ */
+ if (rss_mbuf_software_hash_v4(m, 0, &rss_hash, &rss_type) == 0) {
+ m->m_pkthdr.flowid = rss_hash;
+ M_HASHTYPE_SET(m, rss_type);
+ m->m_flags |= M_FLOWID;
+ }
+
+ /*
+ * Queue/dispatch for reprocessing.
+ *
+ * Note: this is much slower than just handling the frame in the
+ * current receive context. It's likely worth investigating
+ * why this is.
+ */
+ m->m_flags |= M_REINJECT_OURS;
+ netisr_dispatch(NETISR_IP, m);
+ return (NULL);
+#endif
+
+ /* Handle in-line */
return (m);
dropfrag:
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -145,7 +145,9 @@
if (inp != NULL) {
INP_LOCK_ASSERT(inp);
M_SETFIB(m, inp->inp_inc.inc_fibnum);
- if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
+
+ if (((flags & IP_NODEFAULTFLOWID) == 0) &&
+ inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
m->m_pkthdr.flowid = inp->inp_flowid;
M_HASHTYPE_SET(m, inp->inp_flowtype);
m->m_flags |= M_FLOWID;
Index: sys/netinet/ip_var.h
===================================================================
--- sys/netinet/ip_var.h
+++ sys/netinet/ip_var.h
@@ -161,6 +161,7 @@
#define IP_SENDTOIF 0x8 /* send on specific ifnet */
#define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */
#define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */
+#define IP_NODEFAULTFLOWID 0x40 /* Don't set the flowid from inp */
#ifdef __NO_STRICT_ALIGNMENT
#define IP_HDR_ALIGNED_P(ip) 1
Index: sys/netinet/udp_usrreq.c
===================================================================
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -43,6 +43,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_rss.h"
#include <sys/param.h>
#include <sys/domain.h>
@@ -89,6 +90,7 @@
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/udplite.h>
+#include <netinet/in_rss.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -206,6 +208,13 @@
udp_init(void)
{
+ /*
+ * For now default to 2-tuple UDP hashing - until the fragment
+ * reassembly code can also update the flowid.
+ *
+ * Once we can calculate the flowid that way and re-establish
+ * a 4-tuple, flip this to 4-tuple.
+ */
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
"udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
IPI_HASHFIELDS_2TUPLE);
@@ -1395,6 +1404,60 @@
((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
+ /*
+ * Setup flowid / RSS information for outbound socket.
+ *
+ * Once the UDP code decides to set a flowid some other way,
+ * this allows the flowid to be overridden by userland.
+ *
+ * Remember ip_output() overrides with the inp flowid details
+ * if they exist.
+ *
+ * .. and ip_output() -> flowtable_lookup() also assigns
+ * a flowid too. Ugh.
+ */
+#ifdef RSS
+ {
+ uint32_t hash_val, hash_type;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v4(laddr, faddr, lport, fport,
+ pr, RSS_HASH_PKT_EGRESS,
+ &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, hash_type);
+ }
+ }
+#endif
+
+ /*
+ * Don't override with the inp cached flowid value.
+ *
+ * Depending upon the kind of send being done, the inp
+ * flowid/flowtype values may actually not be appropriate
+ * for this particular socket send.
+ *
+ * We should either leave the flowid at zero (which is what is
+ * currently done) or set it to some software generated
+ * hash value based on the packet contents.
+ */
+ ipflags |= IP_NODEFAULTFLOWID;
+
if (unlock_udbinfo == UH_WLOCKED)
INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)
Index: sys/netinet6/in6.h
===================================================================
--- sys/netinet6/in6.h
+++ sys/netinet6/in6.h
@@ -640,6 +640,8 @@
#define M_LOOP M_PROTO6
#define M_AUTHIPDGM M_PROTO7
#define M_RTALERT_MLD M_PROTO8
+#define M_REINJECT_OURS M_PROTO9 /* Re-injected from some
+ * de-encaps / defrag process */
#ifdef _KERNEL
struct cmsghdr;
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -260,8 +260,14 @@
goto bad;
}
- if (inp != NULL)
+ if (inp != NULL) {
M_SETFIB(m, inp->inp_inc.inc_fibnum);
+ if (((flags & IP_NODEFAULTFLOWID) == 0) &&
+ (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID))) {
+ m->m_pkthdr.flowid = inp->inp_flowid;
+ m->m_flags |= M_FLOWID;
+ }
+ }
finaldst = ip6->ip6_dst;
bzero(&exthdrs, sizeof(exthdrs));
Index: sys/netinet6/udp6_usrreq.c
===================================================================
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -74,6 +74,7 @@
#include "opt_inet6.h"
#include "opt_ipfw.h"
#include "opt_ipsec.h"
+#include "opt_rss.h"
#include <sys/param.h>
#include <sys/jail.h>
@@ -111,6 +112,7 @@
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/udplite.h>
+#include <netinet/in_rss.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_var.h>
@@ -850,8 +852,28 @@
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
}
+ /*
+ * XXX for now assume UDP is 2-tuple.
+ * Later on this may become configurable as 4-tuple;
+ * we should support that.
+ *
+ * XXX .. and we should likely cache this in the inpcb.
+ */
+#ifdef RSS
+ m->m_pkthdr.flowid = rss_hash_ip6_2tuple(*faddr, *laddr);
+ m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
+#endif
flags = 0;
+ /*
+ * Don't override with the inp cached flowid.
+ *
+ * Until the whole UDP path is vetted, it may actually
+ * be incorrect.
+ */
+ flags |= IP_NODEFAULTFLOWID;
+
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
UDPSTAT_INC(udps_opackets);
error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jun 5, 10:42 AM (3 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33750745
Default Alt Text
D527.id1109.diff (16 KB)
Attached To
Mode
D527: Begin adding IPv4 fragment handling and IPv4/IPv6 UDP awareness to the RSS code.
Attached
Detach File
Event Timeline
Log In to Comment