First I implemented IPV6_RECVFLOWID and IPV6_RECVRSSBUCKETID [1].
Then I patched librss to use it [2].
And I slightly modified rss-udp-srv [3] to count the flowid of each
I slightly modified rss-udp-srv [1] to count the flowid of each packet:
```
diff --git a/rss-udp-srv/udp_srv.c b/rss-udp-srv/udp_srv.c
index 8f49f2a..ad75361 100644
--- a/rss-udp-srv/udp_srv.c
+++ b/rss-udp-srv/udp_srv.c
@@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
+#include <assert.h>
#include <unistd.h>
#include <string.h>
#include <err.h>
@@ -43,6 +44,14 @@ struct udp_srv_thread {
struct event *ev_read6, *ev_write6;
};
+struct flowid_node {
+ uint32_t flowid;
+ int64_t count;
+};
+
+#define NR_FLOWIDS 10
+struct flowid_node flowids[NR_FLOWIDS];
+
static int
thr_sock_set_reuseaddr(int fd, int reuse_addr)
{
@@ -96,7 +105,7 @@ thr_rss_udp_listen_sock_setup(int fd, int af_family, int rss_bucket)
return (-1);
}
-#if 0
+#if 1
if (rss_sock_set_recvrss(fd, af_family, rss_bucket) < 0) {
return (-1);
}
@@ -208,12 +217,13 @@ error:
}
static void
-thr_parse_msghdr(struct msghdr *m)
+thr_parse_msghdr(struct msghdr *m, int ver)
{
const struct cmsghdr *c;
uint32_t flowid;
uint32_t flowtype;
uint32_t flow_rssbucket;
+ int i;
for (c = CMSG_FIRSTHDR(m); c != NULL; c = CMSG_NXTHDR(m, c)) {
#if 0
@@ -221,28 +231,72 @@ thr_parse_msghdr(struct msghdr *m)
printf(" msghdr type: %d\n", c->cmsg_type);
printf(" msghdr len: %d\n", c->cmsg_len);
#endif
- if (c->cmsg_level != IPPROTO_IP)
- continue;
- switch (c->cmsg_type) {
- case IP_FLOWID:
- flowid = *(uint32_t *) CMSG_DATA(c);
- break;
- case IP_FLOWTYPE:
- flowtype = *(uint32_t *) CMSG_DATA(c);
- break;
- case IP_RSSBUCKETID:
- flow_rssbucket = *(uint32_t *) CMSG_DATA(c);
- break;
+ if (ver == 4) {
+ if (c->cmsg_level != IPPROTO_IP)
+ continue;
+ switch (c->cmsg_type) {
+ case IP_FLOWID:
+ flowid = *(uint32_t *) CMSG_DATA(c);
+ break;
+ case IP_FLOWTYPE:
+ flowtype = *(uint32_t *) CMSG_DATA(c);
+ break;
+ case IP_RSSBUCKETID:
+ flow_rssbucket = *(uint32_t *) CMSG_DATA(c);
+ break;
+ }
+ } else {
+ if (c->cmsg_level != IPPROTO_IPV6)
+ continue;
+ switch (c->cmsg_type) {
+ case IPV6_FLOWID:
+ flowid = *(uint32_t *) CMSG_DATA(c);
+ break;
+ case IPV6_FLOWTYPE:
+ flowtype = *(uint32_t *) CMSG_DATA(c);
+ break;
+ case IPV6_RSSBUCKETID:
+ flow_rssbucket = *(uint32_t *) CMSG_DATA(c);
+ break;
+ }
+ }
+ }
+
+ if (ver == 4) {
+ if (flowtype != 7) {
+ printf("flowtype=%d\n", flowtype);
+ exit(127);
+ }
+ } else {
+ if (flowtype != 9) {
+ printf("flowtype=%d\n", flowtype);
+ exit(127);
}
}
#if 0
- printf(" flowid=0x%08x; flowtype=%d; bucket=%d\n", flowid, flowtype, flow_rssbucket);
+ printf(" flowid=0x%08x; flowtype=%d; bucket=%d; version=%d\n", flowid, flowtype, flow_rssbucket, ver);
#endif
+
+ for (i = 0; i < NR_FLOWIDS && flowids[i].count != -1; i++) {
+ if (flowid == flowids[i].flowid) {
+ flowids[i].count++;
+ break;
+ }
+ }
+ if (i == NR_FLOWIDS) {
+ fprintf(stderr, "Too many flowids appeared\n");
+ exit(127);
+ }
+ if (flowids[i].count == -1) {
+ flowids[i].flowid = flowid;
+ flowids[i].count = 1;
+ }
}
static void
thr_ev_timer(int fd, short what, void *arg)
{
+#if 0
struct udp_srv_thread *th = arg;
struct timeval tv;
@@ -260,11 +314,12 @@ thr_ev_timer(int fd, short what, void *arg)
tv.tv_sec = 1;
tv.tv_usec = 0;
evtimer_add(th->ev_timer, &tv);
+#endif
}
static void
-thr_udp_ev_read(int fd, short what, void *arg)
+thr_udp_ev_read(int fd, short what, void *arg, int ver)
{
struct udp_srv_thread *th = arg;
/* XXX should be thread-local, and a larger buffer, and likely a queue .. */
@@ -274,7 +329,7 @@ thr_udp_ev_read(int fd, short what, void *arg)
struct sockaddr_storage sin;
socklen_t sin_len;
-#if 0
+#if 1
/* for the msghdr contents */
struct msghdr m;
char msgbuf[2048];
@@ -286,7 +341,7 @@ thr_udp_ev_read(int fd, short what, void *arg)
/* Loop read UDP frames until EWOULDBLOCK or 1024 frames */
while (i < 10240) {
-#if 0
+#if 1
iov[0].iov_base = buf;
iov[0].iov_len = 2048;
@@ -299,22 +354,24 @@ thr_udp_ev_read(int fd, short what, void *arg)
m.msg_flags = 0;
ret = recvmsg(fd, &m, 0);
-#endif
+#else
sin_len = sizeof(sin);
ret = recvfrom(fd, buf, 2048, MSG_DONTWAIT,
(struct sockaddr *) &sin,
&sin_len);
+#endif
if (ret <= 0) {
if (errno != EWOULDBLOCK)
warn("%s: recv", __func__);
break;
}
-#if 0
- printf(" recv: len=%d, controllen=%d\n",
- (int) ret,
- (int) m.msg_controllen);
- thr_parse_msghdr(&m);
+#if 1
+ //printf(" recv: len=%d, controllen=%d\n",
+ // (int) ret,
+ // (int) m.msg_controllen);
+ thr_parse_msghdr(&m, ver);
+ //printf("%s\n", buf);
#endif
i++;
th->recv_pkts++;
@@ -336,9 +393,15 @@ thr_udp_ev_read(int fd, short what, void *arg)
}
static void
+thr_udp_ev_read4(int fd, short what, void *arg)
+{
+ thr_udp_ev_read(fd, what, arg, 4);
+}
+
+static void
thr_udp_ev_read6(int fd, short what, void *arg)
{
- thr_udp_ev_read(fd, what, arg);
+ thr_udp_ev_read(fd, what, arg, 6);
}
static void *
@@ -388,7 +451,7 @@ thr_udp_srv_init(void *arg)
/* Create read and write readiness events */
th->ev_read = event_new(th->b, th->s4, EV_READ | EV_PERSIST,
- thr_udp_ev_read, th);
+ thr_udp_ev_read4, th);
event_add(th->ev_read, NULL);
th->ev_read6 = event_new(th->b, th->s6, EV_READ | EV_PERSIST,
@@ -428,7 +491,7 @@ main(int argc, char *argv[])
struct in6_addr lcl6_addr;
int do_response;
- if (argc < 3) {
+ if (argc < 2) {
printf("Usage: %s <response> <ipv4 lcl address>\n", argv[0]);
printf(" response: 1 if each RX packet generates a TX response, else 0\n");
printf(" ipv4 lcl address: IPv4 local address to bind to\n");
@@ -438,7 +501,7 @@ main(int argc, char *argv[])
lcl_addr.s_addr = INADDR_ANY;
lcl6_addr = in6addr_any;
do_response = atoi(argv[1]);
- (void) inet_aton(argv[2], &lcl_addr);
+ //(void) inet_aton(argv[2], &lcl_addr);
ncpu = rss_getsysctlint("net.inet.rss.ncpus");
if (ncpu < 0) {
@@ -488,6 +551,11 @@ main(int argc, char *argv[])
if (sigemptyset(&sa.sa_mask) == -1 || sigaction(SIGPIPE, &sa, 0) == -1)
perror("failed to ignore SIGPIPE; sigaction");
+ for (i = 0; i < NR_FLOWIDS; i++) {
+ flowids[i].flowid = -1;
+ flowids[i].count = -1;
+ }
+
for (i = 0; i < nbuckets; i++) {
th[i].tid = i;
th[i].rss_bucket = i;
@@ -504,6 +572,23 @@ main(int argc, char *argv[])
(void) pthread_create(&th[i].thr, NULL, thr_udp_srv_init, &th[i]);
}
+#if 0
+ while (1) {
+ for (i = 0; i < nbuckets; i++) {
+ printf("bucket%d=%lu ", i, th[i].recv_pkts);
+ }
+ printf("\n");
+ sleep(1);
+ }
+#endif
+ while (1) {
+ for (i = 0; i < NR_FLOWIDS && flowids[i].count != -1; i++)
+ printf("count(flowid=%08x)=%ld ", flowids[i].flowid,
+ flowids[i].count);
+ printf("\n");
+ sleep(1);
+ }
+
/* Wait */
for (i = 0; i < nbuckets; i++) {
(void) pthread_join(th[i].thr, NULL);
```
And I enabled UDP 4-tuple hashing to make sure the RSS hash is
recalculated as the type of RSS_HASHTYPE_RSS_UDP_IPV4 or
RSS_HASHTYPE_RSS_UDP_IPV6 by np_m2cpuid (for testing purpose).
```
diff --git a/sys/net/rss_config.c b/sys/net/rss_config.c
index e7e8eb4..320ed9b 100644
--- a/sys/net/rss_config.c
+++ b/sys/net/rss_config.c
@@ -483,14 +483,14 @@ rss_gethashconfig(void)
return (
RSS_HASHTYPE_RSS_IPV4
| RSS_HASHTYPE_RSS_TCP_IPV4
+ | RSS_HASHTYPE_RSS_UDP_IPV4
| RSS_HASHTYPE_RSS_IPV6
| RSS_HASHTYPE_RSS_TCP_IPV6
+ | RSS_HASHTYPE_RSS_UDP_IPV6
| RSS_HASHTYPE_RSS_IPV6_EX
| RSS_HASHTYPE_RSS_TCP_IPV6_EX
#if 0
- | RSS_HASHTYPE_RSS_UDP_IPV4
| RSS_HASHTYPE_RSS_UDP_IPV4_EX
- | RSS_HASHTYPE_RSS_UDP_IPV6
| RSS_HASHTYPE_RSS_UDP_IPV6_EX
#endif
);
```
Then I use pktgen [42] to generate UDP packets with the UDP payload
length varies in the range of 100 to 10000 bytes (packets will be
fragmented when the payload exceeds 800 bytes), but keep the 4-tuple
(saddr, sport, daddr, dport) consistent, and inject them into tap(4).
So the flowid of each packet received by rss-udp-srv should be the
same even if the packet is fragmented.
The command line parameters forscript that is used to execute pktgen: automatically:
```
% sudo ./pktgen -i tap0 -6 -l 100 -n 1000000 # No fragment, 1000000 packets% cat auto
% #!/bin/sh
sudo ./pktgen -i tap0 -6 -l 1-n 10000 -nl 1000000 # 2No fragmentst, 10000 packets
% sudo ./pktgen -i tap0 -6 -l-n 10000 -nl 1000000 # 13 -f # atomic fragments
sudo ./pktgen -i tap0 -6 -n 10000 -l 1000 # 2 fragments
sudo ./pktgen -i tap0 -6 -n 10000 -l 10000 # 13 fragments
```
The outputs of netstat(1):
```
% netstat -p ip6 -ss -p ip6
ip6:
17110000 total packets received
0 with size smaller than minimum
0 with data size < data length
0 with bad options
0 with incorrect version number
1500160000 fragments received
0 fragments dropped (dup or out of space)
0 fragments dropped after timeout
0 fragments that exceeded limit
20030000 packets reassembled ok
17110000 packets for this host
0 packets forwarded
0 packets not forwardable
0 redirects sent
4111 packets sent from this host
0 packets sent with fabricated ip header
0 output packets dropped due to no bufs, etc.
30 output packets discarded due to no route
0 output datagrams fragmented
0 fragments created
0 datagrams that can't be fragmented
0 packets that violated scope rules
0 multicast packets which we don't join
Input histogram:
UDP: 2110000
fragment: 150060000
Mbuf statistics:
2110000 one mbuf
1500160000 one ext mbuf
0 two or more ext mbuf
0 packets whose headers are not contiguous
0 tunneling packets that can't find gif
0 packets discarded because of too many headers
01 failures of source address selection
source addresses on a non-outgoing I/F
1 addresses scope=%x
Source addresses selection rule applied:
1 same address
```
1500160000 fragments were received, 20030000 packets are reassembled ok.
It is the expected result.
The outputs of rss-udp-srv (some lines are omitted):
```
% ./rss-udp-srv 0
starting: tid=0, rss_bucket=0, cpuid=0
starting: tid=1, rss_bucket=1, cpuid=1
starting: tid=2, rss_bucket=2, cpuid=2
[1] th=0x801806880615480
[2] th=0x801615500
starting: tid=3, rss_bucket=3, cpuid=3
starting: tid=4, rss_bucket=4, cpuid=0
[3] th=0x801806980
[2] th=0x801806900
starting: tid=5, rss_bucket=5, cpuid=1
starting: tid=6, rss_bucket=6, cpuid=2
starting: tid=7, rss_bucket=7, cpuid=3
[06] th=0x8018068001615700
[3] th=0x801615580
[57] th=0x801806a80615780
[40] th=0x801806a00
[7] th=0x801806b80615400
[64] th=0x801806b001615600
[5] th=0x801615680
count(flowid=6e0f86ef)=3129010000
count(flowid=6e0f86ef)=16614220000
......
count(flowid=6e0f86ef)=79973226160
count(flowid=6e0f86ef)=93405330000
count(flowid=6e0f86ef)=100000038865
count(flowid=6e0f86ef)=10040000
......
count(flowid=6e0f86ef)=185509440000
count(flowid=6e0f86ef)=1921372
count(flowid=6e0f86ef)=1988627
count(flowid=6e0f86ef)=2000000
count(flowid=6e0f86ef)=2000000
......^C
count(flowid=6e0f86ef)=2969784
count(flowid=6e0f86ef)=2980348
count(flowid=6e0f86ef)=2990925
count(flowid=6e0f86ef)=3000000
```
34000000 UDP packets were received, and their flowids were all the same.
It is the expected result.
So ip6_direct worked correctly!
[1] https://reviews.freebsd.org/D3562github.com/btw616/ip6_direct_test/tree/master/rss-udp-srv
[2] https://github.com/erikarn/freebsd-rss/pull/3
[3] https://github.com/btw616/ip6_direct_test/tree/master/rss-udp-srv
[4] https://github.com/btw616/ip6_direct_test/tree/master/pktgen