Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153535095
D17698.id49605.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
67 KB
Referenced Files
None
Subscribers
None
D17698.id49605.diff
View Options
Index: ctrs.h
===================================================================
--- ctrs.h
+++ ctrs.h
@@ -7,9 +7,11 @@
/* counters to accumulate statistics */
struct my_ctrs {
- uint64_t pkts, bytes, events, drop;
+ uint64_t pkts, bytes, events;
+ uint64_t drop, drop_bytes;
uint64_t min_space;
struct timeval t;
+ uint32_t oq_n; /* number of elements in overflow queue (used in lb) */
};
/* very crude code to print a number in normalized form.
@@ -16,21 +18,26 @@
* Caller has to make sure that the buffer is large enough.
*/
static const char *
-norm2(char *buf, double val, char *fmt)
+norm2(char *buf, double val, char *fmt, int normalize)
{
char *units[] = { "", "K", "M", "G", "T" };
u_int i;
-
- for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
- val /= 1000;
+ if (normalize)
+ for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
+ val /= 1000;
+ else
+ i=0;
sprintf(buf, fmt, val, units[i]);
return buf;
}
static __inline const char *
-norm(char *buf, double val)
+norm(char *buf, double val, int normalize)
{
- return norm2(buf, val, "%.3f %s");
+ if (normalize)
+ return norm2(buf, val, "%.3f %s", normalize);
+ else
+ return norm2(buf, val, "%.0f %s", normalize);
}
static __inline int
@@ -89,7 +96,7 @@
return ret;
}
-static uint64_t
+static __inline uint64_t
wait_for_next_report(struct timeval *prev, struct timeval *cur,
int report_interval)
{
@@ -106,3 +113,4 @@
return delta.tv_sec* 1000000 + delta.tv_usec;
}
#endif /* CTRS_H_ */
+
Index: pkt-gen.8
===================================================================
--- pkt-gen.8
+++ pkt-gen.8
@@ -169,7 +169,7 @@
.Pp
.Dl
.Nm
--i netmap:ncxl0 -f tx -s 172.16.0.1:53 -d 172.16.1.3:53 -D 00:07:43:29:2a:e0
+-i netmap:ncxl0 -f tx -s 172.16.0.1:53 -d 172.16.1.3:53 -D 00:07:43:29:2a:e0
.Sh SEE ALSO
.Xr netmap 4 ,
.Xr bridge 8
Index: pkt-gen.c
===================================================================
--- pkt-gen.c
+++ pkt-gen.c
@@ -55,6 +55,11 @@
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
+#include <netinet/ip6.h>
+#ifdef linux
+#define IPV6_VERSION 0x60
+#define IPV6_DEFHLIM 64
+#endif
#include <assert.h>
#include <math.h>
@@ -66,16 +71,18 @@
#include "ctrs.h"
+static void usage(int);
+
#ifdef _WIN32
#define cpuset_t DWORD_PTR //uint64_t
static inline void CPU_ZERO(cpuset_t *p)
{
- *p = 0;
+ *p = 0;
}
static inline void CPU_SET(uint32_t i, cpuset_t *p)
{
- *p |= 1<< (i & 0x3f);
+ *p |= 1<< (i & 0x3f);
}
#define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0)
@@ -155,12 +162,12 @@
#define cpuset_t uint64_t // XXX
static inline void CPU_ZERO(cpuset_t *p)
{
- *p = 0;
+ *p = 0;
}
static inline void CPU_SET(uint32_t i, cpuset_t *p)
{
- *p |= 1<< (i & 0x3f);
+ *p |= 1<< (i & 0x3f);
}
#define pthread_setaffinity_np(a, b, c) ((void)a, 0)
@@ -169,7 +176,7 @@
#define IFF_PPROMISC IFF_PROMISC
#include <net/if_dl.h> /* LLADDR */
#define clock_gettime(a,b) \
- do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
+ do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
#endif /* __APPLE__ */
const char *default_payload="netmap pkt-gen DIRECT payload\n"
@@ -179,10 +186,8 @@
"http://info.iet.unipi.it/~luigi/netmap/ ";
int verbose = 0;
+int normalize = 1;
-#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */
-
-
#define VIRT_HDR_1 10 /* length of a base vnet-hdr */
#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */
#define VIRT_HDR_MAX VIRT_HDR_2
@@ -195,14 +200,34 @@
struct pkt {
struct virt_header vh;
struct ether_header eh;
- struct ip ip;
- struct udphdr udp;
- uint8_t body[MAX_BODYSIZE]; // XXX hardwired
+ union {
+ struct {
+ struct ip ip;
+ struct udphdr udp;
+ uint8_t body[MAX_BODYSIZE]; /* hardwired */
+ } ipv4;
+ struct {
+ struct ip6_hdr ip;
+ struct udphdr udp;
+ uint8_t body[MAX_BODYSIZE]; /* hardwired */
+ } ipv6;
+ };
} __attribute__((__packed__));
+#define PKT(p, f, af) \
+ ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f)
+
struct ip_range {
char *name;
- uint32_t start, end; /* same as struct in_addr */
+ union {
+ struct {
+ uint32_t start, end; /* same as struct in_addr */
+ } ipv4;
+ struct {
+ struct in6_addr start, end;
+ uint8_t sgroup, egroup;
+ } ipv6;
+ };
uint16_t port0, port1;
};
@@ -227,15 +252,18 @@
*/
struct glob_arg {
+ int af; /* address family AF_INET/AF_INET6 */
struct ip_range src_ip;
struct ip_range dst_ip;
struct mac_range dst_mac;
struct mac_range src_mac;
int pkt_size;
+ int pkt_min_size;
int burst;
int forever;
uint64_t npackets; /* total packets to send */
- int frags; /* fragments per packet */
+ int frags; /* fragments per packet */
+ u_int mtu; /* size of each fragment */
int nthreads;
int cpus; /* cpus used for running */
int system_cpus; /* cpus on the system */
@@ -277,6 +305,8 @@
#define STATS_WIN 15
int win_idx;
int64_t win[STATS_WIN];
+ int wait_link;
+ int framing; /* #bits of framing (for bw output) */
};
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
@@ -304,79 +334,166 @@
struct pkt pkt;
void *frame;
+ uint16_t seed[3];
+ u_int frags;
+ u_int frag_size;
};
+static __inline uint16_t
+cksum_add(uint16_t sum, uint16_t a)
+{
+ uint16_t res;
+ res = sum + a;
+ return (res + (res < a));
+}
+
+static void
+extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port)
+{
+ struct in_addr a;
+ char *pp;
+
+ pp = strchr(name, ':');
+ if (pp != NULL) { /* do we have ports ? */
+ *pp++ = '\0';
+ *port = (uint16_t)strtol(pp, NULL, 0);
+ }
+
+ inet_pton(AF_INET, name, &a);
+ *addr = ntohl(a.s_addr);
+}
+
+static void
+extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port,
+ uint8_t *group)
+{
+ char *pp;
+
+ /*
+ * We accept IPv6 address in the following form:
+ * group@[2001:DB8::1001]:port (w/ brackets and port)
+ * group@[2001:DB8::1] (w/ brackets and w/o port)
+ * group@2001:DB8::1234 (w/o brackets and w/o port)
+ */
+ pp = strchr(name, '@');
+ if (pp != NULL) {
+ *pp++ = '\0';
+ *group = (uint8_t)strtol(name, NULL, 0);
+ if (*group > 7)
+ *group = 7;
+ name = pp;
+ }
+ if (name[0] == '[')
+ name++;
+ pp = strchr(name, ']');
+ if (pp != NULL)
+ *pp++ = '\0';
+ if (pp != NULL && *pp != ':')
+ pp = NULL;
+ if (pp != NULL) { /* do we have ports ? */
+ *pp++ = '\0';
+ *port = (uint16_t)strtol(pp, NULL, 0);
+ }
+ inet_pton(AF_INET6, name, addr);
+}
/*
* extract the extremes from a range of ipv4 addresses.
* addr_lo[-addr_hi][:port_lo[-port_hi]]
*/
-static void
-extract_ip_range(struct ip_range *r)
+static int
+extract_ip_range(struct ip_range *r, int af)
{
- char *ap, *pp;
+ char *name, *ap, start[INET6_ADDRSTRLEN];
+ char end[INET6_ADDRSTRLEN];
struct in_addr a;
+ uint32_t tmp;
if (verbose)
D("extract IP range from %s", r->name);
- r->port0 = r->port1 = 0;
- r->start = r->end = 0;
+ name = strdup(r->name);
+ if (name == NULL) {
+ D("strdup failed");
+ usage(-1);
+ }
/* the first - splits start/end of range */
- ap = index(r->name, '-'); /* do we have ports ? */
- if (ap) {
+ ap = strchr(name, '-');
+ if (ap != NULL)
*ap++ = '\0';
- }
- /* grab the initial values (mandatory) */
- pp = index(r->name, ':');
- if (pp) {
- *pp++ = '\0';
- r->port0 = r->port1 = strtol(pp, NULL, 0);
- };
- inet_aton(r->name, &a);
- r->start = r->end = ntohl(a.s_addr);
- if (ap) {
- pp = index(ap, ':');
- if (pp) {
- *pp++ = '\0';
- if (*pp)
- r->port1 = strtol(pp, NULL, 0);
+ r->port0 = 1234; /* default port */
+ if (af == AF_INET6) {
+ r->ipv6.sgroup = 7; /* default group */
+ extract_ipv6_addr(name, &r->ipv6.start, &r->port0,
+ &r->ipv6.sgroup);
+ } else
+ extract_ipv4_addr(name, &r->ipv4.start, &r->port0);
+
+ r->port1 = r->port0;
+ if (af == AF_INET6) {
+ if (ap != NULL) {
+ r->ipv6.egroup = r->ipv6.sgroup;
+ extract_ipv6_addr(ap, &r->ipv6.end, &r->port1,
+ &r->ipv6.egroup);
+ } else {
+ r->ipv6.end = r->ipv6.start;
+ r->ipv6.egroup = r->ipv6.sgroup;
}
- if (*ap) {
- inet_aton(ap, &a);
- r->end = ntohl(a.s_addr);
- }
+ } else {
+ if (ap != NULL) {
+ extract_ipv4_addr(ap, &r->ipv4.end, &r->port1);
+ if (r->ipv4.start > r->ipv4.end) {
+ tmp = r->ipv4.end;
+ r->ipv4.end = r->ipv4.start;
+ r->ipv4.start = tmp;
+ }
+ } else
+ r->ipv4.end = r->ipv4.start;
}
+
if (r->port0 > r->port1) {
- uint16_t tmp = r->port0;
+ tmp = r->port0;
r->port0 = r->port1;
r->port1 = tmp;
}
- if (r->start > r->end) {
- uint32_t tmp = r->start;
- r->start = r->end;
- r->end = tmp;
+ if (af == AF_INET) {
+ a.s_addr = htonl(r->ipv4.start);
+ inet_ntop(af, &a, start, sizeof(start));
+ a.s_addr = htonl(r->ipv4.end);
+ inet_ntop(af, &a, end, sizeof(end));
+ } else {
+ inet_ntop(af, &r->ipv6.start, start, sizeof(start));
+ inet_ntop(af, &r->ipv6.end, end, sizeof(end));
}
- {
- struct in_addr a;
- char buf1[16]; // one ip address
+ if (af == AF_INET)
+ D("range is %s:%d to %s:%d", start, r->port0, end, r->port1);
+ else
+ D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup,
+ start, r->port0, r->ipv6.egroup, end, r->port1);
- a.s_addr = htonl(r->end);
- strncpy(buf1, inet_ntoa(a), sizeof(buf1));
- a.s_addr = htonl(r->start);
- if (1)
- D("range is %s:%d to %s:%d",
- inet_ntoa(a), r->port0, buf1, r->port1);
- }
+ free(name);
+ if (r->port0 != r->port1 ||
+ (af == AF_INET && r->ipv4.start != r->ipv4.end) ||
+ (af == AF_INET6 &&
+ !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end)))
+ return (OPT_COPY);
+ return (0);
}
-static void
+static int
extract_mac_range(struct mac_range *r)
{
+ struct ether_addr *e;
if (verbose)
D("extract MAC range from %s", r->name);
- bcopy(ether_aton(r->name), &r->start, 6);
- bcopy(ether_aton(r->name), &r->end, 6);
+
+ e = ether_aton(r->name);
+ if (e == NULL) {
+ D("invalid MAC address '%s'", r->name);
+ return 1;
+ }
+ bcopy(e, &r->start, 6);
+ bcopy(e, &r->end, 6);
#if 0
bcopy(targ->src_mac, eh->ether_shost, 6);
p = index(targ->g->src_mac, '-');
@@ -391,6 +508,7 @@
#endif
if (verbose)
D("%s starts at %s", r->name, ether_ntoa(&r->start));
+ return 0;
}
static struct targ *targs;
@@ -500,7 +618,7 @@
nmr->nr_rx_rings, nmr->nr_rx_slots);
free(w);
return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
- nmr->nr_rx_rings || nmr->nr_rx_slots) ?
+ nmr->nr_rx_rings || nmr->nr_rx_slots) ?
NM_OPEN_RING_CFG : 0;
}
@@ -513,7 +631,6 @@
source_hwaddr(const char *ifname, char *buf)
{
struct ifaddrs *ifaphead, *ifap;
- int l = sizeof(ifap->ifa_name);
if (getifaddrs(&ifaphead) != 0) {
D("getifaddrs %s failed", ifname);
@@ -527,7 +644,7 @@
if (!sdl || sdl->sdl_family != AF_LINK)
continue;
- if (strncmp(ifap->ifa_name, ifname, l) != 0)
+ if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0)
continue;
mac = (uint8_t *)LLADDR(sdl);
sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
@@ -562,19 +679,20 @@
return 0;
}
+
/* Compute the checksum of the given ip header. */
-static uint16_t
+static uint32_t
checksum(const void *data, uint16_t len, uint32_t sum)
{
- const uint8_t *addr = data;
+ const uint8_t *addr = data;
uint32_t i;
- /* Checksum all the pairs of bytes first... */
- for (i = 0; i < (len & ~1U); i += 2) {
- sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
- if (sum > 0xFFFF)
- sum -= 0xFFFF;
- }
+ /* Checksum all the pairs of bytes first... */
+ for (i = 0; i < (len & ~1U); i += 2) {
+ sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
+ if (sum > 0xFFFF)
+ sum -= 0xFFFF;
+ }
/*
* If there's a single byte left over, checksum it, too.
* Network byte order is big-endian, so the remaining byte is
@@ -588,8 +706,8 @@
return sum;
}
-static u_int16_t
-wrapsum(u_int32_t sum)
+static uint16_t
+wrapsum(uint32_t sum)
{
sum = ~sum & 0xFFFF;
return (htons(sum));
@@ -637,64 +755,198 @@
#define uh_sum check
#endif /* linux */
-/*
- * increment the addressed in the packet,
- * starting from the least significant field.
- * DST_IP DST_PORT SRC_IP SRC_PORT
- */
static void
-update_addresses(struct pkt *pkt, struct glob_arg *g)
+update_ip(struct pkt *pkt, struct targ *t)
{
- uint32_t a;
- uint16_t p;
- struct ip *ip = &pkt->ip;
- struct udphdr *udp = &pkt->udp;
+ struct glob_arg *g = t->g;
+ struct ip ip;
+ struct udphdr udp;
+ uint32_t oaddr, naddr;
+ uint16_t oport, nport;
+ uint16_t ip_sum, udp_sum;
- do {
- /* XXX for now it doesn't handle non-random src, random dst */
- if (g->options & OPT_RANDOM_SRC) {
- udp->uh_sport = random();
- ip->ip_src.s_addr = random();
- } else {
- p = ntohs(udp->uh_sport);
- if (p < g->src_ip.port1) { /* just inc, no wrap */
- udp->uh_sport = htons(p + 1);
+ memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
+ memcpy(&udp, &pkt->ipv4.udp, sizeof(udp));
+ do {
+ ip_sum = udp_sum = 0;
+ naddr = oaddr = ntohl(ip.ip_src.s_addr);
+ nport = oport = ntohs(udp.uh_sport);
+ if (g->options & OPT_RANDOM_SRC) {
+ ip.ip_src.s_addr = nrand48(t->seed);
+ udp.uh_sport = nrand48(t->seed);
+ naddr = ntohl(ip.ip_src.s_addr);
+ nport = ntohs(udp.uh_sport);
break;
}
- udp->uh_sport = htons(g->src_ip.port0);
-
- a = ntohl(ip->ip_src.s_addr);
- if (a < g->src_ip.end) { /* just inc, no wrap */
- ip->ip_src.s_addr = htonl(a + 1);
+ if (oport < g->src_ip.port1) {
+ nport = oport + 1;
+ udp.uh_sport = htons(nport);
break;
}
- ip->ip_src.s_addr = htonl(g->src_ip.start);
-
- udp->uh_sport = htons(g->src_ip.port0);
+ nport = g->src_ip.port0;
+ udp.uh_sport = htons(nport);
+ if (oaddr < g->src_ip.ipv4.end) {
+ naddr = oaddr + 1;
+ ip.ip_src.s_addr = htonl(naddr);
+ break;
+ }
+ naddr = g->src_ip.ipv4.start;
+ ip.ip_src.s_addr = htonl(naddr);
+ } while (0);
+ /* update checksums if needed */
+ if (oaddr != naddr) {
+ ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
+ ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
+ ip_sum = cksum_add(ip_sum, naddr >> 16);
+ ip_sum = cksum_add(ip_sum, naddr & 0xffff);
}
-
- if (g->options & OPT_RANDOM_DST) {
- udp->uh_dport = random();
- ip->ip_dst.s_addr = random();
- } else {
- p = ntohs(udp->uh_dport);
- if (p < g->dst_ip.port1) { /* just inc, no wrap */
- udp->uh_dport = htons(p + 1);
+ if (oport != nport) {
+ udp_sum = cksum_add(udp_sum, ~oport);
+ udp_sum = cksum_add(udp_sum, nport);
+ }
+ do {
+ naddr = oaddr = ntohl(ip.ip_dst.s_addr);
+ nport = oport = ntohs(udp.uh_dport);
+ if (g->options & OPT_RANDOM_DST) {
+ ip.ip_dst.s_addr = nrand48(t->seed);
+ udp.uh_dport = nrand48(t->seed);
+ naddr = ntohl(ip.ip_dst.s_addr);
+ nport = ntohs(udp.uh_dport);
break;
}
- udp->uh_dport = htons(g->dst_ip.port0);
-
- a = ntohl(ip->ip_dst.s_addr);
- if (a < g->dst_ip.end) { /* just inc, no wrap */
- ip->ip_dst.s_addr = htonl(a + 1);
+ if (oport < g->dst_ip.port1) {
+ nport = oport + 1;
+ udp.uh_dport = htons(nport);
break;
}
+ nport = g->dst_ip.port0;
+ udp.uh_dport = htons(nport);
+ if (oaddr < g->dst_ip.ipv4.end) {
+ naddr = oaddr + 1;
+ ip.ip_dst.s_addr = htonl(naddr);
+ break;
+ }
+ naddr = g->dst_ip.ipv4.start;
+ ip.ip_dst.s_addr = htonl(naddr);
+ } while (0);
+ /* update checksums */
+ if (oaddr != naddr) {
+ ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
+ ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
+ ip_sum = cksum_add(ip_sum, naddr >> 16);
+ ip_sum = cksum_add(ip_sum, naddr & 0xffff);
}
- ip->ip_dst.s_addr = htonl(g->dst_ip.start);
- } while (0);
- // update checksum
+ if (oport != nport) {
+ udp_sum = cksum_add(udp_sum, ~oport);
+ udp_sum = cksum_add(udp_sum, nport);
+ }
+ if (udp_sum != 0)
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum));
+ if (ip_sum != 0) {
+ ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum));
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum));
+ }
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ memcpy(&pkt->ipv4.udp, &udp, sizeof(udp));
}
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+static void
+update_ip6(struct pkt *pkt, struct targ *t)
+{
+ struct glob_arg *g = t->g;
+ struct ip6_hdr ip6;
+ struct udphdr udp;
+ uint16_t udp_sum;
+ uint16_t oaddr, naddr;
+ uint16_t oport, nport;
+ uint8_t group;
+
+ memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6));
+ memcpy(&udp, &pkt->ipv6.udp, sizeof(udp));
+ do {
+ udp_sum = 0;
+ group = g->src_ip.ipv6.sgroup;
+ naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]);
+ nport = oport = ntohs(udp.uh_sport);
+ if (g->options & OPT_RANDOM_SRC) {
+ ip6.ip6_src.s6_addr16[group] = nrand48(t->seed);
+ udp.uh_sport = nrand48(t->seed);
+ naddr = ntohs(ip6.ip6_src.s6_addr16[group]);
+ nport = ntohs(udp.uh_sport);
+ break;
+ }
+ if (oport < g->src_ip.port1) {
+ nport = oport + 1;
+ udp.uh_sport = htons(nport);
+ break;
+ }
+ nport = g->src_ip.port0;
+ udp.uh_sport = htons(nport);
+ if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) {
+ naddr = oaddr + 1;
+ ip6.ip6_src.s6_addr16[group] = htons(naddr);
+ break;
+ }
+ naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]);
+ ip6.ip6_src.s6_addr16[group] = htons(naddr);
+ } while (0);
+ /* update checksums if needed */
+ if (oaddr != naddr)
+ udp_sum = cksum_add(~oaddr, naddr);
+ if (oport != nport)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oport, nport));
+ do {
+ group = g->dst_ip.ipv6.egroup;
+ naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
+ nport = oport = ntohs(udp.uh_dport);
+ if (g->options & OPT_RANDOM_DST) {
+ ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed);
+ udp.uh_dport = nrand48(t->seed);
+ naddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
+ nport = ntohs(udp.uh_dport);
+ break;
+ }
+ if (oport < g->dst_ip.port1) {
+ nport = oport + 1;
+ udp.uh_dport = htons(nport);
+ break;
+ }
+ nport = g->dst_ip.port0;
+ udp.uh_dport = htons(nport);
+ if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) {
+ naddr = oaddr + 1;
+ ip6.ip6_dst.s6_addr16[group] = htons(naddr);
+ break;
+ }
+ naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]);
+ ip6.ip6_dst.s6_addr16[group] = htons(naddr);
+ } while (0);
+ /* update checksums */
+ if (oaddr != naddr)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oaddr, naddr));
+ if (oport != nport)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oport, nport));
+ if (udp_sum != 0)
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum);
+ memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
+ memcpy(&pkt->ipv6.udp, &udp, sizeof(udp));
+}
+
+static void
+update_addresses(struct pkt *pkt, struct targ *t)
+{
+
+ if (t->g->af == AF_INET)
+ update_ip(pkt, t);
+ else
+ update_ip6(pkt, t);
+}
/*
* initialize one packet and prepare for the next one.
* The copy could be done better instead of repeating it each time.
@@ -704,9 +956,12 @@
{
struct pkt *pkt = &targ->pkt;
struct ether_header *eh;
- struct ip *ip;
- struct udphdr *udp;
- uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
+ struct ip6_hdr ip6;
+ struct ip ip;
+ struct udphdr udp;
+ void *udp_ptr;
+ uint16_t paylen;
+ uint32_t csum = 0;
const char *payload = targ->g->options & OPT_INDIRECT ?
indirect_payload : default_payload;
int i, l0 = strlen(payload);
@@ -716,7 +971,7 @@
pcap_t *file;
struct pcap_pkthdr *header;
const unsigned char *packet;
-
+
/* Read a packet from a PCAP file if asked. */
if (targ->g->packet_file != NULL) {
if ((file = pcap_open_offline(targ->g->packet_file,
@@ -735,49 +990,80 @@
}
#endif
+ paylen = targ->g->pkt_size - sizeof(*eh) -
+ (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6));
+
/* create a nice NUL-terminated string */
for (i = 0; i < paylen; i += l0) {
if (l0 > paylen - i)
l0 = paylen - i; // last round
- bcopy(payload, pkt->body + i, l0);
+ bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0);
}
- pkt->body[i-1] = '\0';
- ip = &pkt->ip;
+ PKT(pkt, body, targ->g->af)[i - 1] = '\0';
/* prepare the headers */
- ip->ip_v = IPVERSION;
- ip->ip_hl = 5;
- ip->ip_id = 0;
- ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
- ip->ip_id = 0;
- ip->ip_off = htons(IP_DF); /* Don't fragment */
- ip->ip_ttl = IPDEFTTL;
- ip->ip_p = IPPROTO_UDP;
- ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start);
- ip->ip_src.s_addr = htonl(targ->g->src_ip.start);
- ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
-
-
- udp = &pkt->udp;
- udp->uh_sport = htons(targ->g->src_ip.port0);
- udp->uh_dport = htons(targ->g->dst_ip.port0);
- udp->uh_ulen = htons(paylen);
- /* Magic: taken from sbin/dhclient/packet.c */
- udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
- checksum(pkt->body,
- paylen - sizeof(*udp),
- checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
- IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
- )
- )
- ));
-
eh = &pkt->eh;
bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
- eh->ether_type = htons(ETHERTYPE_IP);
+ if (targ->g->af == AF_INET) {
+ eh->ether_type = htons(ETHERTYPE_IP);
+ memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
+ udp_ptr = &pkt->ipv4.udp;
+ ip.ip_v = IPVERSION;
+ ip.ip_hl = sizeof(ip) >> 2;
+ ip.ip_id = 0;
+ ip.ip_tos = IPTOS_LOWDELAY;
+ ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh));
+ ip.ip_id = 0;
+ ip.ip_off = htons(IP_DF); /* Don't fragment */
+ ip.ip_ttl = IPDEFTTL;
+ ip.ip_p = IPPROTO_UDP;
+ ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start);
+ ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start);
+ ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0));
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ } else {
+ eh->ether_type = htons(ETHERTYPE_IPV6);
+ memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6));
+ udp_ptr = &pkt->ipv6.udp;
+ ip6.ip6_flow = 0;
+ ip6.ip6_plen = htons(paylen);
+ ip6.ip6_vfc = IPV6_VERSION;
+ ip6.ip6_nxt = IPPROTO_UDP;
+ ip6.ip6_hlim = IPV6_DEFHLIM;
+ ip6.ip6_src = targ->g->src_ip.ipv6.start;
+ ip6.ip6_dst = targ->g->dst_ip.ipv6.start;
+ }
+ memcpy(&udp, udp_ptr, sizeof(udp));
+
+ udp.uh_sport = htons(targ->g->src_ip.port0);
+ udp.uh_dport = htons(targ->g->dst_ip.port0);
+ udp.uh_ulen = htons(paylen);
+ if (targ->g->af == AF_INET) {
+ /* Magic: taken from sbin/dhclient/packet.c */
+ udp.uh_sum = wrapsum(
+ checksum(&udp, sizeof(udp), /* udp header */
+ checksum(pkt->ipv4.body, /* udp payload */
+ paylen - sizeof(udp),
+ checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */
+ 2 * sizeof(pkt->ipv4.ip.ip_src),
+ IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ } else {
+ /* Save part of pseudo header checksum into csum */
+ csum = IPPROTO_UDP << 24;
+ csum = checksum(&csum, sizeof(csum), paylen);
+ udp.uh_sum = wrapsum(
+ checksum(udp_ptr, sizeof(udp), /* udp header */
+ checksum(pkt->ipv6.body, /* udp payload */
+ paylen - sizeof(udp),
+ checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */
+ 2 * sizeof(pkt->ipv6.ip.ip6_src), csum))));
+ memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
+ }
+ memcpy(udp_ptr, &udp, sizeof(udp));
+
bzero(&pkt->vh, sizeof(pkt->vh));
// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
}
@@ -825,7 +1111,6 @@
}
}
-
/*
* create and enqueue a batch of packets on a ring.
* On the last one set NS_REPORT to tell the driver to generate
@@ -833,19 +1118,14 @@
*/
static int
send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
- int size, struct glob_arg *g, u_int count, int options,
- u_int nfrags)
+ int size, struct targ *t, u_int count, int options)
{
u_int n, sent, cur = ring->cur;
- u_int fcnt;
+ u_int frags = t->frags;
+ u_int frag_size = t->frag_size;
+ struct netmap_slot *slot = &ring->slot[cur];
n = nm_ring_space(ring);
- if (n < count)
- count = n;
- if (count < nfrags) {
- D("truncating packet, no room for frags %d %d",
- count, nfrags);
- }
#if 0
if (options & (OPT_COPY | OPT_PREFETCH) ) {
for (sent = 0; sent < count; sent++) {
@@ -858,11 +1138,15 @@
cur = ring->cur;
}
#endif
- for (fcnt = nfrags, sent = 0; sent < count; sent++) {
- struct netmap_slot *slot = &ring->slot[cur];
- char *p = NETMAP_BUF(ring, slot->buf_idx);
- int buf_changed = slot->flags & NS_BUF_CHANGED;
+ for (sent = 0; sent < count && n >= frags; sent++, n--) {
+ char *p;
+ int buf_changed;
+ u_int tosend = size;
+ slot = &ring->slot[cur];
+ p = NETMAP_BUF(ring, slot->buf_idx);
+ buf_changed = slot->flags & NS_BUF_CHANGED;
+
slot->flags = 0;
if (options & OPT_RUBBISH) {
/* do nothing */
@@ -869,31 +1153,49 @@
} else if (options & OPT_INDIRECT) {
slot->flags |= NS_INDIRECT;
slot->ptr = (uint64_t)((uintptr_t)frame);
- } else if ((options & OPT_COPY) || buf_changed) {
- nm_pkt_copy(frame, p, size);
- if (fcnt == nfrags)
- update_addresses(pkt, g);
- } else if (options & OPT_MEMCPY) {
- memcpy(p, frame, size);
- if (fcnt == nfrags)
- update_addresses(pkt, g);
+ } else if (frags > 1) {
+ u_int i;
+ const char *f = frame;
+ char *fp = p;
+ for (i = 0; i < frags - 1; i++) {
+ memcpy(fp, f, frag_size);
+ slot->len = frag_size;
+ slot->flags = NS_MOREFRAG;
+ if (options & OPT_DUMP)
+ dump_payload(fp, frag_size, ring, cur);
+ tosend -= frag_size;
+ f += frag_size;
+ cur = nm_ring_next(ring, cur);
+ slot = &ring->slot[cur];
+ fp = NETMAP_BUF(ring, slot->buf_idx);
+ }
+ n -= (frags - 1);
+ p = fp;
+ slot->flags = 0;
+ memcpy(p, f, tosend);
+ update_addresses(pkt, t);
+ } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) {
+ if (options & OPT_COPY)
+ nm_pkt_copy(frame, p, size);
+ else
+ memcpy(p, frame, size);
+ update_addresses(pkt, t);
} else if (options & OPT_PREFETCH) {
__builtin_prefetch(p);
}
+ slot->len = tosend;
if (options & OPT_DUMP)
- dump_payload(p, size, ring, cur);
- slot->len = size;
- if (--fcnt > 0)
- slot->flags |= NS_MOREFRAG;
- else
- fcnt = nfrags;
- if (sent == count - 1) {
- slot->flags &= ~NS_MOREFRAG;
- slot->flags |= NS_REPORT;
- }
+ dump_payload(p, tosend, ring, cur);
cur = nm_ring_next(ring, cur);
}
- ring->head = ring->cur = cur;
+ if (sent) {
+ slot->flags |= NS_REPORT;
+ ring->head = ring->cur = cur;
+ }
+ if (sent < count) {
+ /* tell netmap that we need more slots */
+ ring->cur = ring->tail;
+ }
return (sent);
}
@@ -914,28 +1216,47 @@
}
/*
+ * wait until ts, either busy or sleeping if more than 1ms.
+ * Return wakeup time.
+ */
+static struct timespec
+wait_time(struct timespec ts)
+{
+ for (;;) {
+ struct timespec w, cur;
+ clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
+ w = timespec_sub(ts, cur);
+ if (w.tv_sec < 0)
+ return cur;
+ else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
+ poll(NULL, 0, 1);
+ }
+}
+
+/*
* Send a packet, and wait for a response.
* The payload (after UDP header, ofs 42) has a 4-byte sequence
* followed by a struct timeval (or bintime?)
*/
-#define PAY_OFS 42 /* where in the pkt... */
static void *
-pinger_body(void *data)
+ping_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
struct netmap_if *nifp = targ->nmd->nifp;
- int i, rx = 0;
+ int i, m, rx = 0;
void *frame;
int size;
struct timespec ts, now, last_print;
+ struct timespec nexttime = {0, 0}; /* silence compiler */
uint64_t sent = 0, n = targ->g->npackets;
uint64_t count = 0, t_cur, t_min = ~0, av = 0;
+ uint64_t g_min = ~0, g_av = 0;
uint64_t buckets[64]; /* bins for delays, ns */
+ int rate_limit = targ->g->tx_rate, tosend = 0;
- frame = &targ->pkt;
- frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
+ frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
@@ -947,38 +1268,75 @@
bzero(&buckets, sizeof(buckets));
clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
now = last_print;
+ if (rate_limit) {
+ targ->tic = timespec_add(now, (struct timespec){2,0});
+ targ->tic.tv_nsec = 0;
+ wait_time(targ->tic);
+ nexttime = targ->tic;
+ }
while (!targ->cancel && (n == 0 || sent < n)) {
- struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
+ struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
struct netmap_slot *slot;
char *p;
- for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */
- slot = &ring->slot[ring->cur];
- slot->len = size;
- p = NETMAP_BUF(ring, slot->buf_idx);
+ int rv;
+ uint64_t limit, event = 0;
- if (nm_ring_empty(ring)) {
- D("-- ouch, cannot send");
- } else {
- struct tstamp *tp;
- nm_pkt_copy(frame, p, size);
- clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
- bcopy(&sent, p+42, sizeof(sent));
- tp = (struct tstamp *)(p+46);
- tp->sec = (uint32_t)ts.tv_sec;
- tp->nsec = (uint32_t)ts.tv_nsec;
- sent++;
- ring->head = ring->cur = nm_ring_next(ring, ring->cur);
+ if (rate_limit && tosend <= 0) {
+ tosend = targ->g->burst;
+ nexttime = timespec_add(nexttime, targ->g->tx_period);
+ wait_time(nexttime);
}
- }
+
+ limit = rate_limit ? tosend : targ->g->burst;
+ if (n > 0 && n - sent < limit)
+ limit = n - sent;
+ for (m = 0; (unsigned)m < limit; m++) {
+ slot = &ring->slot[ring->cur];
+ slot->len = size;
+ p = NETMAP_BUF(ring, slot->buf_idx);
+
+ if (nm_ring_empty(ring)) {
+ D("-- ouch, cannot send");
+ break;
+ } else {
+ struct tstamp *tp;
+ nm_pkt_copy(frame, p, size);
+ clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
+ bcopy(&sent, p+42, sizeof(sent));
+ tp = (struct tstamp *)(p+46);
+ tp->sec = (uint32_t)ts.tv_sec;
+ tp->nsec = (uint32_t)ts.tv_nsec;
+ sent++;
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
+ }
+ }
+ if (m > 0)
+ event++;
+ targ->ctr.pkts = sent;
+ targ->ctr.bytes = sent*size;
+ targ->ctr.events = event;
+ if (rate_limit)
+ tosend -= m;
+#ifdef BUSYWAIT
+ rv = ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ if (rv < 0) {
+ D("TXSYNC error on queue %d: %s", targ->me,
+ strerror(errno));
+ }
+ again:
+ ioctl(pfd.fd, NIOCRXSYNC, NULL);
+#else
/* should use a parameter to decide how often to send */
- if (poll(&pfd, 1, 3000) <= 0) {
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ if ( (rv = poll(&pfd, 1, 3000)) <= 0) {
+ D("poll error on queue %d: %s", targ->me,
+ (rv ? strerror(errno) : "timeout"));
continue;
}
+#endif /* BUSYWAIT */
/* see what we got back */
- for (i = targ->nmd->first_tx_ring;
- i <= targ->nmd->last_tx_ring; i++) {
+ rx = 0;
+ for (i = targ->nmd->first_rx_ring;
+ i <= targ->nmd->last_rx_ring; i++) {
ring = NETMAP_RXRING(nifp, i);
while (!nm_ring_empty(ring)) {
uint32_t seq;
@@ -999,7 +1357,8 @@
ts.tv_nsec += 1000000000;
ts.tv_sec--;
}
- if (0) D("seq %d/%lu delta %d.%09d", seq, sent,
+ if (0) D("seq %d/%llu delta %d.%09d", seq,
+ (unsigned long long)sent,
(int)ts.tv_sec, (int)ts.tv_nsec);
t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec;
if (t_cur < t_min)
@@ -1024,7 +1383,7 @@
if (ts.tv_sec >= 1) {
D("count %d RTT: min %d av %d ns",
(int)count, (int)t_min, (int)(av/count));
- int k, j, kmin;
+ int k, j, kmin, off;
char buf[512];
for (kmin = 0; kmin < 64; kmin ++)
@@ -1034,17 +1393,33 @@
if (buckets[k])
break;
buf[0] = '\0';
- for (j = kmin; j <= k; j++)
- sprintf(buf, "%s %5d", buf, (int)buckets[j]);
+ off = 0;
+ for (j = kmin; j <= k; j++) {
+ off += sprintf(buf + off, " %5d", (int)buckets[j]);
+ }
D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf);
bzero(&buckets, sizeof(buckets));
count = 0;
+ g_av += av;
av = 0;
+ if (t_min < g_min)
+ g_min = t_min;
t_min = ~0;
last_print = now;
}
+#ifdef BUSYWAIT
+ if (rx < m && ts.tv_sec <= 3 && !targ->cancel)
+ goto again;
+#endif /* BUSYWAIT */
}
+ if (sent > 0) {
+ D("RTT over %llu packets: min %d av %d ns",
+ (long long unsigned)sent, (int)g_min,
+ (int)((double)g_av/sent));
+ }
+ targ->completed = 1;
+
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1056,7 +1431,7 @@
* reply to ping requests
*/
static void *
-ponger_body(void *data)
+pong_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
@@ -1069,7 +1444,9 @@
D("can only reply ping with 1 thread");
return NULL;
}
- D("understood ponger %lu but don't know how to do it", n);
+ if (n > 0)
+ D("understood ponger %llu but don't know how to do it",
+ (unsigned long long)n);
while (!targ->cancel && (n == 0 || sent < n)) {
uint32_t txcur, txavail;
//#define BUSYWAIT
@@ -1076,13 +1453,14 @@
#ifdef BUSYWAIT
ioctl(pfd.fd, NIOCRXSYNC, NULL);
#else
- if (poll(&pfd, 1, 1000) <= 0) {
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ int rv;
+ if ( (rv = poll(&pfd, 1, 1000)) <= 0) {
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
continue;
}
#endif
- txring = NETMAP_TXRING(nifp, 0);
+ txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
txcur = txring->cur;
txavail = nm_ring_space(txring);
/* see what we got back */
@@ -1105,6 +1483,7 @@
dpkt = (uint16_t *)dst;
spkt = (uint16_t *)src;
nm_pkt_copy(src, dst, slot->len);
+ /* swap source and destination MAC */
dpkt[0] = spkt[3];
dpkt[1] = spkt[4];
dpkt[2] = spkt[5];
@@ -1112,7 +1491,6 @@
dpkt[4] = spkt[1];
dpkt[5] = spkt[2];
txring->slot[txcur].len = slot->len;
- /* XXX swap src dst mac */
txcur = nm_ring_next(txring, txcur);
txavail--;
sent++;
@@ -1126,6 +1504,8 @@
//D("tx %d rx %d", sent, rx);
}
+ targ->completed = 1;
+
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1133,24 +1513,6 @@
}
-/*
- * wait until ts, either busy or sleeping if more than 1ms.
- * Return wakeup time.
- */
-static struct timespec
-wait_time(struct timespec ts)
-{
- for (;;) {
- struct timespec w, cur;
- clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
- w = timespec_sub(ts, cur);
- if (w.tv_sec < 0)
- return cur;
- else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
- poll(NULL, 0, 1);
- }
-}
-
static void *
sender_body(void *data)
{
@@ -1170,14 +1532,13 @@
int size;
if (targ->frame == NULL) {
- frame = pkt;
- frame += sizeof(pkt->vh) - targ->g->virt_header;
+ frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
} else {
frame = targ->frame;
size = targ->g->pkt_size;
}
-
+
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -1190,13 +1551,13 @@
wait_time(targ->tic);
nexttime = targ->tic;
}
- if (targ->g->dev_type == DEV_TAP) {
+ if (targ->g->dev_type == DEV_TAP) {
D("writing to file desc %d", targ->g->main_fd);
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (write(targ->g->main_fd, frame, size) != -1)
sent++;
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
if (i > 10000) {
targ->ctr.pkts = sent;
targ->ctr.bytes = sent*size;
@@ -1211,7 +1572,7 @@
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (pcap_inject(p, frame, size) != -1)
sent++;
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
if (i > 10000) {
targ->ctr.pkts = sent;
targ->ctr.bytes = sent*size;
@@ -1222,10 +1583,23 @@
#endif /* NO_PCAP */
} else {
int tosend = 0;
- int frags = targ->g->frags;
+ u_int bufsz, mtu = targ->g->mtu;
nifp = targ->nmd->nifp;
+ txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
+ bufsz = txring->nr_buf_size;
+ if (bufsz < mtu)
+ mtu = bufsz;
+ targ->frag_size = targ->g->pkt_size / targ->frags;
+ if (targ->frag_size > mtu) {
+ targ->frags = targ->g->pkt_size / mtu;
+ targ->frag_size = mtu;
+ if (targ->g->pkt_size % mtu != 0)
+ targ->frags++;
+ }
+ D("frags %u frag_size %u", targ->frags, targ->frag_size);
while (!targ->cancel && (n == 0 || sent < n)) {
+ int rv;
if (rate_limit && tosend <= 0) {
tosend = targ->g->burst;
@@ -1237,6 +1611,7 @@
* wait for available room in the send queue(s)
*/
#ifdef BUSYWAIT
+ (void)rv;
if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
D("ioctl error on queue %d: %s", targ->me,
strerror(errno));
@@ -1243,11 +1618,11 @@
goto quit;
}
#else /* !BUSYWAIT */
- if (poll(&pfd, 1, 2000) <= 0) {
+ if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
// goto quit;
}
if (pfd.revents & POLLERR) {
@@ -1266,23 +1641,30 @@
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
int m;
uint64_t limit = rate_limit ? tosend : targ->g->burst;
+
+ if (n > 0 && n == sent)
+ break;
+
if (n > 0 && n - sent < limit)
limit = n - sent;
txring = NETMAP_TXRING(nifp, i);
if (nm_ring_empty(txring))
continue;
- if (frags > 1)
- limit = ((limit + frags - 1) / frags) * frags;
- m = send_packets(txring, pkt, frame, size, targ->g,
- limit, options, frags);
- ND("limit %d tail %d frags %d m %d",
- limit, txring->tail, frags, m);
+ if (targ->g->pkt_min_size > 0) {
+ size = nrand48(targ->seed) %
+ (targ->g->pkt_size - targ->g->pkt_min_size) +
+ targ->g->pkt_min_size;
+ }
+ m = send_packets(txring, pkt, frame, size, targ,
+ limit, options);
+ ND("limit %lu tail %d m %d",
+ limit, txring->tail, m);
sent += m;
if (m > 0) //XXX-ste: can m be 0?
event++;
targ->ctr.pkts = sent;
- targ->ctr.bytes = sent*size;
+ targ->ctr.bytes += m*size;
targ->ctr.events = event;
if (rate_limit) {
tosend -= m;
@@ -1292,10 +1674,12 @@
}
}
/* flush any remaining packets */
- D("flush tail %d head %d on thread %p",
- txring->tail, txring->head,
- (void *)pthread_self());
- ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ if (txring != NULL) {
+ D("flush tail %d head %d on thread %p",
+ txring->tail, txring->head,
+ (void *)pthread_self());
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ }
/* final part: wait all the TX queues to be empty. */
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
@@ -1340,6 +1724,7 @@
{
u_int cur, rx, n;
uint64_t b = 0;
+ u_int complete = 0;
if (bytes == NULL)
bytes = &b;
@@ -1355,12 +1740,14 @@
*bytes += slot->len;
if (dump)
dump_payload(p, slot->len, ring, cur);
+ if (!(slot->flags & NS_MOREFRAG))
+ complete++;
cur = nm_ring_next(ring, cur);
}
ring->head = ring->cur = cur;
- return (rx);
+ return (complete);
}
static void *
@@ -1373,8 +1760,7 @@
int i;
struct my_ctrs cur;
- cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
- cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler
+ memset(&cur, 0, sizeof(cur));
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -1386,6 +1772,14 @@
i = poll(&pfd, 1, 1000);
if (i > 0 && !(pfd.revents & POLLERR))
break;
+ if (i < 0) {
+ D("poll() error: %s", strerror(errno));
+ goto quit;
+ }
+ if (pfd.revents & POLLERR) {
+ D("fd error");
+ goto quit;
+ }
RD(1, "waiting for initial packets, poll returns %d %d",
i, pfd.revents);
}
@@ -1408,7 +1802,7 @@
/* XXX should we poll ? */
pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
(u_char *)&targ->ctr);
- targ->ctr.events++;
+ targ->ctr.events++;
}
#endif /* !NO_PCAP */
} else {
@@ -1451,7 +1845,7 @@
m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes);
cur.pkts += m;
- if (m > 0) //XXX-ste: can m be 0?
+ if (m > 0)
cur.events++;
}
cur.min_space = targ->ctr.min_space;
@@ -1503,8 +1897,7 @@
D("Ignoring -n argument");
}
- frame = pkt;
- frame += sizeof(pkt->vh) - targ->g->virt_header;
+ frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
@@ -1527,6 +1920,8 @@
unsigned int space;
unsigned int head;
int fcnt;
+ uint16_t sum = 0;
+ int rv;
if (!rate_limit) {
budget = targ->g->burst;
@@ -1538,11 +1933,20 @@
}
/* wait for available room in the send queue */
- if (poll(&pfd, 1, 2000) <= 0) {
+#ifdef BUSYWAIT
+ (void)rv;
+ if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
+ D("ioctl error on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+#else /* !BUSYWAIT */
+ if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
+ // goto quit;
}
if (pfd.revents & POLLERR) {
D("poll error on %d ring %d-%d", pfd.fd,
@@ -1549,6 +1953,7 @@
targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
goto quit;
}
+#endif /* !BUSYWAIT */
/* If no room poll() again. */
space = nm_ring_space(ring);
@@ -1573,15 +1978,23 @@
sent < limit; sent++, sequence++) {
struct netmap_slot *slot = &ring->slot[head];
char *p = NETMAP_BUF(ring, slot->buf_idx);
+ uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t;
+ memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum));
+
slot->flags = 0;
- pkt->body[0] = sequence >> 24;
- pkt->body[1] = (sequence >> 16) & 0xff;
- pkt->body[2] = (sequence >> 8) & 0xff;
- pkt->body[3] = sequence & 0xff;
+ t = *w;
+ PKT(pkt, body, targ->g->af)[0] = sequence >> 24;
+ PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff;
+ sum = ~cksum_add(~sum, cksum_add(~t, *w));
+ t = *++w;
+ PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff;
+ PKT(pkt, body, targ->g->af)[3] = sequence & 0xff;
+ sum = ~cksum_add(~sum, cksum_add(~t, *w));
+ memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum));
nm_pkt_copy(frame, p, size);
if (fcnt == frags) {
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
}
if (options & OPT_DUMP) {
@@ -1675,19 +2088,25 @@
int dump = targ->g->options & OPT_DUMP;
struct netmap_ring *ring;
unsigned int frags_exp = 1;
- uint32_t seq_exp = 0;
struct my_ctrs cur;
unsigned int frags = 0;
int first_packet = 1;
int first_slot = 1;
- int i;
+ int i, j, af, nrings;
+ uint32_t seq, *seq_exp = NULL;
- cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
- cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler
+ memset(&cur, 0, sizeof(cur));
if (setaffinity(targ->thread, targ->affinity))
goto quit;
+ nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1;
+ seq_exp = calloc(nrings, sizeof(uint32_t));
+ if (seq_exp == NULL) {
+ D("failed to allocate seq array");
+ goto quit;
+ }
+
D("reading from %s fd %d main_fd %d",
targ->g->ifname, targ->fd, targ->g->main_fd);
/* unbounded wait for the first packet. */
@@ -1701,15 +2120,18 @@
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
- ring = NETMAP_RXRING(targ->nmd->nifp, targ->nmd->first_rx_ring);
while (!targ->cancel) {
unsigned int head;
- uint32_t seq;
int limit;
- /* Once we started to receive packets, wait at most 1 seconds
- before quitting. */
+#ifdef BUSYWAIT
+ if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
+ D("ioctl error on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+#else /* !BUSYWAIT */
if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
targ->toc.tv_sec -= 1; /* Subtract timeout time. */
@@ -1720,108 +2142,123 @@
D("poll err");
goto quit;
}
+#endif /* !BUSYWAIT */
- if (nm_ring_empty(ring))
- continue;
+ for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) {
+ ring = NETMAP_RXRING(targ->nmd->nifp, j);
+ if (nm_ring_empty(ring))
+ continue;
- limit = nm_ring_space(ring);
- if (limit > targ->g->burst)
- limit = targ->g->burst;
+ limit = nm_ring_space(ring);
+ if (limit > targ->g->burst)
+ limit = targ->g->burst;
#if 0
- /* Enable this if
- * 1) we remove the early-return optimization from
- * the netmap poll implementation, or
- * 2) pipes get NS_MOREFRAG support.
- * With the current netmap implementation, an experiment like
- * pkt-gen -i vale:1{1 -f txseq -F 9
- * pkt-gen -i vale:1}1 -f rxseq
- * would get stuck as soon as we find nm_ring_space(ring) < 9,
- * since here limit is rounded to 0 and
- * pipe rxsync is not called anymore by the poll() of this loop.
- */
- if (frags_exp > 1) {
- int o = limit;
- /* Cut off to the closest smaller multiple. */
- limit = (limit / frags_exp) * frags_exp;
- RD(2, "LIMIT %d --> %d", o, limit);
- }
+ /* Enable this if
+ * 1) we remove the early-return optimization from
+ * the netmap poll implementation, or
+ * 2) pipes get NS_MOREFRAG support.
+ * With the current netmap implementation, an experiment like
+ * pkt-gen -i vale:1{1 -f txseq -F 9
+ * pkt-gen -i vale:1}1 -f rxseq
+ * would get stuck as soon as we find nm_ring_space(ring) < 9,
+ * since here limit is rounded to 0 and
+ * pipe rxsync is not called anymore by the poll() of this loop.
+ */
+ if (frags_exp > 1) {
+ int o = limit;
+ /* Cut off to the closest smaller multiple. */
+ limit = (limit / frags_exp) * frags_exp;
+ RD(2, "LIMIT %d --> %d", o, limit);
+ }
#endif
- for (head = ring->head, i = 0; i < limit; i++) {
- struct netmap_slot *slot = &ring->slot[head];
- char *p = NETMAP_BUF(ring, slot->buf_idx);
- int len = slot->len;
- struct pkt *pkt;
+ for (head = ring->head, i = 0; i < limit; i++) {
+ struct netmap_slot *slot = &ring->slot[head];
+ char *p = NETMAP_BUF(ring, slot->buf_idx);
+ int len = slot->len;
+ struct pkt *pkt;
- if (dump) {
- dump_payload(p, slot->len, ring, head);
- }
+ if (dump) {
+ dump_payload(p, slot->len, ring, head);
+ }
- frags++;
- if (!(slot->flags & NS_MOREFRAG)) {
- if (first_packet) {
+ frags++;
+ if (!(slot->flags & NS_MOREFRAG)) {
+ if (first_packet) {
+ first_packet = 0;
+ } else if (frags != frags_exp) {
+ char prbuf[512];
+ RD(1, "Received packets with %u frags, "
+ "expected %u, '%s'", frags, frags_exp,
+ multi_slot_to_string(ring, head-frags+1,
+ frags,
+ prbuf, sizeof(prbuf)));
+ }
first_packet = 0;
- } else if (frags != frags_exp) {
- char prbuf[512];
- RD(1, "Received packets with %u frags, "
- "expected %u, '%s'", frags, frags_exp,
- multi_slot_to_string(ring, head-frags+1, frags,
- prbuf, sizeof(prbuf)));
+ frags_exp = frags;
+ frags = 0;
}
- first_packet = 0;
- frags_exp = frags;
- frags = 0;
- }
- p -= sizeof(pkt->vh) - targ->g->virt_header;
- len += sizeof(pkt->vh) - targ->g->virt_header;
- pkt = (struct pkt *)p;
+ p -= sizeof(pkt->vh) - targ->g->virt_header;
+ len += sizeof(pkt->vh) - targ->g->virt_header;
+ pkt = (struct pkt *)p;
+ if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP)
+ af = AF_INET;
+ else
+ af = AF_INET6;
- if ((char *)pkt + len < ((char *)pkt->body) + sizeof(seq)) {
- RD(1, "%s: packet too small (len=%u)", __func__,
- slot->len);
- } else {
- seq = (pkt->body[0] << 24) | (pkt->body[1] << 16)
- | (pkt->body[2] << 8) | pkt->body[3];
- if (first_slot) {
- /* Grab the first one, whatever it
- is. */
- seq_exp = seq;
- first_slot = 0;
- } else if (seq != seq_exp) {
- uint32_t delta = seq - seq_exp;
+ if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) +
+ sizeof(seq)) {
+ RD(1, "%s: packet too small (len=%u)", __func__,
+ slot->len);
+ } else {
+ seq = (PKT(pkt, body, af)[0] << 24) |
+ (PKT(pkt, body, af)[1] << 16) |
+ (PKT(pkt, body, af)[2] << 8) |
+ PKT(pkt, body, af)[3];
+ if (first_slot) {
+ /* Grab the first one, whatever it
+ is. */
+ seq_exp[j] = seq;
+ first_slot = 0;
+ } else if (seq != seq_exp[j]) {
+ uint32_t delta = seq - seq_exp[j];
- if (delta < (0xFFFFFFFF >> 1)) {
- RD(2, "Sequence GAP: exp %u found %u",
- seq_exp, seq);
- } else {
- RD(2, "Sequence OUT OF ORDER: "
- "exp %u found %u", seq_exp, seq);
+ if (delta < (0xFFFFFFFF >> 1)) {
+ RD(2, "Sequence GAP: exp %u found %u",
+ seq_exp[j], seq);
+ } else {
+ RD(2, "Sequence OUT OF ORDER: "
+ "exp %u found %u", seq_exp[j], seq);
+ }
+ seq_exp[j] = seq;
}
- seq_exp = seq;
+ seq_exp[j]++;
}
- seq_exp++;
+
+ cur.bytes += slot->len;
+ head = nm_ring_next(ring, head);
+ cur.pkts++;
}
- cur.bytes += slot->len;
- head = nm_ring_next(ring, head);
- cur.pkts++;
+ ring->cur = ring->head = head;
+
+ cur.events++;
+ targ->ctr = cur;
}
-
- ring->cur = ring->head = head;
-
- cur.events++;
- targ->ctr = cur;
}
-
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
+#ifndef BUSYWAIT
out:
+#endif /* !BUSYWAIT */
targ->completed = 1;
targ->ctr = cur;
quit:
+ if (seq_exp != NULL)
+ free(seq_exp);
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1859,46 +2296,85 @@
abs = cur->pkts / (double)(cur->events);
printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n",
- norm(b1, pps), norm(b2, bw), norm(b3, raw_bw), abs);
+ norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs);
}
static void
-usage(void)
+usage(int errcode)
{
const char *cmd = "pkt-gen";
fprintf(stderr,
"Usage:\n"
"%s arguments\n"
- "\t-i interface interface name\n"
- "\t-f function tx rx ping pong txseq rxseq\n"
- "\t-n count number of iterations (can be 0)\n"
- "\t-t pkts_to_send also forces tx mode\n"
- "\t-r pkts_to_receive also forces rx mode\n"
- "\t-l pkt_size in bytes excluding CRC\n"
- "\t-d dst_ip[:port[-dst_ip:port]] single or range\n"
- "\t-s src_ip[:port[-src_ip:port]] single or range\n"
- "\t-D dst-mac\n"
- "\t-S src-mac\n"
- "\t-a cpu_id use setaffinity\n"
- "\t-b burst size testing, mostly\n"
- "\t-c cores cores to use\n"
- "\t-p threads processes/threads to use\n"
- "\t-T report_ms milliseconds between reports\n"
- "\t-w wait_for_link_time in seconds\n"
- "\t-R rate in packets per second\n"
- "\t-X dump payload\n"
- "\t-H len add empty virtio-net-header with size 'len'\n"
- "\t-E pipes allocate extra space for a number of pipes\n"
- "\t-r do not touch the buffers (send rubbish)\n"
- "\t-P file load packet from pcap file\n"
- "\t-z use random IPv4 src address/port\n"
- "\t-Z use random IPv4 dst address/port\n"
- "\t-F num_frags send multi-slot packets\n"
- "\t-A activate pps stats on receiver\n"
- "",
+ "\t-i interface interface name\n"
+ "\t-f function tx rx ping pong txseq rxseq\n"
+ "\t-n count number of iterations (can be 0)\n"
+#ifdef notyet
+ "\t-t pkts_to_send also forces tx mode\n"
+ "\t-r pkts_to_receive also forces rx mode\n"
+#endif
+ "\t-l pkt_size in bytes excluding CRC\n"
+ "\t (if passed a second time, use random sizes\n"
+ "\t bigger than the second one and lower than\n"
+ "\t the first one)\n"
+ "\t-d dst_ip[:port[-dst_ip:port]] single or range\n"
+ "\t-s src_ip[:port[-src_ip:port]] single or range\n"
+ "\t-D dst-mac\n"
+ "\t-S src-mac\n"
+ "\t-a cpu_id use setaffinity\n"
+ "\t-b burst size testing, mostly\n"
+ "\t-c cores cores to use\n"
+ "\t-p threads processes/threads to use\n"
+ "\t-T report_ms milliseconds between reports\n"
+ "\t-w wait_for_link_time in seconds\n"
+ "\t-R rate in packets per second\n"
+ "\t-X dump payload\n"
+ "\t-H len add empty virtio-net-header with size 'len'\n"
+ "\t-E pipes allocate extra space for a number of pipes\n"
+ "\t-r do not touch the buffers (send rubbish)\n"
+ "\t-P file load packet from pcap file\n"
+ "\t-z use random IPv4 src address/port\n"
+ "\t-Z use random IPv4 dst address/port\n"
+ "\t-F num_frags send multi-slot packets\n"
+ "\t-M set MTU\n"
+ "\t-A activate pps stats on receiver\n"
+ "\t-4 IPv4\n"
+ "\t-6 IPv6\n"
+ "\t-N don't normalize units (Kbps/Mbps/etc)\n"
+ "\t-I use indirect buffers, tx only\n"
+ "\t-o options data generation options (parsed using atoi)\n"
+ "\t OPT_PREFETCH 1\n"
+ "\t OPT_ACCESS 2\n"
+ "\t OPT_COPY 4\n"
+ "\t OPT_MEMCPY 8\n"
+ "\t OPT_TS 16 (add a timestamp)\n"
+ "\t OPT_INDIRECT 32 (use indirect buffers)\n"
+ "\t OPT_DUMP 64 (dump rx/tx traffic)\n"
+ "\t OPT_RUBBISH 256\n"
+ "\t (send wathever the buffers contain)\n"
+ "\t OPT_RANDOM_SRC 512\n"
+ "\t OPT_RANDOM_DST 1024\n"
+ "\t OPT_PPS_STATS 2048\n"
+ "\t-W exit RX with no traffic\n"
+ "\t-v verbose (more v = more verbose)\n"
+ "\t-C vale-config specify a vale config\n"
+#ifdef notyet
+ "\t The configuration may consist of 0 to 4\n"
+ "\t numbers separated by commas:\n"
+ "\t #tx-slots,#rx-slots,#tx-rings,#rx-rings.\n"
+ "\t Missing numbers or zeroes stand for default\n"
+ "\t values. As an additional convenience, if\n"
+ "\t exactly one number is specified, then this\n"
+ "\t is assigned to both #tx-slots and #rx-slots.\n"
+ "\t If there is no 4th number, then the 3rd is\n"
+ "\t assigned to both #tx-rings and #rx-rings.\n"
+#endif
+ "\t-e extra-bufs extra_bufs - goes in nr_arg3\n"
+ "\t-B account for ethernet framing when showing bps\n"
+ "\t-m ignored\n"
+ "",
cmd);
-
- exit(0);
+ exit(errcode);
}
enum {
@@ -1908,57 +2384,59 @@
};
static void
-start_threads(struct glob_arg *g)
-{
+start_threads(struct glob_arg *g) {
int i;
targs = calloc(g->nthreads, sizeof(*targs));
+ struct targ *t;
/*
* Now create the desired number of threads, each one
* using a single descriptor.
- */
+ */
for (i = 0; i < g->nthreads; i++) {
- struct targ *t = &targs[i];
+ uint64_t seed = time(0) | (time(0) << 32);
+ t = &targs[i];
bzero(t, sizeof(*t));
t->fd = -1; /* default, with pcap */
t->g = g;
+ memcpy(t->seed, &seed, sizeof(t->seed));
- if (g->dev_type == DEV_NETMAP) {
- struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
- uint64_t nmd_flags = 0;
- nmd.self = &nmd;
+ if (g->dev_type == DEV_NETMAP) {
+ struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
+ uint64_t nmd_flags = 0;
+ nmd.self = &nmd;
- if (i > 0) {
- /* the first thread uses the fd opened by the main
- * thread, the other threads re-open /dev/netmap
- */
- if (g->nthreads > 1) {
- nmd.req.nr_flags =
- g->nmd->req.nr_flags & ~NR_REG_MASK;
- nmd.req.nr_flags |= NR_REG_ONE_NIC;
- nmd.req.nr_ringid = i;
- }
- /* Only touch one of the rings (rx is already ok) */
- if (g->td_type == TD_TYPE_RECEIVER)
- nmd_flags |= NETMAP_NO_TX_POLL;
+ if (i > 0) {
+ /* the first thread uses the fd opened by the main
+ * thread, the other threads re-open /dev/netmap
+ */
+ if (g->nthreads > 1) {
+ nmd.req.nr_flags =
+ g->nmd->req.nr_flags & ~NR_REG_MASK;
+ nmd.req.nr_flags |= NR_REG_ONE_NIC;
+ nmd.req.nr_ringid = i;
+ }
+ /* Only touch one of the rings (rx is already ok) */
+ if (g->td_type == TD_TYPE_RECEIVER)
+ nmd_flags |= NETMAP_NO_TX_POLL;
- /* register interface. Override ifname and ringid etc. */
- t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
- NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
- if (t->nmd == NULL) {
- D("Unable to open %s: %s",
- t->g->ifname, strerror(errno));
- continue;
+ /* register interface. Override ifname and ringid etc. */
+ t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
+ NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
+ if (t->nmd == NULL) {
+ D("Unable to open %s: %s",
+ t->g->ifname, strerror(errno));
+ continue;
+ }
+ } else {
+ t->nmd = g->nmd;
}
+ t->fd = t->nmd->fd;
+ t->frags = g->frags;
} else {
- t->nmd = g->nmd;
+ targs[i].fd = g->main_fd;
}
- t->fd = t->nmd->fd;
-
- } else {
- targs[i].fd = g->main_fd;
- }
t->used = 1;
t->me = i;
if (g->affinity >= 0) {
@@ -1968,7 +2446,14 @@
}
/* default, init packets */
initialize_packet(t);
+ }
+ /* Wait for PHY reset. */
+ D("Wait %d secs for phy reset", g->wait_link);
+ sleep(g->wait_link);
+ D("Ready...");
+ for (i = 0; i < g->nthreads; i++) {
+ t = &targs[i];
if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
D("Unable to create thread %d: %s", i, strerror(errno));
t->used = 0;
@@ -1988,7 +2473,7 @@
prev.pkts = prev.bytes = prev.events = 0;
gettimeofday(&prev.t, NULL);
for (;;) {
- char b1[40], b2[40], b3[40], b4[70];
+ char b1[40], b2[40], b3[40], b4[100];
uint64_t pps, usec;
struct my_ctrs x;
double abs;
@@ -2045,13 +2530,13 @@
ppsdev = sqrt(ppsdev);
snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]",
- norm(b1, ppsavg), norm(b2, ppsdev));
+ norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize));
}
D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space",
- norm(b1, pps), b4,
- norm(b2, (double)x.pkts),
- norm(b3, (double)x.bytes*8),
+ norm(b1, pps, normalize), b4,
+ norm(b2, (double)x.pkts, normalize),
+ norm(b3, (double)x.bytes*8+(double)x.pkts*g->framing, normalize),
(unsigned long long)usec,
abs, (int)cur.min_space);
prev = cur;
@@ -2106,7 +2591,7 @@
delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
if (g->td_type == TD_TYPE_SENDER)
tx_output(&cur, delta_t, "Sent");
- else
+ else if (g->td_type == TD_TYPE_RECEIVER)
tx_output(&cur, delta_t, "Received");
}
@@ -2114,16 +2599,17 @@
int ty;
char *key;
void *f;
+ int default_burst;
};
static struct td_desc func[] = {
- { TD_TYPE_SENDER, "tx", sender_body },
- { TD_TYPE_RECEIVER, "rx", receiver_body },
- { TD_TYPE_OTHER, "ping", pinger_body },
- { TD_TYPE_OTHER, "pong", ponger_body },
- { TD_TYPE_SENDER, "txseq", txseq_body },
- { TD_TYPE_RECEIVER, "rxseq", rxseq_body },
- { 0, NULL, NULL }
+ { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */
+ { TD_TYPE_SENDER, "tx", sender_body, 512 },
+ { TD_TYPE_OTHER, "ping", ping_body, 1 },
+ { TD_TYPE_OTHER, "pong", pong_body, 1 },
+ { TD_TYPE_SENDER, "txseq", txseq_body, 512 },
+ { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 },
+ { 0, NULL, NULL, 0 }
};
static int
@@ -2165,7 +2651,12 @@
/* if a device name was specified, put it in the structure; otherwise,
* the kernel will try to allocate the "next" device of the
* specified type */
- strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+ size_t len = strlen(dev);
+ if (len > IFNAMSIZ) {
+ D("%s too long", dev);
+ return -1;
+ }
+ memcpy(ifr.ifr_name, dev, len);
}
/* try to create the device */
@@ -2183,9 +2674,9 @@
D("new name is %s", dev);
#endif /* linux */
- /* this is the special file descriptor that the caller will use to talk
- * with the virtual interface */
- return fd;
+ /* this is the special file descriptor that the caller will use to talk
+ * with the virtual interface */
+ return fd;
}
int
@@ -2198,41 +2689,62 @@
struct glob_arg g;
int ch;
- int wait_link = 2;
int devqueues = 1; /* how many device queues */
+ int wait_link_arg = 0;
+ int pkt_size_done = 0;
+
+ struct td_desc *fn = func;
+
bzero(&g, sizeof(g));
g.main_fd = -1;
- g.td_body = receiver_body;
- g.td_type = TD_TYPE_RECEIVER;
+ g.td_body = fn->f;
+ g.td_type = fn->ty;
g.report_interval = 1000; /* report interval */
g.affinity = -1;
/* ip addresses can also be a range x.x.x.x-x.x.x.y */
+ g.af = AF_INET; /* default */
g.src_ip.name = "10.0.0.1";
g.dst_ip.name = "10.1.0.1";
g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
g.src_mac.name = NULL;
g.pkt_size = 60;
- g.burst = 512; // default
+ g.pkt_min_size = 0;
g.nthreads = 1;
- g.cpus = 1; // default
+ g.cpus = 1; /* default */
g.forever = 1;
g.tx_rate = 0;
- g.frags = 1;
+ g.frags =1;
+ g.mtu = 1500;
g.nmr_config = "";
g.virt_header = 0;
+ g.wait_link = 2; /* wait 2 seconds for physical ports */
- while ( (ch = getopt(arc, argv,
- "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:E:m:rP:zZA")) != -1) {
- struct td_desc *fn;
+ while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:"
+ "T:w:WvR:XC:H:e:E:m:rP:zZAhBM:")) != -1) {
switch(ch) {
default:
D("bad option %c %s", ch, optarg);
- usage();
+ usage(-1);
break;
+ case 'h':
+ usage(0);
+ break;
+ case '4':
+ g.af = AF_INET;
+ break;
+
+ case '6':
+ g.af = AF_INET6;
+ break;
+
+ case 'N':
+ normalize = 0;
+ break;
+
case 'n':
g.npackets = strtoull(optarg, NULL, 10);
break;
@@ -2246,6 +2758,10 @@
g.frags = i;
break;
+ case 'M':
+ g.mtu = atoi(optarg);
+ break;
+
case 'f':
for (fn = func; fn->key; fn++) {
if (!strcmp(fn->key, optarg))
@@ -2260,7 +2776,7 @@
break;
case 'o': /* data generation options */
- g.options = atoi(optarg);
+ g.options |= atoi(optarg);
break;
case 'a': /* force affinity */
@@ -2298,11 +2814,16 @@
break;
case 'I':
- g.options |= OPT_INDIRECT; /* XXX use indirect buffer */
+ g.options |= OPT_INDIRECT; /* use indirect buffers */
break;
case 'l': /* pkt_size */
- g.pkt_size = atoi(optarg);
+ if (pkt_size_done) {
+ g.pkt_min_size = atoi(optarg);
+ } else {
+ g.pkt_size = atoi(optarg);
+ pkt_size_done = 1;
+ }
break;
case 'd':
@@ -2318,11 +2839,12 @@
break;
case 'w':
- wait_link = atoi(optarg);
+ g.wait_link = atoi(optarg);
+ wait_link_arg = 1;
break;
- case 'W': /* XXX changed default */
- g.forever = 0; /* do not exit rx even with no traffic */
+ case 'W':
+ g.forever = 0; /* exit RX with no traffic */
break;
case 'b': /* burst */
@@ -2381,28 +2903,46 @@
case 'A':
g.options |= OPT_PPS_STATS;
break;
+ case 'B':
+ // XXX maybe add an option to pass the IFG
+ g.framing = 24 * 8;
+ break;
}
}
if (strlen(g.ifname) <=0 ) {
D("missing ifname");
- usage();
+ usage(-1);
}
+ if (g.burst == 0) {
+ g.burst = fn->default_burst;
+ D("using default burst size: %d", g.burst);
+ }
+
g.system_cpus = i = system_ncpus();
if (g.cpus < 0 || g.cpus > i) {
D("%d cpus is too high, have only %d cpus", g.cpus, i);
- usage();
+ usage(-1);
}
-D("running on %d cpus (have %d)", g.cpus, i);
+ D("running on %d cpus (have %d)", g.cpus, i);
if (g.cpus == 0)
g.cpus = i;
+ if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) {
+ g.wait_link = 0;
+ }
+
if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
- usage();
+ usage(-1);
}
+ if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) {
+ D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size);
+ usage(-1);
+ }
+
if (g.src_mac.name == NULL) {
static char mybuf[20] = "00:00:00:00:00:00";
/* retrieve source mac address. */
@@ -2413,21 +2953,15 @@
g.src_mac.name = mybuf;
}
/* extract address ranges */
- extract_ip_range(&g.src_ip);
- extract_ip_range(&g.dst_ip);
- extract_mac_range(&g.src_mac);
- extract_mac_range(&g.dst_mac);
+ if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac))
+ usage(-1);
+ g.options |= extract_ip_range(&g.src_ip, g.af);
+ g.options |= extract_ip_range(&g.dst_ip, g.af);
- if (g.src_ip.start != g.src_ip.end ||
- g.src_ip.port0 != g.src_ip.port1 ||
- g.dst_ip.start != g.dst_ip.end ||
- g.dst_ip.port0 != g.dst_ip.port1)
- g.options |= OPT_COPY;
-
if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
&& g.virt_header != VIRT_HDR_2) {
D("bad virtio-net-header length");
- usage();
+ usage(-1);
}
if (g.dev_type == DEV_TAP) {
@@ -2435,7 +2969,7 @@
g.main_fd = tap_alloc(g.ifname);
if (g.main_fd < 0) {
D("cannot open tap %s", g.ifname);
- usage();
+ usage(-1);
}
#ifndef NO_PCAP
} else if (g.dev_type == DEV_PCAP) {
@@ -2445,7 +2979,7 @@
g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
if (g.p == NULL) {
D("cannot open pcap on %s", g.ifname);
- usage();
+ usage(-1);
}
g.main_fd = pcap_fileno(g.p);
D("using pcap on %s fileno %d", g.ifname, g.main_fd);
@@ -2453,20 +2987,27 @@
} else if (g.dummy_send) { /* but DEV_NETMAP */
D("using a dummy send routine");
} else {
- struct nmreq base_nmd;
+ struct nm_desc base_nmd;
+ char errmsg[MAXERRMSG];
+ u_int flags;
bzero(&base_nmd, sizeof(base_nmd));
- parse_nmr_config(g.nmr_config, &base_nmd);
+ parse_nmr_config(g.nmr_config, &base_nmd.req);
if (g.extra_bufs) {
- base_nmd.nr_arg3 = g.extra_bufs;
+ base_nmd.req.nr_arg3 = g.extra_bufs;
}
if (g.extra_pipes) {
- base_nmd.nr_arg1 = g.extra_pipes;
+ base_nmd.req.nr_arg1 = g.extra_pipes;
}
- base_nmd.nr_flags |= NR_ACCEPT_VNET_HDR;
+ base_nmd.req.nr_flags |= NR_ACCEPT_VNET_HDR;
+ if (nm_parse(g.ifname, &base_nmd, errmsg) < 0) {
+ D("Invalid name '%s': %s", g.ifname, errmsg);
+ goto out;
+ }
+
/*
* Open the netmap device using nm_open().
*
@@ -2474,28 +3015,21 @@
* which in turn may take some time for the PHY to
* reconfigure. We do the open here to have time to reset.
*/
- g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL);
+ flags = NM_OPEN_IFNAME | NM_OPEN_ARG1 | NM_OPEN_ARG2 |
+ NM_OPEN_ARG3 | NM_OPEN_RING_CFG;
+ if (g.nthreads > 1) {
+ base_nmd.req.nr_flags &= ~NR_REG_MASK;
+ base_nmd.req.nr_flags |= NR_REG_ONE_NIC;
+ base_nmd.req.nr_ringid = 0;
+ }
+ g.nmd = nm_open(g.ifname, NULL, flags, &base_nmd);
if (g.nmd == NULL) {
D("Unable to open %s: %s", g.ifname, strerror(errno));
goto out;
}
-
- if (g.nthreads > 1) {
- struct nm_desc saved_desc = *g.nmd;
- saved_desc.self = &saved_desc;
- saved_desc.mem = NULL;
- nm_close(g.nmd);
- saved_desc.req.nr_flags &= ~NR_REG_MASK;
- saved_desc.req.nr_flags |= NR_REG_ONE_NIC;
- saved_desc.req.nr_ringid = 0;
- g.nmd = nm_open(g.ifname, &base_nmd, NM_OPEN_IFNAME, &saved_desc);
- if (g.nmd == NULL) {
- D("Unable to open %s: %s", g.ifname, strerror(errno));
- goto out;
- }
- }
g.main_fd = g.nmd->fd;
- D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
+ D("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10),
+ g.nmd->mem);
if (g.virt_header) {
/* Set the virtio-net header length, since the user asked
@@ -2558,7 +3092,7 @@
/* Exit if something went wrong. */
if (g.main_fd < 0) {
D("aborting");
- usage();
+ usage(-1);
}
}
@@ -2583,8 +3117,8 @@
int lim = (g.tx_rate)/300;
if (g.burst > lim)
g.burst = lim;
- if (g.burst < g.frags)
- g.burst = g.frags;
+ if (g.burst == 0)
+ g.burst = 1;
x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
g.tx_period.tv_nsec = x;
g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
@@ -2593,11 +3127,6 @@
if (g.td_type == TD_TYPE_SENDER)
D("Sending %d packets every %ld.%09ld s",
g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
- /* Wait for PHY reset. */
- D("Wait %d secs for phy reset", wait_link);
- sleep(wait_link);
- D("Ready...");
-
/* Install ^C handler. */
global_nthreads = g.nthreads;
sigemptyset(&ss);
@@ -2608,6 +3137,7 @@
}
start_threads(&g);
/* Install the handler and re-enable SIGINT for the main thread */
+ memset(&sa, 0, sizeof(sa));
sa.sa_handler = sigint_h;
if (sigaction(SIGINT, &sa, NULL) < 0) {
D("failed to install ^C handler: %s", strerror(errno));
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Apr 22, 5:41 PM (21 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31991622
Default Alt Text
D17698.id49605.diff (67 KB)
Attached To
Mode
D17698: netmap: pkt-gen: several updates from upstream
Attached
Detach File
Event Timeline
Log In to Comment