Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F150075066
D17698.id49615.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
81 KB
Referenced Files
None
Subscribers
None
D17698.id49615.diff
View Options
Index: ctrs.h
===================================================================
--- ctrs.h
+++ ctrs.h
@@ -7,9 +7,11 @@
/* counters to accumulate statistics */
struct my_ctrs {
- uint64_t pkts, bytes, events, drop;
+ uint64_t pkts, bytes, events;
+ uint64_t drop, drop_bytes;
uint64_t min_space;
struct timeval t;
+ uint32_t oq_n; /* number of elements in overflow queue (used in lb) */
};
/* very crude code to print a number in normalized form.
@@ -16,21 +18,26 @@
* Caller has to make sure that the buffer is large enough.
*/
static const char *
-norm2(char *buf, double val, char *fmt)
+norm2(char *buf, double val, char *fmt, int normalize)
{
char *units[] = { "", "K", "M", "G", "T" };
u_int i;
-
- for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
- val /= 1000;
+ if (normalize)
+ for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
+ val /= 1000;
+ else
+ i=0;
sprintf(buf, fmt, val, units[i]);
return buf;
}
static __inline const char *
-norm(char *buf, double val)
+norm(char *buf, double val, int normalize)
{
- return norm2(buf, val, "%.3f %s");
+ if (normalize)
+ return norm2(buf, val, "%.3f %s", normalize);
+ else
+ return norm2(buf, val, "%.0f %s", normalize);
}
static __inline int
@@ -89,7 +96,7 @@
return ret;
}
-static uint64_t
+static __inline uint64_t
wait_for_next_report(struct timeval *prev, struct timeval *cur,
int report_interval)
{
@@ -106,3 +113,4 @@
return delta.tv_sec* 1000000 + delta.tv_usec;
}
#endif /* CTRS_H_ */
+
Index: pkt-gen.8
===================================================================
--- pkt-gen.8
+++ pkt-gen.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 23, 2018
+.Dd October 25, 2018
.Dt PKT-GEN 8
.Os
.Sh NAME
@@ -36,96 +36,214 @@
.Bl -item -compact
.It
.Nm
+.Op Fl h46XzZNIWvrAB
.Op Fl i Ar interface
.Op Fl f Ar function
.Op Fl n Ar count
-.Op Fl t Ar pkts_to_send
-.Op Fl r Ar pkts_to_receive
.Op Fl l Ar pkt_size
+.Op Fl b Ar burst_size
.Op Fl d Ar dst_ip[:port[-dst_ip:port]]
.Op Fl s Ar src_ip[:port[-src_ip:port]]
-.Op Fl D Ar dst-mac
-.Op Fl S Ar src-mac
+.Op Fl D Ar dst_mac
+.Op Fl S Ar src_mac
.Op Fl a Ar cpu_id
-.Op Fl b Ar burst size
-.Op Fl c Ar cores
+.Op Fl c Ar cpus
.Op Fl p Ar threads
.Op Fl T Ar report_ms
-.Op Fl P
+.Op Fl P Ar file
.Op Fl w Ar wait_for_link_time
.Op Fl R Ar rate
-.Op Fl X
.Op Fl H Ar len
-.Op Fl P Ar xfile
-.Op Fl z
-.Op Fl Z
+.Op Fl F Ar num_frags
+.Op Fl M Ar frag_size
+.Op Fl C Ar port_config
+.El
.Sh DESCRIPTION
.Nm
-generates and receives raw network packets using
-.Xr netmap 4 .
+leverages
+.Xr netmap 4
+to generate and receive raw network packets in batches.
The arguments are as follows:
-.Pp
.Bl -tag -width Ds
+.It Fl h
+Show program usage and exit.
.It Fl i Ar interface
-Network interface name.
-.It Fl f Ar function tx rx ping pong
-Set the function to transmit, receive of ping/pong.
-.It Fl n count
-Number of iterations (can be 0).
-.It Fl t pkts_to_send
-Number of packets to send. Also forces transmit mode.
-.It Fl r Ar pkts_to_receive
-Number of packets to receive. Also forces rx mode.
+Name of the network interface that
+.Nm
+operates on.
+It can be a system network interface (e.g., em0),
+the name of a
+.Xr vale 4
+port (e.g., valeSSS:PPP), the name of a netmap pipe or monitor,
+or any valid netmap port name accepted by the
+.Ar nm_open
+library function, as documented in
+.Xr netmap 4
+(NIOCREGIF section).
+.It Fl f Ar function
+The function to be executed by
+.Nm .
+Specify
+.Ar tx
+for transmission,
+.Ar rx
+for reception,
+.Ar ping
+for client-side ping-pong operation, and
+.Ar pong
+for server-side ping-pong operation.
+.It Fl n Ar count
+Number of iterations of the
+.Nm
+function, with 0 meaning infinite).
+In case of
+.Ar tx
+or
+.Ar rx ,
+.Ar count
+is the number of packets to receive or transmit.
+In case of
+.Ar ping
+or
+.Ar pong ,
+.Ar count
+is the number of ping-pong transactions.
.It Fl l Ar pkt_size
Packet size in bytes excluding CRC.
+If passed a second time, use random sizes larger or equal than the
+second one and lower than the first one.
+.It Fl b Ar burst_size
+Transmit or receive up to
+.Ar burst_size
+packets at a time.
+.It Fl 4
+Use IPv4 addresses.
+.It Fl 6
+Use IPv6 addresses.
.It Fl d Ar dst_ip[:port[-dst_ip:port]]
-Destination IPv4 address and port, single or range.
+Destination IPv4/IPv6 address and port, single or range.
.It Fl s Ar src_ip[:port[-src_ip:port]]
-Source IPv4 address and port, single or range.
-.It Fl D Ar dst-mac
-Destination MAC address in colon notation.
-.It Fl S Ar src-mac
+Source IPv4/IPv6 address and port, single or range.
+.It Fl D Ar dst_mac
+Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).
+.It Fl S Ar src_mac
Source MAC address in colon notation.
.It Fl a Ar cpu_id
-Tie
+Pin the first thread of
.Nm
-to a particular CPU core using
-.Xr setaffinity 2.
-.It Fl b Ar burst size
-Set the size of a burst of packets.
-.It Fl c Ar cores
-Number of cores to use.
+to a particular CPU using
+.Xr pthread_setaffinity_np 3 .
+If more threads are used, they are pinned to the subsequent CPUs,
+one per thread.
+.It Fl c Ar cpus
+Maximum number of CPUs to use (0 means to use all the available ones).
.It Fl p Ar threads
Number of threads to use.
+By default, only a single thread is used
+to handle all the netmap rings.
+If
+.Ar threads
+is larger than one, each thread handles a single TX ring (in
+.Ar tx
+mode), a single RX ring (in
+.Ar rx
+mode), or a TX/RX ring couple.
+The number of
+.Ar threads
+must be less or equal than the number of TX (or RX) ring available
+in the device specified by
+.Ar interface .
.It Fl T Ar report_ms
Number of milliseconds between reports.
-.It Fl P
-Use libpcap instead of netmap for reading or writing.
.It Fl w Ar wait_for_link_time
-Number of seconds to wait to make sure that the network link is up. A
-network device driver may take some time to create a new
-transmit/receive ring pair when
+Number of seconds to wait before starting the
+.Nm
+function, useuful to make sure that the network link is up.
+A network device driver may take some time to enter netmap mode, or
+to create a new transmit/receive ring pair when
.Xr netmap 4
requests one.
.It Fl R Ar rate
-Packet transmission rate. Not setting the packet transmission rate tells
+Packet transmission rate.
+Not setting the packet transmission rate tells
.Nm
-to transmit packets as quickly as possible. On servers from 2010 on-wards
+to transmit packets as quickly as possible.
+On servers from 2010 on-wards
.Xr netmap 4
is able to completely use all of the bandwidth of a 10 or 40Gbps link,
so this option should be used unless your intention is to saturate the link.
.It Fl X
-Dump payload transmitted or received.
+Dump payload of each packet transmitted or received.
.It Fl H Ar len
-Add empty virtio-net-header with size 'len'. This option is only use
-with Virtual Machine technologies that use virtio as a network interface.
+Add empty virtio-net-header with size 'len'.
+Valid sizes are 0, 10 and 12.
+This option is only used with Virtual Machine technologies that use virtio
+as a network interface.
.It Fl P Ar file
-Load the packet from a pcap file rather than constructing it inside of
-.Nm
+Load the packet to be transmitted from a pcap file rather than constructing
+it within
+.Nm .
.It Fl z
-Use random IPv4 src address/port
+Use random IPv4/IPv6 src address/port.
.It Fl Z
-Use random IPv4 dst address/port
+Use random IPv4/IPv6 dst address/port.
+.It Fl N
+Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).
+.It Fl F Ar num_frags
+Send multi-slot packets, each one with
+.Ar num_frags
+fragments.
+A multi-slot packet is represented by two or more consecutive netmap slots
+with the
+.Ar NS_MOREFRAG
+flag set (except for the last slot).
+This is useful to transmit or receive packets larger than the netmap
+buffer size.
+.It Fl M Ar frag_size
+In multi-slot mode,
+.Ar frag_size
+specifies the size of each fragment, if smaller than the packet length
+divided by
+.Ar num_frags .
+.It Fl I
+Use indirect buffers.
+It is only valid for transmitting on VALE ports,
+and it is implemented by setting the
+.Ar NS_INDIRECT
+flag in the netmap slots.
+.It Fl W
+Exit immediately if all the RX rings are empty the first time they are
+examined.
+.It Fl v
+Increase the verbosity level.
+.It Fl r
+In
+.Ar tx
+mode, do not initialize packets, but send whatever the content of
+the uninitialized netmap buffers is (rubbish mode).
+.It Fl A
+Compute mean and standard deviation (over a sliding window) for the
+transmit or receive rate.
+.It Fl B
+Take Ethernet framing and CRC into account when computing the average bps.
+This adds 4 bytes of CRC and 20 bytes of framing to each packet.
+.It Fl C Ar tx_slots[,rx_slots[,tx_rings[,rx_rings]]]
+Configuration in terms of number of rings and slots to be used when
+opening the netmap port.
+Such configuration has effect on software ports
+created on the fly, such as VALE ports and netmap pipes.
+The configuration may consist of 1 to 4 numbers separated by commas:
+.Ar tx_slots , rx_slots , tx_rings , rx_rings .
+Missing numbers or zeroes stand for default values.
+As an additional convenience, if exactly one number is specified,
+then this is assigned to both
+.Ar tx_slots
+and
+.Ar rx_slots .
+If there is no fourth number, then the third one is assigned to both
+.Ar tx_rings
+and
+.Ar rx_rings .
.El
.Pp
.Nm
@@ -133,7 +251,7 @@
.Xr netmap 4
or
.Xr bpf 4
-but which is most often uses with
+but which is most often used with
.Xr netmap 4 .
The
.Ar interface name
@@ -146,7 +264,8 @@
.Nm
can peel off one or more of the transmit or receive rings for its own
use without interfering with packets that might otherwise be destined
-for the host. For example on a system with a Chelsio Network
+for the host.
+For example on a system with a Chelsio Network
Interface Card (NIC) the interface specification of
.Ar -i netmap:ncxl0
gives
@@ -156,20 +275,20 @@
system's TCP/IP stack.
.Sh EXAMPLES
Capture and count all packets arriving on the operating system's cxl0
-interface. Using this will block packets from reaching the operating
+interface.
+Using this will block packets from reaching the operating
system's network stack.
-.Dl
.Pp
.Nm
-i cxl0 -f rx
.Pp
Send a stream of fake DNS packets between two hosts with a packet
-length of 128 bytes. You must set the destination MAC address for
+length of 128 bytes.
+You must set the destination MAC address for
packets to be received by the target host.
.Pp
-.Dl
.Nm
--i netmap:ncxl0 -f tx -s 172.16.0.1:53 -d 172.16.1.3:53 -D 00:07:43:29:2a:e0
+-i netmap:ncxl0 -f tx -s 172.16.0.1:53 -d 172.16.1.3:53 -D 00:07:43:29:2a:e0
.Sh SEE ALSO
.Xr netmap 4 ,
.Xr bridge 8
Index: pkt-gen.c
===================================================================
--- pkt-gen.c
+++ pkt-gen.c
@@ -55,6 +55,11 @@
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
+#include <netinet/ip6.h>
+#ifdef linux
+#define IPV6_VERSION 0x60
+#define IPV6_DEFHLIM 64
+#endif
#include <assert.h>
#include <math.h>
@@ -66,16 +71,18 @@
#include "ctrs.h"
+static void usage(int);
+
#ifdef _WIN32
#define cpuset_t DWORD_PTR //uint64_t
static inline void CPU_ZERO(cpuset_t *p)
{
- *p = 0;
+ *p = 0;
}
static inline void CPU_SET(uint32_t i, cpuset_t *p)
{
- *p |= 1<< (i & 0x3f);
+ *p |= 1<< (i & 0x3f);
}
#define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0)
@@ -155,12 +162,12 @@
#define cpuset_t uint64_t // XXX
static inline void CPU_ZERO(cpuset_t *p)
{
- *p = 0;
+ *p = 0;
}
static inline void CPU_SET(uint32_t i, cpuset_t *p)
{
- *p |= 1<< (i & 0x3f);
+ *p |= 1<< (i & 0x3f);
}
#define pthread_setaffinity_np(a, b, c) ((void)a, 0)
@@ -169,7 +176,7 @@
#define IFF_PPROMISC IFF_PROMISC
#include <net/if_dl.h> /* LLADDR */
#define clock_gettime(a,b) \
- do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
+ do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
#endif /* __APPLE__ */
const char *default_payload="netmap pkt-gen DIRECT payload\n"
@@ -179,10 +186,8 @@
"http://info.iet.unipi.it/~luigi/netmap/ ";
int verbose = 0;
+int normalize = 1;
-#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */
-
-
#define VIRT_HDR_1 10 /* length of a base vnet-hdr */
#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */
#define VIRT_HDR_MAX VIRT_HDR_2
@@ -195,14 +200,34 @@
struct pkt {
struct virt_header vh;
struct ether_header eh;
- struct ip ip;
- struct udphdr udp;
- uint8_t body[MAX_BODYSIZE]; // XXX hardwired
+ union {
+ struct {
+ struct ip ip;
+ struct udphdr udp;
+ uint8_t body[MAX_BODYSIZE]; /* hardwired */
+ } ipv4;
+ struct {
+ struct ip6_hdr ip;
+ struct udphdr udp;
+ uint8_t body[MAX_BODYSIZE]; /* hardwired */
+ } ipv6;
+ };
} __attribute__((__packed__));
+#define PKT(p, f, af) \
+ ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f)
+
struct ip_range {
char *name;
- uint32_t start, end; /* same as struct in_addr */
+ union {
+ struct {
+ uint32_t start, end; /* same as struct in_addr */
+ } ipv4;
+ struct {
+ struct in6_addr start, end;
+ uint8_t sgroup, egroup;
+ } ipv6;
+ };
uint16_t port0, port1;
};
@@ -227,15 +252,18 @@
*/
struct glob_arg {
+ int af; /* address family AF_INET/AF_INET6 */
struct ip_range src_ip;
struct ip_range dst_ip;
struct mac_range dst_mac;
struct mac_range src_mac;
int pkt_size;
+ int pkt_min_size;
int burst;
int forever;
uint64_t npackets; /* total packets to send */
- int frags; /* fragments per packet */
+ int frags; /* fragments per packet */
+ u_int mtu; /* size of each fragment */
int nthreads;
int cpus; /* cpus used for running */
int system_cpus; /* cpus on the system */
@@ -271,12 +299,12 @@
char *nmr_config;
int dummy_send;
int virt_header; /* send also the virt_header */
- int extra_bufs; /* goes in nr_arg3 */
- int extra_pipes; /* goes in nr_arg1 */
char *packet_file; /* -P option */
#define STATS_WIN 15
int win_idx;
int64_t win[STATS_WIN];
+ int wait_link;
+ int framing; /* #bits of framing (for bw output) */
};
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
@@ -304,79 +332,166 @@
struct pkt pkt;
void *frame;
+ uint16_t seed[3];
+ u_int frags;
+ u_int frag_size;
};
+static __inline uint16_t
+cksum_add(uint16_t sum, uint16_t a)
+{
+ uint16_t res;
+ res = sum + a;
+ return (res + (res < a));
+}
+
+static void
+extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port)
+{
+ struct in_addr a;
+ char *pp;
+
+ pp = strchr(name, ':');
+ if (pp != NULL) { /* do we have ports ? */
+ *pp++ = '\0';
+ *port = (uint16_t)strtol(pp, NULL, 0);
+ }
+
+ inet_pton(AF_INET, name, &a);
+ *addr = ntohl(a.s_addr);
+}
+
+static void
+extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port,
+ uint8_t *group)
+{
+ char *pp;
+
+ /*
+ * We accept IPv6 address in the following form:
+ * group@[2001:DB8::1001]:port (w/ brackets and port)
+ * group@[2001:DB8::1] (w/ brackets and w/o port)
+ * group@2001:DB8::1234 (w/o brackets and w/o port)
+ */
+ pp = strchr(name, '@');
+ if (pp != NULL) {
+ *pp++ = '\0';
+ *group = (uint8_t)strtol(name, NULL, 0);
+ if (*group > 7)
+ *group = 7;
+ name = pp;
+ }
+ if (name[0] == '[')
+ name++;
+ pp = strchr(name, ']');
+ if (pp != NULL)
+ *pp++ = '\0';
+ if (pp != NULL && *pp != ':')
+ pp = NULL;
+ if (pp != NULL) { /* do we have ports ? */
+ *pp++ = '\0';
+ *port = (uint16_t)strtol(pp, NULL, 0);
+ }
+ inet_pton(AF_INET6, name, addr);
+}
/*
* extract the extremes from a range of ipv4 addresses.
* addr_lo[-addr_hi][:port_lo[-port_hi]]
*/
-static void
-extract_ip_range(struct ip_range *r)
+static int
+extract_ip_range(struct ip_range *r, int af)
{
- char *ap, *pp;
+ char *name, *ap, start[INET6_ADDRSTRLEN];
+ char end[INET6_ADDRSTRLEN];
struct in_addr a;
+ uint32_t tmp;
if (verbose)
D("extract IP range from %s", r->name);
- r->port0 = r->port1 = 0;
- r->start = r->end = 0;
+ name = strdup(r->name);
+ if (name == NULL) {
+ D("strdup failed");
+ usage(-1);
+ }
/* the first - splits start/end of range */
- ap = index(r->name, '-'); /* do we have ports ? */
- if (ap) {
+ ap = strchr(name, '-');
+ if (ap != NULL)
*ap++ = '\0';
- }
- /* grab the initial values (mandatory) */
- pp = index(r->name, ':');
- if (pp) {
- *pp++ = '\0';
- r->port0 = r->port1 = strtol(pp, NULL, 0);
- };
- inet_aton(r->name, &a);
- r->start = r->end = ntohl(a.s_addr);
- if (ap) {
- pp = index(ap, ':');
- if (pp) {
- *pp++ = '\0';
- if (*pp)
- r->port1 = strtol(pp, NULL, 0);
+ r->port0 = 1234; /* default port */
+ if (af == AF_INET6) {
+ r->ipv6.sgroup = 7; /* default group */
+ extract_ipv6_addr(name, &r->ipv6.start, &r->port0,
+ &r->ipv6.sgroup);
+ } else
+ extract_ipv4_addr(name, &r->ipv4.start, &r->port0);
+
+ r->port1 = r->port0;
+ if (af == AF_INET6) {
+ if (ap != NULL) {
+ r->ipv6.egroup = r->ipv6.sgroup;
+ extract_ipv6_addr(ap, &r->ipv6.end, &r->port1,
+ &r->ipv6.egroup);
+ } else {
+ r->ipv6.end = r->ipv6.start;
+ r->ipv6.egroup = r->ipv6.sgroup;
}
- if (*ap) {
- inet_aton(ap, &a);
- r->end = ntohl(a.s_addr);
- }
+ } else {
+ if (ap != NULL) {
+ extract_ipv4_addr(ap, &r->ipv4.end, &r->port1);
+ if (r->ipv4.start > r->ipv4.end) {
+ tmp = r->ipv4.end;
+ r->ipv4.end = r->ipv4.start;
+ r->ipv4.start = tmp;
+ }
+ } else
+ r->ipv4.end = r->ipv4.start;
}
+
if (r->port0 > r->port1) {
- uint16_t tmp = r->port0;
+ tmp = r->port0;
r->port0 = r->port1;
r->port1 = tmp;
}
- if (r->start > r->end) {
- uint32_t tmp = r->start;
- r->start = r->end;
- r->end = tmp;
+ if (af == AF_INET) {
+ a.s_addr = htonl(r->ipv4.start);
+ inet_ntop(af, &a, start, sizeof(start));
+ a.s_addr = htonl(r->ipv4.end);
+ inet_ntop(af, &a, end, sizeof(end));
+ } else {
+ inet_ntop(af, &r->ipv6.start, start, sizeof(start));
+ inet_ntop(af, &r->ipv6.end, end, sizeof(end));
}
- {
- struct in_addr a;
- char buf1[16]; // one ip address
+ if (af == AF_INET)
+ D("range is %s:%d to %s:%d", start, r->port0, end, r->port1);
+ else
+ D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup,
+ start, r->port0, r->ipv6.egroup, end, r->port1);
- a.s_addr = htonl(r->end);
- strncpy(buf1, inet_ntoa(a), sizeof(buf1));
- a.s_addr = htonl(r->start);
- if (1)
- D("range is %s:%d to %s:%d",
- inet_ntoa(a), r->port0, buf1, r->port1);
- }
+ free(name);
+ if (r->port0 != r->port1 ||
+ (af == AF_INET && r->ipv4.start != r->ipv4.end) ||
+ (af == AF_INET6 &&
+ !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end)))
+ return (OPT_COPY);
+ return (0);
}
-static void
+static int
extract_mac_range(struct mac_range *r)
{
+ struct ether_addr *e;
if (verbose)
D("extract MAC range from %s", r->name);
- bcopy(ether_aton(r->name), &r->start, 6);
- bcopy(ether_aton(r->name), &r->end, 6);
+
+ e = ether_aton(r->name);
+ if (e == NULL) {
+ D("invalid MAC address '%s'", r->name);
+ return 1;
+ }
+ bcopy(e, &r->start, 6);
+ bcopy(e, &r->end, 6);
#if 0
bcopy(targ->src_mac, eh->ether_shost, 6);
p = index(targ->g->src_mac, '-');
@@ -391,6 +506,7 @@
#endif
if (verbose)
D("%s starts at %s", r->name, ether_ntoa(&r->start));
+ return 0;
}
static struct targ *targs;
@@ -456,7 +572,7 @@
/*
* parse the vale configuration in conf and put it in nmr.
* Return the flag set if necessary.
- * The configuration may consist of 0 to 4 numbers separated
+ * The configuration may consist of 1 to 4 numbers separated
* by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
* Missing numbers or zeroes stand for default values.
* As an additional convenience, if exactly one number
@@ -500,7 +616,7 @@
nmr->nr_rx_rings, nmr->nr_rx_slots);
free(w);
return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
- nmr->nr_rx_rings || nmr->nr_rx_slots) ?
+ nmr->nr_rx_rings || nmr->nr_rx_slots) ?
NM_OPEN_RING_CFG : 0;
}
@@ -513,7 +629,6 @@
source_hwaddr(const char *ifname, char *buf)
{
struct ifaddrs *ifaphead, *ifap;
- int l = sizeof(ifap->ifa_name);
if (getifaddrs(&ifaphead) != 0) {
D("getifaddrs %s failed", ifname);
@@ -527,7 +642,7 @@
if (!sdl || sdl->sdl_family != AF_LINK)
continue;
- if (strncmp(ifap->ifa_name, ifname, l) != 0)
+ if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0)
continue;
mac = (uint8_t *)LLADDR(sdl);
sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
@@ -562,19 +677,20 @@
return 0;
}
+
/* Compute the checksum of the given ip header. */
-static uint16_t
+static uint32_t
checksum(const void *data, uint16_t len, uint32_t sum)
{
- const uint8_t *addr = data;
+ const uint8_t *addr = data;
uint32_t i;
- /* Checksum all the pairs of bytes first... */
- for (i = 0; i < (len & ~1U); i += 2) {
- sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
- if (sum > 0xFFFF)
- sum -= 0xFFFF;
- }
+ /* Checksum all the pairs of bytes first... */
+ for (i = 0; i < (len & ~1U); i += 2) {
+ sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
+ if (sum > 0xFFFF)
+ sum -= 0xFFFF;
+ }
/*
* If there's a single byte left over, checksum it, too.
* Network byte order is big-endian, so the remaining byte is
@@ -588,8 +704,8 @@
return sum;
}
-static u_int16_t
-wrapsum(u_int32_t sum)
+static uint16_t
+wrapsum(uint32_t sum)
{
sum = ~sum & 0xFFFF;
return (htons(sum));
@@ -637,64 +753,198 @@
#define uh_sum check
#endif /* linux */
-/*
- * increment the addressed in the packet,
- * starting from the least significant field.
- * DST_IP DST_PORT SRC_IP SRC_PORT
- */
static void
-update_addresses(struct pkt *pkt, struct glob_arg *g)
+update_ip(struct pkt *pkt, struct targ *t)
{
- uint32_t a;
- uint16_t p;
- struct ip *ip = &pkt->ip;
- struct udphdr *udp = &pkt->udp;
+ struct glob_arg *g = t->g;
+ struct ip ip;
+ struct udphdr udp;
+ uint32_t oaddr, naddr;
+ uint16_t oport, nport;
+ uint16_t ip_sum, udp_sum;
- do {
- /* XXX for now it doesn't handle non-random src, random dst */
- if (g->options & OPT_RANDOM_SRC) {
- udp->uh_sport = random();
- ip->ip_src.s_addr = random();
- } else {
- p = ntohs(udp->uh_sport);
- if (p < g->src_ip.port1) { /* just inc, no wrap */
- udp->uh_sport = htons(p + 1);
+ memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
+ memcpy(&udp, &pkt->ipv4.udp, sizeof(udp));
+ do {
+ ip_sum = udp_sum = 0;
+ naddr = oaddr = ntohl(ip.ip_src.s_addr);
+ nport = oport = ntohs(udp.uh_sport);
+ if (g->options & OPT_RANDOM_SRC) {
+ ip.ip_src.s_addr = nrand48(t->seed);
+ udp.uh_sport = nrand48(t->seed);
+ naddr = ntohl(ip.ip_src.s_addr);
+ nport = ntohs(udp.uh_sport);
break;
}
- udp->uh_sport = htons(g->src_ip.port0);
-
- a = ntohl(ip->ip_src.s_addr);
- if (a < g->src_ip.end) { /* just inc, no wrap */
- ip->ip_src.s_addr = htonl(a + 1);
+ if (oport < g->src_ip.port1) {
+ nport = oport + 1;
+ udp.uh_sport = htons(nport);
break;
}
- ip->ip_src.s_addr = htonl(g->src_ip.start);
-
- udp->uh_sport = htons(g->src_ip.port0);
+ nport = g->src_ip.port0;
+ udp.uh_sport = htons(nport);
+ if (oaddr < g->src_ip.ipv4.end) {
+ naddr = oaddr + 1;
+ ip.ip_src.s_addr = htonl(naddr);
+ break;
+ }
+ naddr = g->src_ip.ipv4.start;
+ ip.ip_src.s_addr = htonl(naddr);
+ } while (0);
+ /* update checksums if needed */
+ if (oaddr != naddr) {
+ ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
+ ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
+ ip_sum = cksum_add(ip_sum, naddr >> 16);
+ ip_sum = cksum_add(ip_sum, naddr & 0xffff);
}
-
- if (g->options & OPT_RANDOM_DST) {
- udp->uh_dport = random();
- ip->ip_dst.s_addr = random();
- } else {
- p = ntohs(udp->uh_dport);
- if (p < g->dst_ip.port1) { /* just inc, no wrap */
- udp->uh_dport = htons(p + 1);
+ if (oport != nport) {
+ udp_sum = cksum_add(udp_sum, ~oport);
+ udp_sum = cksum_add(udp_sum, nport);
+ }
+ do {
+ naddr = oaddr = ntohl(ip.ip_dst.s_addr);
+ nport = oport = ntohs(udp.uh_dport);
+ if (g->options & OPT_RANDOM_DST) {
+ ip.ip_dst.s_addr = nrand48(t->seed);
+ udp.uh_dport = nrand48(t->seed);
+ naddr = ntohl(ip.ip_dst.s_addr);
+ nport = ntohs(udp.uh_dport);
break;
}
- udp->uh_dport = htons(g->dst_ip.port0);
-
- a = ntohl(ip->ip_dst.s_addr);
- if (a < g->dst_ip.end) { /* just inc, no wrap */
- ip->ip_dst.s_addr = htonl(a + 1);
+ if (oport < g->dst_ip.port1) {
+ nport = oport + 1;
+ udp.uh_dport = htons(nport);
break;
}
+ nport = g->dst_ip.port0;
+ udp.uh_dport = htons(nport);
+ if (oaddr < g->dst_ip.ipv4.end) {
+ naddr = oaddr + 1;
+ ip.ip_dst.s_addr = htonl(naddr);
+ break;
+ }
+ naddr = g->dst_ip.ipv4.start;
+ ip.ip_dst.s_addr = htonl(naddr);
+ } while (0);
+ /* update checksums */
+ if (oaddr != naddr) {
+ ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
+ ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
+ ip_sum = cksum_add(ip_sum, naddr >> 16);
+ ip_sum = cksum_add(ip_sum, naddr & 0xffff);
}
- ip->ip_dst.s_addr = htonl(g->dst_ip.start);
- } while (0);
- // update checksum
+ if (oport != nport) {
+ udp_sum = cksum_add(udp_sum, ~oport);
+ udp_sum = cksum_add(udp_sum, nport);
+ }
+ if (udp_sum != 0)
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum));
+ if (ip_sum != 0) {
+ ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum));
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum));
+ }
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ memcpy(&pkt->ipv4.udp, &udp, sizeof(udp));
}
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+static void
+update_ip6(struct pkt *pkt, struct targ *t)
+{
+ struct glob_arg *g = t->g;
+ struct ip6_hdr ip6;
+ struct udphdr udp;
+ uint16_t udp_sum;
+ uint16_t oaddr, naddr;
+ uint16_t oport, nport;
+ uint8_t group;
+
+ memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6));
+ memcpy(&udp, &pkt->ipv6.udp, sizeof(udp));
+ do {
+ udp_sum = 0;
+ group = g->src_ip.ipv6.sgroup;
+ naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]);
+ nport = oport = ntohs(udp.uh_sport);
+ if (g->options & OPT_RANDOM_SRC) {
+ ip6.ip6_src.s6_addr16[group] = nrand48(t->seed);
+ udp.uh_sport = nrand48(t->seed);
+ naddr = ntohs(ip6.ip6_src.s6_addr16[group]);
+ nport = ntohs(udp.uh_sport);
+ break;
+ }
+ if (oport < g->src_ip.port1) {
+ nport = oport + 1;
+ udp.uh_sport = htons(nport);
+ break;
+ }
+ nport = g->src_ip.port0;
+ udp.uh_sport = htons(nport);
+ if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) {
+ naddr = oaddr + 1;
+ ip6.ip6_src.s6_addr16[group] = htons(naddr);
+ break;
+ }
+ naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]);
+ ip6.ip6_src.s6_addr16[group] = htons(naddr);
+ } while (0);
+ /* update checksums if needed */
+ if (oaddr != naddr)
+ udp_sum = cksum_add(~oaddr, naddr);
+ if (oport != nport)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oport, nport));
+ do {
+ group = g->dst_ip.ipv6.egroup;
+ naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
+ nport = oport = ntohs(udp.uh_dport);
+ if (g->options & OPT_RANDOM_DST) {
+ ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed);
+ udp.uh_dport = nrand48(t->seed);
+ naddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
+ nport = ntohs(udp.uh_dport);
+ break;
+ }
+ if (oport < g->dst_ip.port1) {
+ nport = oport + 1;
+ udp.uh_dport = htons(nport);
+ break;
+ }
+ nport = g->dst_ip.port0;
+ udp.uh_dport = htons(nport);
+ if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) {
+ naddr = oaddr + 1;
+ ip6.ip6_dst.s6_addr16[group] = htons(naddr);
+ break;
+ }
+ naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]);
+ ip6.ip6_dst.s6_addr16[group] = htons(naddr);
+ } while (0);
+ /* update checksums */
+ if (oaddr != naddr)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oaddr, naddr));
+ if (oport != nport)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oport, nport));
+ if (udp_sum != 0)
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum);
+ memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
+ memcpy(&pkt->ipv6.udp, &udp, sizeof(udp));
+}
+
+static void
+update_addresses(struct pkt *pkt, struct targ *t)
+{
+
+ if (t->g->af == AF_INET)
+ update_ip(pkt, t);
+ else
+ update_ip6(pkt, t);
+}
/*
* initialize one packet and prepare for the next one.
* The copy could be done better instead of repeating it each time.
@@ -704,9 +954,12 @@
{
struct pkt *pkt = &targ->pkt;
struct ether_header *eh;
- struct ip *ip;
- struct udphdr *udp;
- uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
+ struct ip6_hdr ip6;
+ struct ip ip;
+ struct udphdr udp;
+ void *udp_ptr;
+ uint16_t paylen;
+ uint32_t csum = 0;
const char *payload = targ->g->options & OPT_INDIRECT ?
indirect_payload : default_payload;
int i, l0 = strlen(payload);
@@ -716,7 +969,7 @@
pcap_t *file;
struct pcap_pkthdr *header;
const unsigned char *packet;
-
+
/* Read a packet from a PCAP file if asked. */
if (targ->g->packet_file != NULL) {
if ((file = pcap_open_offline(targ->g->packet_file,
@@ -735,49 +988,80 @@
}
#endif
+ paylen = targ->g->pkt_size - sizeof(*eh) -
+ (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6));
+
/* create a nice NUL-terminated string */
for (i = 0; i < paylen; i += l0) {
if (l0 > paylen - i)
l0 = paylen - i; // last round
- bcopy(payload, pkt->body + i, l0);
+ bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0);
}
- pkt->body[i-1] = '\0';
- ip = &pkt->ip;
+ PKT(pkt, body, targ->g->af)[i - 1] = '\0';
/* prepare the headers */
- ip->ip_v = IPVERSION;
- ip->ip_hl = 5;
- ip->ip_id = 0;
- ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
- ip->ip_id = 0;
- ip->ip_off = htons(IP_DF); /* Don't fragment */
- ip->ip_ttl = IPDEFTTL;
- ip->ip_p = IPPROTO_UDP;
- ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start);
- ip->ip_src.s_addr = htonl(targ->g->src_ip.start);
- ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
-
-
- udp = &pkt->udp;
- udp->uh_sport = htons(targ->g->src_ip.port0);
- udp->uh_dport = htons(targ->g->dst_ip.port0);
- udp->uh_ulen = htons(paylen);
- /* Magic: taken from sbin/dhclient/packet.c */
- udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
- checksum(pkt->body,
- paylen - sizeof(*udp),
- checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
- IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
- )
- )
- ));
-
eh = &pkt->eh;
bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
- eh->ether_type = htons(ETHERTYPE_IP);
+ if (targ->g->af == AF_INET) {
+ eh->ether_type = htons(ETHERTYPE_IP);
+ memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
+ udp_ptr = &pkt->ipv4.udp;
+ ip.ip_v = IPVERSION;
+ ip.ip_hl = sizeof(ip) >> 2;
+ ip.ip_id = 0;
+ ip.ip_tos = IPTOS_LOWDELAY;
+ ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh));
+ ip.ip_id = 0;
+ ip.ip_off = htons(IP_DF); /* Don't fragment */
+ ip.ip_ttl = IPDEFTTL;
+ ip.ip_p = IPPROTO_UDP;
+ ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start);
+ ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start);
+ ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0));
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ } else {
+ eh->ether_type = htons(ETHERTYPE_IPV6);
+ memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6));
+ udp_ptr = &pkt->ipv6.udp;
+ ip6.ip6_flow = 0;
+ ip6.ip6_plen = htons(paylen);
+ ip6.ip6_vfc = IPV6_VERSION;
+ ip6.ip6_nxt = IPPROTO_UDP;
+ ip6.ip6_hlim = IPV6_DEFHLIM;
+ ip6.ip6_src = targ->g->src_ip.ipv6.start;
+ ip6.ip6_dst = targ->g->dst_ip.ipv6.start;
+ }
+ memcpy(&udp, udp_ptr, sizeof(udp));
+
+ udp.uh_sport = htons(targ->g->src_ip.port0);
+ udp.uh_dport = htons(targ->g->dst_ip.port0);
+ udp.uh_ulen = htons(paylen);
+ if (targ->g->af == AF_INET) {
+ /* Magic: taken from sbin/dhclient/packet.c */
+ udp.uh_sum = wrapsum(
+ checksum(&udp, sizeof(udp), /* udp header */
+ checksum(pkt->ipv4.body, /* udp payload */
+ paylen - sizeof(udp),
+ checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */
+ 2 * sizeof(pkt->ipv4.ip.ip_src),
+ IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ } else {
+ /* Save part of pseudo header checksum into csum */
+ csum = IPPROTO_UDP << 24;
+ csum = checksum(&csum, sizeof(csum), paylen);
+ udp.uh_sum = wrapsum(
+ checksum(udp_ptr, sizeof(udp), /* udp header */
+ checksum(pkt->ipv6.body, /* udp payload */
+ paylen - sizeof(udp),
+ checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */
+ 2 * sizeof(pkt->ipv6.ip.ip6_src), csum))));
+ memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
+ }
+ memcpy(udp_ptr, &udp, sizeof(udp));
+
bzero(&pkt->vh, sizeof(pkt->vh));
// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
}
@@ -825,7 +1109,6 @@
}
}
-
/*
* create and enqueue a batch of packets on a ring.
* On the last one set NS_REPORT to tell the driver to generate
@@ -833,19 +1116,14 @@
*/
static int
send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
- int size, struct glob_arg *g, u_int count, int options,
- u_int nfrags)
+ int size, struct targ *t, u_int count, int options)
{
u_int n, sent, cur = ring->cur;
- u_int fcnt;
+ u_int frags = t->frags;
+ u_int frag_size = t->frag_size;
+ struct netmap_slot *slot = &ring->slot[cur];
n = nm_ring_space(ring);
- if (n < count)
- count = n;
- if (count < nfrags) {
- D("truncating packet, no room for frags %d %d",
- count, nfrags);
- }
#if 0
if (options & (OPT_COPY | OPT_PREFETCH) ) {
for (sent = 0; sent < count; sent++) {
@@ -858,11 +1136,15 @@
cur = ring->cur;
}
#endif
- for (fcnt = nfrags, sent = 0; sent < count; sent++) {
- struct netmap_slot *slot = &ring->slot[cur];
- char *p = NETMAP_BUF(ring, slot->buf_idx);
- int buf_changed = slot->flags & NS_BUF_CHANGED;
+ for (sent = 0; sent < count && n >= frags; sent++, n--) {
+ char *p;
+ int buf_changed;
+ u_int tosend = size;
+ slot = &ring->slot[cur];
+ p = NETMAP_BUF(ring, slot->buf_idx);
+ buf_changed = slot->flags & NS_BUF_CHANGED;
+
slot->flags = 0;
if (options & OPT_RUBBISH) {
/* do nothing */
@@ -869,31 +1151,49 @@
} else if (options & OPT_INDIRECT) {
slot->flags |= NS_INDIRECT;
slot->ptr = (uint64_t)((uintptr_t)frame);
- } else if ((options & OPT_COPY) || buf_changed) {
- nm_pkt_copy(frame, p, size);
- if (fcnt == nfrags)
- update_addresses(pkt, g);
- } else if (options & OPT_MEMCPY) {
- memcpy(p, frame, size);
- if (fcnt == nfrags)
- update_addresses(pkt, g);
+ } else if (frags > 1) {
+ u_int i;
+ const char *f = frame;
+ char *fp = p;
+ for (i = 0; i < frags - 1; i++) {
+ memcpy(fp, f, frag_size);
+ slot->len = frag_size;
+ slot->flags = NS_MOREFRAG;
+ if (options & OPT_DUMP)
+ dump_payload(fp, frag_size, ring, cur);
+ tosend -= frag_size;
+ f += frag_size;
+ cur = nm_ring_next(ring, cur);
+ slot = &ring->slot[cur];
+ fp = NETMAP_BUF(ring, slot->buf_idx);
+ }
+ n -= (frags - 1);
+ p = fp;
+ slot->flags = 0;
+ memcpy(p, f, tosend);
+ update_addresses(pkt, t);
+ } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) {
+ if (options & OPT_COPY)
+ nm_pkt_copy(frame, p, size);
+ else
+ memcpy(p, frame, size);
+ update_addresses(pkt, t);
} else if (options & OPT_PREFETCH) {
__builtin_prefetch(p);
}
+ slot->len = tosend;
if (options & OPT_DUMP)
- dump_payload(p, size, ring, cur);
- slot->len = size;
- if (--fcnt > 0)
- slot->flags |= NS_MOREFRAG;
- else
- fcnt = nfrags;
- if (sent == count - 1) {
- slot->flags &= ~NS_MOREFRAG;
- slot->flags |= NS_REPORT;
- }
+ dump_payload(p, tosend, ring, cur);
cur = nm_ring_next(ring, cur);
}
- ring->head = ring->cur = cur;
+ if (sent) {
+ slot->flags |= NS_REPORT;
+ ring->head = ring->cur = cur;
+ }
+ if (sent < count) {
+ /* tell netmap that we need more slots */
+ ring->cur = ring->tail;
+ }
return (sent);
}
@@ -914,28 +1214,47 @@
}
/*
+ * wait until ts, either busy or sleeping if more than 1ms.
+ * Return wakeup time.
+ */
+static struct timespec
+wait_time(struct timespec ts)
+{
+ for (;;) {
+ struct timespec w, cur;
+ clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
+ w = timespec_sub(ts, cur);
+ if (w.tv_sec < 0)
+ return cur;
+ else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
+ poll(NULL, 0, 1);
+ }
+}
+
+/*
* Send a packet, and wait for a response.
* The payload (after UDP header, ofs 42) has a 4-byte sequence
* followed by a struct timeval (or bintime?)
*/
-#define PAY_OFS 42 /* where in the pkt... */
static void *
-pinger_body(void *data)
+ping_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
struct netmap_if *nifp = targ->nmd->nifp;
- int i, rx = 0;
+ int i, m, rx = 0;
void *frame;
int size;
struct timespec ts, now, last_print;
+ struct timespec nexttime = {0, 0}; /* silence compiler */
uint64_t sent = 0, n = targ->g->npackets;
uint64_t count = 0, t_cur, t_min = ~0, av = 0;
+ uint64_t g_min = ~0, g_av = 0;
uint64_t buckets[64]; /* bins for delays, ns */
+ int rate_limit = targ->g->tx_rate, tosend = 0;
- frame = &targ->pkt;
- frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
+ frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
@@ -947,38 +1266,75 @@
bzero(&buckets, sizeof(buckets));
clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
now = last_print;
+ if (rate_limit) {
+ targ->tic = timespec_add(now, (struct timespec){2,0});
+ targ->tic.tv_nsec = 0;
+ wait_time(targ->tic);
+ nexttime = targ->tic;
+ }
while (!targ->cancel && (n == 0 || sent < n)) {
- struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
+ struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
struct netmap_slot *slot;
char *p;
- for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */
- slot = &ring->slot[ring->cur];
- slot->len = size;
- p = NETMAP_BUF(ring, slot->buf_idx);
+ int rv;
+ uint64_t limit, event = 0;
- if (nm_ring_empty(ring)) {
- D("-- ouch, cannot send");
- } else {
- struct tstamp *tp;
- nm_pkt_copy(frame, p, size);
- clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
- bcopy(&sent, p+42, sizeof(sent));
- tp = (struct tstamp *)(p+46);
- tp->sec = (uint32_t)ts.tv_sec;
- tp->nsec = (uint32_t)ts.tv_nsec;
- sent++;
- ring->head = ring->cur = nm_ring_next(ring, ring->cur);
+ if (rate_limit && tosend <= 0) {
+ tosend = targ->g->burst;
+ nexttime = timespec_add(nexttime, targ->g->tx_period);
+ wait_time(nexttime);
}
- }
+
+ limit = rate_limit ? tosend : targ->g->burst;
+ if (n > 0 && n - sent < limit)
+ limit = n - sent;
+ for (m = 0; (unsigned)m < limit; m++) {
+ slot = &ring->slot[ring->cur];
+ slot->len = size;
+ p = NETMAP_BUF(ring, slot->buf_idx);
+
+ if (nm_ring_empty(ring)) {
+ D("-- ouch, cannot send");
+ break;
+ } else {
+ struct tstamp *tp;
+ nm_pkt_copy(frame, p, size);
+ clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
+ bcopy(&sent, p+42, sizeof(sent));
+ tp = (struct tstamp *)(p+46);
+ tp->sec = (uint32_t)ts.tv_sec;
+ tp->nsec = (uint32_t)ts.tv_nsec;
+ sent++;
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
+ }
+ }
+ if (m > 0)
+ event++;
+ targ->ctr.pkts = sent;
+ targ->ctr.bytes = sent*size;
+ targ->ctr.events = event;
+ if (rate_limit)
+ tosend -= m;
+#ifdef BUSYWAIT
+ rv = ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ if (rv < 0) {
+ D("TXSYNC error on queue %d: %s", targ->me,
+ strerror(errno));
+ }
+ again:
+ ioctl(pfd.fd, NIOCRXSYNC, NULL);
+#else
/* should use a parameter to decide how often to send */
- if (poll(&pfd, 1, 3000) <= 0) {
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ if ( (rv = poll(&pfd, 1, 3000)) <= 0) {
+ D("poll error on queue %d: %s", targ->me,
+ (rv ? strerror(errno) : "timeout"));
continue;
}
+#endif /* BUSYWAIT */
/* see what we got back */
- for (i = targ->nmd->first_tx_ring;
- i <= targ->nmd->last_tx_ring; i++) {
+ rx = 0;
+ for (i = targ->nmd->first_rx_ring;
+ i <= targ->nmd->last_rx_ring; i++) {
ring = NETMAP_RXRING(nifp, i);
while (!nm_ring_empty(ring)) {
uint32_t seq;
@@ -999,7 +1355,8 @@
ts.tv_nsec += 1000000000;
ts.tv_sec--;
}
- if (0) D("seq %d/%lu delta %d.%09d", seq, sent,
+ if (0) D("seq %d/%llu delta %d.%09d", seq,
+ (unsigned long long)sent,
(int)ts.tv_sec, (int)ts.tv_nsec);
t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec;
if (t_cur < t_min)
@@ -1024,7 +1381,7 @@
if (ts.tv_sec >= 1) {
D("count %d RTT: min %d av %d ns",
(int)count, (int)t_min, (int)(av/count));
- int k, j, kmin;
+ int k, j, kmin, off;
char buf[512];
for (kmin = 0; kmin < 64; kmin ++)
@@ -1034,17 +1391,33 @@
if (buckets[k])
break;
buf[0] = '\0';
- for (j = kmin; j <= k; j++)
- sprintf(buf, "%s %5d", buf, (int)buckets[j]);
+ off = 0;
+ for (j = kmin; j <= k; j++) {
+ off += sprintf(buf + off, " %5d", (int)buckets[j]);
+ }
D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf);
bzero(&buckets, sizeof(buckets));
count = 0;
+ g_av += av;
av = 0;
+ if (t_min < g_min)
+ g_min = t_min;
t_min = ~0;
last_print = now;
}
+#ifdef BUSYWAIT
+ if (rx < m && ts.tv_sec <= 3 && !targ->cancel)
+ goto again;
+#endif /* BUSYWAIT */
}
+ if (sent > 0) {
+ D("RTT over %llu packets: min %d av %d ns",
+ (long long unsigned)sent, (int)g_min,
+ (int)((double)g_av/sent));
+ }
+ targ->completed = 1;
+
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1056,7 +1429,7 @@
* reply to ping requests
*/
static void *
-ponger_body(void *data)
+pong_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
@@ -1069,7 +1442,9 @@
D("can only reply ping with 1 thread");
return NULL;
}
- D("understood ponger %lu but don't know how to do it", n);
+ if (n > 0)
+ D("understood ponger %llu but don't know how to do it",
+ (unsigned long long)n);
while (!targ->cancel && (n == 0 || sent < n)) {
uint32_t txcur, txavail;
//#define BUSYWAIT
@@ -1076,13 +1451,14 @@
#ifdef BUSYWAIT
ioctl(pfd.fd, NIOCRXSYNC, NULL);
#else
- if (poll(&pfd, 1, 1000) <= 0) {
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ int rv;
+ if ( (rv = poll(&pfd, 1, 1000)) <= 0) {
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
continue;
}
#endif
- txring = NETMAP_TXRING(nifp, 0);
+ txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
txcur = txring->cur;
txavail = nm_ring_space(txring);
/* see what we got back */
@@ -1105,6 +1481,7 @@
dpkt = (uint16_t *)dst;
spkt = (uint16_t *)src;
nm_pkt_copy(src, dst, slot->len);
+ /* swap source and destination MAC */
dpkt[0] = spkt[3];
dpkt[1] = spkt[4];
dpkt[2] = spkt[5];
@@ -1112,7 +1489,6 @@
dpkt[4] = spkt[1];
dpkt[5] = spkt[2];
txring->slot[txcur].len = slot->len;
- /* XXX swap src dst mac */
txcur = nm_ring_next(txring, txcur);
txavail--;
sent++;
@@ -1126,6 +1502,8 @@
//D("tx %d rx %d", sent, rx);
}
+ targ->completed = 1;
+
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1133,24 +1511,6 @@
}
-/*
- * wait until ts, either busy or sleeping if more than 1ms.
- * Return wakeup time.
- */
-static struct timespec
-wait_time(struct timespec ts)
-{
- for (;;) {
- struct timespec w, cur;
- clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
- w = timespec_sub(ts, cur);
- if (w.tv_sec < 0)
- return cur;
- else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
- poll(NULL, 0, 1);
- }
-}
-
static void *
sender_body(void *data)
{
@@ -1170,14 +1530,13 @@
int size;
if (targ->frame == NULL) {
- frame = pkt;
- frame += sizeof(pkt->vh) - targ->g->virt_header;
+ frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
} else {
frame = targ->frame;
size = targ->g->pkt_size;
}
-
+
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -1190,13 +1549,13 @@
wait_time(targ->tic);
nexttime = targ->tic;
}
- if (targ->g->dev_type == DEV_TAP) {
+ if (targ->g->dev_type == DEV_TAP) {
D("writing to file desc %d", targ->g->main_fd);
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (write(targ->g->main_fd, frame, size) != -1)
sent++;
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
if (i > 10000) {
targ->ctr.pkts = sent;
targ->ctr.bytes = sent*size;
@@ -1211,7 +1570,7 @@
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (pcap_inject(p, frame, size) != -1)
sent++;
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
if (i > 10000) {
targ->ctr.pkts = sent;
targ->ctr.bytes = sent*size;
@@ -1222,10 +1581,23 @@
#endif /* NO_PCAP */
} else {
int tosend = 0;
- int frags = targ->g->frags;
+ u_int bufsz, mtu = targ->g->mtu;
nifp = targ->nmd->nifp;
+ txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
+ bufsz = txring->nr_buf_size;
+ if (bufsz < mtu)
+ mtu = bufsz;
+ targ->frag_size = targ->g->pkt_size / targ->frags;
+ if (targ->frag_size > mtu) {
+ targ->frags = targ->g->pkt_size / mtu;
+ targ->frag_size = mtu;
+ if (targ->g->pkt_size % mtu != 0)
+ targ->frags++;
+ }
+ D("frags %u frag_size %u", targ->frags, targ->frag_size);
while (!targ->cancel && (n == 0 || sent < n)) {
+ int rv;
if (rate_limit && tosend <= 0) {
tosend = targ->g->burst;
@@ -1237,6 +1609,7 @@
* wait for available room in the send queue(s)
*/
#ifdef BUSYWAIT
+ (void)rv;
if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
D("ioctl error on queue %d: %s", targ->me,
strerror(errno));
@@ -1243,11 +1616,11 @@
goto quit;
}
#else /* !BUSYWAIT */
- if (poll(&pfd, 1, 2000) <= 0) {
+ if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
// goto quit;
}
if (pfd.revents & POLLERR) {
@@ -1266,23 +1639,30 @@
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
int m;
uint64_t limit = rate_limit ? tosend : targ->g->burst;
+
+ if (n > 0 && n == sent)
+ break;
+
if (n > 0 && n - sent < limit)
limit = n - sent;
txring = NETMAP_TXRING(nifp, i);
if (nm_ring_empty(txring))
continue;
- if (frags > 1)
- limit = ((limit + frags - 1) / frags) * frags;
- m = send_packets(txring, pkt, frame, size, targ->g,
- limit, options, frags);
- ND("limit %d tail %d frags %d m %d",
- limit, txring->tail, frags, m);
+ if (targ->g->pkt_min_size > 0) {
+ size = nrand48(targ->seed) %
+ (targ->g->pkt_size - targ->g->pkt_min_size) +
+ targ->g->pkt_min_size;
+ }
+ m = send_packets(txring, pkt, frame, size, targ,
+ limit, options);
+ ND("limit %lu tail %d m %d",
+ limit, txring->tail, m);
sent += m;
if (m > 0) //XXX-ste: can m be 0?
event++;
targ->ctr.pkts = sent;
- targ->ctr.bytes = sent*size;
+ targ->ctr.bytes += m*size;
targ->ctr.events = event;
if (rate_limit) {
tosend -= m;
@@ -1292,10 +1672,12 @@
}
}
/* flush any remaining packets */
- D("flush tail %d head %d on thread %p",
- txring->tail, txring->head,
- (void *)pthread_self());
- ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ if (txring != NULL) {
+ D("flush tail %d head %d on thread %p",
+ txring->tail, txring->head,
+ (void *)pthread_self());
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ }
/* final part: wait all the TX queues to be empty. */
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
@@ -1340,6 +1722,7 @@
{
u_int cur, rx, n;
uint64_t b = 0;
+ u_int complete = 0;
if (bytes == NULL)
bytes = &b;
@@ -1355,12 +1738,14 @@
*bytes += slot->len;
if (dump)
dump_payload(p, slot->len, ring, cur);
+ if (!(slot->flags & NS_MOREFRAG))
+ complete++;
cur = nm_ring_next(ring, cur);
}
ring->head = ring->cur = cur;
- return (rx);
+ return (complete);
}
static void *
@@ -1373,8 +1758,7 @@
int i;
struct my_ctrs cur;
- cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
- cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler
+ memset(&cur, 0, sizeof(cur));
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -1386,6 +1770,14 @@
i = poll(&pfd, 1, 1000);
if (i > 0 && !(pfd.revents & POLLERR))
break;
+ if (i < 0) {
+ D("poll() error: %s", strerror(errno));
+ goto quit;
+ }
+ if (pfd.revents & POLLERR) {
+ D("fd error");
+ goto quit;
+ }
RD(1, "waiting for initial packets, poll returns %d %d",
i, pfd.revents);
}
@@ -1408,7 +1800,7 @@
/* XXX should we poll ? */
pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
(u_char *)&targ->ctr);
- targ->ctr.events++;
+ targ->ctr.events++;
}
#endif /* !NO_PCAP */
} else {
@@ -1451,7 +1843,7 @@
m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes);
cur.pkts += m;
- if (m > 0) //XXX-ste: can m be 0?
+ if (m > 0)
cur.events++;
}
cur.min_space = targ->ctr.min_space;
@@ -1503,8 +1895,7 @@
D("Ignoring -n argument");
}
- frame = pkt;
- frame += sizeof(pkt->vh) - targ->g->virt_header;
+ frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
@@ -1527,6 +1918,8 @@
unsigned int space;
unsigned int head;
int fcnt;
+ uint16_t sum = 0;
+ int rv;
if (!rate_limit) {
budget = targ->g->burst;
@@ -1538,11 +1931,20 @@
}
/* wait for available room in the send queue */
- if (poll(&pfd, 1, 2000) <= 0) {
+#ifdef BUSYWAIT
+ (void)rv;
+ if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
+ D("ioctl error on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+#else /* !BUSYWAIT */
+ if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
+ // goto quit;
}
if (pfd.revents & POLLERR) {
D("poll error on %d ring %d-%d", pfd.fd,
@@ -1549,6 +1951,7 @@
targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
goto quit;
}
+#endif /* !BUSYWAIT */
/* If no room poll() again. */
space = nm_ring_space(ring);
@@ -1573,15 +1976,23 @@
sent < limit; sent++, sequence++) {
struct netmap_slot *slot = &ring->slot[head];
char *p = NETMAP_BUF(ring, slot->buf_idx);
+ uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t;
+ memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum));
+
slot->flags = 0;
- pkt->body[0] = sequence >> 24;
- pkt->body[1] = (sequence >> 16) & 0xff;
- pkt->body[2] = (sequence >> 8) & 0xff;
- pkt->body[3] = sequence & 0xff;
+ t = *w;
+ PKT(pkt, body, targ->g->af)[0] = sequence >> 24;
+ PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff;
+ sum = ~cksum_add(~sum, cksum_add(~t, *w));
+ t = *++w;
+ PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff;
+ PKT(pkt, body, targ->g->af)[3] = sequence & 0xff;
+ sum = ~cksum_add(~sum, cksum_add(~t, *w));
+ memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum));
nm_pkt_copy(frame, p, size);
if (fcnt == frags) {
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
}
if (options & OPT_DUMP) {
@@ -1675,19 +2086,25 @@
int dump = targ->g->options & OPT_DUMP;
struct netmap_ring *ring;
unsigned int frags_exp = 1;
- uint32_t seq_exp = 0;
struct my_ctrs cur;
unsigned int frags = 0;
int first_packet = 1;
int first_slot = 1;
- int i;
+ int i, j, af, nrings;
+ uint32_t seq, *seq_exp = NULL;
- cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
- cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler
+ memset(&cur, 0, sizeof(cur));
if (setaffinity(targ->thread, targ->affinity))
goto quit;
+ nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1;
+ seq_exp = calloc(nrings, sizeof(uint32_t));
+ if (seq_exp == NULL) {
+ D("failed to allocate seq array");
+ goto quit;
+ }
+
D("reading from %s fd %d main_fd %d",
targ->g->ifname, targ->fd, targ->g->main_fd);
/* unbounded wait for the first packet. */
@@ -1701,15 +2118,18 @@
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
- ring = NETMAP_RXRING(targ->nmd->nifp, targ->nmd->first_rx_ring);
while (!targ->cancel) {
unsigned int head;
- uint32_t seq;
int limit;
- /* Once we started to receive packets, wait at most 1 seconds
- before quitting. */
+#ifdef BUSYWAIT
+ if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
+ D("ioctl error on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+#else /* !BUSYWAIT */
if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
targ->toc.tv_sec -= 1; /* Subtract timeout time. */
@@ -1720,108 +2140,123 @@
D("poll err");
goto quit;
}
+#endif /* !BUSYWAIT */
- if (nm_ring_empty(ring))
- continue;
+ for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) {
+ ring = NETMAP_RXRING(targ->nmd->nifp, j);
+ if (nm_ring_empty(ring))
+ continue;
- limit = nm_ring_space(ring);
- if (limit > targ->g->burst)
- limit = targ->g->burst;
+ limit = nm_ring_space(ring);
+ if (limit > targ->g->burst)
+ limit = targ->g->burst;
#if 0
- /* Enable this if
- * 1) we remove the early-return optimization from
- * the netmap poll implementation, or
- * 2) pipes get NS_MOREFRAG support.
- * With the current netmap implementation, an experiment like
- * pkt-gen -i vale:1{1 -f txseq -F 9
- * pkt-gen -i vale:1}1 -f rxseq
- * would get stuck as soon as we find nm_ring_space(ring) < 9,
- * since here limit is rounded to 0 and
- * pipe rxsync is not called anymore by the poll() of this loop.
- */
- if (frags_exp > 1) {
- int o = limit;
- /* Cut off to the closest smaller multiple. */
- limit = (limit / frags_exp) * frags_exp;
- RD(2, "LIMIT %d --> %d", o, limit);
- }
+ /* Enable this if
+ * 1) we remove the early-return optimization from
+ * the netmap poll implementation, or
+ * 2) pipes get NS_MOREFRAG support.
+ * With the current netmap implementation, an experiment like
+ * pkt-gen -i vale:1{1 -f txseq -F 9
+ * pkt-gen -i vale:1}1 -f rxseq
+ * would get stuck as soon as we find nm_ring_space(ring) < 9,
+ * since here limit is rounded to 0 and
+ * pipe rxsync is not called anymore by the poll() of this loop.
+ */
+ if (frags_exp > 1) {
+ int o = limit;
+ /* Cut off to the closest smaller multiple. */
+ limit = (limit / frags_exp) * frags_exp;
+ RD(2, "LIMIT %d --> %d", o, limit);
+ }
#endif
- for (head = ring->head, i = 0; i < limit; i++) {
- struct netmap_slot *slot = &ring->slot[head];
- char *p = NETMAP_BUF(ring, slot->buf_idx);
- int len = slot->len;
- struct pkt *pkt;
+ for (head = ring->head, i = 0; i < limit; i++) {
+ struct netmap_slot *slot = &ring->slot[head];
+ char *p = NETMAP_BUF(ring, slot->buf_idx);
+ int len = slot->len;
+ struct pkt *pkt;
- if (dump) {
- dump_payload(p, slot->len, ring, head);
- }
+ if (dump) {
+ dump_payload(p, slot->len, ring, head);
+ }
- frags++;
- if (!(slot->flags & NS_MOREFRAG)) {
- if (first_packet) {
+ frags++;
+ if (!(slot->flags & NS_MOREFRAG)) {
+ if (first_packet) {
+ first_packet = 0;
+ } else if (frags != frags_exp) {
+ char prbuf[512];
+ RD(1, "Received packets with %u frags, "
+ "expected %u, '%s'", frags, frags_exp,
+ multi_slot_to_string(ring, head-frags+1,
+ frags,
+ prbuf, sizeof(prbuf)));
+ }
first_packet = 0;
- } else if (frags != frags_exp) {
- char prbuf[512];
- RD(1, "Received packets with %u frags, "
- "expected %u, '%s'", frags, frags_exp,
- multi_slot_to_string(ring, head-frags+1, frags,
- prbuf, sizeof(prbuf)));
+ frags_exp = frags;
+ frags = 0;
}
- first_packet = 0;
- frags_exp = frags;
- frags = 0;
- }
- p -= sizeof(pkt->vh) - targ->g->virt_header;
- len += sizeof(pkt->vh) - targ->g->virt_header;
- pkt = (struct pkt *)p;
+ p -= sizeof(pkt->vh) - targ->g->virt_header;
+ len += sizeof(pkt->vh) - targ->g->virt_header;
+ pkt = (struct pkt *)p;
+ if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP)
+ af = AF_INET;
+ else
+ af = AF_INET6;
- if ((char *)pkt + len < ((char *)pkt->body) + sizeof(seq)) {
- RD(1, "%s: packet too small (len=%u)", __func__,
- slot->len);
- } else {
- seq = (pkt->body[0] << 24) | (pkt->body[1] << 16)
- | (pkt->body[2] << 8) | pkt->body[3];
- if (first_slot) {
- /* Grab the first one, whatever it
- is. */
- seq_exp = seq;
- first_slot = 0;
- } else if (seq != seq_exp) {
- uint32_t delta = seq - seq_exp;
+ if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) +
+ sizeof(seq)) {
+ RD(1, "%s: packet too small (len=%u)", __func__,
+ slot->len);
+ } else {
+ seq = (PKT(pkt, body, af)[0] << 24) |
+ (PKT(pkt, body, af)[1] << 16) |
+ (PKT(pkt, body, af)[2] << 8) |
+ PKT(pkt, body, af)[3];
+ if (first_slot) {
+ /* Grab the first one, whatever it
+ is. */
+ seq_exp[j] = seq;
+ first_slot = 0;
+ } else if (seq != seq_exp[j]) {
+ uint32_t delta = seq - seq_exp[j];
- if (delta < (0xFFFFFFFF >> 1)) {
- RD(2, "Sequence GAP: exp %u found %u",
- seq_exp, seq);
- } else {
- RD(2, "Sequence OUT OF ORDER: "
- "exp %u found %u", seq_exp, seq);
+ if (delta < (0xFFFFFFFF >> 1)) {
+ RD(2, "Sequence GAP: exp %u found %u",
+ seq_exp[j], seq);
+ } else {
+ RD(2, "Sequence OUT OF ORDER: "
+ "exp %u found %u", seq_exp[j], seq);
+ }
+ seq_exp[j] = seq;
}
- seq_exp = seq;
+ seq_exp[j]++;
}
- seq_exp++;
+
+ cur.bytes += slot->len;
+ head = nm_ring_next(ring, head);
+ cur.pkts++;
}
- cur.bytes += slot->len;
- head = nm_ring_next(ring, head);
- cur.pkts++;
+ ring->cur = ring->head = head;
+
+ cur.events++;
+ targ->ctr = cur;
}
-
- ring->cur = ring->head = head;
-
- cur.events++;
- targ->ctr = cur;
}
-
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
+#ifndef BUSYWAIT
out:
+#endif /* !BUSYWAIT */
targ->completed = 1;
targ->ctr = cur;
quit:
+ if (seq_exp != NULL)
+ free(seq_exp);
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1830,7 +2265,7 @@
static void
-tx_output(struct my_ctrs *cur, double delta, const char *msg)
+tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg)
{
double bw, raw_bw, pps, abs;
char b1[40], b2[80], b3[80];
@@ -1854,51 +2289,156 @@
size = 60;
pps = cur->pkts / delta;
bw = (8.0 * cur->bytes) / delta;
- /* raw packets have4 bytes crc + 20 bytes framing */
- raw_bw = (8.0 * (cur->pkts * 24 + cur->bytes)) / delta;
+ raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta;
abs = cur->pkts / (double)(cur->events);
printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n",
- norm(b1, pps), norm(b2, bw), norm(b3, raw_bw), abs);
+ norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs);
}
static void
-usage(void)
+usage(int errcode)
{
+/* This usage is generated from the pkt-gen man page:
+ * $ man pkt-gen > x
+ * and pasted here adding the string terminators and endlines with simple
+ * regular expressions. */
const char *cmd = "pkt-gen";
fprintf(stderr,
"Usage:\n"
"%s arguments\n"
- "\t-i interface interface name\n"
- "\t-f function tx rx ping pong txseq rxseq\n"
- "\t-n count number of iterations (can be 0)\n"
- "\t-t pkts_to_send also forces tx mode\n"
- "\t-r pkts_to_receive also forces rx mode\n"
- "\t-l pkt_size in bytes excluding CRC\n"
- "\t-d dst_ip[:port[-dst_ip:port]] single or range\n"
- "\t-s src_ip[:port[-src_ip:port]] single or range\n"
- "\t-D dst-mac\n"
- "\t-S src-mac\n"
- "\t-a cpu_id use setaffinity\n"
- "\t-b burst size testing, mostly\n"
- "\t-c cores cores to use\n"
- "\t-p threads processes/threads to use\n"
- "\t-T report_ms milliseconds between reports\n"
- "\t-w wait_for_link_time in seconds\n"
- "\t-R rate in packets per second\n"
- "\t-X dump payload\n"
- "\t-H len add empty virtio-net-header with size 'len'\n"
- "\t-E pipes allocate extra space for a number of pipes\n"
- "\t-r do not touch the buffers (send rubbish)\n"
- "\t-P file load packet from pcap file\n"
- "\t-z use random IPv4 src address/port\n"
- "\t-Z use random IPv4 dst address/port\n"
- "\t-F num_frags send multi-slot packets\n"
- "\t-A activate pps stats on receiver\n"
- "",
+" -h Show program usage and exit.\n"
+"\n"
+" -i interface\n"
+" Name of the network interface that pkt-gen operates on. It can be a system network interface\n"
+" (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n"
+" monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n"
+" mented in netmap(4) (NIOCREGIF section).\n"
+"\n"
+" -f function\n"
+" The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n"
+" for client-side ping-pong operation, and pong for server-side ping-pong operation.\n"
+"\n"
+" -n count\n"
+" Number of iterations of the pkt-gen function, with 0 meaning infinite). In case of tx or rx,\n"
+" count is the number of packets to receive or transmit. In case of ping or pong, count is the\n"
+" number of ping-pong transactions.\n"
+"\n"
+" -l pkt_size\n"
+" Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n"
+" equal than the second one and lower than the first one.\n"
+"\n"
+" -b burst_size\n"
+" Transmit or receive up to burst_size packets at a time.\n"
+"\n"
+" -4 Use IPv4 addresses.\n"
+"\n"
+" -6 Use IPv6 addresses.\n"
+"\n"
+" -d dst_ip[:port[-dst_ip:port]]\n"
+" Destination IPv4/IPv6 address and port, single or range.\n"
+"\n"
+" -s src_ip[:port[-src_ip:port]]\n"
+" Source IPv4/IPv6 address and port, single or range.\n"
+"\n"
+" -D dst_mac\n"
+" Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n"
+"\n"
+" -S src_mac\n"
+" Source MAC address in colon notation.\n"
+"\n"
+" -a cpu_id\n"
+" Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n"
+" threads are used, they are pinned to the subsequent CPUs, one per thread.\n"
+"\n"
+" -c cpus\n"
+" Maximum number of CPUs to use (0 means to use all the available ones).\n"
+"\n"
+" -p threads\n"
+" Number of threads to use. By default, only a single thread is used to handle all the netmap\n"
+" rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n"
+" single RX ring (in rx mode), or a TX/RX ring couple. The number of threads must be less or\n"
+" equal than the number of TX (or RX) ring available in the device specified by interface.\n"
+"\n"
+" -T report_ms\n"
+" Number of milliseconds between reports.\n"
+"\n"
+" -w wait_for_link_time\n"
+" Number of seconds to wait before starting the pkt-gen function, useuful to make sure that the\n"
+" network link is up. A network device driver may take some time to enter netmap mode, or to\n"
+" create a new transmit/receive ring pair when netmap(4) requests one.\n"
+"\n"
+" -R rate\n"
+" Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n"
+" packets as quickly as possible. On servers from 2010 on-wards netmap(4) is able to com-\n"
+" pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n"
+" your intention is to saturate the link.\n"
+"\n"
+" -X Dump payload of each packet transmitted or received.\n"
+"\n"
+" -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n"
+" only used with Virtual Machine technologies that use virtio as a network interface.\n"
+"\n"
+" -P file\n"
+" Load the packet to be transmitted from a pcap file rather than constructing it within\n"
+" pkt-gen.\n"
+"\n"
+" -z Use random IPv4/IPv6 src address/port.\n"
+"\n"
+" -Z Use random IPv4/IPv6 dst address/port.\n"
+"\n"
+" -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n"
+"\n"
+" -F num_frags\n"
+" Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n"
+" sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n"
+" last slot). This is useful to transmit or receive packets larger than the netmap buffer\n"
+" size.\n"
+"\n"
+" -M frag_size\n"
+" In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n"
+" length divided by num_frags.\n"
+"\n"
+" -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n"
+" by setting the NS_INDIRECT flag in the netmap slots.\n"
+"\n"
+" -W Exit immediately if all the RX rings are empty the first time they are examined.\n"
+"\n"
+" -v Increase the verbosity level.\n"
+"\n"
+" -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n"
+" netmap buffers is (rubbish mode).\n"
+"\n"
+" -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n"
+"\n"
+" -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n"
+" of CRC and 20 bytes of framing to each packet.\n"
+"\n"
+" -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n"
+" Configuration in terms of number of rings and slots to be used when opening the netmap port.\n"
+" Such configuration has effect on software ports created on the fly, such as VALE ports and\n"
+" netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n"
+" rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n"
+" additional convenience, if exactly one number is specified, then this is assigned to both\n"
+" tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n"
+" tx_rings and rx_rings.\n"
+"\n"
+" -o options data generation options (parsed using atoi)\n"
+" OPT_PREFETCH 1\n"
+" OPT_ACCESS 2\n"
+" OPT_COPY 4\n"
+" OPT_MEMCPY 8\n"
+" OPT_TS 16 (add a timestamp)\n"
+" OPT_INDIRECT 32 (use indirect buffers)\n"
+" OPT_DUMP 64 (dump rx/tx traffic)\n"
+" OPT_RUBBISH 256\n"
+" (send wathever the buffers contain)\n"
+" OPT_RANDOM_SRC 512\n"
+" OPT_RANDOM_DST 1024\n"
+" OPT_PPS_STATS 2048\n"
+ "",
cmd);
-
- exit(0);
+ exit(errcode);
}
enum {
@@ -1908,67 +2448,76 @@
};
static void
-start_threads(struct glob_arg *g)
-{
+start_threads(struct glob_arg *g) {
int i;
targs = calloc(g->nthreads, sizeof(*targs));
+ struct targ *t;
/*
* Now create the desired number of threads, each one
* using a single descriptor.
- */
+ */
for (i = 0; i < g->nthreads; i++) {
- struct targ *t = &targs[i];
+ uint64_t seed = time(0) | (time(0) << 32);
+ t = &targs[i];
bzero(t, sizeof(*t));
t->fd = -1; /* default, with pcap */
t->g = g;
+ memcpy(t->seed, &seed, sizeof(t->seed));
- if (g->dev_type == DEV_NETMAP) {
- struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
- uint64_t nmd_flags = 0;
- nmd.self = &nmd;
+ if (g->dev_type == DEV_NETMAP) {
+ struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
+ uint64_t nmd_flags = 0;
+ nmd.self = &nmd;
- if (i > 0) {
- /* the first thread uses the fd opened by the main
- * thread, the other threads re-open /dev/netmap
- */
- if (g->nthreads > 1) {
- nmd.req.nr_flags =
- g->nmd->req.nr_flags & ~NR_REG_MASK;
- nmd.req.nr_flags |= NR_REG_ONE_NIC;
- nmd.req.nr_ringid = i;
- }
- /* Only touch one of the rings (rx is already ok) */
- if (g->td_type == TD_TYPE_RECEIVER)
- nmd_flags |= NETMAP_NO_TX_POLL;
+ if (i > 0) {
+ /* the first thread uses the fd opened by the main
+ * thread, the other threads re-open /dev/netmap
+ */
+ if (g->nthreads > 1) {
+ nmd.req.nr_flags =
+ g->nmd->req.nr_flags & ~NR_REG_MASK;
+ nmd.req.nr_flags |= NR_REG_ONE_NIC;
+ nmd.req.nr_ringid = i;
+ }
+ /* Only touch one of the rings (rx is already ok) */
+ if (g->td_type == TD_TYPE_RECEIVER)
+ nmd_flags |= NETMAP_NO_TX_POLL;
- /* register interface. Override ifname and ringid etc. */
- t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
- NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
- if (t->nmd == NULL) {
- D("Unable to open %s: %s",
- t->g->ifname, strerror(errno));
- continue;
+ /* register interface. Override ifname and ringid etc. */
+ t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
+ NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
+ if (t->nmd == NULL) {
+ D("Unable to open %s: %s",
+ t->g->ifname, strerror(errno));
+ continue;
+ }
+ } else {
+ t->nmd = g->nmd;
}
+ t->fd = t->nmd->fd;
+ t->frags = g->frags;
} else {
- t->nmd = g->nmd;
+ targs[i].fd = g->main_fd;
}
- t->fd = t->nmd->fd;
-
- } else {
- targs[i].fd = g->main_fd;
- }
t->used = 1;
t->me = i;
if (g->affinity >= 0) {
- t->affinity = (g->affinity + i) % g->system_cpus;
+ t->affinity = (g->affinity + i) % g->cpus;
} else {
t->affinity = -1;
}
/* default, init packets */
initialize_packet(t);
+ }
+ /* Wait for PHY reset. */
+ D("Wait %d secs for phy reset", g->wait_link);
+ sleep(g->wait_link);
+ D("Ready...");
+ for (i = 0; i < g->nthreads; i++) {
+ t = &targs[i];
if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
D("Unable to create thread %d: %s", i, strerror(errno));
t->used = 0;
@@ -1988,7 +2537,7 @@
prev.pkts = prev.bytes = prev.events = 0;
gettimeofday(&prev.t, NULL);
for (;;) {
- char b1[40], b2[40], b3[40], b4[70];
+ char b1[40], b2[40], b3[40], b4[100];
uint64_t pps, usec;
struct my_ctrs x;
double abs;
@@ -2045,13 +2594,13 @@
ppsdev = sqrt(ppsdev);
snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]",
- norm(b1, ppsavg), norm(b2, ppsdev));
+ norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize));
}
D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space",
- norm(b1, pps), b4,
- norm(b2, (double)x.pkts),
- norm(b3, (double)x.bytes*8),
+ norm(b1, pps, normalize), b4,
+ norm(b2, (double)x.pkts, normalize),
+ norm(b3, (double)x.bytes*8+(double)x.pkts*g->framing, normalize),
(unsigned long long)usec,
abs, (int)cur.min_space);
prev = cur;
@@ -2105,9 +2654,9 @@
timersub(&toc, &tic, &toc);
delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
if (g->td_type == TD_TYPE_SENDER)
- tx_output(&cur, delta_t, "Sent");
- else
- tx_output(&cur, delta_t, "Received");
+ tx_output(g, &cur, delta_t, "Sent");
+ else if (g->td_type == TD_TYPE_RECEIVER)
+ tx_output(g, &cur, delta_t, "Received");
}
struct td_desc {
@@ -2114,16 +2663,17 @@
int ty;
char *key;
void *f;
+ int default_burst;
};
static struct td_desc func[] = {
- { TD_TYPE_SENDER, "tx", sender_body },
- { TD_TYPE_RECEIVER, "rx", receiver_body },
- { TD_TYPE_OTHER, "ping", pinger_body },
- { TD_TYPE_OTHER, "pong", ponger_body },
- { TD_TYPE_SENDER, "txseq", txseq_body },
- { TD_TYPE_RECEIVER, "rxseq", rxseq_body },
- { 0, NULL, NULL }
+ { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */
+ { TD_TYPE_SENDER, "tx", sender_body, 512 },
+ { TD_TYPE_OTHER, "ping", ping_body, 1 },
+ { TD_TYPE_OTHER, "pong", pong_body, 1 },
+ { TD_TYPE_SENDER, "txseq", txseq_body, 512 },
+ { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 },
+ { 0, NULL, NULL, 0 }
};
static int
@@ -2165,7 +2715,12 @@
/* if a device name was specified, put it in the structure; otherwise,
* the kernel will try to allocate the "next" device of the
* specified type */
- strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+ size_t len = strlen(dev);
+ if (len > IFNAMSIZ) {
+ D("%s too long", dev);
+ return -1;
+ }
+ memcpy(ifr.ifr_name, dev, len);
}
/* try to create the device */
@@ -2183,9 +2738,9 @@
D("new name is %s", dev);
#endif /* linux */
- /* this is the special file descriptor that the caller will use to talk
- * with the virtual interface */
- return fd;
+ /* this is the special file descriptor that the caller will use to talk
+ * with the virtual interface */
+ return fd;
}
int
@@ -2198,41 +2753,63 @@
struct glob_arg g;
int ch;
- int wait_link = 2;
int devqueues = 1; /* how many device queues */
+ int wait_link_arg = 0;
+ int pkt_size_done = 0;
+
+ struct td_desc *fn = func;
+
bzero(&g, sizeof(g));
g.main_fd = -1;
- g.td_body = receiver_body;
- g.td_type = TD_TYPE_RECEIVER;
+ g.td_body = fn->f;
+ g.td_type = fn->ty;
g.report_interval = 1000; /* report interval */
g.affinity = -1;
/* ip addresses can also be a range x.x.x.x-x.x.x.y */
+ g.af = AF_INET; /* default */
g.src_ip.name = "10.0.0.1";
g.dst_ip.name = "10.1.0.1";
g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
g.src_mac.name = NULL;
g.pkt_size = 60;
- g.burst = 512; // default
+ g.pkt_min_size = 0;
g.nthreads = 1;
- g.cpus = 1; // default
+ g.cpus = 1; /* default */
g.forever = 1;
g.tx_rate = 0;
- g.frags = 1;
+ g.frags =1;
+ g.mtu = 1500;
g.nmr_config = "";
g.virt_header = 0;
+ g.wait_link = 2; /* wait 2 seconds for physical ports */
- while ( (ch = getopt(arc, argv,
- "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:E:m:rP:zZA")) != -1) {
- struct td_desc *fn;
+ while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:"
+ "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) {
switch(ch) {
default:
D("bad option %c %s", ch, optarg);
- usage();
+ usage(-1);
break;
+ case 'h':
+ usage(0);
+ break;
+
+ case '4':
+ g.af = AF_INET;
+ break;
+
+ case '6':
+ g.af = AF_INET6;
+ break;
+
+ case 'N':
+ normalize = 0;
+ break;
+
case 'n':
g.npackets = strtoull(optarg, NULL, 10);
break;
@@ -2246,6 +2823,10 @@
g.frags = i;
break;
+ case 'M':
+ g.mtu = atoi(optarg);
+ break;
+
case 'f':
for (fn = func; fn->key; fn++) {
if (!strcmp(fn->key, optarg))
@@ -2260,7 +2841,7 @@
break;
case 'o': /* data generation options */
- g.options = atoi(optarg);
+ g.options |= atoi(optarg);
break;
case 'a': /* force affinity */
@@ -2298,11 +2879,16 @@
break;
case 'I':
- g.options |= OPT_INDIRECT; /* XXX use indirect buffer */
+ g.options |= OPT_INDIRECT; /* use indirect buffers */
break;
case 'l': /* pkt_size */
- g.pkt_size = atoi(optarg);
+ if (pkt_size_done) {
+ g.pkt_min_size = atoi(optarg);
+ } else {
+ g.pkt_size = atoi(optarg);
+ pkt_size_done = 1;
+ }
break;
case 'd':
@@ -2318,11 +2904,12 @@
break;
case 'w':
- wait_link = atoi(optarg);
+ g.wait_link = atoi(optarg);
+ wait_link_arg = 1;
break;
- case 'W': /* XXX changed default */
- g.forever = 0; /* do not exit rx even with no traffic */
+ case 'W':
+ g.forever = 0; /* exit RX with no traffic */
break;
case 'b': /* burst */
@@ -2357,18 +2944,9 @@
case 'H':
g.virt_header = atoi(optarg);
break;
- case 'e': /* extra bufs */
- g.extra_bufs = atoi(optarg);
- break;
- case 'E':
- g.extra_pipes = atoi(optarg);
- break;
case 'P':
g.packet_file = strdup(optarg);
break;
- case 'm':
- /* ignored */
- break;
case 'r':
g.options |= OPT_RUBBISH;
break;
@@ -2381,28 +2959,47 @@
case 'A':
g.options |= OPT_PPS_STATS;
break;
+ case 'B':
+ /* raw packets have4 bytes crc + 20 bytes framing */
+ // XXX maybe add an option to pass the IFG
+ g.framing = 24 * 8;
+ break;
}
}
if (strlen(g.ifname) <=0 ) {
D("missing ifname");
- usage();
+ usage(-1);
}
+ if (g.burst == 0) {
+ g.burst = fn->default_burst;
+ D("using default burst size: %d", g.burst);
+ }
+
g.system_cpus = i = system_ncpus();
if (g.cpus < 0 || g.cpus > i) {
D("%d cpus is too high, have only %d cpus", g.cpus, i);
- usage();
+ usage(-1);
}
-D("running on %d cpus (have %d)", g.cpus, i);
+ D("running on %d cpus (have %d)", g.cpus, i);
if (g.cpus == 0)
g.cpus = i;
+ if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) {
+ g.wait_link = 0;
+ }
+
if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
- usage();
+ usage(-1);
}
+ if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) {
+ D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size);
+ usage(-1);
+ }
+
if (g.src_mac.name == NULL) {
static char mybuf[20] = "00:00:00:00:00:00";
/* retrieve source mac address. */
@@ -2413,21 +3010,15 @@
g.src_mac.name = mybuf;
}
/* extract address ranges */
- extract_ip_range(&g.src_ip);
- extract_ip_range(&g.dst_ip);
- extract_mac_range(&g.src_mac);
- extract_mac_range(&g.dst_mac);
+ if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac))
+ usage(-1);
+ g.options |= extract_ip_range(&g.src_ip, g.af);
+ g.options |= extract_ip_range(&g.dst_ip, g.af);
- if (g.src_ip.start != g.src_ip.end ||
- g.src_ip.port0 != g.src_ip.port1 ||
- g.dst_ip.start != g.dst_ip.end ||
- g.dst_ip.port0 != g.dst_ip.port1)
- g.options |= OPT_COPY;
-
if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
&& g.virt_header != VIRT_HDR_2) {
D("bad virtio-net-header length");
- usage();
+ usage(-1);
}
if (g.dev_type == DEV_TAP) {
@@ -2435,7 +3026,7 @@
g.main_fd = tap_alloc(g.ifname);
if (g.main_fd < 0) {
D("cannot open tap %s", g.ifname);
- usage();
+ usage(-1);
}
#ifndef NO_PCAP
} else if (g.dev_type == DEV_PCAP) {
@@ -2445,7 +3036,7 @@
g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
if (g.p == NULL) {
D("cannot open pcap on %s", g.ifname);
- usage();
+ usage(-1);
}
g.main_fd = pcap_fileno(g.p);
D("using pcap on %s fileno %d", g.ifname, g.main_fd);
@@ -2453,20 +3044,21 @@
} else if (g.dummy_send) { /* but DEV_NETMAP */
D("using a dummy send routine");
} else {
- struct nmreq base_nmd;
+ struct nm_desc base_nmd;
+ char errmsg[MAXERRMSG];
+ u_int flags;
bzero(&base_nmd, sizeof(base_nmd));
- parse_nmr_config(g.nmr_config, &base_nmd);
- if (g.extra_bufs) {
- base_nmd.nr_arg3 = g.extra_bufs;
+ parse_nmr_config(g.nmr_config, &base_nmd.req);
+
+ base_nmd.req.nr_flags |= NR_ACCEPT_VNET_HDR;
+
+ if (nm_parse(g.ifname, &base_nmd, errmsg) < 0) {
+ D("Invalid name '%s': %s", g.ifname, errmsg);
+ goto out;
}
- if (g.extra_pipes) {
- base_nmd.nr_arg1 = g.extra_pipes;
- }
- base_nmd.nr_flags |= NR_ACCEPT_VNET_HDR;
-
/*
* Open the netmap device using nm_open().
*
@@ -2474,28 +3066,21 @@
* which in turn may take some time for the PHY to
* reconfigure. We do the open here to have time to reset.
*/
- g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL);
+ flags = NM_OPEN_IFNAME | NM_OPEN_ARG1 | NM_OPEN_ARG2 |
+ NM_OPEN_ARG3 | NM_OPEN_RING_CFG;
+ if (g.nthreads > 1) {
+ base_nmd.req.nr_flags &= ~NR_REG_MASK;
+ base_nmd.req.nr_flags |= NR_REG_ONE_NIC;
+ base_nmd.req.nr_ringid = 0;
+ }
+ g.nmd = nm_open(g.ifname, NULL, flags, &base_nmd);
if (g.nmd == NULL) {
D("Unable to open %s: %s", g.ifname, strerror(errno));
goto out;
}
-
- if (g.nthreads > 1) {
- struct nm_desc saved_desc = *g.nmd;
- saved_desc.self = &saved_desc;
- saved_desc.mem = NULL;
- nm_close(g.nmd);
- saved_desc.req.nr_flags &= ~NR_REG_MASK;
- saved_desc.req.nr_flags |= NR_REG_ONE_NIC;
- saved_desc.req.nr_ringid = 0;
- g.nmd = nm_open(g.ifname, &base_nmd, NM_OPEN_IFNAME, &saved_desc);
- if (g.nmd == NULL) {
- D("Unable to open %s: %s", g.ifname, strerror(errno));
- goto out;
- }
- }
g.main_fd = g.nmd->fd;
- D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
+ D("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10),
+ g.nmd->mem);
if (g.virt_header) {
/* Set the virtio-net header length, since the user asked
@@ -2558,7 +3143,7 @@
/* Exit if something went wrong. */
if (g.main_fd < 0) {
D("aborting");
- usage();
+ usage(-1);
}
}
@@ -2583,8 +3168,8 @@
int lim = (g.tx_rate)/300;
if (g.burst > lim)
g.burst = lim;
- if (g.burst < g.frags)
- g.burst = g.frags;
+ if (g.burst == 0)
+ g.burst = 1;
x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
g.tx_period.tv_nsec = x;
g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
@@ -2593,11 +3178,6 @@
if (g.td_type == TD_TYPE_SENDER)
D("Sending %d packets every %ld.%09ld s",
g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
- /* Wait for PHY reset. */
- D("Wait %d secs for phy reset", wait_link);
- sleep(wait_link);
- D("Ready...");
-
/* Install ^C handler. */
global_nthreads = g.nthreads;
sigemptyset(&ss);
@@ -2608,6 +3188,7 @@
}
start_threads(&g);
/* Install the handler and re-enable SIGINT for the main thread */
+ memset(&sa, 0, sizeof(sa));
sa.sa_handler = sigint_h;
if (sigaction(SIGINT, &sa, NULL) < 0) {
D("failed to install ^C handler: %s", strerror(errno));
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Mar 30, 4:39 AM (22 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30573840
Default Alt Text
D17698.id49615.diff (81 KB)
Attached To
Mode
D17698: netmap: pkt-gen: several updates from upstream
Attached
Detach File
Event Timeline
Log In to Comment