Page MenuHomeFreeBSD

D17698.id49615.diff
No OneTemporary

D17698.id49615.diff

Index: ctrs.h
===================================================================
--- ctrs.h
+++ ctrs.h
@@ -7,9 +7,11 @@
/* counters to accumulate statistics */
struct my_ctrs {
- uint64_t pkts, bytes, events, drop;
+ uint64_t pkts, bytes, events;
+ uint64_t drop, drop_bytes;
uint64_t min_space;
struct timeval t;
+ uint32_t oq_n; /* number of elements in overflow queue (used in lb) */
};
/* very crude code to print a number in normalized form.
@@ -16,21 +18,26 @@
* Caller has to make sure that the buffer is large enough.
*/
static const char *
-norm2(char *buf, double val, char *fmt)
+norm2(char *buf, double val, char *fmt, int normalize)
{
char *units[] = { "", "K", "M", "G", "T" };
u_int i;
-
- for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
- val /= 1000;
+ if (normalize)
+ for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
+ val /= 1000;
+ else
+ i=0;
sprintf(buf, fmt, val, units[i]);
return buf;
}
static __inline const char *
-norm(char *buf, double val)
+norm(char *buf, double val, int normalize)
{
- return norm2(buf, val, "%.3f %s");
+ if (normalize)
+ return norm2(buf, val, "%.3f %s", normalize);
+ else
+ return norm2(buf, val, "%.0f %s", normalize);
}
static __inline int
@@ -89,7 +96,7 @@
return ret;
}
-static uint64_t
+static __inline uint64_t
wait_for_next_report(struct timeval *prev, struct timeval *cur,
int report_interval)
{
@@ -106,3 +113,4 @@
return delta.tv_sec* 1000000 + delta.tv_usec;
}
#endif /* CTRS_H_ */
+
Index: pkt-gen.8
===================================================================
--- pkt-gen.8
+++ pkt-gen.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 23, 2018
+.Dd October 25, 2018
.Dt PKT-GEN 8
.Os
.Sh NAME
@@ -36,96 +36,214 @@
.Bl -item -compact
.It
.Nm
+.Op Fl h46XzZNIWvrAB
.Op Fl i Ar interface
.Op Fl f Ar function
.Op Fl n Ar count
-.Op Fl t Ar pkts_to_send
-.Op Fl r Ar pkts_to_receive
.Op Fl l Ar pkt_size
+.Op Fl b Ar burst_size
.Op Fl d Ar dst_ip[:port[-dst_ip:port]]
.Op Fl s Ar src_ip[:port[-src_ip:port]]
-.Op Fl D Ar dst-mac
-.Op Fl S Ar src-mac
+.Op Fl D Ar dst_mac
+.Op Fl S Ar src_mac
.Op Fl a Ar cpu_id
-.Op Fl b Ar burst size
-.Op Fl c Ar cores
+.Op Fl c Ar cpus
.Op Fl p Ar threads
.Op Fl T Ar report_ms
-.Op Fl P
+.Op Fl P Ar file
.Op Fl w Ar wait_for_link_time
.Op Fl R Ar rate
-.Op Fl X
.Op Fl H Ar len
-.Op Fl P Ar xfile
-.Op Fl z
-.Op Fl Z
+.Op Fl F Ar num_frags
+.Op Fl M Ar frag_size
+.Op Fl C Ar port_config
+.El
.Sh DESCRIPTION
.Nm
-generates and receives raw network packets using
-.Xr netmap 4 .
+leverages
+.Xr netmap 4
+to generate and receive raw network packets in batches.
The arguments are as follows:
-.Pp
.Bl -tag -width Ds
+.It Fl h
+Show program usage and exit.
.It Fl i Ar interface
-Network interface name.
-.It Fl f Ar function tx rx ping pong
-Set the function to transmit, receive of ping/pong.
-.It Fl n count
-Number of iterations (can be 0).
-.It Fl t pkts_to_send
-Number of packets to send. Also forces transmit mode.
-.It Fl r Ar pkts_to_receive
-Number of packets to receive. Also forces rx mode.
+Name of the network interface that
+.Nm
+operates on.
+It can be a system network interface (e.g., em0),
+the name of a
+.Xr vale 4
+port (e.g., valeSSS:PPP), the name of a netmap pipe or monitor,
+or any valid netmap port name accepted by the
+.Ar nm_open
+library function, as documented in
+.Xr netmap 4
+(NIOCREGIF section).
+.It Fl f Ar function
+The function to be executed by
+.Nm .
+Specify
+.Ar tx
+for transmission,
+.Ar rx
+for reception,
+.Ar ping
+for client-side ping-pong operation, and
+.Ar pong
+for server-side ping-pong operation.
+.It Fl n Ar count
+Number of iterations of the
+.Nm
+function, with 0 meaning infinite).
+In case of
+.Ar tx
+or
+.Ar rx ,
+.Ar count
+is the number of packets to receive or transmit.
+In case of
+.Ar ping
+or
+.Ar pong ,
+.Ar count
+is the number of ping-pong transactions.
.It Fl l Ar pkt_size
Packet size in bytes excluding CRC.
+If passed a second time, use random sizes larger or equal than the
+second one and lower than the first one.
+.It Fl b Ar burst_size
+Transmit or receive up to
+.Ar burst_size
+packets at a time.
+.It Fl 4
+Use IPv4 addresses.
+.It Fl 6
+Use IPv6 addresses.
.It Fl d Ar dst_ip[:port[-dst_ip:port]]
-Destination IPv4 address and port, single or range.
+Destination IPv4/IPv6 address and port, single or range.
.It Fl s Ar src_ip[:port[-src_ip:port]]
-Source IPv4 address and port, single or range.
-.It Fl D Ar dst-mac
-Destination MAC address in colon notation.
-.It Fl S Ar src-mac
+Source IPv4/IPv6 address and port, single or range.
+.It Fl D Ar dst_mac
+Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).
+.It Fl S Ar src_mac
Source MAC address in colon notation.
.It Fl a Ar cpu_id
-Tie
+Pin the first thread of
.Nm
-to a particular CPU core using
-.Xr setaffinity 2.
-.It Fl b Ar burst size
-Set the size of a burst of packets.
-.It Fl c Ar cores
-Number of cores to use.
+to a particular CPU using
+.Xr pthread_setaffinity_np 3 .
+If more threads are used, they are pinned to the subsequent CPUs,
+one per thread.
+.It Fl c Ar cpus
+Maximum number of CPUs to use (0 means to use all the available ones).
.It Fl p Ar threads
Number of threads to use.
+By default, only a single thread is used
+to handle all the netmap rings.
+If
+.Ar threads
+is larger than one, each thread handles a single TX ring (in
+.Ar tx
+mode), a single RX ring (in
+.Ar rx
+mode), or a TX/RX ring couple.
+The number of
+.Ar threads
+must be less or equal than the number of TX (or RX) ring available
+in the device specified by
+.Ar interface .
.It Fl T Ar report_ms
Number of milliseconds between reports.
-.It Fl P
-Use libpcap instead of netmap for reading or writing.
.It Fl w Ar wait_for_link_time
-Number of seconds to wait to make sure that the network link is up. A
-network device driver may take some time to create a new
-transmit/receive ring pair when
+Number of seconds to wait before starting the
+.Nm
+function, useuful to make sure that the network link is up.
+A network device driver may take some time to enter netmap mode, or
+to create a new transmit/receive ring pair when
.Xr netmap 4
requests one.
.It Fl R Ar rate
-Packet transmission rate. Not setting the packet transmission rate tells
+Packet transmission rate.
+Not setting the packet transmission rate tells
.Nm
-to transmit packets as quickly as possible. On servers from 2010 on-wards
+to transmit packets as quickly as possible.
+On servers from 2010 on-wards
.Xr netmap 4
is able to completely use all of the bandwidth of a 10 or 40Gbps link,
so this option should be used unless your intention is to saturate the link.
.It Fl X
-Dump payload transmitted or received.
+Dump payload of each packet transmitted or received.
.It Fl H Ar len
-Add empty virtio-net-header with size 'len'. This option is only use
-with Virtual Machine technologies that use virtio as a network interface.
+Add empty virtio-net-header with size 'len'.
+Valid sizes are 0, 10 and 12.
+This option is only used with Virtual Machine technologies that use virtio
+as a network interface.
.It Fl P Ar file
-Load the packet from a pcap file rather than constructing it inside of
-.Nm
+Load the packet to be transmitted from a pcap file rather than constructing
+it within
+.Nm .
.It Fl z
-Use random IPv4 src address/port
+Use random IPv4/IPv6 src address/port.
.It Fl Z
-Use random IPv4 dst address/port
+Use random IPv4/IPv6 dst address/port.
+.It Fl N
+Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).
+.It Fl F Ar num_frags
+Send multi-slot packets, each one with
+.Ar num_frags
+fragments.
+A multi-slot packet is represented by two or more consecutive netmap slots
+with the
+.Ar NS_MOREFRAG
+flag set (except for the last slot).
+This is useful to transmit or receive packets larger than the netmap
+buffer size.
+.It Fl M Ar frag_size
+In multi-slot mode,
+.Ar frag_size
+specifies the size of each fragment, if smaller than the packet length
+divided by
+.Ar num_frags .
+.It Fl I
+Use indirect buffers.
+It is only valid for transmitting on VALE ports,
+and it is implemented by setting the
+.Ar NS_INDIRECT
+flag in the netmap slots.
+.It Fl W
+Exit immediately if all the RX rings are empty the first time they are
+examined.
+.It Fl v
+Increase the verbosity level.
+.It Fl r
+In
+.Ar tx
+mode, do not initialize packets, but send whatever the content of
+the uninitialized netmap buffers is (rubbish mode).
+.It Fl A
+Compute mean and standard deviation (over a sliding window) for the
+transmit or receive rate.
+.It Fl B
+Take Ethernet framing and CRC into account when computing the average bps.
+This adds 4 bytes of CRC and 20 bytes of framing to each packet.
+.It Fl C Ar tx_slots[,rx_slots[,tx_rings[,rx_rings]]]
+Configuration in terms of number of rings and slots to be used when
+opening the netmap port.
+Such configuration has effect on software ports
+created on the fly, such as VALE ports and netmap pipes.
+The configuration may consist of 1 to 4 numbers separated by commas:
+.Ar tx_slots , rx_slots , tx_rings , rx_rings .
+Missing numbers or zeroes stand for default values.
+As an additional convenience, if exactly one number is specified,
+then this is assigned to both
+.Ar tx_slots
+and
+.Ar rx_slots .
+If there is no fourth number, then the third one is assigned to both
+.Ar tx_rings
+and
+.Ar rx_rings .
.El
.Pp
.Nm
@@ -133,7 +251,7 @@
.Xr netmap 4
or
.Xr bpf 4
-but which is most often uses with
+but which is most often used with
.Xr netmap 4 .
The
.Ar interface name
@@ -146,7 +264,8 @@
.Nm
can peel off one or more of the transmit or receive rings for its own
use without interfering with packets that might otherwise be destined
-for the host. For example on a system with a Chelsio Network
+for the host.
+For example on a system with a Chelsio Network
Interface Card (NIC) the interface specification of
.Ar -i netmap:ncxl0
gives
@@ -156,20 +275,20 @@
system's TCP/IP stack.
.Sh EXAMPLES
Capture and count all packets arriving on the operating system's cxl0
-interface. Using this will block packets from reaching the operating
+interface.
+Using this will block packets from reaching the operating
system's network stack.
-.Dl
.Pp
.Nm
-i cxl0 -f rx
.Pp
Send a stream of fake DNS packets between two hosts with a packet
-length of 128 bytes. You must set the destination MAC address for
+length of 128 bytes.
+You must set the destination MAC address for
packets to be received by the target host.
.Pp
-.Dl
.Nm
--i netmap:ncxl0 -f tx -s 172.16.0.1:53 -d 172.16.1.3:53 -D 00:07:43:29:2a:e0
+-i netmap:ncxl0 -f tx -s 172.16.0.1:53 -d 172.16.1.3:53 -D 00:07:43:29:2a:e0
.Sh SEE ALSO
.Xr netmap 4 ,
.Xr bridge 8
Index: pkt-gen.c
===================================================================
--- pkt-gen.c
+++ pkt-gen.c
@@ -55,6 +55,11 @@
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
+#include <netinet/ip6.h>
+#ifdef linux
+#define IPV6_VERSION 0x60
+#define IPV6_DEFHLIM 64
+#endif
#include <assert.h>
#include <math.h>
@@ -66,16 +71,18 @@
#include "ctrs.h"
+static void usage(int);
+
#ifdef _WIN32
#define cpuset_t DWORD_PTR //uint64_t
static inline void CPU_ZERO(cpuset_t *p)
{
- *p = 0;
+ *p = 0;
}
static inline void CPU_SET(uint32_t i, cpuset_t *p)
{
- *p |= 1<< (i & 0x3f);
+ *p |= 1<< (i & 0x3f);
}
#define pthread_setaffinity_np(a, b, c) !SetThreadAffinityMask(a, *c) //((void)a, 0)
@@ -155,12 +162,12 @@
#define cpuset_t uint64_t // XXX
static inline void CPU_ZERO(cpuset_t *p)
{
- *p = 0;
+ *p = 0;
}
static inline void CPU_SET(uint32_t i, cpuset_t *p)
{
- *p |= 1<< (i & 0x3f);
+ *p |= 1<< (i & 0x3f);
}
#define pthread_setaffinity_np(a, b, c) ((void)a, 0)
@@ -169,7 +176,7 @@
#define IFF_PPROMISC IFF_PROMISC
#include <net/if_dl.h> /* LLADDR */
#define clock_gettime(a,b) \
- do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
+ do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
#endif /* __APPLE__ */
const char *default_payload="netmap pkt-gen DIRECT payload\n"
@@ -179,10 +186,8 @@
"http://info.iet.unipi.it/~luigi/netmap/ ";
int verbose = 0;
+int normalize = 1;
-#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */
-
-
#define VIRT_HDR_1 10 /* length of a base vnet-hdr */
#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */
#define VIRT_HDR_MAX VIRT_HDR_2
@@ -195,14 +200,34 @@
struct pkt {
struct virt_header vh;
struct ether_header eh;
- struct ip ip;
- struct udphdr udp;
- uint8_t body[MAX_BODYSIZE]; // XXX hardwired
+ union {
+ struct {
+ struct ip ip;
+ struct udphdr udp;
+ uint8_t body[MAX_BODYSIZE]; /* hardwired */
+ } ipv4;
+ struct {
+ struct ip6_hdr ip;
+ struct udphdr udp;
+ uint8_t body[MAX_BODYSIZE]; /* hardwired */
+ } ipv6;
+ };
} __attribute__((__packed__));
+#define PKT(p, f, af) \
+ ((af) == AF_INET ? (p)->ipv4.f: (p)->ipv6.f)
+
struct ip_range {
char *name;
- uint32_t start, end; /* same as struct in_addr */
+ union {
+ struct {
+ uint32_t start, end; /* same as struct in_addr */
+ } ipv4;
+ struct {
+ struct in6_addr start, end;
+ uint8_t sgroup, egroup;
+ } ipv6;
+ };
uint16_t port0, port1;
};
@@ -227,15 +252,18 @@
*/
struct glob_arg {
+ int af; /* address family AF_INET/AF_INET6 */
struct ip_range src_ip;
struct ip_range dst_ip;
struct mac_range dst_mac;
struct mac_range src_mac;
int pkt_size;
+ int pkt_min_size;
int burst;
int forever;
uint64_t npackets; /* total packets to send */
- int frags; /* fragments per packet */
+ int frags; /* fragments per packet */
+ u_int mtu; /* size of each fragment */
int nthreads;
int cpus; /* cpus used for running */
int system_cpus; /* cpus on the system */
@@ -271,12 +299,12 @@
char *nmr_config;
int dummy_send;
int virt_header; /* send also the virt_header */
- int extra_bufs; /* goes in nr_arg3 */
- int extra_pipes; /* goes in nr_arg1 */
char *packet_file; /* -P option */
#define STATS_WIN 15
int win_idx;
int64_t win[STATS_WIN];
+ int wait_link;
+ int framing; /* #bits of framing (for bw output) */
};
enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
@@ -304,79 +332,166 @@
struct pkt pkt;
void *frame;
+ uint16_t seed[3];
+ u_int frags;
+ u_int frag_size;
};
+static __inline uint16_t
+cksum_add(uint16_t sum, uint16_t a)
+{
+ uint16_t res;
+ res = sum + a;
+ return (res + (res < a));
+}
+
+static void
+extract_ipv4_addr(char *name, uint32_t *addr, uint16_t *port)
+{
+ struct in_addr a;
+ char *pp;
+
+ pp = strchr(name, ':');
+ if (pp != NULL) { /* do we have ports ? */
+ *pp++ = '\0';
+ *port = (uint16_t)strtol(pp, NULL, 0);
+ }
+
+ inet_pton(AF_INET, name, &a);
+ *addr = ntohl(a.s_addr);
+}
+
+static void
+extract_ipv6_addr(char *name, struct in6_addr *addr, uint16_t *port,
+ uint8_t *group)
+{
+ char *pp;
+
+ /*
+ * We accept IPv6 address in the following form:
+ * group@[2001:DB8::1001]:port (w/ brackets and port)
+ * group@[2001:DB8::1] (w/ brackets and w/o port)
+ * group@2001:DB8::1234 (w/o brackets and w/o port)
+ */
+ pp = strchr(name, '@');
+ if (pp != NULL) {
+ *pp++ = '\0';
+ *group = (uint8_t)strtol(name, NULL, 0);
+ if (*group > 7)
+ *group = 7;
+ name = pp;
+ }
+ if (name[0] == '[')
+ name++;
+ pp = strchr(name, ']');
+ if (pp != NULL)
+ *pp++ = '\0';
+ if (pp != NULL && *pp != ':')
+ pp = NULL;
+ if (pp != NULL) { /* do we have ports ? */
+ *pp++ = '\0';
+ *port = (uint16_t)strtol(pp, NULL, 0);
+ }
+ inet_pton(AF_INET6, name, addr);
+}
/*
* extract the extremes from a range of ipv4 addresses.
* addr_lo[-addr_hi][:port_lo[-port_hi]]
*/
-static void
-extract_ip_range(struct ip_range *r)
+static int
+extract_ip_range(struct ip_range *r, int af)
{
- char *ap, *pp;
+ char *name, *ap, start[INET6_ADDRSTRLEN];
+ char end[INET6_ADDRSTRLEN];
struct in_addr a;
+ uint32_t tmp;
if (verbose)
D("extract IP range from %s", r->name);
- r->port0 = r->port1 = 0;
- r->start = r->end = 0;
+ name = strdup(r->name);
+ if (name == NULL) {
+ D("strdup failed");
+ usage(-1);
+ }
/* the first - splits start/end of range */
- ap = index(r->name, '-'); /* do we have ports ? */
- if (ap) {
+ ap = strchr(name, '-');
+ if (ap != NULL)
*ap++ = '\0';
- }
- /* grab the initial values (mandatory) */
- pp = index(r->name, ':');
- if (pp) {
- *pp++ = '\0';
- r->port0 = r->port1 = strtol(pp, NULL, 0);
- };
- inet_aton(r->name, &a);
- r->start = r->end = ntohl(a.s_addr);
- if (ap) {
- pp = index(ap, ':');
- if (pp) {
- *pp++ = '\0';
- if (*pp)
- r->port1 = strtol(pp, NULL, 0);
+ r->port0 = 1234; /* default port */
+ if (af == AF_INET6) {
+ r->ipv6.sgroup = 7; /* default group */
+ extract_ipv6_addr(name, &r->ipv6.start, &r->port0,
+ &r->ipv6.sgroup);
+ } else
+ extract_ipv4_addr(name, &r->ipv4.start, &r->port0);
+
+ r->port1 = r->port0;
+ if (af == AF_INET6) {
+ if (ap != NULL) {
+ r->ipv6.egroup = r->ipv6.sgroup;
+ extract_ipv6_addr(ap, &r->ipv6.end, &r->port1,
+ &r->ipv6.egroup);
+ } else {
+ r->ipv6.end = r->ipv6.start;
+ r->ipv6.egroup = r->ipv6.sgroup;
}
- if (*ap) {
- inet_aton(ap, &a);
- r->end = ntohl(a.s_addr);
- }
+ } else {
+ if (ap != NULL) {
+ extract_ipv4_addr(ap, &r->ipv4.end, &r->port1);
+ if (r->ipv4.start > r->ipv4.end) {
+ tmp = r->ipv4.end;
+ r->ipv4.end = r->ipv4.start;
+ r->ipv4.start = tmp;
+ }
+ } else
+ r->ipv4.end = r->ipv4.start;
}
+
if (r->port0 > r->port1) {
- uint16_t tmp = r->port0;
+ tmp = r->port0;
r->port0 = r->port1;
r->port1 = tmp;
}
- if (r->start > r->end) {
- uint32_t tmp = r->start;
- r->start = r->end;
- r->end = tmp;
+ if (af == AF_INET) {
+ a.s_addr = htonl(r->ipv4.start);
+ inet_ntop(af, &a, start, sizeof(start));
+ a.s_addr = htonl(r->ipv4.end);
+ inet_ntop(af, &a, end, sizeof(end));
+ } else {
+ inet_ntop(af, &r->ipv6.start, start, sizeof(start));
+ inet_ntop(af, &r->ipv6.end, end, sizeof(end));
}
- {
- struct in_addr a;
- char buf1[16]; // one ip address
+ if (af == AF_INET)
+ D("range is %s:%d to %s:%d", start, r->port0, end, r->port1);
+ else
+ D("range is %d@[%s]:%d to %d@[%s]:%d", r->ipv6.sgroup,
+ start, r->port0, r->ipv6.egroup, end, r->port1);
- a.s_addr = htonl(r->end);
- strncpy(buf1, inet_ntoa(a), sizeof(buf1));
- a.s_addr = htonl(r->start);
- if (1)
- D("range is %s:%d to %s:%d",
- inet_ntoa(a), r->port0, buf1, r->port1);
- }
+ free(name);
+ if (r->port0 != r->port1 ||
+ (af == AF_INET && r->ipv4.start != r->ipv4.end) ||
+ (af == AF_INET6 &&
+ !IN6_ARE_ADDR_EQUAL(&r->ipv6.start, &r->ipv6.end)))
+ return (OPT_COPY);
+ return (0);
}
-static void
+static int
extract_mac_range(struct mac_range *r)
{
+ struct ether_addr *e;
if (verbose)
D("extract MAC range from %s", r->name);
- bcopy(ether_aton(r->name), &r->start, 6);
- bcopy(ether_aton(r->name), &r->end, 6);
+
+ e = ether_aton(r->name);
+ if (e == NULL) {
+ D("invalid MAC address '%s'", r->name);
+ return 1;
+ }
+ bcopy(e, &r->start, 6);
+ bcopy(e, &r->end, 6);
#if 0
bcopy(targ->src_mac, eh->ether_shost, 6);
p = index(targ->g->src_mac, '-');
@@ -391,6 +506,7 @@
#endif
if (verbose)
D("%s starts at %s", r->name, ether_ntoa(&r->start));
+ return 0;
}
static struct targ *targs;
@@ -456,7 +572,7 @@
/*
* parse the vale configuration in conf and put it in nmr.
* Return the flag set if necessary.
- * The configuration may consist of 0 to 4 numbers separated
+ * The configuration may consist of 1 to 4 numbers separated
* by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings.
* Missing numbers or zeroes stand for default values.
* As an additional convenience, if exactly one number
@@ -500,7 +616,7 @@
nmr->nr_rx_rings, nmr->nr_rx_slots);
free(w);
return (nmr->nr_tx_rings || nmr->nr_tx_slots ||
- nmr->nr_rx_rings || nmr->nr_rx_slots) ?
+ nmr->nr_rx_rings || nmr->nr_rx_slots) ?
NM_OPEN_RING_CFG : 0;
}
@@ -513,7 +629,6 @@
source_hwaddr(const char *ifname, char *buf)
{
struct ifaddrs *ifaphead, *ifap;
- int l = sizeof(ifap->ifa_name);
if (getifaddrs(&ifaphead) != 0) {
D("getifaddrs %s failed", ifname);
@@ -527,7 +642,7 @@
if (!sdl || sdl->sdl_family != AF_LINK)
continue;
- if (strncmp(ifap->ifa_name, ifname, l) != 0)
+ if (strncmp(ifap->ifa_name, ifname, IFNAMSIZ) != 0)
continue;
mac = (uint8_t *)LLADDR(sdl);
sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
@@ -562,19 +677,20 @@
return 0;
}
+
/* Compute the checksum of the given ip header. */
-static uint16_t
+static uint32_t
checksum(const void *data, uint16_t len, uint32_t sum)
{
- const uint8_t *addr = data;
+ const uint8_t *addr = data;
uint32_t i;
- /* Checksum all the pairs of bytes first... */
- for (i = 0; i < (len & ~1U); i += 2) {
- sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
- if (sum > 0xFFFF)
- sum -= 0xFFFF;
- }
+ /* Checksum all the pairs of bytes first... */
+ for (i = 0; i < (len & ~1U); i += 2) {
+ sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
+ if (sum > 0xFFFF)
+ sum -= 0xFFFF;
+ }
/*
* If there's a single byte left over, checksum it, too.
* Network byte order is big-endian, so the remaining byte is
@@ -588,8 +704,8 @@
return sum;
}
-static u_int16_t
-wrapsum(u_int32_t sum)
+static uint16_t
+wrapsum(uint32_t sum)
{
sum = ~sum & 0xFFFF;
return (htons(sum));
@@ -637,64 +753,198 @@
#define uh_sum check
#endif /* linux */
-/*
- * increment the addressed in the packet,
- * starting from the least significant field.
- * DST_IP DST_PORT SRC_IP SRC_PORT
- */
static void
-update_addresses(struct pkt *pkt, struct glob_arg *g)
+update_ip(struct pkt *pkt, struct targ *t)
{
- uint32_t a;
- uint16_t p;
- struct ip *ip = &pkt->ip;
- struct udphdr *udp = &pkt->udp;
+ struct glob_arg *g = t->g;
+ struct ip ip;
+ struct udphdr udp;
+ uint32_t oaddr, naddr;
+ uint16_t oport, nport;
+ uint16_t ip_sum, udp_sum;
- do {
- /* XXX for now it doesn't handle non-random src, random dst */
- if (g->options & OPT_RANDOM_SRC) {
- udp->uh_sport = random();
- ip->ip_src.s_addr = random();
- } else {
- p = ntohs(udp->uh_sport);
- if (p < g->src_ip.port1) { /* just inc, no wrap */
- udp->uh_sport = htons(p + 1);
+ memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
+ memcpy(&udp, &pkt->ipv4.udp, sizeof(udp));
+ do {
+ ip_sum = udp_sum = 0;
+ naddr = oaddr = ntohl(ip.ip_src.s_addr);
+ nport = oport = ntohs(udp.uh_sport);
+ if (g->options & OPT_RANDOM_SRC) {
+ ip.ip_src.s_addr = nrand48(t->seed);
+ udp.uh_sport = nrand48(t->seed);
+ naddr = ntohl(ip.ip_src.s_addr);
+ nport = ntohs(udp.uh_sport);
break;
}
- udp->uh_sport = htons(g->src_ip.port0);
-
- a = ntohl(ip->ip_src.s_addr);
- if (a < g->src_ip.end) { /* just inc, no wrap */
- ip->ip_src.s_addr = htonl(a + 1);
+ if (oport < g->src_ip.port1) {
+ nport = oport + 1;
+ udp.uh_sport = htons(nport);
break;
}
- ip->ip_src.s_addr = htonl(g->src_ip.start);
-
- udp->uh_sport = htons(g->src_ip.port0);
+ nport = g->src_ip.port0;
+ udp.uh_sport = htons(nport);
+ if (oaddr < g->src_ip.ipv4.end) {
+ naddr = oaddr + 1;
+ ip.ip_src.s_addr = htonl(naddr);
+ break;
+ }
+ naddr = g->src_ip.ipv4.start;
+ ip.ip_src.s_addr = htonl(naddr);
+ } while (0);
+ /* update checksums if needed */
+ if (oaddr != naddr) {
+ ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
+ ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
+ ip_sum = cksum_add(ip_sum, naddr >> 16);
+ ip_sum = cksum_add(ip_sum, naddr & 0xffff);
}
-
- if (g->options & OPT_RANDOM_DST) {
- udp->uh_dport = random();
- ip->ip_dst.s_addr = random();
- } else {
- p = ntohs(udp->uh_dport);
- if (p < g->dst_ip.port1) { /* just inc, no wrap */
- udp->uh_dport = htons(p + 1);
+ if (oport != nport) {
+ udp_sum = cksum_add(udp_sum, ~oport);
+ udp_sum = cksum_add(udp_sum, nport);
+ }
+ do {
+ naddr = oaddr = ntohl(ip.ip_dst.s_addr);
+ nport = oport = ntohs(udp.uh_dport);
+ if (g->options & OPT_RANDOM_DST) {
+ ip.ip_dst.s_addr = nrand48(t->seed);
+ udp.uh_dport = nrand48(t->seed);
+ naddr = ntohl(ip.ip_dst.s_addr);
+ nport = ntohs(udp.uh_dport);
break;
}
- udp->uh_dport = htons(g->dst_ip.port0);
-
- a = ntohl(ip->ip_dst.s_addr);
- if (a < g->dst_ip.end) { /* just inc, no wrap */
- ip->ip_dst.s_addr = htonl(a + 1);
+ if (oport < g->dst_ip.port1) {
+ nport = oport + 1;
+ udp.uh_dport = htons(nport);
break;
}
+ nport = g->dst_ip.port0;
+ udp.uh_dport = htons(nport);
+ if (oaddr < g->dst_ip.ipv4.end) {
+ naddr = oaddr + 1;
+ ip.ip_dst.s_addr = htonl(naddr);
+ break;
+ }
+ naddr = g->dst_ip.ipv4.start;
+ ip.ip_dst.s_addr = htonl(naddr);
+ } while (0);
+ /* update checksums */
+ if (oaddr != naddr) {
+ ip_sum = cksum_add(ip_sum, ~oaddr >> 16);
+ ip_sum = cksum_add(ip_sum, ~oaddr & 0xffff);
+ ip_sum = cksum_add(ip_sum, naddr >> 16);
+ ip_sum = cksum_add(ip_sum, naddr & 0xffff);
}
- ip->ip_dst.s_addr = htonl(g->dst_ip.start);
- } while (0);
- // update checksum
+ if (oport != nport) {
+ udp_sum = cksum_add(udp_sum, ~oport);
+ udp_sum = cksum_add(udp_sum, nport);
+ }
+ if (udp_sum != 0)
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(udp_sum));
+ if (ip_sum != 0) {
+ ip.ip_sum = ~cksum_add(~ip.ip_sum, htons(ip_sum));
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, htons(ip_sum));
+ }
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ memcpy(&pkt->ipv4.udp, &udp, sizeof(udp));
}
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+static void
+update_ip6(struct pkt *pkt, struct targ *t)
+{
+ struct glob_arg *g = t->g;
+ struct ip6_hdr ip6;
+ struct udphdr udp;
+ uint16_t udp_sum;
+ uint16_t oaddr, naddr;
+ uint16_t oport, nport;
+ uint8_t group;
+
+ memcpy(&ip6, &pkt->ipv6.ip, sizeof(ip6));
+ memcpy(&udp, &pkt->ipv6.udp, sizeof(udp));
+ do {
+ udp_sum = 0;
+ group = g->src_ip.ipv6.sgroup;
+ naddr = oaddr = ntohs(ip6.ip6_src.s6_addr16[group]);
+ nport = oport = ntohs(udp.uh_sport);
+ if (g->options & OPT_RANDOM_SRC) {
+ ip6.ip6_src.s6_addr16[group] = nrand48(t->seed);
+ udp.uh_sport = nrand48(t->seed);
+ naddr = ntohs(ip6.ip6_src.s6_addr16[group]);
+ nport = ntohs(udp.uh_sport);
+ break;
+ }
+ if (oport < g->src_ip.port1) {
+ nport = oport + 1;
+ udp.uh_sport = htons(nport);
+ break;
+ }
+ nport = g->src_ip.port0;
+ udp.uh_sport = htons(nport);
+ if (oaddr < ntohs(g->src_ip.ipv6.end.s6_addr16[group])) {
+ naddr = oaddr + 1;
+ ip6.ip6_src.s6_addr16[group] = htons(naddr);
+ break;
+ }
+ naddr = ntohs(g->src_ip.ipv6.start.s6_addr16[group]);
+ ip6.ip6_src.s6_addr16[group] = htons(naddr);
+ } while (0);
+ /* update checksums if needed */
+ if (oaddr != naddr)
+ udp_sum = cksum_add(~oaddr, naddr);
+ if (oport != nport)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oport, nport));
+ do {
+ group = g->dst_ip.ipv6.egroup;
+ naddr = oaddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
+ nport = oport = ntohs(udp.uh_dport);
+ if (g->options & OPT_RANDOM_DST) {
+ ip6.ip6_dst.s6_addr16[group] = nrand48(t->seed);
+ udp.uh_dport = nrand48(t->seed);
+ naddr = ntohs(ip6.ip6_dst.s6_addr16[group]);
+ nport = ntohs(udp.uh_dport);
+ break;
+ }
+ if (oport < g->dst_ip.port1) {
+ nport = oport + 1;
+ udp.uh_dport = htons(nport);
+ break;
+ }
+ nport = g->dst_ip.port0;
+ udp.uh_dport = htons(nport);
+ if (oaddr < ntohs(g->dst_ip.ipv6.end.s6_addr16[group])) {
+ naddr = oaddr + 1;
+ ip6.ip6_dst.s6_addr16[group] = htons(naddr);
+ break;
+ }
+ naddr = ntohs(g->dst_ip.ipv6.start.s6_addr16[group]);
+ ip6.ip6_dst.s6_addr16[group] = htons(naddr);
+ } while (0);
+ /* update checksums */
+ if (oaddr != naddr)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oaddr, naddr));
+ if (oport != nport)
+ udp_sum = cksum_add(udp_sum,
+ cksum_add(~oport, nport));
+ if (udp_sum != 0)
+ udp.uh_sum = ~cksum_add(~udp.uh_sum, udp_sum);
+ memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
+ memcpy(&pkt->ipv6.udp, &udp, sizeof(udp));
+}
+
+static void
+update_addresses(struct pkt *pkt, struct targ *t)
+{
+
+ if (t->g->af == AF_INET)
+ update_ip(pkt, t);
+ else
+ update_ip6(pkt, t);
+}
/*
* initialize one packet and prepare for the next one.
* The copy could be done better instead of repeating it each time.
@@ -704,9 +954,12 @@
{
struct pkt *pkt = &targ->pkt;
struct ether_header *eh;
- struct ip *ip;
- struct udphdr *udp;
- uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
+ struct ip6_hdr ip6;
+ struct ip ip;
+ struct udphdr udp;
+ void *udp_ptr;
+ uint16_t paylen;
+ uint32_t csum = 0;
const char *payload = targ->g->options & OPT_INDIRECT ?
indirect_payload : default_payload;
int i, l0 = strlen(payload);
@@ -716,7 +969,7 @@
pcap_t *file;
struct pcap_pkthdr *header;
const unsigned char *packet;
-
+
/* Read a packet from a PCAP file if asked. */
if (targ->g->packet_file != NULL) {
if ((file = pcap_open_offline(targ->g->packet_file,
@@ -735,49 +988,80 @@
}
#endif
+ paylen = targ->g->pkt_size - sizeof(*eh) -
+ (targ->g->af == AF_INET ? sizeof(ip): sizeof(ip6));
+
/* create a nice NUL-terminated string */
for (i = 0; i < paylen; i += l0) {
if (l0 > paylen - i)
l0 = paylen - i; // last round
- bcopy(payload, pkt->body + i, l0);
+ bcopy(payload, PKT(pkt, body, targ->g->af) + i, l0);
}
- pkt->body[i-1] = '\0';
- ip = &pkt->ip;
+ PKT(pkt, body, targ->g->af)[i - 1] = '\0';
/* prepare the headers */
- ip->ip_v = IPVERSION;
- ip->ip_hl = 5;
- ip->ip_id = 0;
- ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
- ip->ip_id = 0;
- ip->ip_off = htons(IP_DF); /* Don't fragment */
- ip->ip_ttl = IPDEFTTL;
- ip->ip_p = IPPROTO_UDP;
- ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start);
- ip->ip_src.s_addr = htonl(targ->g->src_ip.start);
- ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
-
-
- udp = &pkt->udp;
- udp->uh_sport = htons(targ->g->src_ip.port0);
- udp->uh_dport = htons(targ->g->dst_ip.port0);
- udp->uh_ulen = htons(paylen);
- /* Magic: taken from sbin/dhclient/packet.c */
- udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
- checksum(pkt->body,
- paylen - sizeof(*udp),
- checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
- IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
- )
- )
- ));
-
eh = &pkt->eh;
bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
- eh->ether_type = htons(ETHERTYPE_IP);
+ if (targ->g->af == AF_INET) {
+ eh->ether_type = htons(ETHERTYPE_IP);
+ memcpy(&ip, &pkt->ipv4.ip, sizeof(ip));
+ udp_ptr = &pkt->ipv4.udp;
+ ip.ip_v = IPVERSION;
+ ip.ip_hl = sizeof(ip) >> 2;
+ ip.ip_id = 0;
+ ip.ip_tos = IPTOS_LOWDELAY;
+ ip.ip_len = htons(targ->g->pkt_size - sizeof(*eh));
+ ip.ip_id = 0;
+ ip.ip_off = htons(IP_DF); /* Don't fragment */
+ ip.ip_ttl = IPDEFTTL;
+ ip.ip_p = IPPROTO_UDP;
+ ip.ip_dst.s_addr = htonl(targ->g->dst_ip.ipv4.start);
+ ip.ip_src.s_addr = htonl(targ->g->src_ip.ipv4.start);
+ ip.ip_sum = wrapsum(checksum(&ip, sizeof(ip), 0));
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ } else {
+ eh->ether_type = htons(ETHERTYPE_IPV6);
+ memcpy(&ip6, &pkt->ipv4.ip, sizeof(ip6));
+ udp_ptr = &pkt->ipv6.udp;
+ ip6.ip6_flow = 0;
+ ip6.ip6_plen = htons(paylen);
+ ip6.ip6_vfc = IPV6_VERSION;
+ ip6.ip6_nxt = IPPROTO_UDP;
+ ip6.ip6_hlim = IPV6_DEFHLIM;
+ ip6.ip6_src = targ->g->src_ip.ipv6.start;
+ ip6.ip6_dst = targ->g->dst_ip.ipv6.start;
+ }
+ memcpy(&udp, udp_ptr, sizeof(udp));
+
+ udp.uh_sport = htons(targ->g->src_ip.port0);
+ udp.uh_dport = htons(targ->g->dst_ip.port0);
+ udp.uh_ulen = htons(paylen);
+ if (targ->g->af == AF_INET) {
+ /* Magic: taken from sbin/dhclient/packet.c */
+ udp.uh_sum = wrapsum(
+ checksum(&udp, sizeof(udp), /* udp header */
+ checksum(pkt->ipv4.body, /* udp payload */
+ paylen - sizeof(udp),
+ checksum(&pkt->ipv4.ip.ip_src, /* pseudo header */
+ 2 * sizeof(pkt->ipv4.ip.ip_src),
+ IPPROTO_UDP + (u_int32_t)ntohs(udp.uh_ulen)))));
+ memcpy(&pkt->ipv4.ip, &ip, sizeof(ip));
+ } else {
+ /* Save part of pseudo header checksum into csum */
+ csum = IPPROTO_UDP << 24;
+ csum = checksum(&csum, sizeof(csum), paylen);
+ udp.uh_sum = wrapsum(
+ checksum(udp_ptr, sizeof(udp), /* udp header */
+ checksum(pkt->ipv6.body, /* udp payload */
+ paylen - sizeof(udp),
+ checksum(&pkt->ipv6.ip.ip6_src, /* pseudo header */
+ 2 * sizeof(pkt->ipv6.ip.ip6_src), csum))));
+ memcpy(&pkt->ipv6.ip, &ip6, sizeof(ip6));
+ }
+ memcpy(udp_ptr, &udp, sizeof(udp));
+
bzero(&pkt->vh, sizeof(pkt->vh));
// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
}
@@ -825,7 +1109,6 @@
}
}
-
/*
* create and enqueue a batch of packets on a ring.
* On the last one set NS_REPORT to tell the driver to generate
@@ -833,19 +1116,14 @@
*/
static int
send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame,
- int size, struct glob_arg *g, u_int count, int options,
- u_int nfrags)
+ int size, struct targ *t, u_int count, int options)
{
u_int n, sent, cur = ring->cur;
- u_int fcnt;
+ u_int frags = t->frags;
+ u_int frag_size = t->frag_size;
+ struct netmap_slot *slot = &ring->slot[cur];
n = nm_ring_space(ring);
- if (n < count)
- count = n;
- if (count < nfrags) {
- D("truncating packet, no room for frags %d %d",
- count, nfrags);
- }
#if 0
if (options & (OPT_COPY | OPT_PREFETCH) ) {
for (sent = 0; sent < count; sent++) {
@@ -858,11 +1136,15 @@
cur = ring->cur;
}
#endif
- for (fcnt = nfrags, sent = 0; sent < count; sent++) {
- struct netmap_slot *slot = &ring->slot[cur];
- char *p = NETMAP_BUF(ring, slot->buf_idx);
- int buf_changed = slot->flags & NS_BUF_CHANGED;
+ for (sent = 0; sent < count && n >= frags; sent++, n--) {
+ char *p;
+ int buf_changed;
+ u_int tosend = size;
+ slot = &ring->slot[cur];
+ p = NETMAP_BUF(ring, slot->buf_idx);
+ buf_changed = slot->flags & NS_BUF_CHANGED;
+
slot->flags = 0;
if (options & OPT_RUBBISH) {
/* do nothing */
@@ -869,31 +1151,49 @@
} else if (options & OPT_INDIRECT) {
slot->flags |= NS_INDIRECT;
slot->ptr = (uint64_t)((uintptr_t)frame);
- } else if ((options & OPT_COPY) || buf_changed) {
- nm_pkt_copy(frame, p, size);
- if (fcnt == nfrags)
- update_addresses(pkt, g);
- } else if (options & OPT_MEMCPY) {
- memcpy(p, frame, size);
- if (fcnt == nfrags)
- update_addresses(pkt, g);
+ } else if (frags > 1) {
+ u_int i;
+ const char *f = frame;
+ char *fp = p;
+ for (i = 0; i < frags - 1; i++) {
+ memcpy(fp, f, frag_size);
+ slot->len = frag_size;
+ slot->flags = NS_MOREFRAG;
+ if (options & OPT_DUMP)
+ dump_payload(fp, frag_size, ring, cur);
+ tosend -= frag_size;
+ f += frag_size;
+ cur = nm_ring_next(ring, cur);
+ slot = &ring->slot[cur];
+ fp = NETMAP_BUF(ring, slot->buf_idx);
+ }
+ n -= (frags - 1);
+ p = fp;
+ slot->flags = 0;
+ memcpy(p, f, tosend);
+ update_addresses(pkt, t);
+ } else if ((options & (OPT_COPY | OPT_MEMCPY)) || buf_changed) {
+ if (options & OPT_COPY)
+ nm_pkt_copy(frame, p, size);
+ else
+ memcpy(p, frame, size);
+ update_addresses(pkt, t);
} else if (options & OPT_PREFETCH) {
__builtin_prefetch(p);
}
+ slot->len = tosend;
if (options & OPT_DUMP)
- dump_payload(p, size, ring, cur);
- slot->len = size;
- if (--fcnt > 0)
- slot->flags |= NS_MOREFRAG;
- else
- fcnt = nfrags;
- if (sent == count - 1) {
- slot->flags &= ~NS_MOREFRAG;
- slot->flags |= NS_REPORT;
- }
+ dump_payload(p, tosend, ring, cur);
cur = nm_ring_next(ring, cur);
}
- ring->head = ring->cur = cur;
+ if (sent) {
+ slot->flags |= NS_REPORT;
+ ring->head = ring->cur = cur;
+ }
+ if (sent < count) {
+ /* tell netmap that we need more slots */
+ ring->cur = ring->tail;
+ }
return (sent);
}
@@ -914,28 +1214,47 @@
}
/*
+ * wait until ts, either busy or sleeping if more than 1ms.
+ * Return wakeup time.
+ */
+static struct timespec
+wait_time(struct timespec ts)
+{
+ for (;;) {
+ struct timespec w, cur;
+ clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
+ w = timespec_sub(ts, cur);
+ if (w.tv_sec < 0)
+ return cur;
+ else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
+ poll(NULL, 0, 1);
+ }
+}
+
+/*
* Send a packet, and wait for a response.
* The payload (after UDP header, ofs 42) has a 4-byte sequence
* followed by a struct timeval (or bintime?)
*/
-#define PAY_OFS 42 /* where in the pkt... */
static void *
-pinger_body(void *data)
+ping_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
struct netmap_if *nifp = targ->nmd->nifp;
- int i, rx = 0;
+ int i, m, rx = 0;
void *frame;
int size;
struct timespec ts, now, last_print;
+ struct timespec nexttime = {0, 0}; /* silence compiler */
uint64_t sent = 0, n = targ->g->npackets;
uint64_t count = 0, t_cur, t_min = ~0, av = 0;
+ uint64_t g_min = ~0, g_av = 0;
uint64_t buckets[64]; /* bins for delays, ns */
+ int rate_limit = targ->g->tx_rate, tosend = 0;
- frame = &targ->pkt;
- frame += sizeof(targ->pkt.vh) - targ->g->virt_header;
+ frame = (char*)&targ->pkt + sizeof(targ->pkt.vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
@@ -947,38 +1266,75 @@
bzero(&buckets, sizeof(buckets));
clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
now = last_print;
+ if (rate_limit) {
+ targ->tic = timespec_add(now, (struct timespec){2,0});
+ targ->tic.tv_nsec = 0;
+ wait_time(targ->tic);
+ nexttime = targ->tic;
+ }
while (!targ->cancel && (n == 0 || sent < n)) {
- struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
+ struct netmap_ring *ring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
struct netmap_slot *slot;
char *p;
- for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */
- slot = &ring->slot[ring->cur];
- slot->len = size;
- p = NETMAP_BUF(ring, slot->buf_idx);
+ int rv;
+ uint64_t limit, event = 0;
- if (nm_ring_empty(ring)) {
- D("-- ouch, cannot send");
- } else {
- struct tstamp *tp;
- nm_pkt_copy(frame, p, size);
- clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
- bcopy(&sent, p+42, sizeof(sent));
- tp = (struct tstamp *)(p+46);
- tp->sec = (uint32_t)ts.tv_sec;
- tp->nsec = (uint32_t)ts.tv_nsec;
- sent++;
- ring->head = ring->cur = nm_ring_next(ring, ring->cur);
+ if (rate_limit && tosend <= 0) {
+ tosend = targ->g->burst;
+ nexttime = timespec_add(nexttime, targ->g->tx_period);
+ wait_time(nexttime);
}
- }
+
+ limit = rate_limit ? tosend : targ->g->burst;
+ if (n > 0 && n - sent < limit)
+ limit = n - sent;
+ for (m = 0; (unsigned)m < limit; m++) {
+ slot = &ring->slot[ring->cur];
+ slot->len = size;
+ p = NETMAP_BUF(ring, slot->buf_idx);
+
+ if (nm_ring_empty(ring)) {
+ D("-- ouch, cannot send");
+ break;
+ } else {
+ struct tstamp *tp;
+ nm_pkt_copy(frame, p, size);
+ clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
+ bcopy(&sent, p+42, sizeof(sent));
+ tp = (struct tstamp *)(p+46);
+ tp->sec = (uint32_t)ts.tv_sec;
+ tp->nsec = (uint32_t)ts.tv_nsec;
+ sent++;
+ ring->head = ring->cur = nm_ring_next(ring, ring->cur);
+ }
+ }
+ if (m > 0)
+ event++;
+ targ->ctr.pkts = sent;
+ targ->ctr.bytes = sent*size;
+ targ->ctr.events = event;
+ if (rate_limit)
+ tosend -= m;
+#ifdef BUSYWAIT
+ rv = ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ if (rv < 0) {
+ D("TXSYNC error on queue %d: %s", targ->me,
+ strerror(errno));
+ }
+ again:
+ ioctl(pfd.fd, NIOCRXSYNC, NULL);
+#else
/* should use a parameter to decide how often to send */
- if (poll(&pfd, 1, 3000) <= 0) {
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ if ( (rv = poll(&pfd, 1, 3000)) <= 0) {
+ D("poll error on queue %d: %s", targ->me,
+ (rv ? strerror(errno) : "timeout"));
continue;
}
+#endif /* BUSYWAIT */
/* see what we got back */
- for (i = targ->nmd->first_tx_ring;
- i <= targ->nmd->last_tx_ring; i++) {
+ rx = 0;
+ for (i = targ->nmd->first_rx_ring;
+ i <= targ->nmd->last_rx_ring; i++) {
ring = NETMAP_RXRING(nifp, i);
while (!nm_ring_empty(ring)) {
uint32_t seq;
@@ -999,7 +1355,8 @@
ts.tv_nsec += 1000000000;
ts.tv_sec--;
}
- if (0) D("seq %d/%lu delta %d.%09d", seq, sent,
+ if (0) D("seq %d/%llu delta %d.%09d", seq,
+ (unsigned long long)sent,
(int)ts.tv_sec, (int)ts.tv_nsec);
t_cur = ts.tv_sec * 1000000000UL + ts.tv_nsec;
if (t_cur < t_min)
@@ -1024,7 +1381,7 @@
if (ts.tv_sec >= 1) {
D("count %d RTT: min %d av %d ns",
(int)count, (int)t_min, (int)(av/count));
- int k, j, kmin;
+ int k, j, kmin, off;
char buf[512];
for (kmin = 0; kmin < 64; kmin ++)
@@ -1034,17 +1391,33 @@
if (buckets[k])
break;
buf[0] = '\0';
- for (j = kmin; j <= k; j++)
- sprintf(buf, "%s %5d", buf, (int)buckets[j]);
+ off = 0;
+ for (j = kmin; j <= k; j++) {
+ off += sprintf(buf + off, " %5d", (int)buckets[j]);
+ }
D("k: %d .. %d\n\t%s", 1<<kmin, 1<<k, buf);
bzero(&buckets, sizeof(buckets));
count = 0;
+ g_av += av;
av = 0;
+ if (t_min < g_min)
+ g_min = t_min;
t_min = ~0;
last_print = now;
}
+#ifdef BUSYWAIT
+ if (rx < m && ts.tv_sec <= 3 && !targ->cancel)
+ goto again;
+#endif /* BUSYWAIT */
}
+ if (sent > 0) {
+ D("RTT over %llu packets: min %d av %d ns",
+ (long long unsigned)sent, (int)g_min,
+ (int)((double)g_av/sent));
+ }
+ targ->completed = 1;
+
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1056,7 +1429,7 @@
* reply to ping requests
*/
static void *
-ponger_body(void *data)
+pong_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
@@ -1069,7 +1442,9 @@
D("can only reply ping with 1 thread");
return NULL;
}
- D("understood ponger %lu but don't know how to do it", n);
+ if (n > 0)
+ D("understood ponger %llu but don't know how to do it",
+ (unsigned long long)n);
while (!targ->cancel && (n == 0 || sent < n)) {
uint32_t txcur, txavail;
//#define BUSYWAIT
@@ -1076,13 +1451,14 @@
#ifdef BUSYWAIT
ioctl(pfd.fd, NIOCRXSYNC, NULL);
#else
- if (poll(&pfd, 1, 1000) <= 0) {
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ int rv;
+ if ( (rv = poll(&pfd, 1, 1000)) <= 0) {
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
continue;
}
#endif
- txring = NETMAP_TXRING(nifp, 0);
+ txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
txcur = txring->cur;
txavail = nm_ring_space(txring);
/* see what we got back */
@@ -1105,6 +1481,7 @@
dpkt = (uint16_t *)dst;
spkt = (uint16_t *)src;
nm_pkt_copy(src, dst, slot->len);
+ /* swap source and destination MAC */
dpkt[0] = spkt[3];
dpkt[1] = spkt[4];
dpkt[2] = spkt[5];
@@ -1112,7 +1489,6 @@
dpkt[4] = spkt[1];
dpkt[5] = spkt[2];
txring->slot[txcur].len = slot->len;
- /* XXX swap src dst mac */
txcur = nm_ring_next(txring, txcur);
txavail--;
sent++;
@@ -1126,6 +1502,8 @@
//D("tx %d rx %d", sent, rx);
}
+ targ->completed = 1;
+
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1133,24 +1511,6 @@
}
-/*
- * wait until ts, either busy or sleeping if more than 1ms.
- * Return wakeup time.
- */
-static struct timespec
-wait_time(struct timespec ts)
-{
- for (;;) {
- struct timespec w, cur;
- clock_gettime(CLOCK_REALTIME_PRECISE, &cur);
- w = timespec_sub(ts, cur);
- if (w.tv_sec < 0)
- return cur;
- else if (w.tv_sec > 0 || w.tv_nsec > 1000000)
- poll(NULL, 0, 1);
- }
-}
-
static void *
sender_body(void *data)
{
@@ -1170,14 +1530,13 @@
int size;
if (targ->frame == NULL) {
- frame = pkt;
- frame += sizeof(pkt->vh) - targ->g->virt_header;
+ frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
} else {
frame = targ->frame;
size = targ->g->pkt_size;
}
-
+
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -1190,13 +1549,13 @@
wait_time(targ->tic);
nexttime = targ->tic;
}
- if (targ->g->dev_type == DEV_TAP) {
+ if (targ->g->dev_type == DEV_TAP) {
D("writing to file desc %d", targ->g->main_fd);
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (write(targ->g->main_fd, frame, size) != -1)
sent++;
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
if (i > 10000) {
targ->ctr.pkts = sent;
targ->ctr.bytes = sent*size;
@@ -1211,7 +1570,7 @@
for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
if (pcap_inject(p, frame, size) != -1)
sent++;
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
if (i > 10000) {
targ->ctr.pkts = sent;
targ->ctr.bytes = sent*size;
@@ -1222,10 +1581,23 @@
#endif /* NO_PCAP */
} else {
int tosend = 0;
- int frags = targ->g->frags;
+ u_int bufsz, mtu = targ->g->mtu;
nifp = targ->nmd->nifp;
+ txring = NETMAP_TXRING(nifp, targ->nmd->first_tx_ring);
+ bufsz = txring->nr_buf_size;
+ if (bufsz < mtu)
+ mtu = bufsz;
+ targ->frag_size = targ->g->pkt_size / targ->frags;
+ if (targ->frag_size > mtu) {
+ targ->frags = targ->g->pkt_size / mtu;
+ targ->frag_size = mtu;
+ if (targ->g->pkt_size % mtu != 0)
+ targ->frags++;
+ }
+ D("frags %u frag_size %u", targ->frags, targ->frag_size);
while (!targ->cancel && (n == 0 || sent < n)) {
+ int rv;
if (rate_limit && tosend <= 0) {
tosend = targ->g->burst;
@@ -1237,6 +1609,7 @@
* wait for available room in the send queue(s)
*/
#ifdef BUSYWAIT
+ (void)rv;
if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
D("ioctl error on queue %d: %s", targ->me,
strerror(errno));
@@ -1243,11 +1616,11 @@
goto quit;
}
#else /* !BUSYWAIT */
- if (poll(&pfd, 1, 2000) <= 0) {
+ if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
// goto quit;
}
if (pfd.revents & POLLERR) {
@@ -1266,23 +1639,30 @@
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
int m;
uint64_t limit = rate_limit ? tosend : targ->g->burst;
+
+ if (n > 0 && n == sent)
+ break;
+
if (n > 0 && n - sent < limit)
limit = n - sent;
txring = NETMAP_TXRING(nifp, i);
if (nm_ring_empty(txring))
continue;
- if (frags > 1)
- limit = ((limit + frags - 1) / frags) * frags;
- m = send_packets(txring, pkt, frame, size, targ->g,
- limit, options, frags);
- ND("limit %d tail %d frags %d m %d",
- limit, txring->tail, frags, m);
+ if (targ->g->pkt_min_size > 0) {
+ size = nrand48(targ->seed) %
+ (targ->g->pkt_size - targ->g->pkt_min_size) +
+ targ->g->pkt_min_size;
+ }
+ m = send_packets(txring, pkt, frame, size, targ,
+ limit, options);
+ ND("limit %lu tail %d m %d",
+ limit, txring->tail, m);
sent += m;
if (m > 0) //XXX-ste: can m be 0?
event++;
targ->ctr.pkts = sent;
- targ->ctr.bytes = sent*size;
+ targ->ctr.bytes += m*size;
targ->ctr.events = event;
if (rate_limit) {
tosend -= m;
@@ -1292,10 +1672,12 @@
}
}
/* flush any remaining packets */
- D("flush tail %d head %d on thread %p",
- txring->tail, txring->head,
- (void *)pthread_self());
- ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ if (txring != NULL) {
+ D("flush tail %d head %d on thread %p",
+ txring->tail, txring->head,
+ (void *)pthread_self());
+ ioctl(pfd.fd, NIOCTXSYNC, NULL);
+ }
/* final part: wait all the TX queues to be empty. */
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
@@ -1340,6 +1722,7 @@
{
u_int cur, rx, n;
uint64_t b = 0;
+ u_int complete = 0;
if (bytes == NULL)
bytes = &b;
@@ -1355,12 +1738,14 @@
*bytes += slot->len;
if (dump)
dump_payload(p, slot->len, ring, cur);
+ if (!(slot->flags & NS_MOREFRAG))
+ complete++;
cur = nm_ring_next(ring, cur);
}
ring->head = ring->cur = cur;
- return (rx);
+ return (complete);
}
static void *
@@ -1373,8 +1758,7 @@
int i;
struct my_ctrs cur;
- cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
- cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler
+ memset(&cur, 0, sizeof(cur));
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@@ -1386,6 +1770,14 @@
i = poll(&pfd, 1, 1000);
if (i > 0 && !(pfd.revents & POLLERR))
break;
+ if (i < 0) {
+ D("poll() error: %s", strerror(errno));
+ goto quit;
+ }
+ if (pfd.revents & POLLERR) {
+ D("fd error");
+ goto quit;
+ }
RD(1, "waiting for initial packets, poll returns %d %d",
i, pfd.revents);
}
@@ -1408,7 +1800,7 @@
/* XXX should we poll ? */
pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
(u_char *)&targ->ctr);
- targ->ctr.events++;
+ targ->ctr.events++;
}
#endif /* !NO_PCAP */
} else {
@@ -1451,7 +1843,7 @@
m = receive_packets(rxring, targ->g->burst, dump, &cur.bytes);
cur.pkts += m;
- if (m > 0) //XXX-ste: can m be 0?
+ if (m > 0)
cur.events++;
}
cur.min_space = targ->ctr.min_space;
@@ -1503,8 +1895,7 @@
D("Ignoring -n argument");
}
- frame = pkt;
- frame += sizeof(pkt->vh) - targ->g->virt_header;
+ frame = (char *)pkt + sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
@@ -1527,6 +1918,8 @@
unsigned int space;
unsigned int head;
int fcnt;
+ uint16_t sum = 0;
+ int rv;
if (!rate_limit) {
budget = targ->g->burst;
@@ -1538,11 +1931,20 @@
}
/* wait for available room in the send queue */
- if (poll(&pfd, 1, 2000) <= 0) {
+#ifdef BUSYWAIT
+ (void)rv;
+ if (ioctl(pfd.fd, NIOCTXSYNC, NULL) < 0) {
+ D("ioctl error on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+#else /* !BUSYWAIT */
+ if ( (rv = poll(&pfd, 1, 2000)) <= 0) {
if (targ->cancel)
break;
- D("poll error/timeout on queue %d: %s", targ->me,
- strerror(errno));
+ D("poll error on queue %d: %s", targ->me,
+ rv ? strerror(errno) : "timeout");
+ // goto quit;
}
if (pfd.revents & POLLERR) {
D("poll error on %d ring %d-%d", pfd.fd,
@@ -1549,6 +1951,7 @@
targ->nmd->first_tx_ring, targ->nmd->last_tx_ring);
goto quit;
}
+#endif /* !BUSYWAIT */
/* If no room poll() again. */
space = nm_ring_space(ring);
@@ -1573,15 +1976,23 @@
sent < limit; sent++, sequence++) {
struct netmap_slot *slot = &ring->slot[head];
char *p = NETMAP_BUF(ring, slot->buf_idx);
+ uint16_t *w = (uint16_t *)PKT(pkt, body, targ->g->af), t;
+ memcpy(&sum, targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, sizeof(sum));
+
slot->flags = 0;
- pkt->body[0] = sequence >> 24;
- pkt->body[1] = (sequence >> 16) & 0xff;
- pkt->body[2] = (sequence >> 8) & 0xff;
- pkt->body[3] = sequence & 0xff;
+ t = *w;
+ PKT(pkt, body, targ->g->af)[0] = sequence >> 24;
+ PKT(pkt, body, targ->g->af)[1] = (sequence >> 16) & 0xff;
+ sum = ~cksum_add(~sum, cksum_add(~t, *w));
+ t = *++w;
+ PKT(pkt, body, targ->g->af)[2] = (sequence >> 8) & 0xff;
+ PKT(pkt, body, targ->g->af)[3] = sequence & 0xff;
+ sum = ~cksum_add(~sum, cksum_add(~t, *w));
+ memcpy(targ->g->af == AF_INET ? &pkt->ipv4.udp.uh_sum : &pkt->ipv6.udp.uh_sum, &sum, sizeof(sum));
nm_pkt_copy(frame, p, size);
if (fcnt == frags) {
- update_addresses(pkt, targ->g);
+ update_addresses(pkt, targ);
}
if (options & OPT_DUMP) {
@@ -1675,19 +2086,25 @@
int dump = targ->g->options & OPT_DUMP;
struct netmap_ring *ring;
unsigned int frags_exp = 1;
- uint32_t seq_exp = 0;
struct my_ctrs cur;
unsigned int frags = 0;
int first_packet = 1;
int first_slot = 1;
- int i;
+ int i, j, af, nrings;
+ uint32_t seq, *seq_exp = NULL;
- cur.pkts = cur.bytes = cur.events = cur.min_space = 0;
- cur.t.tv_usec = cur.t.tv_sec = 0; // unused, just silence the compiler
+ memset(&cur, 0, sizeof(cur));
if (setaffinity(targ->thread, targ->affinity))
goto quit;
+ nrings = targ->nmd->last_rx_ring - targ->nmd->first_rx_ring + 1;
+ seq_exp = calloc(nrings, sizeof(uint32_t));
+ if (seq_exp == NULL) {
+ D("failed to allocate seq array");
+ goto quit;
+ }
+
D("reading from %s fd %d main_fd %d",
targ->g->ifname, targ->fd, targ->g->main_fd);
/* unbounded wait for the first packet. */
@@ -1701,15 +2118,18 @@
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
- ring = NETMAP_RXRING(targ->nmd->nifp, targ->nmd->first_rx_ring);
while (!targ->cancel) {
unsigned int head;
- uint32_t seq;
int limit;
- /* Once we started to receive packets, wait at most 1 seconds
- before quitting. */
+#ifdef BUSYWAIT
+ if (ioctl(pfd.fd, NIOCRXSYNC, NULL) < 0) {
+ D("ioctl error on queue %d: %s", targ->me,
+ strerror(errno));
+ goto quit;
+ }
+#else /* !BUSYWAIT */
if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) {
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
targ->toc.tv_sec -= 1; /* Subtract timeout time. */
@@ -1720,108 +2140,123 @@
D("poll err");
goto quit;
}
+#endif /* !BUSYWAIT */
- if (nm_ring_empty(ring))
- continue;
+ for (j = targ->nmd->first_rx_ring; j <= targ->nmd->last_rx_ring; j++) {
+ ring = NETMAP_RXRING(targ->nmd->nifp, j);
+ if (nm_ring_empty(ring))
+ continue;
- limit = nm_ring_space(ring);
- if (limit > targ->g->burst)
- limit = targ->g->burst;
+ limit = nm_ring_space(ring);
+ if (limit > targ->g->burst)
+ limit = targ->g->burst;
#if 0
- /* Enable this if
- * 1) we remove the early-return optimization from
- * the netmap poll implementation, or
- * 2) pipes get NS_MOREFRAG support.
- * With the current netmap implementation, an experiment like
- * pkt-gen -i vale:1{1 -f txseq -F 9
- * pkt-gen -i vale:1}1 -f rxseq
- * would get stuck as soon as we find nm_ring_space(ring) < 9,
- * since here limit is rounded to 0 and
- * pipe rxsync is not called anymore by the poll() of this loop.
- */
- if (frags_exp > 1) {
- int o = limit;
- /* Cut off to the closest smaller multiple. */
- limit = (limit / frags_exp) * frags_exp;
- RD(2, "LIMIT %d --> %d", o, limit);
- }
+ /* Enable this if
+ * 1) we remove the early-return optimization from
+ * the netmap poll implementation, or
+ * 2) pipes get NS_MOREFRAG support.
+ * With the current netmap implementation, an experiment like
+ * pkt-gen -i vale:1{1 -f txseq -F 9
+ * pkt-gen -i vale:1}1 -f rxseq
+ * would get stuck as soon as we find nm_ring_space(ring) < 9,
+ * since here limit is rounded to 0 and
+ * pipe rxsync is not called anymore by the poll() of this loop.
+ */
+ if (frags_exp > 1) {
+ int o = limit;
+ /* Cut off to the closest smaller multiple. */
+ limit = (limit / frags_exp) * frags_exp;
+ RD(2, "LIMIT %d --> %d", o, limit);
+ }
#endif
- for (head = ring->head, i = 0; i < limit; i++) {
- struct netmap_slot *slot = &ring->slot[head];
- char *p = NETMAP_BUF(ring, slot->buf_idx);
- int len = slot->len;
- struct pkt *pkt;
+ for (head = ring->head, i = 0; i < limit; i++) {
+ struct netmap_slot *slot = &ring->slot[head];
+ char *p = NETMAP_BUF(ring, slot->buf_idx);
+ int len = slot->len;
+ struct pkt *pkt;
- if (dump) {
- dump_payload(p, slot->len, ring, head);
- }
+ if (dump) {
+ dump_payload(p, slot->len, ring, head);
+ }
- frags++;
- if (!(slot->flags & NS_MOREFRAG)) {
- if (first_packet) {
+ frags++;
+ if (!(slot->flags & NS_MOREFRAG)) {
+ if (first_packet) {
+ first_packet = 0;
+ } else if (frags != frags_exp) {
+ char prbuf[512];
+ RD(1, "Received packets with %u frags, "
+ "expected %u, '%s'", frags, frags_exp,
+ multi_slot_to_string(ring, head-frags+1,
+ frags,
+ prbuf, sizeof(prbuf)));
+ }
first_packet = 0;
- } else if (frags != frags_exp) {
- char prbuf[512];
- RD(1, "Received packets with %u frags, "
- "expected %u, '%s'", frags, frags_exp,
- multi_slot_to_string(ring, head-frags+1, frags,
- prbuf, sizeof(prbuf)));
+ frags_exp = frags;
+ frags = 0;
}
- first_packet = 0;
- frags_exp = frags;
- frags = 0;
- }
- p -= sizeof(pkt->vh) - targ->g->virt_header;
- len += sizeof(pkt->vh) - targ->g->virt_header;
- pkt = (struct pkt *)p;
+ p -= sizeof(pkt->vh) - targ->g->virt_header;
+ len += sizeof(pkt->vh) - targ->g->virt_header;
+ pkt = (struct pkt *)p;
+ if (ntohs(pkt->eh.ether_type) == ETHERTYPE_IP)
+ af = AF_INET;
+ else
+ af = AF_INET6;
- if ((char *)pkt + len < ((char *)pkt->body) + sizeof(seq)) {
- RD(1, "%s: packet too small (len=%u)", __func__,
- slot->len);
- } else {
- seq = (pkt->body[0] << 24) | (pkt->body[1] << 16)
- | (pkt->body[2] << 8) | pkt->body[3];
- if (first_slot) {
- /* Grab the first one, whatever it
- is. */
- seq_exp = seq;
- first_slot = 0;
- } else if (seq != seq_exp) {
- uint32_t delta = seq - seq_exp;
+ if ((char *)pkt + len < ((char *)PKT(pkt, body, af)) +
+ sizeof(seq)) {
+ RD(1, "%s: packet too small (len=%u)", __func__,
+ slot->len);
+ } else {
+ seq = (PKT(pkt, body, af)[0] << 24) |
+ (PKT(pkt, body, af)[1] << 16) |
+ (PKT(pkt, body, af)[2] << 8) |
+ PKT(pkt, body, af)[3];
+ if (first_slot) {
+ /* Grab the first one, whatever it
+ is. */
+ seq_exp[j] = seq;
+ first_slot = 0;
+ } else if (seq != seq_exp[j]) {
+ uint32_t delta = seq - seq_exp[j];
- if (delta < (0xFFFFFFFF >> 1)) {
- RD(2, "Sequence GAP: exp %u found %u",
- seq_exp, seq);
- } else {
- RD(2, "Sequence OUT OF ORDER: "
- "exp %u found %u", seq_exp, seq);
+ if (delta < (0xFFFFFFFF >> 1)) {
+ RD(2, "Sequence GAP: exp %u found %u",
+ seq_exp[j], seq);
+ } else {
+ RD(2, "Sequence OUT OF ORDER: "
+ "exp %u found %u", seq_exp[j], seq);
+ }
+ seq_exp[j] = seq;
}
- seq_exp = seq;
+ seq_exp[j]++;
}
- seq_exp++;
+
+ cur.bytes += slot->len;
+ head = nm_ring_next(ring, head);
+ cur.pkts++;
}
- cur.bytes += slot->len;
- head = nm_ring_next(ring, head);
- cur.pkts++;
+ ring->cur = ring->head = head;
+
+ cur.events++;
+ targ->ctr = cur;
}
-
- ring->cur = ring->head = head;
-
- cur.events++;
- targ->ctr = cur;
}
-
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
+#ifndef BUSYWAIT
out:
+#endif /* !BUSYWAIT */
targ->completed = 1;
targ->ctr = cur;
quit:
+ if (seq_exp != NULL)
+ free(seq_exp);
/* reset the ``used`` flag. */
targ->used = 0;
@@ -1830,7 +2265,7 @@
static void
-tx_output(struct my_ctrs *cur, double delta, const char *msg)
+tx_output(struct glob_arg *g, struct my_ctrs *cur, double delta, const char *msg)
{
double bw, raw_bw, pps, abs;
char b1[40], b2[80], b3[80];
@@ -1854,51 +2289,156 @@
size = 60;
pps = cur->pkts / delta;
bw = (8.0 * cur->bytes) / delta;
- /* raw packets have4 bytes crc + 20 bytes framing */
- raw_bw = (8.0 * (cur->pkts * 24 + cur->bytes)) / delta;
+ raw_bw = (8.0 * cur->bytes + cur->pkts * g->framing) / delta;
abs = cur->pkts / (double)(cur->events);
printf("Speed: %spps Bandwidth: %sbps (raw %sbps). Average batch: %.2f pkts\n",
- norm(b1, pps), norm(b2, bw), norm(b3, raw_bw), abs);
+ norm(b1, pps, normalize), norm(b2, bw, normalize), norm(b3, raw_bw, normalize), abs);
}
static void
-usage(void)
+usage(int errcode)
{
+/* This usage is generated from the pkt-gen man page:
+ * $ man pkt-gen > x
+ * and pasted here adding the string terminators and endlines with simple
+ * regular expressions. */
const char *cmd = "pkt-gen";
fprintf(stderr,
"Usage:\n"
"%s arguments\n"
- "\t-i interface interface name\n"
- "\t-f function tx rx ping pong txseq rxseq\n"
- "\t-n count number of iterations (can be 0)\n"
- "\t-t pkts_to_send also forces tx mode\n"
- "\t-r pkts_to_receive also forces rx mode\n"
- "\t-l pkt_size in bytes excluding CRC\n"
- "\t-d dst_ip[:port[-dst_ip:port]] single or range\n"
- "\t-s src_ip[:port[-src_ip:port]] single or range\n"
- "\t-D dst-mac\n"
- "\t-S src-mac\n"
- "\t-a cpu_id use setaffinity\n"
- "\t-b burst size testing, mostly\n"
- "\t-c cores cores to use\n"
- "\t-p threads processes/threads to use\n"
- "\t-T report_ms milliseconds between reports\n"
- "\t-w wait_for_link_time in seconds\n"
- "\t-R rate in packets per second\n"
- "\t-X dump payload\n"
- "\t-H len add empty virtio-net-header with size 'len'\n"
- "\t-E pipes allocate extra space for a number of pipes\n"
- "\t-r do not touch the buffers (send rubbish)\n"
- "\t-P file load packet from pcap file\n"
- "\t-z use random IPv4 src address/port\n"
- "\t-Z use random IPv4 dst address/port\n"
- "\t-F num_frags send multi-slot packets\n"
- "\t-A activate pps stats on receiver\n"
- "",
+" -h Show program usage and exit.\n"
+"\n"
+" -i interface\n"
+" Name of the network interface that pkt-gen operates on. It can be a system network interface\n"
+" (e.g., em0), the name of a vale(4) port (e.g., valeSSS:PPP), the name of a netmap pipe or\n"
+" monitor, or any valid netmap port name accepted by the nm_open library function, as docu-\n"
+" mented in netmap(4) (NIOCREGIF section).\n"
+"\n"
+" -f function\n"
+" The function to be executed by pkt-gen. Specify tx for transmission, rx for reception, ping\n"
+" for client-side ping-pong operation, and pong for server-side ping-pong operation.\n"
+"\n"
+" -n count\n"
+" Number of iterations of the pkt-gen function, with 0 meaning infinite). In case of tx or rx,\n"
+" count is the number of packets to receive or transmit. In case of ping or pong, count is the\n"
+" number of ping-pong transactions.\n"
+"\n"
+" -l pkt_size\n"
+" Packet size in bytes excluding CRC. If passed a second time, use random sizes larger or\n"
+" equal than the second one and lower than the first one.\n"
+"\n"
+" -b burst_size\n"
+" Transmit or receive up to burst_size packets at a time.\n"
+"\n"
+" -4 Use IPv4 addresses.\n"
+"\n"
+" -6 Use IPv6 addresses.\n"
+"\n"
+" -d dst_ip[:port[-dst_ip:port]]\n"
+" Destination IPv4/IPv6 address and port, single or range.\n"
+"\n"
+" -s src_ip[:port[-src_ip:port]]\n"
+" Source IPv4/IPv6 address and port, single or range.\n"
+"\n"
+" -D dst_mac\n"
+" Destination MAC address in colon notation (e.g., aa:bb:cc:dd:ee:00).\n"
+"\n"
+" -S src_mac\n"
+" Source MAC address in colon notation.\n"
+"\n"
+" -a cpu_id\n"
+" Pin the first thread of pkt-gen to a particular CPU using pthread_setaffinity_np(3). If more\n"
+" threads are used, they are pinned to the subsequent CPUs, one per thread.\n"
+"\n"
+" -c cpus\n"
+" Maximum number of CPUs to use (0 means to use all the available ones).\n"
+"\n"
+" -p threads\n"
+" Number of threads to use. By default, only a single thread is used to handle all the netmap\n"
+" rings. If threads is larger than one, each thread handles a single TX ring (in tx mode), a\n"
+" single RX ring (in rx mode), or a TX/RX ring couple. The number of threads must be less or\n"
+" equal than the number of TX (or RX) ring available in the device specified by interface.\n"
+"\n"
+" -T report_ms\n"
+" Number of milliseconds between reports.\n"
+"\n"
+" -w wait_for_link_time\n"
+" Number of seconds to wait before starting the pkt-gen function, useuful to make sure that the\n"
+" network link is up. A network device driver may take some time to enter netmap mode, or to\n"
+" create a new transmit/receive ring pair when netmap(4) requests one.\n"
+"\n"
+" -R rate\n"
+" Packet transmission rate. Not setting the packet transmission rate tells pkt-gen to transmit\n"
+" packets as quickly as possible. On servers from 2010 on-wards netmap(4) is able to com-\n"
+" pletely use all of the bandwidth of a 10 or 40Gbps link, so this option should be used unless\n"
+" your intention is to saturate the link.\n"
+"\n"
+" -X Dump payload of each packet transmitted or received.\n"
+"\n"
+" -H len Add empty virtio-net-header with size 'len'. Valid sizes are 0, 10 and 12. This option is\n"
+" only used with Virtual Machine technologies that use virtio as a network interface.\n"
+"\n"
+" -P file\n"
+" Load the packet to be transmitted from a pcap file rather than constructing it within\n"
+" pkt-gen.\n"
+"\n"
+" -z Use random IPv4/IPv6 src address/port.\n"
+"\n"
+" -Z Use random IPv4/IPv6 dst address/port.\n"
+"\n"
+" -N Do not normalize units (i.e., use bps, pps instead of Mbps, Kpps, etc.).\n"
+"\n"
+" -F num_frags\n"
+" Send multi-slot packets, each one with num_frags fragments. A multi-slot packet is repre-\n"
+" sented by two or more consecutive netmap slots with the NS_MOREFRAG flag set (except for the\n"
+" last slot). This is useful to transmit or receive packets larger than the netmap buffer\n"
+" size.\n"
+"\n"
+" -M frag_size\n"
+" In multi-slot mode, frag_size specifies the size of each fragment, if smaller than the packet\n"
+" length divided by num_frags.\n"
+"\n"
+" -I Use indirect buffers. It is only valid for transmitting on VALE ports, and it is implemented\n"
+" by setting the NS_INDIRECT flag in the netmap slots.\n"
+"\n"
+" -W Exit immediately if all the RX rings are empty the first time they are examined.\n"
+"\n"
+" -v Increase the verbosity level.\n"
+"\n"
+" -r In tx mode, do not initialize packets, but send whatever the content of the uninitialized\n"
+" netmap buffers is (rubbish mode).\n"
+"\n"
+" -A Compute mean and standard deviation (over a sliding window) for the transmit or receive rate.\n"
+"\n"
+" -B Take Ethernet framing and CRC into account when computing the average bps. This adds 4 bytes\n"
+" of CRC and 20 bytes of framing to each packet.\n"
+"\n"
+" -C tx_slots[,rx_slots[,tx_rings[,rx_rings]]]\n"
+" Configuration in terms of number of rings and slots to be used when opening the netmap port.\n"
+" Such configuration has effect on software ports created on the fly, such as VALE ports and\n"
+" netmap pipes. The configuration may consist of 1 to 4 numbers separated by commas: tx_slots,\n"
+" rx_slots, tx_rings, rx_rings. Missing numbers or zeroes stand for default values. As an\n"
+" additional convenience, if exactly one number is specified, then this is assigned to both\n"
+" tx_slots and rx_slots. If there is no fourth number, then the third one is assigned to both\n"
+" tx_rings and rx_rings.\n"
+"\n"
+" -o options data generation options (parsed using atoi)\n"
+" OPT_PREFETCH 1\n"
+" OPT_ACCESS 2\n"
+" OPT_COPY 4\n"
+" OPT_MEMCPY 8\n"
+" OPT_TS 16 (add a timestamp)\n"
+" OPT_INDIRECT 32 (use indirect buffers)\n"
+" OPT_DUMP 64 (dump rx/tx traffic)\n"
+" OPT_RUBBISH 256\n"
+" (send wathever the buffers contain)\n"
+" OPT_RANDOM_SRC 512\n"
+" OPT_RANDOM_DST 1024\n"
+" OPT_PPS_STATS 2048\n"
+ "",
cmd);
-
- exit(0);
+ exit(errcode);
}
enum {
@@ -1908,67 +2448,76 @@
};
static void
-start_threads(struct glob_arg *g)
-{
+start_threads(struct glob_arg *g) {
int i;
targs = calloc(g->nthreads, sizeof(*targs));
+ struct targ *t;
/*
* Now create the desired number of threads, each one
* using a single descriptor.
- */
+ */
for (i = 0; i < g->nthreads; i++) {
- struct targ *t = &targs[i];
+ uint64_t seed = time(0) | (time(0) << 32);
+ t = &targs[i];
bzero(t, sizeof(*t));
t->fd = -1; /* default, with pcap */
t->g = g;
+ memcpy(t->seed, &seed, sizeof(t->seed));
- if (g->dev_type == DEV_NETMAP) {
- struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
- uint64_t nmd_flags = 0;
- nmd.self = &nmd;
+ if (g->dev_type == DEV_NETMAP) {
+ struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
+ uint64_t nmd_flags = 0;
+ nmd.self = &nmd;
- if (i > 0) {
- /* the first thread uses the fd opened by the main
- * thread, the other threads re-open /dev/netmap
- */
- if (g->nthreads > 1) {
- nmd.req.nr_flags =
- g->nmd->req.nr_flags & ~NR_REG_MASK;
- nmd.req.nr_flags |= NR_REG_ONE_NIC;
- nmd.req.nr_ringid = i;
- }
- /* Only touch one of the rings (rx is already ok) */
- if (g->td_type == TD_TYPE_RECEIVER)
- nmd_flags |= NETMAP_NO_TX_POLL;
+ if (i > 0) {
+ /* the first thread uses the fd opened by the main
+ * thread, the other threads re-open /dev/netmap
+ */
+ if (g->nthreads > 1) {
+ nmd.req.nr_flags =
+ g->nmd->req.nr_flags & ~NR_REG_MASK;
+ nmd.req.nr_flags |= NR_REG_ONE_NIC;
+ nmd.req.nr_ringid = i;
+ }
+ /* Only touch one of the rings (rx is already ok) */
+ if (g->td_type == TD_TYPE_RECEIVER)
+ nmd_flags |= NETMAP_NO_TX_POLL;
- /* register interface. Override ifname and ringid etc. */
- t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
- NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
- if (t->nmd == NULL) {
- D("Unable to open %s: %s",
- t->g->ifname, strerror(errno));
- continue;
+ /* register interface. Override ifname and ringid etc. */
+ t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
+ NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
+ if (t->nmd == NULL) {
+ D("Unable to open %s: %s",
+ t->g->ifname, strerror(errno));
+ continue;
+ }
+ } else {
+ t->nmd = g->nmd;
}
+ t->fd = t->nmd->fd;
+ t->frags = g->frags;
} else {
- t->nmd = g->nmd;
+ targs[i].fd = g->main_fd;
}
- t->fd = t->nmd->fd;
-
- } else {
- targs[i].fd = g->main_fd;
- }
t->used = 1;
t->me = i;
if (g->affinity >= 0) {
- t->affinity = (g->affinity + i) % g->system_cpus;
+ t->affinity = (g->affinity + i) % g->cpus;
} else {
t->affinity = -1;
}
/* default, init packets */
initialize_packet(t);
+ }
+ /* Wait for PHY reset. */
+ D("Wait %d secs for phy reset", g->wait_link);
+ sleep(g->wait_link);
+ D("Ready...");
+ for (i = 0; i < g->nthreads; i++) {
+ t = &targs[i];
if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) {
D("Unable to create thread %d: %s", i, strerror(errno));
t->used = 0;
@@ -1988,7 +2537,7 @@
prev.pkts = prev.bytes = prev.events = 0;
gettimeofday(&prev.t, NULL);
for (;;) {
- char b1[40], b2[40], b3[40], b4[70];
+ char b1[40], b2[40], b3[40], b4[100];
uint64_t pps, usec;
struct my_ctrs x;
double abs;
@@ -2045,13 +2594,13 @@
ppsdev = sqrt(ppsdev);
snprintf(b4, sizeof(b4), "[avg/std %s/%s pps]",
- norm(b1, ppsavg), norm(b2, ppsdev));
+ norm(b1, ppsavg, normalize), norm(b2, ppsdev, normalize));
}
D("%spps %s(%spkts %sbps in %llu usec) %.2f avg_batch %d min_space",
- norm(b1, pps), b4,
- norm(b2, (double)x.pkts),
- norm(b3, (double)x.bytes*8),
+ norm(b1, pps, normalize), b4,
+ norm(b2, (double)x.pkts, normalize),
+ norm(b3, (double)x.bytes*8+(double)x.pkts*g->framing, normalize),
(unsigned long long)usec,
abs, (int)cur.min_space);
prev = cur;
@@ -2105,9 +2654,9 @@
timersub(&toc, &tic, &toc);
delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
if (g->td_type == TD_TYPE_SENDER)
- tx_output(&cur, delta_t, "Sent");
- else
- tx_output(&cur, delta_t, "Received");
+ tx_output(g, &cur, delta_t, "Sent");
+ else if (g->td_type == TD_TYPE_RECEIVER)
+ tx_output(g, &cur, delta_t, "Received");
}
struct td_desc {
@@ -2114,16 +2663,17 @@
int ty;
char *key;
void *f;
+ int default_burst;
};
static struct td_desc func[] = {
- { TD_TYPE_SENDER, "tx", sender_body },
- { TD_TYPE_RECEIVER, "rx", receiver_body },
- { TD_TYPE_OTHER, "ping", pinger_body },
- { TD_TYPE_OTHER, "pong", ponger_body },
- { TD_TYPE_SENDER, "txseq", txseq_body },
- { TD_TYPE_RECEIVER, "rxseq", rxseq_body },
- { 0, NULL, NULL }
+ { TD_TYPE_RECEIVER, "rx", receiver_body, 512}, /* default */
+ { TD_TYPE_SENDER, "tx", sender_body, 512 },
+ { TD_TYPE_OTHER, "ping", ping_body, 1 },
+ { TD_TYPE_OTHER, "pong", pong_body, 1 },
+ { TD_TYPE_SENDER, "txseq", txseq_body, 512 },
+ { TD_TYPE_RECEIVER, "rxseq", rxseq_body, 512 },
+ { 0, NULL, NULL, 0 }
};
static int
@@ -2165,7 +2715,12 @@
/* if a device name was specified, put it in the structure; otherwise,
* the kernel will try to allocate the "next" device of the
* specified type */
- strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+ size_t len = strlen(dev);
+ if (len > IFNAMSIZ) {
+ D("%s too long", dev);
+ return -1;
+ }
+ memcpy(ifr.ifr_name, dev, len);
}
/* try to create the device */
@@ -2183,9 +2738,9 @@
D("new name is %s", dev);
#endif /* linux */
- /* this is the special file descriptor that the caller will use to talk
- * with the virtual interface */
- return fd;
+ /* this is the special file descriptor that the caller will use to talk
+ * with the virtual interface */
+ return fd;
}
int
@@ -2198,41 +2753,63 @@
struct glob_arg g;
int ch;
- int wait_link = 2;
int devqueues = 1; /* how many device queues */
+ int wait_link_arg = 0;
+ int pkt_size_done = 0;
+
+ struct td_desc *fn = func;
+
bzero(&g, sizeof(g));
g.main_fd = -1;
- g.td_body = receiver_body;
- g.td_type = TD_TYPE_RECEIVER;
+ g.td_body = fn->f;
+ g.td_type = fn->ty;
g.report_interval = 1000; /* report interval */
g.affinity = -1;
/* ip addresses can also be a range x.x.x.x-x.x.x.y */
+ g.af = AF_INET; /* default */
g.src_ip.name = "10.0.0.1";
g.dst_ip.name = "10.1.0.1";
g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
g.src_mac.name = NULL;
g.pkt_size = 60;
- g.burst = 512; // default
+ g.pkt_min_size = 0;
g.nthreads = 1;
- g.cpus = 1; // default
+ g.cpus = 1; /* default */
g.forever = 1;
g.tx_rate = 0;
- g.frags = 1;
+ g.frags =1;
+ g.mtu = 1500;
g.nmr_config = "";
g.virt_header = 0;
+ g.wait_link = 2; /* wait 2 seconds for physical ports */
- while ( (ch = getopt(arc, argv,
- "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:E:m:rP:zZA")) != -1) {
- struct td_desc *fn;
+ while ((ch = getopt(arc, argv, "46a:f:F:Nn:i:Il:d:s:D:S:b:c:o:p:"
+ "T:w:WvR:XC:H:rP:zZAhBM:")) != -1) {
switch(ch) {
default:
D("bad option %c %s", ch, optarg);
- usage();
+ usage(-1);
break;
+ case 'h':
+ usage(0);
+ break;
+
+ case '4':
+ g.af = AF_INET;
+ break;
+
+ case '6':
+ g.af = AF_INET6;
+ break;
+
+ case 'N':
+ normalize = 0;
+ break;
+
case 'n':
g.npackets = strtoull(optarg, NULL, 10);
break;
@@ -2246,6 +2823,10 @@
g.frags = i;
break;
+ case 'M':
+ g.mtu = atoi(optarg);
+ break;
+
case 'f':
for (fn = func; fn->key; fn++) {
if (!strcmp(fn->key, optarg))
@@ -2260,7 +2841,7 @@
break;
case 'o': /* data generation options */
- g.options = atoi(optarg);
+ g.options |= atoi(optarg);
break;
case 'a': /* force affinity */
@@ -2298,11 +2879,16 @@
break;
case 'I':
- g.options |= OPT_INDIRECT; /* XXX use indirect buffer */
+ g.options |= OPT_INDIRECT; /* use indirect buffers */
break;
case 'l': /* pkt_size */
- g.pkt_size = atoi(optarg);
+ if (pkt_size_done) {
+ g.pkt_min_size = atoi(optarg);
+ } else {
+ g.pkt_size = atoi(optarg);
+ pkt_size_done = 1;
+ }
break;
case 'd':
@@ -2318,11 +2904,12 @@
break;
case 'w':
- wait_link = atoi(optarg);
+ g.wait_link = atoi(optarg);
+ wait_link_arg = 1;
break;
- case 'W': /* XXX changed default */
- g.forever = 0; /* do not exit rx even with no traffic */
+ case 'W':
+ g.forever = 0; /* exit RX with no traffic */
break;
case 'b': /* burst */
@@ -2357,18 +2944,9 @@
case 'H':
g.virt_header = atoi(optarg);
break;
- case 'e': /* extra bufs */
- g.extra_bufs = atoi(optarg);
- break;
- case 'E':
- g.extra_pipes = atoi(optarg);
- break;
case 'P':
g.packet_file = strdup(optarg);
break;
- case 'm':
- /* ignored */
- break;
case 'r':
g.options |= OPT_RUBBISH;
break;
@@ -2381,28 +2959,47 @@
case 'A':
g.options |= OPT_PPS_STATS;
break;
+ case 'B':
+ /* raw packets have4 bytes crc + 20 bytes framing */
+ // XXX maybe add an option to pass the IFG
+ g.framing = 24 * 8;
+ break;
}
}
if (strlen(g.ifname) <=0 ) {
D("missing ifname");
- usage();
+ usage(-1);
}
+ if (g.burst == 0) {
+ g.burst = fn->default_burst;
+ D("using default burst size: %d", g.burst);
+ }
+
g.system_cpus = i = system_ncpus();
if (g.cpus < 0 || g.cpus > i) {
D("%d cpus is too high, have only %d cpus", g.cpus, i);
- usage();
+ usage(-1);
}
-D("running on %d cpus (have %d)", g.cpus, i);
+ D("running on %d cpus (have %d)", g.cpus, i);
if (g.cpus == 0)
g.cpus = i;
+ if (!wait_link_arg && !strncmp(g.ifname, "vale", 4)) {
+ g.wait_link = 0;
+ }
+
if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
- usage();
+ usage(-1);
}
+ if (g.pkt_min_size > 0 && (g.pkt_min_size < 16 || g.pkt_min_size > g.pkt_size)) {
+ D("bad pktminsize %d [16..%d]\n", g.pkt_min_size, g.pkt_size);
+ usage(-1);
+ }
+
if (g.src_mac.name == NULL) {
static char mybuf[20] = "00:00:00:00:00:00";
/* retrieve source mac address. */
@@ -2413,21 +3010,15 @@
g.src_mac.name = mybuf;
}
/* extract address ranges */
- extract_ip_range(&g.src_ip);
- extract_ip_range(&g.dst_ip);
- extract_mac_range(&g.src_mac);
- extract_mac_range(&g.dst_mac);
+ if (extract_mac_range(&g.src_mac) || extract_mac_range(&g.dst_mac))
+ usage(-1);
+ g.options |= extract_ip_range(&g.src_ip, g.af);
+ g.options |= extract_ip_range(&g.dst_ip, g.af);
- if (g.src_ip.start != g.src_ip.end ||
- g.src_ip.port0 != g.src_ip.port1 ||
- g.dst_ip.start != g.dst_ip.end ||
- g.dst_ip.port0 != g.dst_ip.port1)
- g.options |= OPT_COPY;
-
if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1
&& g.virt_header != VIRT_HDR_2) {
D("bad virtio-net-header length");
- usage();
+ usage(-1);
}
if (g.dev_type == DEV_TAP) {
@@ -2435,7 +3026,7 @@
g.main_fd = tap_alloc(g.ifname);
if (g.main_fd < 0) {
D("cannot open tap %s", g.ifname);
- usage();
+ usage(-1);
}
#ifndef NO_PCAP
} else if (g.dev_type == DEV_PCAP) {
@@ -2445,7 +3036,7 @@
g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
if (g.p == NULL) {
D("cannot open pcap on %s", g.ifname);
- usage();
+ usage(-1);
}
g.main_fd = pcap_fileno(g.p);
D("using pcap on %s fileno %d", g.ifname, g.main_fd);
@@ -2453,20 +3044,21 @@
} else if (g.dummy_send) { /* but DEV_NETMAP */
D("using a dummy send routine");
} else {
- struct nmreq base_nmd;
+ struct nm_desc base_nmd;
+ char errmsg[MAXERRMSG];
+ u_int flags;
bzero(&base_nmd, sizeof(base_nmd));
- parse_nmr_config(g.nmr_config, &base_nmd);
- if (g.extra_bufs) {
- base_nmd.nr_arg3 = g.extra_bufs;
+ parse_nmr_config(g.nmr_config, &base_nmd.req);
+
+ base_nmd.req.nr_flags |= NR_ACCEPT_VNET_HDR;
+
+ if (nm_parse(g.ifname, &base_nmd, errmsg) < 0) {
+ D("Invalid name '%s': %s", g.ifname, errmsg);
+ goto out;
}
- if (g.extra_pipes) {
- base_nmd.nr_arg1 = g.extra_pipes;
- }
- base_nmd.nr_flags |= NR_ACCEPT_VNET_HDR;
-
/*
* Open the netmap device using nm_open().
*
@@ -2474,28 +3066,21 @@
* which in turn may take some time for the PHY to
* reconfigure. We do the open here to have time to reset.
*/
- g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL);
+ flags = NM_OPEN_IFNAME | NM_OPEN_ARG1 | NM_OPEN_ARG2 |
+ NM_OPEN_ARG3 | NM_OPEN_RING_CFG;
+ if (g.nthreads > 1) {
+ base_nmd.req.nr_flags &= ~NR_REG_MASK;
+ base_nmd.req.nr_flags |= NR_REG_ONE_NIC;
+ base_nmd.req.nr_ringid = 0;
+ }
+ g.nmd = nm_open(g.ifname, NULL, flags, &base_nmd);
if (g.nmd == NULL) {
D("Unable to open %s: %s", g.ifname, strerror(errno));
goto out;
}
-
- if (g.nthreads > 1) {
- struct nm_desc saved_desc = *g.nmd;
- saved_desc.self = &saved_desc;
- saved_desc.mem = NULL;
- nm_close(g.nmd);
- saved_desc.req.nr_flags &= ~NR_REG_MASK;
- saved_desc.req.nr_flags |= NR_REG_ONE_NIC;
- saved_desc.req.nr_ringid = 0;
- g.nmd = nm_open(g.ifname, &base_nmd, NM_OPEN_IFNAME, &saved_desc);
- if (g.nmd == NULL) {
- D("Unable to open %s: %s", g.ifname, strerror(errno));
- goto out;
- }
- }
g.main_fd = g.nmd->fd;
- D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
+ D("mapped %luKB at %p", (unsigned long)(g.nmd->req.nr_memsize>>10),
+ g.nmd->mem);
if (g.virt_header) {
/* Set the virtio-net header length, since the user asked
@@ -2558,7 +3143,7 @@
/* Exit if something went wrong. */
if (g.main_fd < 0) {
D("aborting");
- usage();
+ usage(-1);
}
}
@@ -2583,8 +3168,8 @@
int lim = (g.tx_rate)/300;
if (g.burst > lim)
g.burst = lim;
- if (g.burst < g.frags)
- g.burst = g.frags;
+ if (g.burst == 0)
+ g.burst = 1;
x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate;
g.tx_period.tv_nsec = x;
g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
@@ -2593,11 +3178,6 @@
if (g.td_type == TD_TYPE_SENDER)
D("Sending %d packets every %ld.%09ld s",
g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec);
- /* Wait for PHY reset. */
- D("Wait %d secs for phy reset", wait_link);
- sleep(wait_link);
- D("Ready...");
-
/* Install ^C handler. */
global_nthreads = g.nthreads;
sigemptyset(&ss);
@@ -2608,6 +3188,7 @@
}
start_threads(&g);
/* Install the handler and re-enable SIGINT for the main thread */
+ memset(&sa, 0, sizeof(sa));
sa.sa_handler = sigint_h;
if (sigaction(SIGINT, &sa, NULL) < 0) {
D("failed to install ^C handler: %s", strerror(errno));

File Metadata

Mime Type
text/plain
Expires
Mon, Mar 30, 4:39 AM (22 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30573840
Default Alt Text
D17698.id49615.diff (81 KB)

Event Timeline