diff --git a/sbin/ipfw/Makefile b/sbin/ipfw/Makefile
index 3205c6626a44..c09ebca32e87 100644
--- a/sbin/ipfw/Makefile
+++ b/sbin/ipfw/Makefile
@@ -1,8 +1,9 @@
 # $FreeBSD$
 
 PROG=	ipfw
 SRCS=	ipfw2.c dummynet.c ipv6.c main.c nat.c altq.c
 WARNS?=	2
+LDADD=	-lutil
 MAN=	ipfw.8
 
 .include <bsd.prog.mk>
diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c
index 9e8356eea516..147328460eba 100644
--- a/sbin/ipfw/dummynet.c
+++ b/sbin/ipfw/dummynet.c
@@ -1,1060 +1,1085 @@
 /*
  * Copyright (c) 2002-2003 Luigi Rizzo
  * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
  * Copyright (c) 1994 Ugen J.S.Antsilevich
  *
  * Idea and grammar partially left from:
  * Copyright (c) 1993 Daniel Boulet
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  *
  * NEW command line interface for IP firewall facility
  *
  * $FreeBSD$
  *
  * dummynet support
  */
 
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 /* XXX there are several sysctl leftover here */
 #include <sys/sysctl.h>
 
 #include "ipfw2.h"
 
 #include <ctype.h>
 #include <err.h>
+#include <errno.h>
+#include <libutil.h>
 #include <netdb.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <arpa/inet.h>	/* inet_ntoa */
 
 static struct _s_x dummynet_params[] = {
 	{ "plr",		TOK_PLR },
 	{ "noerror",		TOK_NOERROR },
 	{ "buckets",		TOK_BUCKETS },
 	{ "dst-ip",		TOK_DSTIP },
 	{ "src-ip",		TOK_SRCIP },
 	{ "dst-port",		TOK_DSTPORT },
 	{ "src-port",		TOK_SRCPORT },
 	{ "proto",		TOK_PROTO },
 	{ "weight",		TOK_WEIGHT },
 	{ "all",		TOK_ALL },
 	{ "mask",		TOK_MASK },
 	{ "droptail",		TOK_DROPTAIL },
 	{ "red",		TOK_RED },
 	{ "gred",		TOK_GRED },
 	{ "bw",			TOK_BW },
 	{ "bandwidth",		TOK_BW },
 	{ "delay",		TOK_DELAY },
 	{ "pipe",		TOK_PIPE },
 	{ "queue",		TOK_QUEUE },
 	{ "flow-id",		TOK_FLOWID},
 	{ "dst-ipv6",		TOK_DSTIP6},
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
 	{ "profile",		TOK_PIPE_PROFILE},
+	{ "burst",		TOK_BURST},
 	{ "dummynet-params",	TOK_NULL },
 	{ NULL, 0 }	/* terminator */
 };
 
 static int
 sort_q(const void *pa, const void *pb)
 {
 	int rev = (co.do_sort < 0);
 	int field = rev ? -co.do_sort : co.do_sort;
 	long long res = 0;
 	const struct dn_flow_queue *a = pa;
 	const struct dn_flow_queue *b = pb;
 
 	switch (field) {
 	case 1: /* pkts */
 		res = a->len - b->len;
 		break;
 	case 2: /* bytes */
 		res = a->len_bytes - b->len_bytes;
 		break;
 
 	case 3: /* tot pkts */
 		res = a->tot_pkts - b->tot_pkts;
 		break;
 
 	case 4: /* tot bytes */
 		res = a->tot_bytes - b->tot_bytes;
 		break;
 	}
 	if (res < 0)
 		res = -1;
 	if (res > 0)
 		res = 1;
 	return (int)(rev ? res : -res);
 }
 
 static void
 list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q)
 {
 	int l;
 	int index_printed, indexes = 0;
 	char buff[255];
 	struct protoent *pe;
 
 	if (fs->rq_elements == 0)
 		return;
 
 	if (co.do_sort != 0)
 		heapsort(q, fs->rq_elements, sizeof *q, sort_q);
 
 	/* Print IPv4 flows */
 	index_printed = 0;
 	for (l = 0; l < fs->rq_elements; l++) {
 		struct in_addr ina;
 
 		/* XXX: Should check for IPv4 flows */
 		if (IS_IP6_FLOW_ID(&(q[l].id)))
 			continue;
 
 		if (!index_printed) {
 			index_printed = 1;
 			if (indexes > 0)	/* currently a no-op */
 				printf("\n");
 			indexes++;
 			printf("    "
 			    "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
 			    fs->flow_mask.proto,
 			    fs->flow_mask.src_ip, fs->flow_mask.src_port,
 			    fs->flow_mask.dst_ip, fs->flow_mask.dst_port);
 
 			printf("BKT Prot ___Source IP/port____ "
 			    "____Dest. IP/port____ "
 			    "Tot_pkt/bytes Pkt/Byte Drp\n");
 		}
 
 		printf("%3d ", q[l].hash_slot);
 		pe = getprotobynumber(q[l].id.proto);
 		if (pe)
 			printf("%-4s ", pe->p_name);
 		else
 			printf("%4u ", q[l].id.proto);
 		ina.s_addr = htonl(q[l].id.src_ip);
 		printf("%15s/%-5d ",
 		    inet_ntoa(ina), q[l].id.src_port);
 		ina.s_addr = htonl(q[l].id.dst_ip);
 		printf("%15s/%-5d ",
 		    inet_ntoa(ina), q[l].id.dst_port);
 		printf("%4llu %8llu %2u %4u %3u\n",
 		    align_uint64(&q[l].tot_pkts),
 		    align_uint64(&q[l].tot_bytes),
 		    q[l].len, q[l].len_bytes, q[l].drops);
 		if (co.verbose)
 			printf("   S %20llu  F %20llu\n",
 			    align_uint64(&q[l].S), align_uint64(&q[l].F));
 	}
 
 	/* Print IPv6 flows */
 	index_printed = 0;
 	for (l = 0; l < fs->rq_elements; l++) {
 		if (!IS_IP6_FLOW_ID(&(q[l].id)))
 			continue;
 
 		if (!index_printed) {
 			index_printed = 1;
 			if (indexes > 0)
 				printf("\n");
 			indexes++;
 			printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
 			    fs->flow_mask.proto, fs->flow_mask.flow_id6);
 			inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6),
 			    buff, sizeof(buff));
 			printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port);
 			inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6),
 			    buff, sizeof(buff) );
 			printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port);
 
 			printf("BKT ___Prot___ _flow-id_ "
 			    "______________Source IPv6/port_______________ "
 			    "_______________Dest. IPv6/port_______________ "
 			    "Tot_pkt/bytes Pkt/Byte Drp\n");
 		}
 		printf("%3d ", q[l].hash_slot);
 		pe = getprotobynumber(q[l].id.proto);
 		if (pe != NULL)
 			printf("%9s ", pe->p_name);
 		else
 			printf("%9u ", q[l].id.proto);
 		printf("%7d  %39s/%-5d ", q[l].id.flow_id6,
 		    inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)),
 		    q[l].id.src_port);
 		printf(" %39s/%-5d ",
 		    inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)),
 		    q[l].id.dst_port);
 		printf(" %4llu %8llu %2u %4u %3u\n",
 		    align_uint64(&q[l].tot_pkts),
 		    align_uint64(&q[l].tot_bytes),
 		    q[l].len, q[l].len_bytes, q[l].drops);
 		if (co.verbose)
 			printf("   S %20llu  F %20llu\n",
 			    align_uint64(&q[l].S),
 			    align_uint64(&q[l].F));
 	}
 }
 
 static void
 print_flowset_parms(struct dn_flow_set *fs, char *prefix)
 {
 	int l;
 	char qs[30];
 	char plr[30];
 	char red[90];	/* Display RED parameters */
 
 	l = fs->qsize;
 	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
 		if (l >= 8192)
 			sprintf(qs, "%d KB", l / 1024);
 		else
 			sprintf(qs, "%d B", l);
 	} else
 		sprintf(qs, "%3d sl.", l);
 	if (fs->plr)
 		sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
 	else
 		plr[0] = '\0';
 	if (fs->flags_fs & DN_IS_RED)	/* RED parameters */
 		sprintf(red,
-		    "\n\t  %cRED w_q %f min_th %d max_th %d max_p %f",
+		    "\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
 		    (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
 		    1.0 * fs->w_q / (double)(1 << SCALE_RED),
 		    SCALE_VAL(fs->min_th),
 		    SCALE_VAL(fs->max_th),
 		    1.0 * fs->max_p / (double)(1 << SCALE_RED));
 	else
 		sprintf(red, "droptail");
 
 	printf("%s %s%s %d queues (%d buckets) %s\n",
 	    prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
 }
 
 static void
-print_extra_delay_parms(struct dn_pipe *p, char *prefix)
+print_extra_delay_parms(struct dn_pipe *p)
 {
 	double loss;
 	if (p->samples_no <= 0)
 		return;
 
 	loss = p->loss_level;
 	loss /= p->samples_no;
-	printf("%s profile: name \"%s\" loss %f samples %d\n",
-		prefix, p->name, loss, p->samples_no);
+	printf("\t profile: name \"%s\" loss %f samples %d\n",
+		p->name, loss, p->samples_no);
 }
 
 void
 ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
 {
 	int rulenum;
 	void *next = data;
 	struct dn_pipe *p = (struct dn_pipe *) data;
 	struct dn_flow_set *fs;
 	struct dn_flow_queue *q;
 	int l;
 
 	if (ac > 0)
 		rulenum = strtoul(*av++, NULL, 10);
 	else
 		rulenum = 0;
 	for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) {
 		double b = p->bandwidth;
 		char buf[30];
 		char prefix[80];
+		char burst[5 + 7];
 
 		if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE)
 			break;	/* done with pipes, now queues */
 
 		/*
 		 * compute length, as pipe have variable size
 		 */
 		l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
 		next = (char *)p + l;
 		nbytes -= l;
 
 		if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2)
 			continue;
 
 		/*
 		 * Print rate (or clocking interface)
 		 */
 		if (p->if_name[0] != '\0')
 			sprintf(buf, "%s", p->if_name);
 		else if (b == 0)
 			sprintf(buf, "unlimited");
 		else if (b >= 1000000)
 			sprintf(buf, "%7.3f Mbit/s", b/1000000);
 		else if (b >= 1000)
 			sprintf(buf, "%7.3f Kbit/s", b/1000);
 		else
 			sprintf(buf, "%7.3f bit/s ", b);
 
 		sprintf(prefix, "%05d: %s %4d ms ",
 		    p->pipe_nr, buf, p->delay);
 
-		print_extra_delay_parms(p, prefix);
-
 		print_flowset_parms(&(p->fs), prefix);
 
+		if (humanize_number(burst, sizeof(burst), p->burst,
+		    "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose)
+			printf("\t burst: %ju Byte\n", p->burst);
+		else
+			printf("\t burst: %s\n", burst);
+
+		print_extra_delay_parms(p);
+
 		q = (struct dn_flow_queue *)(p+1);
 		list_queues(&(p->fs), q);
 	}
 	for (fs = next; nbytes >= sizeof *fs; fs = next) {
 		char prefix[80];
 
 		if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE)
 			break;
 		l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
 		next = (char *)fs + l;
 		nbytes -= l;
 
 		if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) ||
 		    (rulenum != fs->parent_nr && co.do_pipe == 1))) {
 			continue;
 		}
 
 		q = (struct dn_flow_queue *)(fs+1);
 		sprintf(prefix, "q%05d: weight %d pipe %d ",
 		    fs->fs_nr, fs->weight, fs->parent_nr);
 		print_flowset_parms(fs, prefix);
 		list_queues(fs, q);
 	}
 }
 
 /*
  * Delete pipe or queue i
  */
 int
 ipfw_delete_pipe(int pipe_or_queue, int i)
 {
 	struct dn_pipe p;
 
 	memset(&p, 0, sizeof p);
 	if (pipe_or_queue == 1)
 		p.pipe_nr = i;		/* pipe */
 	else
 		p.fs.fs_nr = i;		/* queue */
 	i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p);
 	if (i) {
 		i = 1;
 		warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i);
 	}
 	return i;
 }
 
 /*
  * Code to parse delay profiles.
  *
  * Some link types introduce extra delays in the transmission
  * of a packet, e.g. because of MAC level framing, contention on
  * the use of the channel, MAC level retransmissions and so on.
  * From our point of view, the channel is effectively unavailable
  * for this extra time, which is constant or variable depending
  * on the link type. Additionally, packets may be dropped after this
  * time (e.g. on a wireless link after too many retransmissions).
  * We can model the additional delay with an empirical curve
  * that represents its distribution.
  *
  *	cumulative probability
  *	1.0 ^
  *	    |
  *	L   +-- loss-level          x
  *	    |                 ******
  *	    |                *
  *	    |           *****
  *	    |          *
  *	    |        **
  *	    |       *                         
  *	    +-------*------------------->
  *			delay
  *
  * The empirical curve may have both vertical and horizontal lines.
  * Vertical lines represent constant delay for a range of
  * probabilities; horizontal lines correspond to a discontinuty
  * in the delay distribution: the pipe will use the largest delay
  * for a given probability.
  * 
  * To pass the curve to dummynet, we must store the parameters
  * in a file as described below, and issue the command
  *
  *      ipfw pipe <n> config ... bw XXX profile <filename> ...
  *
  * The file format is the following, with whitespace acting as
  * a separator and '#' indicating the beginning a comment:
  *
  *	samples N
  *		the number of samples used in the internal
  *		representation (2..1024; default 100);
  *
  *	loss-level L 
  *		The probability above which packets are lost.
  *               (0.0 <= L <= 1.0, default 1.0 i.e. no loss);
  *
  *	name identifier
  *		Optional a name (listed by "ipfw pipe show")
  *		to identify the distribution;
  *
  *	"delay prob" | "prob delay"
  *		One of these two lines is mandatory and defines
  *		the format of the following lines with data points.
  *
  *	XXX YYY
  *		2 or more lines representing points in the curve,
  *		with either delay or probability first, according
  *		to the chosen format.
  *		The unit for delay is milliseconds.
  *
  * Data points does not need to be ordered or equal to the number
  * specified in the "samples" line. ipfw will sort and interpolate
  * the curve as needed.
  *
  * Example of a profile file:
  
         name    bla_bla_bla
         samples 100
         loss-level    0.86
         prob    delay
         0       200	# minimum overhead is 200ms
         0.5     200
         0.5     300
         0.8     1000
         0.9     1300
         1       1300
  
  * Internally, we will convert the curve to a fixed number of
  * samples, and when it is time to transmit a packet we will
  * model the extra delay as extra bits in the packet.
  *
  */
 
 #define ED_MAX_LINE_LEN	256+ED_MAX_NAME_LEN
 #define ED_TOK_SAMPLES	"samples"
 #define ED_TOK_LOSS	"loss-level"
 #define ED_TOK_NAME	"name"
 #define ED_TOK_DELAY	"delay"
 #define ED_TOK_PROB	"prob"
 #define ED_TOK_BW	"bw"
 #define ED_SEPARATORS	" \t\n"
 #define ED_MIN_SAMPLES_NO	2
 
 /*
  * returns 1 if s is a non-negative number, with at least one '.'
  */
 static int
 is_valid_number(const char *s)
 {
 	int i, dots_found = 0;
 	int len = strlen(s);
 
 	for (i = 0; i<len; ++i)
 		if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1))
 			return 0;
 	return 1;
 }
 
 /*
  * Take as input a string describing a bandwidth value
  * and return the numeric bandwidth value.
  * set clocking interface or bandwidth value
  */
 void
 read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen)
 {
 	if (*bandwidth != -1)
 		warn("duplicate token, override bandwidth value!");
 
 	if (arg[0] >= 'a' && arg[0] <= 'z') {
 		if (namelen >= IFNAMSIZ)
 			warn("interface name truncated");
 		namelen--;
 		/* interface name */
 		strncpy(if_name, arg, namelen);
 		if_name[namelen] = '\0';
 		*bandwidth = 0;
 	} else {	/* read bandwidth value */
 		int bw;
 		char *end = NULL;
 
 		bw = strtoul(arg, &end, 0);
 		if (*end == 'K' || *end == 'k') {
 			end++;
 			bw *= 1000;
 		} else if (*end == 'M') {
 			end++;
 			bw *= 1000000;
 		}
 		if ((*end == 'B' &&
 			_substrcmp2(end, "Bi", "Bit/s") != 0) ||
 		    _substrcmp2(end, "by", "bytes") == 0)
 			bw *= 8;
 
 		if (bw < 0)
 			errx(EX_DATAERR, "bandwidth too large");
 
 		*bandwidth = bw;
 		if_name[0] = '\0';
 	}
 }
 
 struct point {
 	double prob;
 	double delay;
 };
 
 int
 compare_points(const void *vp1, const void *vp2)
 {
 	const struct point *p1 = vp1;
 	const struct point *p2 = vp2;
 	double res = 0;
 
 	res = p1->prob - p2->prob;
 	if (res == 0)
 		res = p1->delay - p2->delay;
 	if (res < 0)
 		return -1;
 	else if (res > 0)
 		return 1;
 	else
 		return 0;
 }
 
 #define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
 
 static void
 load_extra_delays(const char *filename, struct dn_pipe *p)
 {
 	char    line[ED_MAX_LINE_LEN];
 	FILE    *f;
 	int     lineno = 0;
 	int     i;
 
 	int     samples = -1;
 	double  loss = -1.0;
 	char    profile_name[ED_MAX_NAME_LEN];
 	int     delay_first = -1;
 	int     do_points = 0;
 	struct point    points[ED_MAX_SAMPLES_NO];
 	int     points_no = 0;
 
 	profile_name[0] = '\0';
 	f = fopen(filename, "r");
 	if (f == NULL)
 		err(EX_UNAVAILABLE, "fopen: %s", filename);
 
 	while (fgets(line, ED_MAX_LINE_LEN, f)) {         /* read commands */
 		char *s, *cur = line, *name = NULL, *arg = NULL;
 
 		++lineno;
 
 		/* parse the line */
 		while (cur) {
 			s = strsep(&cur, ED_SEPARATORS);
 			if (s == NULL || *s == '#')
 				break;
 			if (*s == '\0')
 				continue;
 			if (arg)
 				errx(ED_EFMT("too many arguments"));
 			if (name == NULL)
 				name = s;
 			else
 				arg = s;
 		}
 		if (name == NULL)	/* empty line */
 			continue;
 		if (arg == NULL)
 			errx(ED_EFMT("missing arg for %s"), name);
 
 		if (!strcasecmp(name, ED_TOK_SAMPLES)) {
 		    if (samples > 0)
 			errx(ED_EFMT("duplicate ``samples'' line"));
 		    if (atoi(arg) <=0)
 			errx(ED_EFMT("invalid number of samples"));
 		    samples = atoi(arg);
 		    if (samples>ED_MAX_SAMPLES_NO)
 			    errx(ED_EFMT("too many samples, maximum is %d"),
 				ED_MAX_SAMPLES_NO);
 		    do_points = 0;
 		} else if (!strcasecmp(name, ED_TOK_BW)) {
 		    read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name));
 		} else if (!strcasecmp(name, ED_TOK_LOSS)) {
 		    if (loss != -1.0)
 			errx(ED_EFMT("duplicated token: %s"), name);
 		    if (!is_valid_number(arg))
 			errx(ED_EFMT("invalid %s"), arg);
 		    loss = atof(arg);
 		    if (loss > 1)
 			errx(ED_EFMT("%s greater than 1.0"), name);
 		    do_points = 0;
 		} else if (!strcasecmp(name, ED_TOK_NAME)) {
 		    if (profile_name[0] != '\0')
 			errx(ED_EFMT("duplicated token: %s"), name);
 		    strncpy(profile_name, arg, sizeof(profile_name) - 1);
 		    profile_name[sizeof(profile_name)-1] = '\0';
 		    do_points = 0;
 		} else if (!strcasecmp(name, ED_TOK_DELAY)) {
 		    if (do_points)
 			errx(ED_EFMT("duplicated token: %s"), name);
 		    delay_first = 1;
 		    do_points = 1;
 		} else if (!strcasecmp(name, ED_TOK_PROB)) {
 		    if (do_points)
 			errx(ED_EFMT("duplicated token: %s"), name);
 		    delay_first = 0;
 		    do_points = 1;
 		} else if (do_points) {
 		    if (!is_valid_number(name) || !is_valid_number(arg))
 			errx(ED_EFMT("invalid point found"));
 		    if (delay_first) {
 			points[points_no].delay = atof(name);
 			points[points_no].prob = atof(arg);
 		    } else {
 			points[points_no].delay = atof(arg);
 			points[points_no].prob = atof(name);
 		    }
 		    if (points[points_no].prob > 1.0)
 			errx(ED_EFMT("probability greater than 1.0"));
 		    ++points_no;
 		} else {
 		    errx(ED_EFMT("unrecognised command '%s'"), name);
 		}
 	}
 
 	if (samples == -1) {
 	    warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES);
 	    samples = 100;
 	}
 
 	if (loss == -1.0) {
 	    warnx("'%s' not found, assuming no loss", ED_TOK_LOSS);
 	    loss = 1;
 	}
 
 	/* make sure that there are enough points. */
 	if (points_no < ED_MIN_SAMPLES_NO)
 	    errx(ED_EFMT("too few samples, need at least %d"),
 		ED_MIN_SAMPLES_NO);
 
 	qsort(points, points_no, sizeof(struct point), compare_points);
 
 	/* interpolation */
 	for (i = 0; i<points_no-1; ++i) {
 	    double y1 = points[i].prob * samples;
 	    double x1 = points[i].delay;
 	    double y2 = points[i+1].prob * samples;
 	    double x2 = points[i+1].delay;
 
 	    int index = y1;
 	    int stop = y2;
 
 	    if (x1 == x2) {
 		for (; index<stop; ++index)
 		    p->samples[index] = x1;
 	    } else {
 		double m = (y2-y1)/(x2-x1);
 		double c = y1 - m*x1;
 		for (; index<stop ; ++index)
 		    p->samples[index] = (index - c)/m;
 	    }
 	}
 	p->samples_no = samples;
 	p->loss_level = loss * samples;
 	strncpy(p->name, profile_name, sizeof(p->name));
 }
 
 void
 ipfw_config_pipe(int ac, char **av)
 {
 	int samples[ED_MAX_SAMPLES_NO];
 	struct dn_pipe p;
 	int i;
 	char *end;
 	void *par = NULL;
 
 	memset(&p, 0, sizeof p);
 	p.bandwidth = -1;
 
 	av++; ac--;
 	/* Pipe number */
 	if (ac && isdigit(**av)) {
 		i = atoi(*av); av++; ac--;
 		if (co.do_pipe == 1)
 			p.pipe_nr = i;
 		else
 			p.fs.fs_nr = i;
 	}
 	while (ac > 0) {
 		double d;
 		int tok = match_token(dummynet_params, *av);
 		ac--; av++;
 
 		switch(tok) {
 		case TOK_NOERROR:
 			p.fs.flags_fs |= DN_NOERROR;
 			break;
 
 		case TOK_PLR:
 			NEED1("plr needs argument 0..1\n");
 			d = strtod(av[0], NULL);
 			if (d > 1)
 				d = 1;
 			else if (d < 0)
 				d = 0;
 			p.fs.plr = (int)(d*0x7fffffff);
 			ac--; av++;
 			break;
 
 		case TOK_QUEUE:
 			NEED1("queue needs queue size\n");
 			end = NULL;
 			p.fs.qsize = strtoul(av[0], &end, 0);
 			if (*end == 'K' || *end == 'k') {
 				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
 				p.fs.qsize *= 1024;
 			} else if (*end == 'B' ||
 			    _substrcmp2(end, "by", "bytes") == 0) {
 				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
 			}
 			ac--; av++;
 			break;
 
 		case TOK_BUCKETS:
 			NEED1("buckets needs argument\n");
 			p.fs.rq_size = strtoul(av[0], NULL, 0);
 			ac--; av++;
 			break;
 
 		case TOK_MASK:
 			NEED1("mask needs mask specifier\n");
 			/*
 			 * per-flow queue, mask is dst_ip, dst_port,
 			 * src_ip, src_port, proto measured in bits
 			 */
 			par = NULL;
 
 			bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask));
 			end = NULL;
 
 			while (ac >= 1) {
 			    uint32_t *p32 = NULL;
 			    uint16_t *p16 = NULL;
 			    uint32_t *p20 = NULL;
 			    struct in6_addr *pa6 = NULL;
 			    uint32_t a;
 
 			    tok = match_token(dummynet_params, *av);
 			    ac--; av++;
 			    switch(tok) {
 			    case TOK_ALL:
 				    /*
 				     * special case, all bits significant
 				     */
 				    p.fs.flow_mask.dst_ip = ~0;
 				    p.fs.flow_mask.src_ip = ~0;
 				    p.fs.flow_mask.dst_port = ~0;
 				    p.fs.flow_mask.src_port = ~0;
 				    p.fs.flow_mask.proto = ~0;
 				    n2mask(&(p.fs.flow_mask.dst_ip6), 128);
 				    n2mask(&(p.fs.flow_mask.src_ip6), 128);
 				    p.fs.flow_mask.flow_id6 = ~0;
 				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
 				    goto end_mask;
 
 			    case TOK_DSTIP:
 				    p32 = &p.fs.flow_mask.dst_ip;
 				    break;
 
 			    case TOK_SRCIP:
 				    p32 = &p.fs.flow_mask.src_ip;
 				    break;
 
 			    case TOK_DSTIP6:
 				    pa6 = &(p.fs.flow_mask.dst_ip6);
 				    break;
 			    
 			    case TOK_SRCIP6:
 				    pa6 = &(p.fs.flow_mask.src_ip6);
 				    break;
 
 			    case TOK_FLOWID:
 				    p20 = &p.fs.flow_mask.flow_id6;
 				    break;
 
 			    case TOK_DSTPORT:
 				    p16 = &p.fs.flow_mask.dst_port;
 				    break;
 
 			    case TOK_SRCPORT:
 				    p16 = &p.fs.flow_mask.src_port;
 				    break;
 
 			    case TOK_PROTO:
 				    break;
 
 			    default:
 				    ac++; av--; /* backtrack */
 				    goto end_mask;
 			    }
 			    if (ac < 1)
 				    errx(EX_USAGE, "mask: value missing");
 			    if (*av[0] == '/') {
 				    a = strtoul(av[0]+1, &end, 0);
 				    if (pa6 == NULL)
 					    a = (a == 32) ? ~0 : (1 << a) - 1;
 			    } else
 				    a = strtoul(av[0], &end, 0);
 			    if (p32 != NULL)
 				    *p32 = a;
 			    else if (p16 != NULL) {
 				    if (a > 0xFFFF)
 					    errx(EX_DATAERR,
 						"port mask must be 16 bit");
 				    *p16 = (uint16_t)a;
 			    } else if (p20 != NULL) {
 				    if (a > 0xfffff)
 					errx(EX_DATAERR,
 					    "flow_id mask must be 20 bit");
 				    *p20 = (uint32_t)a;
 			    } else if (pa6 != NULL) {
 				    if (a > 128)
 					errx(EX_DATAERR,
 					    "in6addr invalid mask len");
 				    else
 					n2mask(pa6, a);
 			    } else {
 				    if (a > 0xFF)
 					    errx(EX_DATAERR,
 						"proto mask must be 8 bit");
 				    p.fs.flow_mask.proto = (uint8_t)a;
 			    }
 			    if (a != 0)
 				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
 			    ac--; av++;
 			} /* end while, config masks */
 end_mask:
 			break;
 
 		case TOK_RED:
 		case TOK_GRED:
 			NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
 			p.fs.flags_fs |= DN_IS_RED;
 			if (tok == TOK_GRED)
 				p.fs.flags_fs |= DN_IS_GENTLE_RED;
 			/*
 			 * the format for parameters is w_q/min_th/max_th/max_p
 			 */
 			if ((end = strsep(&av[0], "/"))) {
 			    double w_q = strtod(end, NULL);
 			    if (w_q > 1 || w_q <= 0)
 				errx(EX_DATAERR, "0 < w_q <= 1");
 			    p.fs.w_q = (int) (w_q * (1 << SCALE_RED));
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    p.fs.min_th = strtoul(end, &end, 0);
 			    if (*end == 'K' || *end == 'k')
 				p.fs.min_th *= 1024;
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    p.fs.max_th = strtoul(end, &end, 0);
 			    if (*end == 'K' || *end == 'k')
 				p.fs.max_th *= 1024;
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    double max_p = strtod(end, NULL);
 			    if (max_p > 1 || max_p <= 0)
 				errx(EX_DATAERR, "0 < max_p <= 1");
 			    p.fs.max_p = (int)(max_p * (1 << SCALE_RED));
 			}
 			ac--; av++;
 			break;
 
 		case TOK_DROPTAIL:
 			p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
 			break;
 
 		case TOK_BW:
 			NEED1("bw needs bandwidth or interface\n");
 			if (co.do_pipe != 1)
 			    errx(EX_DATAERR, "bandwidth only valid for pipes");
 			read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name));
 			ac--; av++;
 			break;
 
 		case TOK_DELAY:
 			if (co.do_pipe != 1)
 				errx(EX_DATAERR, "delay only valid for pipes");
 			NEED1("delay needs argument 0..10000ms\n");
 			p.delay = strtoul(av[0], NULL, 0);
 			ac--; av++;
 			break;
 
 		case TOK_WEIGHT:
 			if (co.do_pipe == 1)
 				errx(EX_DATAERR,"weight only valid for queues");
 			NEED1("weight needs argument 0..100\n");
 			p.fs.weight = strtoul(av[0], &end, 0);
 			ac--; av++;
 			break;
 
 		case TOK_PIPE:
 			if (co.do_pipe == 1)
 				errx(EX_DATAERR,"pipe only valid for queues");
 			NEED1("pipe needs pipe_number\n");
 			p.fs.parent_nr = strtoul(av[0], &end, 0);
 			ac--; av++;
 			break;
 
 		case TOK_PIPE_PROFILE:
 			if (co.do_pipe != 1)
 			    errx(EX_DATAERR, "extra delay only valid for pipes");
 			NEED1("extra delay needs the file name\n");
 			p.samples = &samples[0];
 			load_extra_delays(av[0], &p);
 			--ac; ++av;
 			break;
 
+		case TOK_BURST:
+			if (co.do_pipe != 1)
+				errx(EX_DATAERR, "burst only valid for pipes");
+			NEED1("burst needs argument\n");
+			errno = 0;
+			if (expand_number(av[0], &p.burst) < 0)
+				if (errno != ERANGE)
+					errx(EX_DATAERR,
+					    "burst: invalid argument");
+			if (errno || p.burst > (1ULL << 48) - 1)
+				errx(EX_DATAERR,
+				    "burst: out of range (0..2^48-1)");
+			ac--; av++;
+			break;
+
 		default:
 			errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
 		}
 	}
 	if (co.do_pipe == 1) {
 		if (p.pipe_nr == 0)
 			errx(EX_DATAERR, "pipe_nr must be > 0");
 		if (p.delay > 10000)
 			errx(EX_DATAERR, "delay must be < 10000");
 	} else { /* co.do_pipe == 2, queue */
 		if (p.fs.parent_nr == 0)
 			errx(EX_DATAERR, "pipe must be > 0");
 		if (p.fs.weight >100)
 			errx(EX_DATAERR, "weight must be <= 100");
 	}
 
 	/* check for bandwidth value */
 	if (p.bandwidth == -1) {
 		p.bandwidth = 0;
 		if (p.samples_no > 0)
 			errx(EX_DATAERR, "profile requires a bandwidth limit");
 	}
 
 	if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) {
 		size_t len;
 		long limit;
 
 		len = sizeof(limit);
 		if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit",
 			&limit, &len, NULL, 0) == -1)
 			limit = 1024*1024;
 		if (p.fs.qsize > limit)
 			errx(EX_DATAERR, "queue size must be < %ldB", limit);
 	} else {
 		size_t len;
 		long limit;
 
 		len = sizeof(limit);
 		if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit",
 			&limit, &len, NULL, 0) == -1)
 			limit = 100;
 		if (p.fs.qsize > limit)
 			errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
 	}
 	if (p.fs.flags_fs & DN_IS_RED) {
 		size_t len;
 		int lookup_depth, avg_pkt_size;
 		double s, idle, weight, w_q;
 		struct clockinfo ck;
 		int t;
 
 		if (p.fs.min_th >= p.fs.max_th)
 		    errx(EX_DATAERR, "min_th %d must be < than max_th %d",
 			p.fs.min_th, p.fs.max_th);
 		if (p.fs.max_th == 0)
 		    errx(EX_DATAERR, "max_th must be > 0");
 
 		len = sizeof(int);
 		if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth",
 			&lookup_depth, &len, NULL, 0) == -1)
 		    errx(1, "sysctlbyname(\"%s\")",
 			"net.inet.ip.dummynet.red_lookup_depth");
 		if (lookup_depth == 0)
 		    errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth"
 			" must be greater than zero");
 
 		len = sizeof(int);
 		if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size",
 			&avg_pkt_size, &len, NULL, 0) == -1)
 
 		    errx(1, "sysctlbyname(\"%s\")",
 			"net.inet.ip.dummynet.red_avg_pkt_size");
 		if (avg_pkt_size == 0)
 			errx(EX_DATAERR,
 			    "net.inet.ip.dummynet.red_avg_pkt_size must"
 			    " be greater than zero");
 
 		len = sizeof(struct clockinfo);
 		if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1)
 			errx(1, "sysctlbyname(\"%s\")", "kern.clockrate");
 
 		/*
 		 * Ticks needed for sending a medium-sized packet.
 		 * Unfortunately, when we are configuring a WF2Q+ queue, we
 		 * do not have bandwidth information, because that is stored
 		 * in the parent pipe, and also we have multiple queues
 		 * competing for it. So we set s=0, which is not very
 		 * correct. But on the other hand, why do we want RED with
 		 * WF2Q+ ?
 		 */
 		if (p.bandwidth==0) /* this is a WF2Q+ queue */
 			s = 0;
 		else
 			s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth;
 
 		/*
 		 * max idle time (in ticks) before avg queue size becomes 0.
 		 * NOTA:  (3/w_q) is approx the value x so that
 		 * (1-w_q)^x < 10^-3.
 		 */
 		w_q = ((double)p.fs.w_q) / (1 << SCALE_RED);
 		idle = s * 3. / w_q;
 		p.fs.lookup_step = (int)idle / lookup_depth;
 		if (!p.fs.lookup_step)
 			p.fs.lookup_step = 1;
 		weight = 1 - w_q;
 		for (t = p.fs.lookup_step; t > 1; --t)
 			weight *= 1 - w_q;
 		p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
 	}
 	if (p.samples_no <= 0) {
 		i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
 	} else {
 		struct dn_pipe_max pm;
 		int len = sizeof(pm);
 
 		memcpy(&pm.pipe, &p, sizeof(pm.pipe));
 		memcpy(&pm.samples, samples, sizeof(pm.samples));
 
 		i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len);
 	}
 
 	if (i)
 		err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
 }
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 9777272b0253..de683e356390 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -1,3087 +1,3101 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 9, 2009
+.Dd June 24, 2009
 .Dt IPFW 8
 .Os
 .Sh NAME
 .Nm ipfw
 .Nd IP firewall and traffic shaper control program
 .Sh SYNOPSIS
 .Nm
 .Op Fl cq
 .Cm add
 .Ar rule
 .Nm
 .Op Fl acdefnNStT
 .Op Cm set Ar N
 .Brq Cm list | show
 .Op Ar rule | first-last ...
 .Nm
 .Op Fl f | q
 .Op Cm set Ar N
 .Cm flush
 .Nm
 .Op Fl q
 .Op Cm set Ar N
 .Brq Cm delete | zero | resetlog
 .Op Ar number ...
 .Nm
 .Cm enable
 .Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
 .Nm
 .Cm disable
 .Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
 .Pp
 .Nm
 .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
 .Nm
 .Cm set move
 .Op Cm rule
 .Ar number Cm to Ar number
 .Nm
 .Cm set swap Ar number number
 .Nm
 .Cm set show
 .Pp
 .Nm
 .Cm table Ar number Cm add Ar addr Ns Oo / Ns Ar masklen Oc Op Ar value
 .Nm
 .Cm table Ar number Cm delete Ar addr Ns Op / Ns Ar masklen
 .Nm
 .Cm table
 .Brq Ar number | all
 .Cm flush
 .Nm
 .Cm table
 .Brq Ar number | all
 .Cm list
 .Pp
 .Nm
 .Brq Cm pipe | queue
 .Ar number
 .Cm config
 .Ar config-options
 .Nm
 .Op Fl s Op Ar field
 .Brq Cm pipe | queue
 .Brq Cm delete | list | show
 .Op Ar number ...
 .Pp
 .Nm
 .Op Fl q
 .Cm nat
 .Ar number
 .Cm config
 .Ar config-options
 .Pp
 .Nm
 .Op Fl cfnNqS
 .Oo
 .Fl p Ar preproc
 .Oo
 .Ar preproc-flags
 .Oc
 .Oc
 .Ar pathname
 .Sh DESCRIPTION
 The
 .Nm
 utility is the user interface for controlling the
 .Xr ipfw 4
 firewall and the
 .Xr dummynet 4
 traffic shaper in
 .Fx .
 .Pp
 An
 .Nm
 configuration, or
 .Em ruleset ,
 is made of a list of
 .Em rules
 numbered from 1 to 65535.
 Packets are passed to
 .Nm
 from a number of different places in the protocol stack
 (depending on the source and destination of the packet,
 it is possible that
 .Nm
 is invoked multiple times on the same packet).
 The packet passed to the firewall is compared
 against each of the rules in the firewall
 .Em ruleset .
 When a match is found, the action corresponding to the
 matching rule is performed.
 .Pp
 Depending on the action and certain system settings, packets
 can be reinjected into the firewall at some rule after the
 matching one for further processing.
 .Pp
 An
 .Nm
 ruleset always includes a
 .Em default
 rule (numbered 65535) which cannot be modified or deleted,
 and matches all packets.
 The action associated with the
 .Em default
 rule can be either
 .Cm deny
 or
 .Cm allow
 depending on how the kernel is configured.
 .Pp
 If the ruleset includes one or more rules with the
 .Cm keep-state
 or
 .Cm limit
 option,
 .Nm
 will have a
 .Em stateful
 behaviour, i.e., upon a match it will create dynamic rules matching
 the exact parameters (source and destination addresses and ports)
 of the matching packet.
 .Pp
 These dynamic rules, which have a limited lifetime, are checked
 at the first occurrence of a
 .Cm check-state ,
 .Cm keep-state
 or
 .Cm limit
 rule, and are typically used to open the firewall on-demand to
 legitimate traffic only.
 See the
 .Sx STATEFUL FIREWALL
 and
 .Sx EXAMPLES
 Sections below for more information on the stateful behaviour of
 .Nm .
 .Pp
 All rules (including dynamic ones) have a few associated counters:
 a packet count, a byte count, a log count and a timestamp
 indicating the time of the last match.
 Counters can be displayed or reset with
 .Nm
 commands.
 .Pp
 Each rule belongs to one of 32 different
 .Em sets
 , and there are
 .Nm
 commands to atomically manipulate sets, such as enable,
 disable, swap sets, move all rules in a set to another
 one, delete all rules in a set.
 These can be useful to
 install temporary configurations, or to test them.
 See Section
 .Sx SETS OF RULES
 for more information on
 .Em sets .
 .Pp
 .Pp
 Rules can be added with the
 .Cm add
 command; deleted individually or in groups with the
 .Cm delete
 command, and globally (except those in set 31) with the
 .Cm flush
 command; displayed, optionally with the content of the
 counters, using the
 .Cm show
 and
 .Cm list
 commands.
 Finally, counters can be reset with the
 .Cm zero
 and
 .Cm resetlog
 commands.
 .Pp
 .Ss COMMAND OPTIONS
 The following general options are available when invoking
 .Nm :
 .Bl -tag -width indent
 .It Fl a
 Show counter values when listing rules.
 The
 .Cm show
 command implies this option.
 .It Fl b
 Only show the action and the comment, not the body of a rule.
 Implies
 .Fl c .
 .It Fl c
 When entering or showing rules, print them in compact form,
 i.e., omitting the "ip from any to any" string
 when this does not carry any additional information.
 .It Fl d
 When listing, show dynamic rules in addition to static ones.
 .It Fl e
 When listing and
 .Fl d
 is specified, also show expired dynamic rules.
 .It Fl f
 Do not ask for confirmation for commands that can cause problems
 if misused,
 .No i.e. Cm flush .
 If there is no tty associated with the process, this is implied.
 .It Fl i
 When listing a table (see the
 .Sx LOOKUP TABLES
 section below for more information on lookup tables), format values
 as IP addresses. By default, values are shown as integers.
 .It Fl n
 Only check syntax of the command strings, without actually passing
 them to the kernel.
 .It Fl N
 Try to resolve addresses and service names in output.
 .It Fl q
 Be quiet when executing the
 .Cm add ,
 .Cm nat ,
 .Cm zero ,
 .Cm resetlog
 or
 .Cm flush
 commands;
 (implies
 .Fl f ) .
 This is useful when updating rulesets by executing multiple
 .Nm
 commands in a script
 (e.g.,
 .Ql sh\ /etc/rc.firewall ) ,
 or by processing a file with many
 .Nm
 rules across a remote login session.
 It also stops a table add or delete
 from failing if the entry already exists or is not present.
 .Pp
 The reason why this option may be important is that
 for some of these actions,
 .Nm
 may print a message; if the action results in blocking the
 traffic to the remote client,
 the remote login session will be closed
 and the rest of the ruleset will not be processed.
 Access to the console would then be required to recover.
 .It Fl S
 When listing rules, show the
 .Em set
 each rule belongs to.
 If this flag is not specified, disabled rules will not be
 listed.
 .It Fl s Op Ar field
 When listing pipes, sort according to one of the four
 counters (total or current packets or bytes).
 .It Fl t
 When listing, show last match timestamp converted with ctime().
 .It Fl T
 When listing, show last match timestamp as seconds from the epoch.
 This form can be more convenient for postprocessing by scripts.
 .El
 .Pp
 To ease configuration, rules can be put into a file which is
 processed using
 .Nm
 as shown in the last synopsis line.
 An absolute
 .Ar pathname
 must be used.
 The file will be read line by line and applied as arguments to the
 .Nm
 utility.
 .Pp
 Optionally, a preprocessor can be specified using
 .Fl p Ar preproc
 where
 .Ar pathname
 is to be piped through.
 Useful preprocessors include
 .Xr cpp 1
 and
 .Xr m4 1 .
 If
 .Ar preproc
 does not start with a slash
 .Pq Ql /
 as its first character, the usual
 .Ev PATH
 name search is performed.
 Care should be taken with this in environments where not all
 file systems are mounted (yet) by the time
 .Nm
 is being run (e.g.\& when they are mounted over NFS).
 Once
 .Fl p
 has been specified, any additional arguments as passed on to the preprocessor
 for interpretation.
 This allows for flexible configuration files (like conditionalizing
 them on the local hostname) and the use of macros to centralize
 frequently required arguments like IP addresses.
 .Pp
 The
 .Nm
 .Cm pipe
 and
 .Cm queue
 commands are used to configure the traffic shaper, as shown in the
 .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 Section below.
 .Pp
 If the world and the kernel get out of sync the
 .Nm
 ABI may break, preventing you from being able to add any rules.
 This can
 adversely effect the booting process.
 You can use
 .Nm
 .Cm disable
 .Cm firewall
 to temporarily disable the firewall to regain access to the network,
 allowing you to fix the problem.
 .Sh PACKET FLOW
 A packet is checked against the active ruleset in multiple places
 in the protocol stack, under control of several sysctl variables.
 These places and variables are shown below, and it is important to
 have this picture in mind in order to design a correct ruleset.
 .Bd -literal -offset indent
        ^    to upper layers    V
        |                       |
        +----------->-----------+
        ^                       V
  [ip(6)_input]           [ip(6)_output]     net.inet(6).ip(6).fw.enable=1
        |                       |
        ^                       V
  [ether_demux]        [ether_output_frame]  net.link.ether.ipfw=1
        |                       |
        +-->--[bdg_forward]-->--+            net.link.bridge.ipfw=1
        ^                       V
        |      to devices       |
 .Ed
 .Pp
 As can be noted from the above picture, the number of
 times the same packet goes through the firewall can
 vary between 0 and 4 depending on packet source and
 destination, and system configuration.
 .Pp
 Note that as packets flow through the stack, headers can be
 stripped or added to it, and so they may or may not be available
 for inspection.
 E.g., incoming packets will include the MAC header when
 .Nm
 is invoked from
 .Cm ether_demux() ,
 but the same packets will have the MAC header stripped off when
 .Nm
 is invoked from
 .Cm ip_input()
 or
 .Cm ip6_input() .
 .Pp
 Also note that each packet is always checked against the complete ruleset,
 irrespective of the place where the check occurs, or the source of the packet.
 If a rule contains some match patterns or actions which are not valid
 for the place of invocation (e.g.\& trying to match a MAC header within
 .Cm ip_input
 or
 .Cm ip6_input ),
 the match pattern will not match, but a
 .Cm not
 operator in front of such patterns
 .Em will
 cause the pattern to
 .Em always
 match on those packets.
 It is thus the responsibility of
 the programmer, if necessary, to write a suitable ruleset to
 differentiate among the possible places.
 .Cm skipto
 rules can be useful here, as an example:
 .Bd -literal -offset indent
 # packets from ether_demux or bdg_forward
 ipfw add 10 skipto 1000 all from any to any layer2 in
 # packets from ip_input
 ipfw add 10 skipto 2000 all from any to any not layer2 in
 # packets from ip_output
 ipfw add 10 skipto 3000 all from any to any not layer2 out
 # packets from ether_output_frame
 ipfw add 10 skipto 4000 all from any to any layer2 out
 .Ed
 .Pp
 (yes, at the moment there is no way to differentiate between
 ether_demux and bdg_forward).
 .Sh SYNTAX
 In general, each keyword or argument must be provided as
 a separate command line argument, with no leading or trailing
 spaces.
 Keywords are case-sensitive, whereas arguments may
 or may not be case-sensitive depending on their nature
 (e.g.\& uid's are, hostnames are not).
 .Pp
 In
 .Nm ipfw2
 you can introduce spaces after commas ',' to make
 the line more readable.
 You can also put the entire
 command (including flags) into a single argument.
 E.g., the following forms are equivalent:
 .Bd -literal -offset indent
 ipfw -q add deny src-ip 10.0.0.0/24,127.0.0.1/8
 ipfw -q add deny src-ip 10.0.0.0/24, 127.0.0.1/8
 ipfw "-q add deny src-ip 10.0.0.0/24, 127.0.0.1/8"
 .Ed
 .Sh RULE FORMAT
 The format of
 .Nm
 rules is the following:
 .Bd -ragged -offset indent
 .Bk -words
 .Op Ar rule_number
 .Op Cm set Ar set_number
 .Op Cm prob Ar match_probability
 .Ar action
 .Op Cm log Op Cm logamount Ar number
 .Op Cm altq Ar queue
 .Oo
 .Bro Cm tag | untag
 .Brc Ar number
 .Oc
 .Ar body
 .Ek
 .Ed
 .Pp
 where the body of the rule specifies which information is used
 for filtering packets, among the following:
 .Pp
 .Bl -tag -width "Source and dest. addresses and ports" -offset XXX -compact
 .It Layer-2 header fields
 When available
 .It IPv4 and IPv6 Protocol
 TCP, UDP, ICMP, etc.
 .It Source and dest. addresses and ports
 .It Direction
 See Section
 .Sx PACKET FLOW
 .It Transmit and receive interface
 By name or address
 .It Misc. IP header fields
 Version, type of service, datagram length, identification,
 fragment flag (non-zero IP offset),
 Time To Live
 .It IP options
 .It IPv6 Extension headers
 Fragmentation, Hop-by-Hop options,
 Routing Headers, Source routing rthdr0, Mobile IPv6 rthdr2, IPSec options.
 .It IPv6 Flow-ID
 .It Misc. TCP header fields
 TCP flags (SYN, FIN, ACK, RST, etc.),
 sequence number, acknowledgment number,
 window
 .It TCP options
 .It ICMP types
 for ICMP packets
 .It ICMP6 types
 for ICMP6 packets
 .It User/group ID
 When the packet can be associated with a local socket.
 .It Divert status
 Whether a packet came from a divert socket (e.g.,
 .Xr natd 8 ) .
 .It Fib annotation state
 Whether a packet has been tagged for using a specific FIB (routing table)
 in future forwarding decisions.
 .El
 .Pp
 Note that some of the above information, e.g.\& source MAC or IP addresses and
 TCP/UDP ports, could easily be spoofed, so filtering on those fields
 alone might not guarantee the desired results.
 .Bl -tag -width indent
 .It Ar rule_number
 Each rule is associated with a
 .Ar rule_number
 in the range 1..65535, with the latter reserved for the
 .Em default
 rule.
 Rules are checked sequentially by rule number.
 Multiple rules can have the same number, in which case they are
 checked (and listed) according to the order in which they have
 been added.
 If a rule is entered without specifying a number, the kernel will
 assign one in such a way that the rule becomes the last one
 before the
 .Em default
 rule.
 Automatic rule numbers are assigned by incrementing the last
 non-default rule number by the value of the sysctl variable
 .Ar net.inet.ip.fw.autoinc_step
 which defaults to 100.
 If this is not possible (e.g.\& because we would go beyond the
 maximum allowed rule number), the number of the last
 non-default value is used instead.
 .It Cm set Ar set_number
 Each rule is associated with a
 .Ar set_number
 in the range 0..31.
 Sets can be individually disabled and enabled, so this parameter
 is of fundamental importance for atomic ruleset manipulation.
 It can be also used to simplify deletion of groups of rules.
 If a rule is entered without specifying a set number,
 set 0 will be used.
 .br
 Set 31 is special in that it cannot be disabled,
 and rules in set 31 are not deleted by the
 .Nm ipfw flush
 command (but you can delete them with the
 .Nm ipfw delete set 31
 command).
 Set 31 is also used for the
 .Em default
 rule.
 .It Cm prob Ar match_probability
 A match is only declared with the specified probability
 (floating point number between 0 and 1).
 This can be useful for a number of applications such as
 random packet drop or
 (in conjunction with
 .Nm dummynet )
 to simulate the effect of multiple paths leading to out-of-order
 packet delivery.
 .Pp
 Note: this condition is checked before any other condition, including
 ones such as keep-state or check-state which might have side effects.
 .It Cm log Op Cm logamount Ar number
 When a packet matches a rule with the
 .Cm log
 keyword, a message will be
 logged to
 .Xr syslogd 8
 with a
 .Dv LOG_SECURITY
 facility.
 The logging only occurs if the sysctl variable
 .Va net.inet.ip.fw.verbose
 is set to 1
 (which is the default when the kernel is compiled with
 .Dv IPFIREWALL_VERBOSE )
 and the number of packets logged so far for that
 particular rule does not exceed the
 .Cm logamount
 parameter.
 If no
 .Cm logamount
 is specified, the limit is taken from the sysctl variable
 .Va net.inet.ip.fw.verbose_limit .
 In both cases, a value of 0 removes the logging limit.
 .Pp
 Once the limit is reached, logging can be re-enabled by
 clearing the logging counter or the packet counter for that entry, see the
 .Cm resetlog
 command.
 .Pp
 Note: logging is done after all other packet matching conditions
 have been successfully verified, and before performing the final
 action (accept, deny, etc.) on the packet.
 .It Cm tag Ar number
 When a packet matches a rule with the
 .Cm tag
 keyword, the numeric tag for the given
 .Ar number
 in the range 1..65534 will be attached to the packet.
 The tag acts as an internal marker (it is not sent out over
 the wire) that can be used to identify these packets later on.
 This can be used, for example, to provide trust between interfaces
 and to start doing policy-based filtering.
 A packet can have multiple tags at the same time.
 Tags are "sticky", meaning once a tag is applied to a packet by a
 matching rule it exists until explicit removal.
 Tags are kept with the packet everywhere within the kernel, but are
 lost when packet leaves the kernel, for example, on transmitting
 packet out to the network or sending packet to a
 .Xr divert 4
 socket.
 .Pp
 To check for previously applied tags, use the
 .Cm tagged
 rule option.
 To delete previously applied tag, use the
 .Cm untag
 keyword.
 .Pp
 Note: since tags are kept with the packet everywhere in kernelspace,
 they can be set and unset anywhere in the kernel network subsystem
 (using the
 .Xr mbuf_tags 9
 facility), not only by means of the
 .Xr ipfw 4
 .Cm tag
 and
 .Cm untag
 keywords.
 For example, there can be a specialized
 .Xr netgraph 4
 node doing traffic analyzing and tagging for later inspecting
 in firewall.
 .It Cm untag Ar number
 When a packet matches a rule with the
 .Cm untag
 keyword, the tag with the number
 .Ar number
 is searched among the tags attached to this packet and,
 if found, removed from it.
 Other tags bound to packet, if present, are left untouched.
 .It Cm altq Ar queue
 When a packet matches a rule with the
 .Cm altq
 keyword, the ALTQ identifier for the given
 .Ar queue
 (see
 .Xr altq 4 )
 will be attached.
 Note that this ALTQ tag is only meaningful for packets going "out" of IPFW,
 and not being rejected or going to divert sockets.
 Note that if there is insufficient memory at the time the packet is
 processed, it will not be tagged, so it is wise to make your ALTQ
 "default" queue policy account for this.
 If multiple
 .Cm altq
 rules match a single packet, only the first one adds the ALTQ classification
 tag.
 In doing so, traffic may be shaped by using
 .Cm count Cm altq Ar queue
 rules for classification early in the ruleset, then later applying
 the filtering decision.
 For example,
 .Cm check-state
 and
 .Cm keep-state
 rules may come later and provide the actual filtering decisions in
 addition to the fallback ALTQ tag.
 .Pp
 You must run
 .Xr pfctl 8
 to set up the queues before IPFW will be able to look them up by name,
 and if the ALTQ disciplines are rearranged, the rules in containing the
 queue identifiers in the kernel will likely have gone stale and need
 to be reloaded.
 Stale queue identifiers will probably result in misclassification.
 .Pp
 All system ALTQ processing can be turned on or off via
 .Nm
 .Cm enable Ar altq
 and
 .Nm
 .Cm disable Ar altq .
 The usage of
 .Va net.inet.ip.fw.one_pass
 is irrelevant to ALTQ traffic shaping, as the actual rule action is followed
 always after adding an ALTQ tag.
 .El
 .Ss RULE ACTIONS
 A rule can be associated with one of the following actions, which
 will be executed when the packet matches the body of the rule.
 .Bl -tag -width indent
 .It Cm allow | accept | pass | permit
 Allow packets that match rule.
 The search terminates.
 .It Cm check-state
 Checks the packet against the dynamic ruleset.
 If a match is found, execute the action associated with
 the rule which generated this dynamic rule, otherwise
 move to the next rule.
 .br
 .Cm Check-state
 rules do not have a body.
 If no
 .Cm check-state
 rule is found, the dynamic ruleset is checked at the first
 .Cm keep-state
 or
 .Cm limit
 rule.
 .It Cm count
 Update counters for all packets that match rule.
 The search continues with the next rule.
 .It Cm deny | drop
 Discard packets that match this rule.
 The search terminates.
 .It Cm divert Ar port
 Divert packets that match this rule to the
 .Xr divert 4
 socket bound to port
 .Ar port .
 The search terminates.
 .It Cm fwd | forward Ar ipaddr | tablearg Ns Op , Ns Ar port
 Change the next-hop on matching packets to
 .Ar ipaddr ,
 which can be an IP address or a host name.
 The next hop can also be supplied by the last table
 looked up for the packet by using the
 .Cm tablearg
 keyword instead of an explicit address.
 The search terminates if this rule matches.
 .Pp
 If
 .Ar ipaddr
 is a local address, then matching packets will be forwarded to
 .Ar port
 (or the port number in the packet if one is not specified in the rule)
 on the local machine.
 .br
 If
 .Ar ipaddr
 is not a local address, then the port number
 (if specified) is ignored, and the packet will be
 forwarded to the remote address, using the route as found in
 the local routing table for that IP.
 .br
 A
 .Ar fwd
 rule will not match layer-2 packets (those received
 on ether_input, ether_output, or bridged).
 .br
 The
 .Cm fwd
 action does not change the contents of the packet at all.
 In particular, the destination address remains unmodified, so
 packets forwarded to another system will usually be rejected by that system
 unless there is a matching rule on that system to capture them.
 For packets forwarded locally,
 the local address of the socket will be
 set to the original destination address of the packet.
 This makes the
 .Xr netstat 1
 entry look rather weird but is intended for
 use with transparent proxy servers.
 .Pp
 To enable
 .Cm fwd
 a custom kernel needs to be compiled with the option
 .Cd "options IPFIREWALL_FORWARD" .
 .It Cm nat Ar nat_nr
 Pass packet to a
 nat instance
 (for network address translation, address redirect, etc.):
 see the
 .Sx NETWORK ADDRESS TRANSLATION (NAT)
 Section for further information.
 .It Cm pipe Ar pipe_nr
 Pass packet to a
 .Nm dummynet
 .Dq pipe
 (for bandwidth limitation, delay, etc.).
 See the
 .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 Section for further information.
 The search terminates; however, on exit from the pipe and if
 the
 .Xr sysctl 8
 variable
 .Va net.inet.ip.fw.one_pass
 is not set, the packet is passed again to the firewall code
 starting from the next rule.
 .It Cm queue Ar queue_nr
 Pass packet to a
 .Nm dummynet
 .Dq queue
 (for bandwidth limitation using WF2Q+).
 .It Cm reject
 (Deprecated).
 Synonym for
 .Cm unreach host .
 .It Cm reset
 Discard packets that match this rule, and if the
 packet is a TCP packet, try to send a TCP reset (RST) notice.
 The search terminates.
 .It Cm reset6
 Discard packets that match this rule, and if the
 packet is a TCP packet, try to send a TCP reset (RST) notice.
 The search terminates.
 .It Cm skipto Ar number | tablearg
 Skip all subsequent rules numbered less than
 .Ar number .
 The search continues with the first rule numbered
 .Ar number
 or higher.
 It is possible to use the 
 .Cm tablearg
 keyword with a skipto for a 
 .Em computed
 skipto, but care should be used, as no destination caching
 is possible in this case so the rules are always walked to find it,
 starting from the 
 .Cm skipto .
 .It Cm tee Ar port
 Send a copy of packets matching this rule to the
 .Xr divert 4
 socket bound to port
 .Ar port .
 The search continues with the next rule.
 .It Cm unreach Ar code
 Discard packets that match this rule, and try to send an ICMP
 unreachable notice with code
 .Ar code ,
 where
 .Ar code
 is a number from 0 to 255, or one of these aliases:
 .Cm net , host , protocol , port ,
 .Cm needfrag , srcfail , net-unknown , host-unknown ,
 .Cm isolated , net-prohib , host-prohib , tosnet ,
 .Cm toshost , filter-prohib , host-precedence
 or
 .Cm precedence-cutoff .
 The search terminates.
 .It Cm unreach6 Ar code
 Discard packets that match this rule, and try to send an ICMPv6
 unreachable notice with code
 .Ar code ,
 where
 .Ar code
 is a number from 0, 1, 3 or 4, or one of these aliases:
 .Cm no-route, admin-prohib, address
 or
 .Cm port .
 The search terminates.
 .It Cm netgraph Ar cookie
 Divert packet into netgraph with given
 .Ar cookie .
 The search terminates.
 If packet is later returned from netgraph it is either
 accepted or continues with the next rule, depending on
 .Va net.inet.ip.fw.one_pass
 sysctl variable.
 .It Cm ngtee Ar cookie
 A copy of packet is diverted into netgraph, original
 packet is either accepted or continues with the next rule, depending on
 .Va net.inet.ip.fw.one_pass
 sysctl variable.
 See
 .Xr ng_ipfw 4
 for more information on
 .Cm netgraph
 and
 .Cm ngtee
 actions.
 .It Cm setfib Ar fibnum
 The packet is tagged so as to use the FIB (routing table)
 .Ar fibnum
 in any subsequent forwarding decisions.
 Initially this is limited to the values 0 through 15, see
 .Xr setfib 8 .
 Processing continues at the next rule.
 .It Cm reass
 Queue and reassemble ip fragments.
 If the packet is not fragmented, counters are updated and processing continues with the next rule.
 If the packet is the last logical fragment, the packet is reassembled and, if
 .Va net.inet.ip.fw.one_pass
 is set to 0, processing continues with the next rule, else packet is allowed to pass and search terminates.
 If the packet is a fragment in the middle, it is consumed and processing stops immediately.
 .Pp
 Fragments handling can be tuned via
 .Va net.inet.ip.maxfragpackets
 and
 .Va net.inet.ip.maxfragsperpacket
 which limit, respectively, the maximum number of processable fragments (default: 800) and
 the maximum number of fragments per packet (default: 16).
 .Pp
 NOTA BENE: since fragments do not contain port numbers, they should be avoided with the
 .Nm reass
 rule.
 Alternatively, direction-based (like 
 .Nm in
 /
 .Nm out
 ) and source-based (like
 .Nm via
 ) match patterns can be used to select fragments.
 .Pp
 Usually a simple rule like:
 .Bd -literal -offset indent
 # reassemble incoming fragments
 ipfw add reass all from any to any in
 .Ed
 .Pp
 is all you need at the beginning of your ruleset.
 .El
 .Ss RULE BODY
 The body of a rule contains zero or more patterns (such as
 specific source and destination addresses or ports,
 protocol options, incoming or outgoing interfaces, etc.)
 that the packet must match in order to be recognised.
 In general, the patterns are connected by (implicit)
 .Cm and
 operators -- i.e., all must match in order for the
 rule to match.
 Individual patterns can be prefixed by the
 .Cm not
 operator to reverse the result of the match, as in
 .Pp
 .Dl "ipfw add 100 allow ip from not 1.2.3.4 to any"
 .Pp
 Additionally, sets of alternative match patterns
 .Pq Em or-blocks
 can be constructed by putting the patterns in
 lists enclosed between parentheses ( ) or braces { }, and
 using the
 .Cm or
 operator as follows:
 .Pp
 .Dl "ipfw add 100 allow ip from { x or not y or z } to any"
 .Pp
 Only one level of parentheses is allowed.
 Beware that most shells have special meanings for parentheses
 or braces, so it is advisable to put a backslash \\ in front of them
 to prevent such interpretations.
 .Pp
 The body of a rule must in general include a source and destination
 address specifier.
 The keyword
 .Ar any
 can be used in various places to specify that the content of
 a required field is irrelevant.
 .Pp
 The rule body has the following format:
 .Bd -ragged -offset indent
 .Op Ar proto Cm from Ar src Cm to Ar dst
 .Op Ar options
 .Ed
 .Pp
 The first part (proto from src to dst) is for backward
 compatibility with earlier versions of
 .Fx .
 In modern
 .Fx
 any match pattern (including MAC headers, IP protocols,
 addresses and ports) can be specified in the
 .Ar options
 section.
 .Pp
 Rule fields have the following meaning:
 .Bl -tag -width indent
 .It Ar proto : protocol | Cm { Ar protocol Cm or ... }
 .It Ar protocol : Oo Cm not Oc Ar protocol-name | protocol-number
 An IP protocol specified by number or name
 (for a complete list see
 .Pa /etc/protocols ) ,
 or one of the following keywords:
 .Bl -tag -width indent
 .It Cm ip4 | ipv4
 Matches IPv4 packets.
 .It Cm ip6 | ipv6
 Matches IPv6 packets.
 .It Cm ip | all
 Matches any packet.
 .El
 .Pp
 The
 .Cm ipv6
 in
 .Cm proto
 option will be treated as inner protocol.
 And, the
 .Cm ipv4
 is not available in
 .Cm proto
 option.
 .Pp
 The
 .Cm { Ar protocol Cm or ... }
 format (an
 .Em or-block )
 is provided for convenience only but its use is deprecated.
 .It Ar src No and Ar dst : Bro Cm addr | Cm { Ar addr Cm or ... } Brc Op Oo Cm not Oc Ar ports
 An address (or a list, see below)
 optionally followed by
 .Ar ports
 specifiers.
 .Pp
 The second format
 .Em ( or-block
 with multiple addresses) is provided for convenience only and
 its use is discouraged.
 .It Ar addr : Oo Cm not Oc Bro
 .Cm any | me | me6 |
 .Cm table Ns Pq Ar number Ns Op , Ns Ar value
 .Ar | addr-list | addr-set
 .Brc
 .It Cm any
 matches any IP address.
 .It Cm me
 matches any IP address configured on an interface in the system.
 .It Cm me6
 matches any IPv6 address configured on an interface in the system.
 The address list is evaluated at the time the packet is
 analysed.
 .It Cm table Ns Pq Ar number Ns Op , Ns Ar value
 Matches any IPv4 address for which an entry exists in the lookup table
 .Ar number .
 If an optional 32-bit unsigned
 .Ar value
 is also specified, an entry will match only if it has this value.
 See the
 .Sx LOOKUP TABLES
 section below for more information on lookup tables.
 .It Ar addr-list : ip-addr Ns Op Ns , Ns Ar addr-list
 .It Ar ip-addr :
 A host or subnet address specified in one of the following ways:
 .Bl -tag -width indent
 .It Ar numeric-ip | hostname
 Matches a single IPv4 address, specified as dotted-quad or a hostname.
 Hostnames are resolved at the time the rule is added to the firewall list.
 .It Ar addr Ns / Ns Ar masklen
 Matches all addresses with base
 .Ar addr
 (specified as an IP address, a network number, or a hostname)
 and mask width of
 .Cm masklen
 bits.
 As an example, 1.2.3.4/25 or 1.2.3.0/25 will match
 all IP numbers from 1.2.3.0 to 1.2.3.127 .
 .It Ar addr Ns : Ns Ar mask
 Matches all addresses with base
 .Ar addr
 (specified as an IP address, a network number, or a hostname)
 and the mask of
 .Ar mask ,
 specified as a dotted quad.
 As an example, 1.2.3.4:255.0.255.0 or 1.0.3.0:255.0.255.0 will match
 1.*.3.*.
 This form is advised only for non-contiguous
 masks.
 It is better to resort to the
 .Ar addr Ns / Ns Ar masklen
 format for contiguous masks, which is more compact and less
 error-prone.
 .El
 .It Ar addr-set : addr Ns Oo Ns / Ns Ar masklen Oc Ns Cm { Ns Ar list Ns Cm }
 .It Ar list : Bro Ar num | num-num Brc Ns Op Ns , Ns Ar list
 Matches all addresses with base address
 .Ar addr
 (specified as an IP address, a network number, or a hostname)
 and whose last byte is in the list between braces { } .
 Note that there must be no spaces between braces and
 numbers (spaces after commas are allowed).
 Elements of the list can be specified as single entries
 or ranges.
 The
 .Ar masklen
 field is used to limit the size of the set of addresses,
 and can have any value between 24 and 32.
 If not specified,
 it will be assumed as 24.
 .br
 This format is particularly useful to handle sparse address sets
 within a single rule.
 Because the matching occurs using a
 bitmask, it takes constant time and dramatically reduces
 the complexity of rulesets.
 .br
 As an example, an address specified as 1.2.3.4/24{128,35-55,89}
 or 1.2.3.0/24{128,35-55,89}
 will match the following IP addresses:
 .br
 1.2.3.128, 1.2.3.35 to 1.2.3.55, 1.2.3.89 .
 .It Ar addr6-list : ip6-addr Ns Op Ns , Ns Ar addr6-list
 .It Ar ip6-addr :
 A host or subnet specified one of the following ways:
 .Pp
 .Bl -tag -width indent
 .It Ar numeric-ip | hostname
 Matches a single IPv6 address as allowed by
 .Xr inet_pton 3
 or a hostname.
 Hostnames are resolved at the time the rule is added to the firewall
 list.
 .It Ar addr Ns / Ns Ar masklen
 Matches all IPv6 addresses with base
 .Ar addr
 (specified as allowed by
 .Xr inet_pton
 or a hostname)
 and mask width of
 .Cm masklen
 bits.
 .El
 .Pp
 No support for sets of IPv6 addresses is provided because IPv6 addresses
 are typically random past the initial prefix.
 .It Ar ports : Bro Ar port | port Ns \&- Ns Ar port Ns Brc Ns Op , Ns Ar ports
 For protocols which support port numbers (such as TCP and UDP), optional
 .Cm ports
 may be specified as one or more ports or port ranges, separated
 by commas but no spaces, and an optional
 .Cm not
 operator.
 The
 .Ql \&-
 notation specifies a range of ports (including boundaries).
 .Pp
 Service names (from
 .Pa /etc/services )
 may be used instead of numeric port values.
 The length of the port list is limited to 30 ports or ranges,
 though one can specify larger ranges by using an
 .Em or-block
 in the
 .Cm options
 section of the rule.
 .Pp
 A backslash
 .Pq Ql \e
 can be used to escape the dash
 .Pq Ql -
 character in a service name (from a shell, the backslash must be
 typed twice to avoid the shell itself interpreting it as an escape
 character).
 .Pp
 .Dl "ipfw add count tcp from any ftp\e\e-data-ftp to any"
 .Pp
 Fragmented packets which have a non-zero offset (i.e., not the first
 fragment) will never match a rule which has one or more port
 specifications.
 See the
 .Cm frag
 option for details on matching fragmented packets.
 .El
 .Ss RULE OPTIONS (MATCH PATTERNS)
 Additional match patterns can be used within
 rules.
 Zero or more of these so-called
 .Em options
 can be present in a rule, optionally prefixed by the
 .Cm not
 operand, and possibly grouped into
 .Em or-blocks .
 .Pp
 The following match patterns can be used (listed in alphabetical order):
 .Bl -tag -width indent
 .It Cm // this is a comment.
 Inserts the specified text as a comment in the rule.
 Everything following // is considered as a comment and stored in the rule.
 You can have comment-only rules, which are listed as having a
 .Cm count
 action followed by the comment.
 .It Cm bridged
 Alias for
 .Cm layer2 .
 .It Cm diverted
 Matches only packets generated by a divert socket.
 .It Cm diverted-loopback
 Matches only packets coming from a divert socket back into the IP stack
 input for delivery.
 .It Cm diverted-output
 Matches only packets going from a divert socket back outward to the IP
 stack output for delivery.
 .It Cm dst-ip Ar ip-address
 Matches IPv4 packets whose destination IP is one of the address(es)
 specified as argument.
 .It Bro Cm dst-ip6 | dst-ipv6 Brc Ar ip6-address
 Matches IPv6 packets whose destination IP is one of the address(es)
 specified as argument.
 .It Cm dst-port Ar ports
 Matches IP packets whose destination port is one of the port(s)
 specified as argument.
 .It Cm established
 Matches TCP packets that have the RST or ACK bits set.
 .It Cm ext6hdr Ar header
 Matches IPv6 packets containing the extended header given by
 .Ar header .
 Supported headers are:
 .Pp
 Fragment,
 .Pq Cm frag ,
 Hop-to-hop options
 .Pq Cm hopopt ,
 any type of Routing Header
 .Pq Cm route ,
 Source routing Routing Header Type 0
 .Pq Cm rthdr0 ,
 Mobile IPv6 Routing Header Type 2
 .Pq Cm rthdr2 ,
 Destination options
 .Pq Cm dstopt ,
 IPSec authentication headers
 .Pq Cm ah ,
 and IPsec encapsulated security payload headers
 .Pq Cm esp .
 .It Cm fib Ar fibnum
 Matches a packet that has been tagged to use
 the given FIB (routing table) number.
 .It Cm flow-id Ar labels
 Matches IPv6 packets containing any of the flow labels given in
 .Ar labels .
 .Ar labels
 is a comma separated list of numeric flow labels.
 .It Cm frag
 Matches packets that are fragments and not the first
 fragment of an IP datagram.
 Note that these packets will not have
 the next protocol header (e.g.\& TCP, UDP) so options that look into
 these headers cannot match.
 .It Cm gid Ar group
 Matches all TCP or UDP packets sent by or received for a
 .Ar group .
 A
 .Ar group
 may be specified by name or number.
 .It Cm jail Ar prisonID
 Matches all TCP or UDP packets sent by or received for the
 jail whos prison ID is
 .Ar prisonID .
 .It Cm icmptypes Ar types
 Matches ICMP packets whose ICMP type is in the list
 .Ar types .
 The list may be specified as any combination of
 individual types (numeric) separated by commas.
 .Em Ranges are not allowed .
 The supported ICMP types are:
 .Pp
 echo reply
 .Pq Cm 0 ,
 destination unreachable
 .Pq Cm 3 ,
 source quench
 .Pq Cm 4 ,
 redirect
 .Pq Cm 5 ,
 echo request
 .Pq Cm 8 ,
 router advertisement
 .Pq Cm 9 ,
 router solicitation
 .Pq Cm 10 ,
 time-to-live exceeded
 .Pq Cm 11 ,
 IP header bad
 .Pq Cm 12 ,
 timestamp request
 .Pq Cm 13 ,
 timestamp reply
 .Pq Cm 14 ,
 information request
 .Pq Cm 15 ,
 information reply
 .Pq Cm 16 ,
 address mask request
 .Pq Cm 17
 and address mask reply
 .Pq Cm 18 .
 .It Cm icmp6types Ar types
 Matches ICMP6 packets whose ICMP6 type is in the list of
 .Ar types .
 The list may be specified as any combination of
 individual types (numeric) separated by commas.
 .Em Ranges are not allowed .
 .It Cm in | out
 Matches incoming or outgoing packets, respectively.
 .Cm in
 and
 .Cm out
 are mutually exclusive (in fact,
 .Cm out
 is implemented as
 .Cm not in Ns No ).
 .It Cm ipid Ar id-list
 Matches IPv4 packets whose
 .Cm ip_id
 field has value included in
 .Ar id-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm iplen Ar len-list
 Matches IP packets whose total length, including header and data, is
 in the set
 .Ar len-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm ipoptions Ar spec
 Matches packets whose IPv4 header contains the comma separated list of
 options specified in
 .Ar spec .
 The supported IP options are:
 .Pp
 .Cm ssrr
 (strict source route),
 .Cm lsrr
 (loose source route),
 .Cm rr
 (record packet route) and
 .Cm ts
 (timestamp).
 The absence of a particular option may be denoted
 with a
 .Ql \&! .
 .It Cm ipprecedence Ar precedence
 Matches IPv4 packets whose precedence field is equal to
 .Ar precedence .
 .It Cm ipsec
 Matches packets that have IPSEC history associated with them
 (i.e., the packet comes encapsulated in IPSEC, the kernel
 has IPSEC support and IPSEC_FILTERTUNNEL option, and can correctly
 decapsulate it).
 .Pp
 Note that specifying
 .Cm ipsec
 is different from specifying
 .Cm proto Ar ipsec
 as the latter will only look at the specific IP protocol field,
 irrespective of IPSEC kernel support and the validity of the IPSEC data.
 .Pp
 Further note that this flag is silently ignored in kernels without
 IPSEC support.
 It does not affect rule processing when given and the
 rules are handled as if with no
 .Cm ipsec
 flag.
 .It Cm iptos Ar spec
 Matches IPv4 packets whose
 .Cm tos
 field contains the comma separated list of
 service types specified in
 .Ar spec .
 The supported IP types of service are:
 .Pp
 .Cm lowdelay
 .Pq Dv IPTOS_LOWDELAY ,
 .Cm throughput
 .Pq Dv IPTOS_THROUGHPUT ,
 .Cm reliability
 .Pq Dv IPTOS_RELIABILITY ,
 .Cm mincost
 .Pq Dv IPTOS_MINCOST ,
 .Cm congestion
 .Pq Dv IPTOS_ECN_CE .
 The absence of a particular type may be denoted
 with a
 .Ql \&! .
 .It Cm ipttl Ar ttl-list
 Matches IPv4 packets whose time to live is included in
 .Ar ttl-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm ipversion Ar ver
 Matches IP packets whose IP version field is
 .Ar ver .
 .It Cm keep-state
 Upon a match, the firewall will create a dynamic rule, whose
 default behaviour is to match bidirectional traffic between
 source and destination IP/port using the same protocol.
 The rule has a limited lifetime (controlled by a set of
 .Xr sysctl 8
 variables), and the lifetime is refreshed every time a matching
 packet is found.
 .It Cm layer2
 Matches only layer2 packets, i.e., those passed to
 .Nm
 from ether_demux() and ether_output_frame().
 .It Cm limit Bro Cm src-addr | src-port | dst-addr | dst-port Brc Ar N
 The firewall will only allow
 .Ar N
 connections with the same
 set of parameters as specified in the rule.
 One or more
 of source and destination addresses and ports can be
 specified.
 Currently,
 only IPv4 flows are supported.
 .It Cm { MAC | mac } Ar dst-mac src-mac
 Match packets with a given
 .Ar dst-mac
 and
 .Ar src-mac
 addresses, specified as the
 .Cm any
 keyword (matching any MAC address), or six groups of hex digits
 separated by colons,
 and optionally followed by a mask indicating the significant bits.
 The mask may be specified using either of the following methods:
 .Bl -enum -width indent
 .It
 A slash
 .Pq /
 followed by the number of significant bits.
 For example, an address with 33 significant bits could be specified as:
 .Pp
 .Dl "MAC 10:20:30:40:50:60/33 any"
 .Pp
 .It
 An ampersand
 .Pq &
 followed by a bitmask specified as six groups of hex digits separated
 by colons.
 For example, an address in which the last 16 bits are significant could
 be specified as:
 .Pp
 .Dl "MAC 10:20:30:40:50:60&00:00:00:00:ff:ff any"
 .Pp
 Note that the ampersand character has a special meaning in many shells
 and should generally be escaped.
 .Pp
 .El
 Note that the order of MAC addresses (destination first,
 source second) is
 the same as on the wire, but the opposite of the one used for
 IP addresses.
 .It Cm mac-type Ar mac-type
 Matches packets whose Ethernet Type field
 corresponds to one of those specified as argument.
 .Ar mac-type
 is specified in the same way as
 .Cm port numbers
 (i.e., one or more comma-separated single values or ranges).
 You can use symbolic names for known values such as
 .Em vlan , ipv4, ipv6 .
 Values can be entered as decimal or hexadecimal (if prefixed by 0x),
 and they are always printed as hexadecimal (unless the
 .Cm -N
 option is used, in which case symbolic resolution will be attempted).
 .It Cm proto Ar protocol
 Matches packets with the corresponding IP protocol.
 .It Cm recv | xmit | via Brq Ar ifX | Ar if Ns Cm * | Ar ipno | Ar any
 Matches packets received, transmitted or going through,
 respectively, the interface specified by exact name
 .Ns No ( Ar ifX Ns No ),
 by device name
 .Ns No ( Ar if Ns Ar * Ns No ),
 by IP address, or through some interface.
 .Pp
 The
 .Cm via
 keyword causes the interface to always be checked.
 If
 .Cm recv
 or
 .Cm xmit
 is used instead of
 .Cm via ,
 then only the receive or transmit interface (respectively)
 is checked.
 By specifying both, it is possible to match packets based on
 both receive and transmit interface, e.g.:
 .Pp
 .Dl "ipfw add deny ip from any to any out recv ed0 xmit ed1"
 .Pp
 The
 .Cm recv
 interface can be tested on either incoming or outgoing packets,
 while the
 .Cm xmit
 interface can only be tested on outgoing packets.
 So
 .Cm out
 is required (and
 .Cm in
 is invalid) whenever
 .Cm xmit
 is used.
 .Pp
 A packet may not have a receive or transmit interface: packets
 originating from the local host have no receive interface,
 while packets destined for the local host have no transmit
 interface.
 .It Cm setup
 Matches TCP packets that have the SYN bit set but no ACK bit.
 This is the short form of
 .Dq Li tcpflags\ syn,!ack .
 .It Cm src-ip Ar ip-address
 Matches IPv4 packets whose source IP is one of the address(es)
 specified as an argument.
 .It Cm src-ip6 Ar ip6-address
 Matches IPv6 packets whose source IP is one of the address(es)
 specified as an argument.
 .It Cm src-port Ar ports
 Matches IP packets whose source port is one of the port(s)
 specified as argument.
 .It Cm tagged Ar tag-list
 Matches packets whose tags are included in
 .Ar tag-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 Tags can be applied to the packet using
 .Cm tag
 rule action parameter (see it's description for details on tags).
 .It Cm tcpack Ar ack
 TCP packets only.
 Match if the TCP header acknowledgment number field is set to
 .Ar ack .
 .It Cm tcpdatalen Ar tcpdatalen-list
 Matches TCP packets whose length of TCP data is
 .Ar tcpdatalen-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm tcpflags Ar spec
 TCP packets only.
 Match if the TCP header contains the comma separated list of
 flags specified in
 .Ar spec .
 The supported TCP flags are:
 .Pp
 .Cm fin ,
 .Cm syn ,
 .Cm rst ,
 .Cm psh ,
 .Cm ack
 and
 .Cm urg .
 The absence of a particular flag may be denoted
 with a
 .Ql \&! .
 A rule which contains a
 .Cm tcpflags
 specification can never match a fragmented packet which has
 a non-zero offset.
 See the
 .Cm frag
 option for details on matching fragmented packets.
 .It Cm tcpseq Ar seq
 TCP packets only.
 Match if the TCP header sequence number field is set to
 .Ar seq .
 .It Cm tcpwin Ar win
 TCP packets only.
 Match if the TCP header window field is set to
 .Ar win .
 .It Cm tcpoptions Ar spec
 TCP packets only.
 Match if the TCP header contains the comma separated list of
 options specified in
 .Ar spec .
 The supported TCP options are:
 .Pp
 .Cm mss
 (maximum segment size),
 .Cm window
 (tcp window advertisement),
 .Cm sack
 (selective ack),
 .Cm ts
 (rfc1323 timestamp) and
 .Cm cc
 (rfc1644 t/tcp connection count).
 The absence of a particular option may be denoted
 with a
 .Ql \&! .
 .It Cm uid Ar user
 Match all TCP or UDP packets sent by or received for a
 .Ar user .
 A
 .Ar user
 may be matched by name or identification number.
 .It Cm verrevpath
 For incoming packets,
 a routing table lookup is done on the packet's source address.
 If the interface on which the packet entered the system matches the
 outgoing interface for the route,
 the packet matches.
 If the interfaces do not match up,
 the packet does not match.
 All outgoing packets or packets with no incoming interface match.
 .Pp
 The name and functionality of the option is intentionally similar to
 the Cisco IOS command:
 .Pp
 .Dl ip verify unicast reverse-path
 .Pp
 This option can be used to make anti-spoofing rules to reject all
 packets with source addresses not from this interface.
 See also the option
 .Cm antispoof .
 .It Cm versrcreach
 For incoming packets,
 a routing table lookup is done on the packet's source address.
 If a route to the source address exists, but not the default route
 or a blackhole/reject route, the packet matches.
 Otherwise, the packet does not match.
 All outgoing packets match.
 .Pp
 The name and functionality of the option is intentionally similar to
 the Cisco IOS command:
 .Pp
 .Dl ip verify unicast source reachable-via any
 .Pp
 This option can be used to make anti-spoofing rules to reject all
 packets whose source address is unreachable.
 .It Cm antispoof
 For incoming packets, the packet's source address is checked if it
 belongs to a directly connected network.
 If the network is directly connected, then the interface the packet
 came on in is compared to the interface the network is connected to.
 When incoming interface and directly connected interface are not the
 same, the packet does not match.
 Otherwise, the packet does match.
 All outgoing packets match.
 .Pp
 This option can be used to make anti-spoofing rules to reject all
 packets that pretend to be from a directly connected network but do
 not come in through that interface.
 This option is similar to but more restricted than
 .Cm verrevpath
 because it engages only on packets with source addresses of directly
 connected networks instead of all source addresses.
 .El
 .Sh LOOKUP TABLES
 Lookup tables are useful to handle large sparse address sets,
 typically from a hundred to several thousands of entries.
 There may be up to 128 different lookup tables, numbered 0 to 127.
 .Pp
 Each entry is represented by an
 .Ar addr Ns Op / Ns Ar masklen
 and will match all addresses with base
 .Ar addr
 (specified as an IP address or a hostname)
 and mask width of
 .Ar masklen
 bits.
 If
 .Ar masklen
 is not specified, it defaults to 32.
 When looking up an IP address in a table, the most specific
 entry will match.
 Associated with each entry is a 32-bit unsigned
 .Ar value ,
 which can optionally be checked by a rule matching code.
 When adding an entry, if
 .Ar value
 is not specified, it defaults to 0.
 .Pp
 An entry can be added to a table
 .Pq Cm add ,
 removed from a table
 .Pq Cm delete ,
 a table can be examined
 .Pq Cm list
 or flushed
 .Pq Cm flush .
 .Pp
 Internally, each table is stored in a Radix tree, the same way as
 the routing table (see
 .Xr route 4 ) .
 .Pp
 Lookup tables currently support IPv4 addresses only.
 .Pp
 The
 .Cm tablearg
 feature provides the ability to use a value, looked up in the table, as
 the argument for a rule action, action parameter or rule option.
 This can significantly reduce number of rules in some configurations.
 If two tables are used in a rule, the result of the second (destination)
 is used.
 The
 .Cm tablearg
 argument can be used with the following actions:
 .Cm nat, pipe , queue, divert, tee, netgraph, ngtee, fwd, skipto
 action parameters:
 .Cm tag, untag,
 rule options:
 .Cm limit, tagged.
 .Pp
 When used with
 .Cm fwd
 it is possible to supply table entries with values
 that are in the form of IP addresses or hostnames.
 See the
 .Sx EXAMPLES
 Section for example usage of tables and the tablearg keyword.
 .Pp
 When used with the
 .Cm skipto
 action, the user should be aware that the code will walk the ruleset
 up to a rule equal to, or past, the given number, and should therefore try keep the
 ruleset compact between the skipto and the target rules. 
 .Sh SETS OF RULES
 Each rule belongs to one of 32 different
 .Em sets
 , numbered 0 to 31.
 Set 31 is reserved for the default rule.
 .Pp
 By default, rules are put in set 0, unless you use the
 .Cm set N
 attribute when entering a new rule.
 Sets can be individually and atomically enabled or disabled,
 so this mechanism permits an easy way to store multiple configurations
 of the firewall and quickly (and atomically) switch between them.
 The command to enable/disable sets is
 .Bd -ragged -offset indent
 .Nm
 .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
 .Ed
 .Pp
 where multiple
 .Cm enable
 or
 .Cm disable
 sections can be specified.
 Command execution is atomic on all the sets specified in the command.
 By default, all sets are enabled.
 .Pp
 When you disable a set, its rules behave as if they do not exist
 in the firewall configuration, with only one exception:
 .Bd -ragged -offset indent
 dynamic rules created from a rule before it had been disabled
 will still be active until they expire.
 In order to delete
 dynamic rules you have to explicitly delete the parent rule
 which generated them.
 .Ed
 .Pp
 The set number of rules can be changed with the command
 .Bd -ragged -offset indent
 .Nm
 .Cm set move
 .Brq Cm rule Ar rule-number | old-set
 .Cm to Ar new-set
 .Ed
 .Pp
 Also, you can atomically swap two rulesets with the command
 .Bd -ragged -offset indent
 .Nm
 .Cm set swap Ar first-set second-set
 .Ed
 .Pp
 See the
 .Sx EXAMPLES
 Section on some possible uses of sets of rules.
 .Sh STATEFUL FIREWALL
 Stateful operation is a way for the firewall to dynamically
 create rules for specific flows when packets that
 match a given pattern are detected.
 Support for stateful
 operation comes through the
 .Cm check-state , keep-state
 and
 .Cm limit
 options of
 .Nm rules .
 .Pp
 Dynamic rules are created when a packet matches a
 .Cm keep-state
 or
 .Cm limit
 rule, causing the creation of a
 .Em dynamic
 rule which will match all and only packets with
 a given
 .Em protocol
 between a
 .Em src-ip/src-port dst-ip/dst-port
 pair of addresses
 .Em ( src
 and
 .Em dst
 are used here only to denote the initial match addresses, but they
 are completely equivalent afterwards).
 Dynamic rules will be checked at the first
 .Cm check-state, keep-state
 or
 .Cm limit
 occurrence, and the action performed upon a match will be the same
 as in the parent rule.
 .Pp
 Note that no additional attributes other than protocol and IP addresses
 and ports are checked on dynamic rules.
 .Pp
 The typical use of dynamic rules is to keep a closed firewall configuration,
 but let the first TCP SYN packet from the inside network install a
 dynamic rule for the flow so that packets belonging to that session
 will be allowed through the firewall:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add allow tcp from my-subnet to any setup keep-state"
 .Dl "ipfw add deny tcp from any to any"
 .Pp
 A similar approach can be used for UDP, where an UDP packet coming
 from the inside will install a dynamic rule to let the response through
 the firewall:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add allow udp from my-subnet to any keep-state"
 .Dl "ipfw add deny udp from any to any"
 .Pp
 Dynamic rules expire after some time, which depends on the status
 of the flow and the setting of some
 .Cm sysctl
 variables.
 See Section
 .Sx SYSCTL VARIABLES
 for more details.
 For TCP sessions, dynamic rules can be instructed to periodically
 send keepalive packets to refresh the state of the rule when it is
 about to expire.
 .Pp
 See Section
 .Sx EXAMPLES
 for more examples on how to use dynamic rules.
 .Sh TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 .Nm
 is also the user interface for the
 .Nm dummynet
 traffic shaper and network emulator, a subsystem that
 can artificially queue, delay or drop packets
 emulator the behaviour of certain network links
 or queueing systems.
 .Pp
 .Nm dummynet
 operates by first using the firewall to select packets
 using any match pattern that can be used in
 .Nm
 rules.
 Matching packets are then passed to either of two
 different objects, which implement the traffic regulation:
 .Bl -hang -offset XXXX
 .It Em pipe
 A pipe emulates a link with given bandwidth, propagation delay,
 queue size and packet loss rate.
 Packets are queued in front of the pipe as they come out from the classifier,
 and then transferred to the pipe according to the pipe's parameters.
 .It Em queue
 A queue
 is an abstraction used to implement the WF2Q+
 (Worst-case Fair Weighted Fair Queueing) policy, which is
 an efficient variant of the WFQ policy.
 .Pp
 The queue associates a
 .Em weight
 and a reference pipe to each flow (a flow is a set of packets
 with the same addresses and ports after masking).
 All backlogged flows (i.e., those
 with packets queued) linked to the same pipe share the pipe's
 bandwidth proportionally to their weights.
 Note that weights are not priorities; a flow with a lower weight
 is still guaranteed to get its fraction of the bandwidth even if a
 flow with a higher weight is permanently backlogged.
 .El
 .Pp
 In practice,
 .Em pipes
 can be used to set hard limits to the bandwidth that a flow can use, whereas
 .Em queues
 can be used to determine how different flows share the available bandwidth.
 .Pp
 There are two modes of
 .Nm dummynet
 operation:
 .Dq normal
 and
 .Dq fast .
 The
 .Dq normal
 mode tries to emulate a real link: the
 .Nm dummynet
 scheduler ensures that the packet will not leave the pipe faster than it
 would on the real link with a given bandwidth.
 The
 .Dq fast
 mode allows certain packets to bypass the
 .Nm dummynet
 scheduler (if packet flow does not exceed pipe's bandwidth).
 This is the reason why the
 .Dq fast
 mode requires less CPU cycles per packet (on average) and packet latency
 can be significantly lower in comparison to a real link with the same
 bandwidth.
 The default mode is
 .Dq normal .
 The
 .Dq fast
 mode can be enabled by setting the
 .Va net.inet.ip.dummynet.io_fast
 .Xr sysctl 8
 variable to a non-zero value.
 .Pp
 .Ss PIPE AND QUEUE CONFIGURATION
 The
 .Em pipe
 and
 .Em queue
 configuration commands are the following:
 .Bd -ragged -offset indent
 .Cm pipe Ar number Cm config Ar pipe-configuration
 .Pp
 .Cm queue Ar number Cm config Ar queue-configuration
 .Ed
 .Pp
 The following parameters can be configured for a pipe:
 .Pp
 .Bl -tag -width indent -compact
 .It Cm bw Ar bandwidth | device
 Bandwidth, measured in
 .Sm off
 .Op Cm K | M
 .Brq Cm bit/s | Byte/s .
 .Sm on
 .Pp
 A value of 0 (default) means unlimited bandwidth.
 The unit must immediately follow the number, as in
 .Pp
 .Dl "ipfw pipe 1 config bw 300Kbit/s"
 .Pp
 If a device name is specified instead of a numeric value, as in
 .Pp
 .Dl "ipfw pipe 1 config bw tun0"
 .Pp
 then the transmit clock is supplied by the specified device.
 At the moment only the
 .Xr tun 4
 device supports this
 functionality, for use in conjunction with
 .Xr ppp 8 .
 .Pp
 .It Cm delay Ar ms-delay
 Propagation delay, measured in milliseconds.
 The value is rounded to the next multiple of the clock tick
 (typically 10ms, but it is a good practice to run kernels
 with
 .Dq "options HZ=1000"
 to reduce
 the granularity to 1ms or less).
 Default value is 0, meaning no delay.
 .Pp
+.It Cm burst Ar size
+If the data rate exceeds the pipe bandwith limit
+(and pipe was idle long enough),
+.Ar size
+bytes of data is allowed to bypass the
+.Nm dummynet
+scheduler (i.e. it will be sent without shaping), then transmission rate
+will not exceed pipe bandwidth. Effective burst size calculated as follows:
+MAX(
+.Ar size
+,
+.Nm bw
+* pipe_idle_time).
+.Pp
 .It Cm profile Ar filename
 A file specifying the additional overhead incurred in the transmission
 of a packet on the link.
 .Pp
 Some link types introduce extra delays in the transmission
 of a packet, e.g. because of MAC level framing, contention on
 the use of the channel, MAC level retransmissions and so on.
 From our point of view, the channel is effectively unavailable
 for this extra time, which is constant or variable depending
 on the link type. Additionally, packets may be dropped after this
 time (e.g. on a wireless link after too many retransmissions).
 We can model the additional delay with an empirical curve
 that represents its distribution.
 .Bd -literal -offset indent
       cumulative probability
       1.0 ^
           |
       L   +-- loss-level          x
           |                 ******
           |                *
           |           *****
           |          *
           |        **
           |       *
           +-------*------------------->
                       delay
 .Ed
 The empirical curve may have both vertical and horizontal lines.
 Vertical lines represent constant delay for a range of
 probabilities.
 Horizontal lines correspond to a discontinuity in the delay
 distribution: the pipe will use the largest delay for a
 given probability.
 .Pp
 The file format is the following, with whitespace acting as
 a separator and '#' indicating the beginning a comment:
 .Bl -tag -width indent
 .It Cm name Ar identifier
 optional name (listed by "ipfw pipe show")
 to identify the delay distribution;
 .It Cm bw Ar value
 the bandwidth used for the pipe.
 If not specified here, it must be present
 explicitly as a configuration parameter for the pipe;
 .It Cm loss-level Ar L
 the probability above which packets are lost.
 (0.0 <= L <= 1.0, default 1.0 i.e. no loss);
 .It Cm samples Ar N
 the number of samples used in the internal
 representation of the curve (2..1024; default 100);
 .It Cm "delay prob" | "prob delay"
 One of these two lines is mandatory and defines
 the format of the following lines with data points.
 .It Ar XXX Ar YYY
 2 or more lines representing points in the curve,
 with either delay or probability first, according
 to the chosen format.
 The unit for delay is milliseconds.
 Data points do not need to be sorted.
 Also, tne number of actual lines can be different
 from the value of the "samples" parameter:
 .Nm
 utility will sort and interpolate
 the curve as needed.
 .El
 .Pp
 Example of a profile file:
 .Bd -literal -offset indent
 name    bla_bla_bla
 samples 100
 loss-level    0.86
 prob    delay
 0       200	# minimum overhead is 200ms
 0.5     200
 0.5     300
 0.8     1000
 0.9     1300
 1       1300
 #configuration file end
 .Ed
 .El
 .Pp
 The following parameters can be configured for a queue:
 .Pp
 .Bl -tag -width indent -compact
 .It Cm pipe Ar pipe_nr
 Connects a queue to the specified pipe.
 Multiple queues (with the same or different weights) can be connected to
 the same pipe, which specifies the aggregate rate for the set of queues.
 .Pp
 .It Cm weight Ar weight
 Specifies the weight to be used for flows matching this queue.
 The weight must be in the range 1..100, and defaults to 1.
 .El
 .Pp
 Finally, the following parameters can be configured for both
 pipes and queues:
 .Pp
 .Bl -tag -width XXXX -compact
 .Pp
 .It Cm buckets Ar hash-table-size
 Specifies the size of the hash table used for storing the
 various queues.
 Default value is 64 controlled by the
 .Xr sysctl 8
 variable
 .Va net.inet.ip.dummynet.hash_size ,
 allowed range is 16 to 65536.
 .Pp
 .It Cm mask Ar mask-specifier
 Packets sent to a given pipe or queue by an
 .Nm
 rule can be further classified into multiple flows, each of which is then
 sent to a different
 .Em dynamic
 pipe or queue.
 A flow identifier is constructed by masking the IP addresses,
 ports and protocol types as specified with the
 .Cm mask
 options in the configuration of the pipe or queue.
 For each different flow identifier, a new pipe or queue is created
 with the same parameters as the original object, and matching packets
 are sent to it.
 .Pp
 Thus, when
 .Em dynamic pipes
 are used, each flow will get the same bandwidth as defined by the pipe,
 whereas when
 .Em dynamic queues
 are used, each flow will share the parent's pipe bandwidth evenly
 with other flows generated by the same queue (note that other queues
 with different weights might be connected to the same pipe).
 .br
 Available mask specifiers are a combination of one or more of the following:
 .Pp
 .Cm dst-ip Ar mask ,
 .Cm dst-ip6 Ar mask ,
 .Cm src-ip Ar mask ,
 .Cm src-ip6 Ar mask ,
 .Cm dst-port Ar mask ,
 .Cm src-port Ar mask ,
 .Cm flow-id Ar mask ,
 .Cm proto Ar mask
 or
 .Cm all ,
 .Pp
 where the latter means all bits in all fields are significant.
 .Pp
 .It Cm noerror
 When a packet is dropped by a
 .Nm dummynet
 queue or pipe, the error
 is normally reported to the caller routine in the kernel, in the
 same way as it happens when a device queue fills up.
 Setting this
 option reports the packet as successfully delivered, which can be
 needed for some experimental setups where you want to simulate
 loss or congestion at a remote router.
 .Pp
 .It Cm plr Ar packet-loss-rate
 Packet loss rate.
 Argument
 .Ar packet-loss-rate
 is a floating-point number between 0 and 1, with 0 meaning no
 loss, 1 meaning 100% loss.
 The loss rate is internally represented on 31 bits.
 .Pp
 .It Cm queue Brq Ar slots | size Ns Cm Kbytes
 Queue size, in
 .Ar slots
 or
 .Cm KBytes .
 Default value is 50 slots, which
 is the typical queue size for Ethernet devices.
 Note that for slow speed links you should keep the queue
 size short or your traffic might be affected by a significant
 queueing delay.
 E.g., 50 max-sized ethernet packets (1500 bytes) mean 600Kbit
 or 20s of queue on a 30Kbit/s pipe.
 Even worse effects can result if you get packets from an
 interface with a much larger MTU, e.g.\& the loopback interface
 with its 16KB packets.
 The
 .Xr sysctl 8
 variables
 .Em net.inet.ip.dummynet.pipe_byte_limit
 and
 .Em net.inet.ip.dummynet.pipe_slot_limit
 control the maximum lengths that can be specified.
 .Pp
 .It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p
 Make use of the RED (Random Early Detection) queue management algorithm.
 .Ar w_q
 and
 .Ar max_p
 are floating
 point numbers between 0 and 1 (0 not included), while
 .Ar min_th
 and
 .Ar max_th
 are integer numbers specifying thresholds for queue management
 (thresholds are computed in bytes if the queue has been defined
 in bytes, in slots otherwise).
 The
 .Nm dummynet
 also supports the gentle RED variant (gred).
 Three
 .Xr sysctl 8
 variables can be used to control the RED behaviour:
 .Bl -tag -width indent
 .It Va net.inet.ip.dummynet.red_lookup_depth
 specifies the accuracy in computing the average queue
 when the link is idle (defaults to 256, must be greater than zero)
 .It Va net.inet.ip.dummynet.red_avg_pkt_size
 specifies the expected average packet size (defaults to 512, must be
 greater than zero)
 .It Va net.inet.ip.dummynet.red_max_pkt_size
 specifies the expected maximum packet size, only used when queue
 thresholds are in bytes (defaults to 1500, must be greater than zero).
 .El
 .El
 .Pp
 When used with IPv6 data,
 .Nm dummynet
 currently has several limitations.
 Information necessary to route link-local packets to an
 interface is not available after processing by
 .Nm dummynet
 so those packets are dropped in the output path.
 Care should be taken to insure that link-local packets are not passed to
 .Nm dummynet .
 .Sh CHECKLIST
 Here are some important points to consider when designing your
 rules:
 .Bl -bullet
 .It
 Remember that you filter both packets going
 .Cm in
 and
 .Cm out .
 Most connections need packets going in both directions.
 .It
 Remember to test very carefully.
 It is a good idea to be near the console when doing this.
 If you cannot be near the console,
 use an auto-recovery script such as the one in
 .Pa /usr/share/examples/ipfw/change_rules.sh .
 .It
 Do not forget the loopback interface.
 .El
 .Sh FINE POINTS
 .Bl -bullet
 .It
 There are circumstances where fragmented datagrams are unconditionally
 dropped.
 TCP packets are dropped if they do not contain at least 20 bytes of
 TCP header, UDP packets are dropped if they do not contain a full 8
 byte UDP header, and ICMP packets are dropped if they do not contain
 4 bytes of ICMP header, enough to specify the ICMP type, code, and
 checksum.
 These packets are simply logged as
 .Dq pullup failed
 since there may not be enough good data in the packet to produce a
 meaningful log entry.
 .It
 Another type of packet is unconditionally dropped, a TCP packet with a
 fragment offset of one.
 This is a valid packet, but it only has one use, to try
 to circumvent firewalls.
 When logging is enabled, these packets are
 reported as being dropped by rule -1.
 .It
 If you are logged in over a network, loading the
 .Xr kld 4
 version of
 .Nm
 is probably not as straightforward as you would think.
 The following command line is recommended:
 .Bd -literal -offset indent
 kldload ipfw && \e
 ipfw add 32000 allow ip from any to any
 .Ed
 .Pp
 Along the same lines, doing an
 .Bd -literal -offset indent
 ipfw flush
 .Ed
 .Pp
 in similar surroundings is also a bad idea.
 .It
 The
 .Nm
 filter list may not be modified if the system security level
 is set to 3 or higher
 (see
 .Xr init 8
 for information on system security levels).
 .El
 .Sh PACKET DIVERSION
 A
 .Xr divert 4
 socket bound to the specified port will receive all packets
 diverted to that port.
 If no socket is bound to the destination port, or if the divert module is
 not loaded, or if the kernel was not compiled with divert socket support,
 the packets are dropped.
 .Sh NETWORK ADDRESS TRANSLATION (NAT)
 .Pp
 .Nm
 support in-kernel NAT using the kernel version of
 .Xr libalias 3 .
 .Pp
 The nat configuration command is the following:
 .Bd -ragged -offset indent
 .Bk -words
 .Cm nat 
 .Ar nat_number 
 .Cm config 
 .Ar nat-configuration
 .Ek
 .Ed
 .Pp
 The following parameters can be configured:
 .Bl -tag -width indent
 .It Cm ip Ar ip_address
 Define an ip address to use for aliasing.
 .It Cm if Ar nic
 Use ip address of NIC for aliasing, dynamically changing
 it if NIC's ip address changes.
 .It Cm log
 Enable logging on this nat instance.
 .It Cm deny_in
 Deny any incoming connection from outside world.
 .It Cm same_ports
 Try to leave the alias port numbers unchanged from
 the actual local port numbers.
 .It Cm unreg_only
 Traffic on the local network not originating from an
 unregistered address spaces will be ignored.
 .It Cm reset
 Reset table of the packet aliasing engine on address change.
 .It Cm reverse
 Reverse the way libalias handles aliasing.
 .It Cm proxy_only
 Obey transparent proxy rules only, packet aliasing is not performed.
 .El
 .Pp
 To let the packet continue after being (de)aliased, set the sysctl variable
 .Va net.inet.ip.fw.one_pass 
 to 0.
 For more information about aliasing modes, refer to
 .Xr libalias 3 .
 See Section
 .Sx EXAMPLES
 for some examples about nat usage.
 .Ss REDIRECT AND LSNAT SUPPORT IN IPFW
 Redirect and LSNAT support follow closely the syntax used in
 .Xr natd 8 . 
 See Section
 .Sx EXAMPLES
 for some examples on how to do redirect and lsnat.
 .Ss SCTP NAT SUPPORT
 SCTP nat can be configured in a similar manner to TCP through the
 .Nm
 command line tool.
 The main difference is that 
 .Nm sctp nat 
 does not do port translation.
 Since the local and global side ports will be the same,
 there is no need to specify both.
 Ports are redirected as follows:
 .Bd -ragged -offset indent
 .Bk -words
 .Cm nat 
 .Ar nat_number 
 .Cm config if
 .Ar nic
 .Cm redirect_port sctp
 .Ar ip_address [,addr_list] {[port | port-port] [,ports]}
 .Ek
 .Ed
 .Pp
 Most
 .Nm sctp nat
 configuration can be done in real-time through the
 .Xr sysctl 8
 interface.
 All may be changed dynamically, though the hash_table size will only
 change for new
 .Nm nat
 instances.
 See
 .Sx SYSCTL VARIABLES 
 for more info.
 .Sh SYSCTL VARIABLES
 A set of
 .Xr sysctl 8
 variables controls the behaviour of the firewall and
 associated modules
 .Pq Nm dummynet , bridge , sctp nat .
 These are shown below together with their default value
 (but always check with the
 .Xr sysctl 8
 command what value is actually in use) and meaning:
 .Bl -tag -width indent
 .It Va net.inet.ip.alias.sctp.accept_global_ootb_addip: No 0
 Defines how the 
 .Nm nat 
 responds to receipt of global OOTB ASCONF-AddIP:
 .Bl -tag -width indent
 .It Cm 0
 No response (unless a partially matching association exists -
 ports and vtags match but global address does not)
 .It Cm 1
 .Nm nat 
 will accept and process all OOTB global AddIP messages.
 .El
 .Pp
 Option 1 should never be selected as this forms a security risk.
 An attacker can
 establish multiple fake associations by sending AddIP messages.
 .It Va net.inet.ip.alias.sctp.chunk_proc_limit: No 5
 Defines the maximum number of chunks in an SCTP packet that will be parsed for a
 packet that matches an existing association.
 This value is enforced to be greater or equal than 
 .Cm net.inet.ip.alias.sctp.initialising_chunk_proc_limit . 
 A high value is
 a DoS risk yet setting too low a value may result in important control chunks in
 the packet not being located and parsed.
 .It Va net.inet.ip.alias.sctp.error_on_ootb: No 1
 Defines when the
 .Nm nat 
 responds to any Out-of-the-Blue (OOTB) packets with ErrorM packets.
 An OOTB packet is a packet that arrives with no existing association
 registered in the
 .Nm nat 
 and is not an INIT or ASCONF-AddIP packet:
 .Bl -tag -width indent
 .It Cm 0
 ErrorM is never sent in response to OOTB packets.
 .It Cm 1
 ErrorM is only sent to OOTB packets received on the local side.
 .It Cm 2
 ErrorM is sent to the local side and on the global side ONLY if there is a
 partial match (ports and vtags match but the source global IP does not).
 This value is only useful if the 
 .Nm nat 
 is tracking global IP addresses.
 .It Cm 3
 ErrorM is sent in response to all OOTB packets on both the local and global side
 (DoS risk).
 .El
 .Pp
 At the moment the default is 0, since the ErrorM packet is not yet
 supported by most SCTP stacks.
 When it is supported, and if not tracking
 global addresses, we recommend setting this value to 1 to allow
 multi-homed local hosts to function with the 
 .Nm nat .
 To track global addresses, we recommend setting this value to 2 to
 allow global hosts to be informed when they need to (re)send an
 ASCONF-AddIP.
 Value 3 should never be chosen (except for debugging) as the
 .Nm nat 
 will respond to all OOTB global packets (a DoS risk).
 .It Va net.inet.ip.alias.sctp.hashtable_size: No 2003
 Size of hash tables used for 
 .Nm nat 
 lookups (100 < prime_number > 1000001).
 This value sets the 
 .Nm hash table 
 size for any future created 
 .Nm nat
 instance and therefore must be set prior to creating a 
 .Nm nat 
 instance.
 The table sizes may be changed to suit specific needs.
 If there will be few
 concurrent associations, and memory is scarce, you may make these smaller.
 If there will be many thousands (or millions) of concurrent associations, you
 should make these larger.
 A prime number is best for the table size.
 The sysctl
 update function will adjust your input value to the next highest prime number.
 .It Va net.inet.ip.alias.sctp.holddown_time:  No 0
 Hold association in table for this many seconds after receiving a
 SHUTDOWN-COMPLETE.
 This allows endpoints to correct shutdown gracefully if a
 shutdown_complete is lost and retransmissions are required.
 .It Va net.inet.ip.alias.sctp.init_timer: No 15
 Timeout value while waiting for (INIT-ACK|AddIP-ACK).
 This value cannot be 0.
 .It Va net.inet.ip.alias.sctp.initialising_chunk_proc_limit: No 2
 Defines the maximum number of chunks in an SCTP packet that will be parsed when
 no existing association exists that matches that packet.
 Ideally this packet
 will only be an INIT or ASCONF-AddIP packet.
 A higher value may become a DoS
 risk as malformed packets can consume processing resources.
 .It Va net.inet.ip.alias.sctp.param_proc_limit: No 25
 Defines the maximum number of parameters within a chunk that will be parsed in a
 packet.
 As for other similar sysctl variables, larger values pose a DoS risk.
 .It Va net.inet.ip.alias.sctp.log_level: No 0 
 Level of detail in the system log messages (0 \- minimal, 1 \- event,
 2 \- info, 3 \- detail, 4 \- debug, 5 \- max debug). May be a good
 option in high loss environments.
 .It Va net.inet.ip.alias.sctp.shutdown_time: No 15
 Timeout value while waiting for SHUTDOWN-COMPLETE.
 This value cannot be 0.
 .It Va net.inet.ip.alias.sctp.track_global_addresses: No 0
 Enables/disables global IP address tracking within the 
 .Nm nat 
 and places an
 upper limit on the number of addresses tracked for each association:
 .Bl -tag -width indent
 .It Cm 0
 Global tracking is disabled
 .It Cm >1
 Enables tracking, the maximum number of addresses tracked for each
 association is limited to this value
 .El
 .Pp
 This variable is fully dynamic, the new value will be adopted for all newly
 arriving associations, existing associations are treated as they were previously.
 Global tracking will decrease the number of collisions within the 
 .Nm nat 
 at a cost
 of increased processing load, memory usage, complexity, and possible 
 .Nm nat 
 state
 problems in complex networks with multiple 
 .Nm nats . 
 We recommend not tracking
 global IP addresses, this will still result in a fully functional 
 .Nm nat .
 .It Va net.inet.ip.alias.sctp.up_timer: No 300
 Timeout value to keep an association up with no traffic.
 This value cannot be 0.
 .It Va net.inet.ip.dummynet.expire : No 1
 Lazily delete dynamic pipes/queue once they have no pending traffic.
 You can disable this by setting the variable to 0, in which case
 the pipes/queues will only be deleted when the threshold is reached.
 .It Va net.inet.ip.dummynet.hash_size : No 64
 Default size of the hash table used for dynamic pipes/queues.
 This value is used when no
 .Cm buckets
 option is specified when configuring a pipe/queue.
 .It Va net.inet.ip.dummynet.io_fast : No 0
 If set to a non-zero value,
 the
 .Dq fast
 mode of
 .Nm dummynet
 operation (see above) is enabled.
 .It Va net.inet.ip.dummynet.io_pkt
 Number of packets passed to
 .Nm dummynet .
 .It Va net.inet.ip.dummynet.io_pkt_drop
 Number of packets dropped by
 .Nm dummynet .
 .It Va net.inet.ip.dummynet.io_pkt_fast
 Number of packets bypassed by the
 .Nm dummynet
 scheduler.
 .It Va net.inet.ip.dummynet.max_chain_len : No 16
 Target value for the maximum number of pipes/queues in a hash bucket.
 The product
 .Cm max_chain_len*hash_size
 is used to determine the threshold over which empty pipes/queues
 will be expired even when
 .Cm net.inet.ip.dummynet.expire=0 .
 .It Va net.inet.ip.dummynet.red_lookup_depth : No 256
 .It Va net.inet.ip.dummynet.red_avg_pkt_size : No 512
 .It Va net.inet.ip.dummynet.red_max_pkt_size : No 1500
 Parameters used in the computations of the drop probability
 for the RED algorithm.
 .It Va net.inet.ip.dummynet.pipe_byte_limit : No 1048576
 .It Va net.inet.ip.dummynet.pipe_slot_limit : No 100
 The maximum queue size that can be specified in bytes or packets.
 These limits prevent accidental exhaustion of resources such as mbufs.
 If you raise these limits,
 you should make sure the system is configured so that sufficient resources
 are available.
 .It Va net.inet.ip.fw.autoinc_step : No 100
 Delta between rule numbers when auto-generating them.
 The value must be in the range 1..1000.
 .It Va net.inet.ip.fw.curr_dyn_buckets : Va net.inet.ip.fw.dyn_buckets
 The current number of buckets in the hash table for dynamic rules
 (readonly).
 .It Va net.inet.ip.fw.debug : No 1
 Controls debugging messages produced by
 .Nm .
 .It Va net.inet.ip.fw.default_rule : No 65535
 The default rule number (read-only).
 By the design of
 .Nm , the default rule is the last one, so its number
 can also serve as the highest number allowed for a rule.
 .It Va net.inet.ip.fw.dyn_buckets : No 256
 The number of buckets in the hash table for dynamic rules.
 Must be a power of 2, up to 65536.
 It only takes effect when all dynamic rules have expired, so you
 are advised to use a
 .Cm flush
 command to make sure that the hash table is resized.
 .It Va net.inet.ip.fw.dyn_count : No 3
 Current number of dynamic rules
 (read-only).
 .It Va net.inet.ip.fw.dyn_keepalive : No 1
 Enables generation of keepalive packets for
 .Cm keep-state
 rules on TCP sessions.
 A keepalive is generated to both
 sides of the connection every 5 seconds for the last 20
 seconds of the lifetime of the rule.
 .It Va net.inet.ip.fw.dyn_max : No 8192
 Maximum number of dynamic rules.
 When you hit this limit, no more dynamic rules can be
 installed until old ones expire.
 .It Va net.inet.ip.fw.dyn_ack_lifetime : No 300
 .It Va net.inet.ip.fw.dyn_syn_lifetime : No 20
 .It Va net.inet.ip.fw.dyn_fin_lifetime : No 1
 .It Va net.inet.ip.fw.dyn_rst_lifetime : No 1
 .It Va net.inet.ip.fw.dyn_udp_lifetime : No 5
 .It Va net.inet.ip.fw.dyn_short_lifetime : No 30
 These variables control the lifetime, in seconds, of dynamic
 rules.
 Upon the initial SYN exchange the lifetime is kept short,
 then increased after both SYN have been seen, then decreased
 again during the final FIN exchange or when a RST is received.
 Both
 .Em dyn_fin_lifetime
 and
 .Em dyn_rst_lifetime
 must be strictly lower than 5 seconds, the period of
 repetition of keepalives.
 The firewall enforces that.
 .It Va net.inet.ip.fw.enable : No 1
 Enables the firewall.
 Setting this variable to 0 lets you run your machine without
 firewall even if compiled in.
 .It Va net.inet6.ip6.fw.enable : No 1
 provides the same functionality as above for the IPv6 case.
 .It Va net.inet.ip.fw.one_pass : No 1
 When set, the packet exiting from the
 .Nm dummynet
 pipe or from
 .Xr ng_ipfw 4
 node is not passed though the firewall again.
 Otherwise, after an action, the packet is
 reinjected into the firewall at the next rule.
 .It Va net.inet.ip.fw.tables_max : No 128
 Maximum number of tables (read-only).
 .It Va net.inet.ip.fw.verbose : No 1
 Enables verbose messages.
 .It Va net.inet.ip.fw.verbose_limit : No 0
 Limits the number of messages produced by a verbose firewall.
 .It Va net.inet6.ip6.fw.deny_unknown_exthdrs : No 1
 If enabled packets with unknown IPv6 Extension Headers will be denied.
 .It Va net.link.ether.ipfw : No 0
 Controls whether layer-2 packets are passed to
 .Nm .
 Default is no.
 .It Va net.link.bridge.ipfw : No 0
 Controls whether bridged packets are passed to
 .Nm .
 Default is no.
 .El
 .Pp
 .Sh EXAMPLES
 There are far too many possible uses of
 .Nm
 so this Section will only give a small set of examples.
 .Pp
 .Ss BASIC PACKET FILTERING
 This command adds an entry which denies all tcp packets from
 .Em cracker.evil.org
 to the telnet port of
 .Em wolf.tambov.su
 from being forwarded by the host:
 .Pp
 .Dl "ipfw add deny tcp from cracker.evil.org to wolf.tambov.su telnet"
 .Pp
 This one disallows any connection from the entire cracker's
 network to my host:
 .Pp
 .Dl "ipfw add deny ip from 123.45.67.0/24 to my.host.org"
 .Pp
 A first and efficient way to limit access (not using dynamic rules)
 is the use of the following rules:
 .Pp
 .Dl "ipfw add allow tcp from any to any established"
 .Dl "ipfw add allow tcp from net1 portlist1 to net2 portlist2 setup"
 .Dl "ipfw add allow tcp from net3 portlist3 to net3 portlist3 setup"
 .Dl "..."
 .Dl "ipfw add deny tcp from any to any"
 .Pp
 The first rule will be a quick match for normal TCP packets,
 but it will not match the initial SYN packet, which will be
 matched by the
 .Cm setup
 rules only for selected source/destination pairs.
 All other SYN packets will be rejected by the final
 .Cm deny
 rule.
 .Pp
 If you administer one or more subnets, you can take advantage
 of the address sets and or-blocks and write extremely
 compact rulesets which selectively enable services to blocks
 of clients, as below:
 .Pp
 .Dl "goodguys=\*q{ 10.1.2.0/24{20,35,66,18} or 10.2.3.0/28{6,3,11} }\*q"
 .Dl "badguys=\*q10.1.2.0/24{8,38,60}\*q"
 .Dl ""
 .Dl "ipfw add allow ip from ${goodguys} to any"
 .Dl "ipfw add deny ip from ${badguys} to any"
 .Dl "... normal policies ..."
 .Pp
 The
 .Cm verrevpath
 option could be used to do automated anti-spoofing by adding the
 following to the top of a ruleset:
 .Pp
 .Dl "ipfw add deny ip from any to any not verrevpath in"
 .Pp
 This rule drops all incoming packets that appear to be coming to the
 system on the wrong interface.
 For example, a packet with a source
 address belonging to a host on a protected internal network would be
 dropped if it tried to enter the system from an external interface.
 .Pp
 The
 .Cm antispoof
 option could be used to do similar but more restricted anti-spoofing
 by adding the following to the top of a ruleset:
 .Pp
 .Dl "ipfw add deny ip from any to any not antispoof in"
 .Pp
 This rule drops all incoming packets that appear to be coming from another
 directly connected system but on the wrong interface.
 For example, a packet with a source address of
 .Li 192.168.0.0/24 ,
 configured on
 .Li fxp0 ,
 but coming in on
 .Li fxp1
 would be dropped.
 .Ss DYNAMIC RULES
 In order to protect a site from flood attacks involving fake
 TCP packets, it is safer to use dynamic rules:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add deny tcp from any to any established"
 .Dl "ipfw add allow tcp from my-net to any setup keep-state"
 .Pp
 This will let the firewall install dynamic rules only for
 those connection which start with a regular SYN packet coming
 from the inside of our network.
 Dynamic rules are checked when encountering the first
 .Cm check-state
 or
 .Cm keep-state
 rule.
 A
 .Cm check-state
 rule should usually be placed near the beginning of the
 ruleset to minimize the amount of work scanning the ruleset.
 Your mileage may vary.
 .Pp
 To limit the number of connections a user can open
 you can use the following type of rules:
 .Pp
 .Dl "ipfw add allow tcp from my-net/24 to any setup limit src-addr 10"
 .Dl "ipfw add allow tcp from any to me setup limit src-addr 4"
 .Pp
 The former (assuming it runs on a gateway) will allow each host
 on a /24 network to open at most 10 TCP connections.
 The latter can be placed on a server to make sure that a single
 client does not use more than 4 simultaneous connections.
 .Pp
 .Em BEWARE :
 stateful rules can be subject to denial-of-service attacks
 by a SYN-flood which opens a huge number of dynamic rules.
 The effects of such attacks can be partially limited by
 acting on a set of
 .Xr sysctl 8
 variables which control the operation of the firewall.
 .Pp
 Here is a good usage of the
 .Cm list
 command to see accounting records and timestamp information:
 .Pp
 .Dl ipfw -at list
 .Pp
 or in short form without timestamps:
 .Pp
 .Dl ipfw -a list
 .Pp
 which is equivalent to:
 .Pp
 .Dl ipfw show
 .Pp
 Next rule diverts all incoming packets from 192.168.2.0/24
 to divert port 5000:
 .Pp
 .Dl ipfw divert 5000 ip from 192.168.2.0/24 to any in
 .Pp
 .Ss TRAFFIC SHAPING
 The following rules show some of the applications of
 .Nm
 and
 .Nm dummynet
 for simulations and the like.
 .Pp
 This rule drops random incoming packets with a probability
 of 5%:
 .Pp
 .Dl "ipfw add prob 0.05 deny ip from any to any in"
 .Pp
 A similar effect can be achieved making use of
 .Nm dummynet
 pipes:
 .Pp
 .Dl "ipfw add pipe 10 ip from any to any"
 .Dl "ipfw pipe 10 config plr 0.05"
 .Pp
 We can use pipes to artificially limit bandwidth, e.g.\& on a
 machine acting as a router, if we want to limit traffic from
 local clients on 192.168.2.0/24 we do:
 .Pp
 .Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
 .Dl "ipfw pipe 1 config bw 300Kbit/s queue 50KBytes"
 .Pp
 note that we use the
 .Cm out
 modifier so that the rule is not used twice.
 Remember in fact that
 .Nm
 rules are checked both on incoming and outgoing packets.
 .Pp
 Should we want to simulate a bidirectional link with bandwidth
 limitations, the correct way is the following:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any out"
 .Dl "ipfw add pipe 2 ip from any to any in"
 .Dl "ipfw pipe 1 config bw 64Kbit/s queue 10Kbytes"
 .Dl "ipfw pipe 2 config bw 64Kbit/s queue 10Kbytes"
 .Pp
 The above can be very useful, e.g.\& if you want to see how
 your fancy Web page will look for a residential user who
 is connected only through a slow link.
 You should not use only one pipe for both directions, unless
 you want to simulate a half-duplex medium (e.g.\& AppleTalk,
 Ethernet, IRDA).
 It is not necessary that both pipes have the same configuration,
 so we can also simulate asymmetric links.
 .Pp
 Should we want to verify network performance with the RED queue
 management algorithm:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any"
 .Dl "ipfw pipe 1 config bw 500Kbit/s queue 100 red 0.002/30/80/0.1"
 .Pp
 Another typical application of the traffic shaper is to
 introduce some delay in the communication.
 This can significantly affect applications which do a lot of Remote
 Procedure Calls, and where the round-trip-time of the
 connection often becomes a limiting factor much more than
 bandwidth:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any out"
 .Dl "ipfw add pipe 2 ip from any to any in"
 .Dl "ipfw pipe 1 config delay 250ms bw 1Mbit/s"
 .Dl "ipfw pipe 2 config delay 250ms bw 1Mbit/s"
 .Pp
 Per-flow queueing can be useful for a variety of purposes.
 A very simple one is counting traffic:
 .Pp
 .Dl "ipfw add pipe 1 tcp from any to any"
 .Dl "ipfw add pipe 1 udp from any to any"
 .Dl "ipfw add pipe 1 ip from any to any"
 .Dl "ipfw pipe 1 config mask all"
 .Pp
 The above set of rules will create queues (and collect
 statistics) for all traffic.
 Because the pipes have no limitations, the only effect is
 collecting statistics.
 Note that we need 3 rules, not just the last one, because
 when
 .Nm
 tries to match IP packets it will not consider ports, so we
 would not see connections on separate ports as different
 ones.
 .Pp
 A more sophisticated example is limiting the outbound traffic
 on a net with per-host limits, rather than per-network limits:
 .Pp
 .Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
 .Dl "ipfw add pipe 2 ip from any to 192.168.2.0/24 in"
 .Dl "ipfw pipe 1 config mask src-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
 .Dl "ipfw pipe 2 config mask dst-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
 .Ss LOOKUP TABLES
 In the following example, we need to create several traffic bandwidth
 classes and we need different hosts/networks to fall into different classes.
 We create one pipe for each class and configure them accordingly.
 Then we create a single table and fill it with IP subnets and addresses.
 For each subnet/host we set the argument equal to the number of the pipe
 that it should use.
 Then we classify traffic using a single rule:
 .Pp
 .Dl "ipfw pipe 1 config bw 1000Kbyte/s"
 .Dl "ipfw pipe 4 config bw 4000Kbyte/s"
 .Dl "..."
 .Dl "ipfw table 1 add 192.168.2.0/24 1"
 .Dl "ipfw table 1 add 192.168.0.0/27 4"
 .Dl "ipfw table 1 add 192.168.0.2 1"
 .Dl "..."
 .Dl "ipfw add pipe tablearg ip from table(1) to any"
 .Pp
 Using the
 .Cm fwd
 action, the table entries may include hostnames and IP addresses.
 .Pp
 .Dl "ipfw table 1 add 192.168.2.0/24 10.23.2.1"
 .Dl "ipfw table 1 add 192.168.0.0/27 router1.dmz"
 .Dl "..."
 .Dl "ipfw add 100 fwd tablearg ip from any to table(1)"
 .Ss SETS OF RULES
 To add a set of rules atomically, e.g.\& set 18:
 .Pp
 .Dl "ipfw set disable 18"
 .Dl "ipfw add NN set 18 ...         # repeat as needed"
 .Dl "ipfw set enable 18"
 .Pp
 To delete a set of rules atomically the command is simply:
 .Pp
 .Dl "ipfw delete set 18"
 .Pp
 To test a ruleset and disable it and regain control if something goes wrong:
 .Pp
 .Dl "ipfw set disable 18"
 .Dl "ipfw add NN set 18 ...         # repeat as needed"
 .Dl "ipfw set enable 18; echo done; sleep 30 && ipfw set disable 18"
 .Pp
 Here if everything goes well, you press control-C before the "sleep"
 terminates, and your ruleset will be left active.
 Otherwise, e.g.\& if
 you cannot access your box, the ruleset will be disabled after
 the sleep terminates thus restoring the previous situation.
 .Pp
 To show rules of the specific set:
 .Pp
 .Dl "ipfw set 18 show"
 .Pp
 To show rules of the disabled set:
 .Pp
 .Dl "ipfw -S set 18 show"
 .Pp
 To clear a specific rule counters of the specific set:
 .Pp
 .Dl "ipfw set 18 zero NN"
 .Pp
 To delete a specific rule of the specific set:
 .Pp
 .Dl "ipfw set 18 delete NN"
 .Ss NAT, REDIRECT AND LSNAT
 First redirect all the traffic to nat instance 123:
 .Pp
 .Dl "ipfw add nat 123 all from any to any"
 .Pp
 Then to configure nat instance 123 to alias all the outgoing traffic with ip
 192.168.0.123, blocking all incoming connections, trying to keep
 same ports on both sides, clearing aliasing table on address change 
 and keeping a log of traffic/link statistics:
 .Pp
 .Dl "ipfw nat 123 config ip 192.168.0.123 log deny_in reset same_ports"
 .Pp
 Or to change address of instance 123, aliasing table will be cleared (see
 reset option):
 .Pp
 .Dl "ipfw nat 123 config ip 10.0.0.1"
 .Pp
 To see configuration of nat instance 123:
 .Pp
 .Dl "ipfw nat 123 show config"
 .Pp
 To show logs of all the instances in range 111-999:
 .Pp
 .Dl "ipfw nat 111-999 show"
 .Pp
 To see configurations of all instances:
 .Pp
 .Dl "ipfw nat show config"
 .Pp
 Or a redirect rule with mixed modes could looks like:
 .Pp
 .Dl "ipfw nat 123 config redirect_addr 10.0.0.1 10.0.0.66"
 .Dl "			 redirect_port tcp 192.168.0.1:80 500"
 .Dl "			 redirect_proto udp 192.168.1.43 192.168.1.1"
 .Dl "			 redirect_addr 192.168.0.10,192.168.0.11"
 .Dl "			 	    10.0.0.100	# LSNAT"
 .Dl "			 redirect_port tcp 192.168.0.1:80,192.168.0.10:22" 
 .Dl "			 	    500		# LSNAT"
 .Pp
 or it could be split in:
 .Pp
 .Dl "ipfw nat 1 config redirect_addr 10.0.0.1 10.0.0.66"
 .Dl "ipfw nat 2 config redirect_port tcp 192.168.0.1:80 500"
 .Dl "ipfw nat 3 config redirect_proto udp 192.168.1.43 192.168.1.1"
 .Dl "ipfw nat 4 config redirect_addr 192.168.0.10,192.168.0.11,192.168.0.12" 
 .Dl "				         10.0.0.100"
 .Dl "ipfw nat 5 config redirect_port tcp"
 .Dl "			192.168.0.1:80,192.168.0.10:22,192.168.0.20:25 500"
 .Pp
 .Sh SEE ALSO
 .Xr cpp 1 ,
 .Xr m4 1 ,
 .Xr altq 4 ,
 .Xr divert 4 ,
 .Xr dummynet 4 ,
 .Xr if_bridge 4 ,
 .Xr ip 4 ,
 .Xr ipfirewall 4 ,
 .Xr ng_ipfw 4 ,
 .Xr protocols 5 ,
 .Xr services 5 ,
 .Xr init 8 ,
 .Xr kldload 8 ,
 .Xr reboot 8 ,
 .Xr sysctl 8 ,
 .Xr syslogd 8
 .Sh HISTORY
 The
 .Nm
 utility first appeared in
 .Fx 2.0 .
 .Nm dummynet
 was introduced in
 .Fx 2.2.8 .
 Stateful extensions were introduced in
 .Fx 4.0 .
 .Nm ipfw2
 was introduced in Summer 2002.
 .Sh AUTHORS
 .An Ugen J. S. Antsilevich ,
 .An Poul-Henning Kamp ,
 .An Alex Nash ,
 .An Archie Cobbs ,
 .An Luigi Rizzo .
 .Pp
 .An -nosplit
 API based upon code written by
 .An Daniel Boulet
 for BSDI.
 .Pp
 Dummynet has been introduced by Luigi Rizzo in 1997-1998.
 .Pp
 Some early work (1999-2000) on the
 .Nm dummynet
 traffic shaper supported by Akamba Corp.
 .Pp
 The ipfw core (ipfw2) has been completely redesigned and
 reimplemented by Luigi Rizzo in summer 2002. Further
 actions and
 options have been added by various developer over the years.
 .Pp
 .An -nosplit
 In-kernel NAT support written by
 .An Paolo Pisati Aq piso@FreeBSD.org
 as part of a Summer of Code 2005 project.
 .Pp
 SCTP
 .Nm nat
 support has been developed by
 .An The Centre for Advanced Internet Architectures (CAIA) Aq http://www.caia.swin.edu.au .
 The primary developers and maintainers are David Hayes and Jason But.
 For further information visit:
 .Aq http://www.caia.swin.edu.au/urp/SONATA
 .Pp
 Delay profiles have been developed by Alessandro Cerri and
 Luigi Rizzo, supported by the
 European Commission within Projects Onelab and Onelab2.
 .Sh BUGS
 The syntax has grown over the years and sometimes it might be confusing.
 Unfortunately, backward compatibility prevents cleaning up mistakes
 made in the definition of the syntax.
 .Pp
 .Em !!! WARNING !!!
 .Pp
 Misconfiguring the firewall can put your computer in an unusable state,
 possibly shutting down network services and requiring console access to
 regain control of it.
 .Pp
 Incoming packet fragments diverted by
 .Cm divert
 are reassembled before delivery to the socket.
 The action used on those packet is the one from the
 rule which matches the first fragment of the packet.
 .Pp
 Packets diverted to userland, and then reinserted by a userland process
 may lose various packet attributes.
 The packet source interface name
 will be preserved if it is shorter than 8 bytes and the userland process
 saves and reuses the sockaddr_in
 (as does
 .Xr natd 8 ) ;
 otherwise, it may be lost.
 If a packet is reinserted in this manner, later rules may be incorrectly
 applied, making the order of
 .Cm divert
 rules in the rule sequence very important.
 .Pp
 Dummynet drops all packets with IPv6 link-local addresses.
 .Pp
 Rules using
 .Cm uid
 or
 .Cm gid
 may not behave as expected.
 In particular, incoming SYN packets may
 have no uid or gid associated with them since they do not yet belong
 to a TCP connection, and the uid/gid associated with a packet may not
 be as expected if the associated process calls
 .Xr setuid 2
 or similar system calls.
 .Pp
 Rule syntax is subject to the command line environment and some patterns
 may need to be escaped with the backslash character
 or quoted appropriately.
 .Pp
 Due to the architecture of 
 .Xr libalias 3 , 
 ipfw nat is not compatible with the TCP segmentation offloading (TSO).
 Thus, to reliably nat your network traffic, please disable TSO
 on your NICs using
 .Xr ifconfig 8 .
 .Pp
 ICMP error messages are not implicitly matched by dynamic rules
 for the respective conversations.
 To avoid failures of network error detection and path MTU discovery,
 ICMP error messages may need to be allowed explicitly through static
 rules.
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index 5b49f5512d8d..d3ce7fb6e08a 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -1,272 +1,273 @@
 /*
  * Copyright (c) 2002-2003 Luigi Rizzo
  * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
  * Copyright (c) 1994 Ugen J.S.Antsilevich
  *
  * Idea and grammar partially left from:
  * Copyright (c) 1993 Daniel Boulet
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  *
  * NEW command line interface for IP firewall facility
  *
  * $FreeBSD$
  */
 
 /*
  * Options that can be set on the command line.
  * When reading commands from a file, a subset of the options can also
  * be applied globally by specifying them before the file name.
  * After that, each line can contain its own option that changes
  * the global value.
  * XXX The context is not restored after each line.
  */
 
 struct cmdline_opts {
 	/* boolean options: */
 	int	do_value_as_ip;	/* show table value as IP */
 	int	do_resolv;	/* try to resolve all ip to names */
 	int	do_time;	/* Show time stamps */
 	int	do_quiet;	/* Be quiet in add and flush */
 	int	do_pipe;	/* this cmd refers to a pipe */
 	int	do_nat; 	/* this cmd refers to a nat config */
 	int	do_dynamic;	/* display dynamic rules */
 	int	do_expired;	/* display expired dynamic rules */
 	int	do_compact;	/* show rules in compact mode */
 	int	do_force;	/* do not ask for confirmation */
 	int	show_sets;	/* display the set each rule belongs to */
 	int	test_only;	/* only check syntax */
 	int	comment_only;	/* only print action and comment */
 	int	verbose;	/* be verbose on some commands */
 
 	/* The options below can have multiple values. */
 
 	int	do_sort;	/* field to sort results (0 = no) */
 		/* valid fields are 1 and above */
 
 	int	use_set;	/* work with specified set number */
 		/* 0 means all sets, otherwise apply to set use_set - 1 */
 
 };
 
 extern struct cmdline_opts co;
 
 /*
  * _s_x is a structure that stores a string <-> token pairs, used in
  * various places in the parser. Entries are stored in arrays,
  * with an entry with s=NULL as terminator.
  * The search routines are match_token() and match_value().
  * Often, an element with x=0 contains an error string.
  *
  */
 struct _s_x {
 	char const *s;
 	int x;
 };
 
 enum tokens {
 	TOK_NULL=0,
 
 	TOK_OR,
 	TOK_NOT,
 	TOK_STARTBRACE,
 	TOK_ENDBRACE,
 
 	TOK_ACCEPT,
 	TOK_COUNT,
 	TOK_PIPE,
 	TOK_QUEUE,
 	TOK_DIVERT,
 	TOK_TEE,
 	TOK_NETGRAPH,
 	TOK_NGTEE,
 	TOK_FORWARD,
 	TOK_SKIPTO,
 	TOK_DENY,
 	TOK_REJECT,
 	TOK_RESET,
 	TOK_UNREACH,
 	TOK_CHECKSTATE,
 	TOK_NAT,
 	TOK_REASS,
 
 	TOK_ALTQ,
 	TOK_LOG,
 	TOK_TAG,
 	TOK_UNTAG,
 
 	TOK_TAGGED,
 	TOK_UID,
 	TOK_GID,
 	TOK_JAIL,
 	TOK_IN,
 	TOK_LIMIT,
 	TOK_KEEPSTATE,
 	TOK_LAYER2,
 	TOK_OUT,
 	TOK_DIVERTED,
 	TOK_DIVERTEDLOOPBACK,
 	TOK_DIVERTEDOUTPUT,
 	TOK_XMIT,
 	TOK_RECV,
 	TOK_VIA,
 	TOK_FRAG,
 	TOK_IPOPTS,
 	TOK_IPLEN,
 	TOK_IPID,
 	TOK_IPPRECEDENCE,
 	TOK_IPTOS,
 	TOK_IPTTL,
 	TOK_IPVER,
 	TOK_ESTAB,
 	TOK_SETUP,
 	TOK_TCPDATALEN,
 	TOK_TCPFLAGS,
 	TOK_TCPOPTS,
 	TOK_TCPSEQ,
 	TOK_TCPACK,
 	TOK_TCPWIN,
 	TOK_ICMPTYPES,
 	TOK_MAC,
 	TOK_MACTYPE,
 	TOK_VERREVPATH,
 	TOK_VERSRCREACH,
 	TOK_ANTISPOOF,
 	TOK_IPSEC,
 	TOK_COMMENT,
 
 	TOK_PLR,
 	TOK_NOERROR,
 	TOK_BUCKETS,
 	TOK_DSTIP,
 	TOK_SRCIP,
 	TOK_DSTPORT,
 	TOK_SRCPORT,
 	TOK_ALL,
 	TOK_MASK,
 	TOK_BW,
 	TOK_DELAY,
 	TOK_PIPE_PROFILE,
+	TOK_BURST,
 	TOK_RED,
 	TOK_GRED,
 	TOK_DROPTAIL,
 	TOK_PROTO,
 	TOK_WEIGHT,
 	TOK_IP,
 	TOK_IF,
  	TOK_ALOG,
  	TOK_DENY_INC,
  	TOK_SAME_PORTS,
  	TOK_UNREG_ONLY,
  	TOK_RESET_ADDR,
  	TOK_ALIAS_REV,
  	TOK_PROXY_ONLY,
 	TOK_REDIR_ADDR,
 	TOK_REDIR_PORT,
 	TOK_REDIR_PROTO,	
 
 	TOK_IPV6,
 	TOK_FLOWID,
 	TOK_ICMP6TYPES,
 	TOK_EXT6HDR,
 	TOK_DSTIP6,
 	TOK_SRCIP6,
 
 	TOK_IPV4,
 	TOK_UNREACH6,
 	TOK_RESET6,
 
 	TOK_FIB,
 	TOK_SETFIB,
 };
 /*
  * the following macro returns an error message if we run out of
  * arguments.
  */
 #define NEED1(msg)      {if (!ac) errx(EX_USAGE, msg);}
 
 unsigned long long align_uint64(const uint64_t *pll);
 
 /* memory allocation support */
 void *safe_calloc(size_t number, size_t size);
 void *safe_realloc(void *ptr, size_t size);
 
 /* string comparison functions used for historical compatibility */
 int _substrcmp(const char *str1, const char* str2);
 int _substrcmp2(const char *str1, const char* str2, const char* str3);
 
 /* utility functions */
 int match_token(struct _s_x *table, char *string);
 char const *match_value(struct _s_x *p, int value);
 
 int do_cmd(int optname, void *optval, uintptr_t optlen);
 
 struct in6_addr;
 void n2mask(struct in6_addr *mask, int n);
 int contigmask(uint8_t *p, int len);
 
 /*
  * Forward declarations to avoid include way too many headers.
  * C does not allow duplicated typedefs, so we use the base struct
  * that the typedef points to.
  * Should the typedefs use a different type, the compiler will
  * still detect the change when compiling the body of the
  * functions involved, so we do not lose error checking.
  */
 struct _ipfw_insn;
 struct _ipfw_insn_altq;
 struct _ipfw_insn_u32;
 struct _ipfw_insn_ip6;
 struct _ipfw_insn_icmp6;
 
 /*
  * The reserved set numer. This is a constant in ip_fw.h
  * but we store it in a variable so other files do not depend
  * in that header just for one constant.
  */
 extern int resvd_set_number;
 
 /* first-level command handlers */
 void ipfw_add(int ac, char *av[]);
 void ipfw_show_nat(int ac, char **av);
 void ipfw_config_pipe(int ac, char **av);
 void ipfw_config_nat(int ac, char **av);
 void ipfw_sets_handler(int ac, char *av[]);
 void ipfw_table_handler(int ac, char *av[]);
 void ipfw_sysctl_handler(int ac, char *av[], int which);
 void ipfw_delete(int ac, char *av[]);
 void ipfw_flush(int force);
 void ipfw_zero(int ac, char *av[], int optname);
 void ipfw_list(int ac, char *av[], int show_counters);
 
 /* altq.c */
 void altq_set_enabled(int enabled);
 u_int32_t altq_name_to_qid(const char *name);
 
 void print_altq_cmd(struct _ipfw_insn_altq *altqptr);
 
 /* dummynet.c */
 void ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]);
 int ipfw_delete_pipe(int pipe_or_queue, int n);
 
 /* ipv6.c */
 void print_unreach6_code(uint16_t code);
 void print_ip6(struct _ipfw_insn_ip6 *cmd, char const *s);
 void print_flow6id(struct _ipfw_insn_u32 *cmd);
 void print_icmp6types(struct _ipfw_insn_u32 *cmd);
 void print_ext6hdr(struct _ipfw_insn *cmd );
 
 struct _ipfw_insn *add_srcip6(struct _ipfw_insn *cmd, char *av);
 struct _ipfw_insn *add_dstip6(struct _ipfw_insn *cmd, char *av);
 
 void fill_flow6(struct _ipfw_insn_u32 *cmd, char *av );
 void fill_unreach6_code(u_short *codep, char *str);
 void fill_icmp6types(struct _ipfw_insn_icmp6 *cmd, char *av);
 int fill_ext6hdr(struct _ipfw_insn *cmd, char *av);
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index 5b6019d74c02..b5ef19e49a61 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -1,394 +1,396 @@
 /*-
  * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
  * Portions Copyright (c) 2000 Akamba Corp.
  * All rights reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _IP_DUMMYNET_H
 #define _IP_DUMMYNET_H
 
 /*
  * Definition of dummynet data structures. In the structures, I decided
  * not to use the macros in <sys/queue.h> in the hope of making the code
  * easier to port to other architectures. The type of lists and queue we
  * use here is pretty simple anyways.
  */
 
 /*
  * We start with a heap, which is used in the scheduler to decide when
  * to transmit packets etc.
  *
  * The key for the heap is used for two different values:
  *
  * 1. timer ticks- max 10K/second, so 32 bits are enough;
  *
  * 2. virtual times. These increase in steps of len/x, where len is the
  *    packet length, and x is either the weight of the flow, or the
  *    sum of all weights.
  *    If we limit to max 1000 flows and a max weight of 100, then
  *    x needs 17 bits. The packet size is 16 bits, so we can easily
  *    overflow if we do not allow errors.
  * So we use a key "dn_key" which is 64 bits. Some macros are used to
  * compare key values and handle wraparounds.
  * MAX64 returns the largest of two key values.
  * MY_M is used as a shift count when doing fixed point arithmetic
  * (a better name would be useful...).
  */
 typedef u_int64_t dn_key ;      /* sorting key */
 #define DN_KEY_LT(a,b)     ((int64_t)((a)-(b)) < 0)
 #define DN_KEY_LEQ(a,b)    ((int64_t)((a)-(b)) <= 0)
 #define DN_KEY_GT(a,b)     ((int64_t)((a)-(b)) > 0)
 #define DN_KEY_GEQ(a,b)    ((int64_t)((a)-(b)) >= 0)
 #define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
 #define MY_M	16 /* number of left shift to obtain a larger precision */
 
 /*
  * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
  * virtual time wraps every 15 days.
  */
 
 
 /*
  * The maximum hash table size for queues.  This value must be a power
  * of 2.
  */
 #define DN_MAX_HASH_SIZE 65536
 
 /*
  * A heap entry is made of a key and a pointer to the actual
  * object stored in the heap.
  * The heap is an array of dn_heap_entry entries, dynamically allocated.
  * Current size is "size", with "elements" actually in use.
  * The heap normally supports only ordered insert and extract from the top.
  * If we want to extract an object from the middle of the heap, we
  * have to know where the object itself is located in the heap (or we
  * need to scan the whole array). To this purpose, an object has a
  * field (int) which contains the index of the object itself into the
  * heap. When the object is moved, the field must also be updated.
  * The offset of the index in the object is stored in the 'offset'
  * field in the heap descriptor. The assumption is that this offset
  * is non-zero if we want to support extract from the middle.
  */
 struct dn_heap_entry {
     dn_key key ;	/* sorting key. Topmost element is smallest one */
     void *object ;	/* object pointer */
 } ;
 
 struct dn_heap {
     int size ;
     int elements ;
     int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */
     struct dn_heap_entry *p ;	/* really an array of "size" entries */
 } ;
 
 #ifdef _KERNEL
 /*
  * Packets processed by dummynet have an mbuf tag associated with
  * them that carries their dummynet state.  This is used within
  * the dummynet code as well as outside when checking for special
  * processing requirements.
  */
 struct dn_pkt_tag {
     struct ip_fw *rule;		/* matching rule */
     uint32_t rule_id;		/* matching rule id */
     uint32_t chain_id;		/* ruleset id */
     int dn_dir;			/* action when packet comes out. */
 #define DN_TO_IP_OUT	1
 #define DN_TO_IP_IN	2
 /* Obsolete: #define DN_TO_BDG_FWD	3 */
 #define DN_TO_ETH_DEMUX	4
 #define DN_TO_ETH_OUT	5
 #define DN_TO_IP6_IN	6
 #define DN_TO_IP6_OUT	7
 #define DN_TO_IFB_FWD	8
 
     dn_key output_time;		/* when the pkt is due for delivery	*/
     struct ifnet *ifp;		/* interface, for ip_output		*/
     struct _ip6dn_args ip6opt;	/* XXX ipv6 options			*/
 };
 #endif /* _KERNEL */
 
 /*
  * Overall structure of dummynet (with WF2Q+):
 
 In dummynet, packets are selected with the firewall rules, and passed
 to two different objects: PIPE or QUEUE.
 
 A QUEUE is just a queue with configurable size and queue management
 policy. It is also associated with a mask (to discriminate among
 different flows), a weight (used to give different shares of the
 bandwidth to different flows) and a "pipe", which essentially
 supplies the transmit clock for all queues associated with that
 pipe.
 
 A PIPE emulates a fixed-bandwidth link, whose bandwidth is
 configurable.  The "clock" for a pipe can come from either an
 internal timer, or from the transmit interrupt of an interface.
 A pipe is also associated with one (or more, if masks are used)
 queue, where all packets for that pipe are stored.
 
 The bandwidth available on the pipe is shared by the queues
 associated with that pipe (only one in case the packet is sent
 to a PIPE) according to the WF2Q+ scheduling algorithm and the
 configured weights.
 
 In general, incoming packets are stored in the appropriate queue,
 which is then placed into one of a few heaps managed by a scheduler
 to decide when the packet should be extracted.
 The scheduler (a function called dummynet()) is run at every timer
 tick, and grabs queues from the head of the heaps when they are
 ready for processing.
 
 There are three data structures definining a pipe and associated queues:
 
  + dn_pipe, which contains the main configuration parameters related
    to delay and bandwidth;
  + dn_flow_set, which contains WF2Q+ configuration, flow
    masks, plr and RED configuration;
  + dn_flow_queue, which is the per-flow queue (containing the packets)
 
 Multiple dn_flow_set can be linked to the same pipe, and multiple
 dn_flow_queue can be linked to the same dn_flow_set.
 All data structures are linked in a linear list which is used for
 housekeeping purposes.
 
 During configuration, we create and initialize the dn_flow_set
 and dn_pipe structures (a dn_pipe also contains a dn_flow_set).
 
 At runtime: packets are sent to the appropriate dn_flow_set (either
 WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows),
 which in turn dispatches them to the appropriate dn_flow_queue
 (created dynamically according to the masks).
 
 The transmit clock for fixed rate flows (ready_event()) selects the
 dn_flow_queue to be used to transmit the next packet. For WF2Q,
 wfq_ready_event() extract a pipe which in turn selects the right
 flow using a number of heaps defined into the pipe itself.
 
  *
  */
 
 /*
  * per flow queue. This contains the flow identifier, the queue
  * of packets, counters, and parameters used to support both RED and
  * WF2Q+.
  *
  * A dn_flow_queue is created and initialized whenever a packet for
  * a new flow arrives.
  */
 struct dn_flow_queue {
     struct dn_flow_queue *next ;
     struct ipfw_flow_id id ;
 
     struct mbuf *head, *tail ;	/* queue of packets */
     u_int len ;
     u_int len_bytes ;
 
     /*
      * When we emulate MAC overheads, or channel unavailability due
      * to other traffic on a shared medium, we augment the packet at
      * the head of the queue with an 'extra_bits' field representsing
      * the additional delay the packet will be subject to:
      *		extra_bits = bw*unavailable_time.
      * With large bandwidth and large delays, extra_bits (and also numbytes)
      * can become very large, so better play safe and use 64 bit
      */
     uint64_t numbytes ;		/* credit for transmission (dynamic queues) */
     int64_t extra_bits;		/* extra bits simulating unavailable channel */
 
     u_int64_t tot_pkts ;	/* statistics counters	*/
     u_int64_t tot_bytes ;
     u_int32_t drops ;
 
     int hash_slot ;		/* debugging/diagnostic */
 
     /* RED parameters */
     int avg ;                   /* average queue length est. (scaled) */
     int count ;                 /* arrivals since last RED drop */
     int random ;                /* random value (scaled) */
-    dn_key q_time;		/* start of queue idle time */
+    dn_key idle_time;		/* start of queue idle time */
 
     /* WF2Q+ support */
     struct dn_flow_set *fs ;	/* parent flow set */
     int heap_pos ;		/* position (index) of struct in heap */
     dn_key sched_time ;		/* current time when queue enters ready_heap */
 
     dn_key S,F ;		/* start time, finish time */
     /*
      * Setting F < S means the timestamp is invalid. We only need
      * to test this when the queue is empty.
      */
 } ;
 
 /*
  * flow_set descriptor. Contains the "template" parameters for the
  * queue configuration, and pointers to the hash table of dn_flow_queue's.
  *
  * The hash table is an array of lists -- we identify the slot by
  * hashing the flow-id, then scan the list looking for a match.
  * The size of the hash table (buckets) is configurable on a per-queue
  * basis.
  *
  * A dn_flow_set is created whenever a new queue or pipe is created (in the
  * latter case, the structure is located inside the struct dn_pipe).
  */
 struct dn_flow_set {
     SLIST_ENTRY(dn_flow_set)	next;	/* linked list in a hash slot */
 
     u_short fs_nr ;             /* flow_set number       */
     u_short flags_fs;
 #define DN_HAVE_FLOW_MASK	0x0001
 #define DN_IS_RED		0x0002
 #define DN_IS_GENTLE_RED	0x0004
 #define DN_QSIZE_IS_BYTES	0x0008	/* queue size is measured in bytes */
 #define DN_NOERROR		0x0010	/* do not report ENOBUFS on drops  */
 #define	DN_HAS_PROFILE		0x0020	/* the pipe has a delay profile. */
 #define DN_IS_PIPE		0x4000
 #define DN_IS_QUEUE		0x8000
 
     struct dn_pipe *pipe ;	/* pointer to parent pipe */
     u_short parent_nr ;		/* parent pipe#, 0 if local to a pipe */
 
     int weight ;		/* WFQ queue weight */
     int qsize ;			/* queue size in slots or bytes */
     int plr ;			/* pkt loss rate (2^31-1 means 100%) */
 
     struct ipfw_flow_id flow_mask ;
 
     /* hash table of queues onto this flow_set */
     int rq_size ;		/* number of slots */
     int rq_elements ;		/* active elements */
     struct dn_flow_queue **rq;	/* array of rq_size entries */
 
     u_int32_t last_expired ;	/* do not expire too frequently */
     int backlogged ;		/* #active queues for this flowset */
 
         /* RED parameters */
 #define SCALE_RED               16
 #define SCALE(x)                ( (x) << SCALE_RED )
 #define SCALE_VAL(x)            ( (x) >> SCALE_RED )
 #define SCALE_MUL(x,y)          ( ( (x) * (y) ) >> SCALE_RED )
     int w_q ;			/* queue weight (scaled) */
     int max_th ;		/* maximum threshold for queue (scaled) */
     int min_th ;		/* minimum threshold for queue (scaled) */
     int max_p ;			/* maximum value for p_b (scaled) */
     u_int c_1 ;			/* max_p/(max_th-min_th) (scaled) */
     u_int c_2 ;			/* max_p*min_th/(max_th-min_th) (scaled) */
     u_int c_3 ;			/* for GRED, (1-max_p)/max_th (scaled) */
     u_int c_4 ;			/* for GRED, 1 - 2*max_p (scaled) */
     u_int * w_q_lookup ;	/* lookup table for computing (1-w_q)^t */
     u_int lookup_depth ;	/* depth of lookup table */
     int lookup_step ;		/* granularity inside the lookup table */
     int lookup_weight ;		/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
     int avg_pkt_size ;		/* medium packet size */
     int max_pkt_size ;		/* max packet size */
 };
 SLIST_HEAD(dn_flow_set_head, dn_flow_set);
 
 /*
  * Pipe descriptor. Contains global parameters, delay-line queue,
  * and the flow_set used for fixed-rate queues.
  *
  * For WF2Q+ support it also has 3 heaps holding dn_flow_queue:
  *   not_eligible_heap, for queues whose start time is higher
  *	than the virtual time. Sorted by start time.
  *   scheduler_heap, for queues eligible for scheduling. Sorted by
  *	finish time.
  *   idle_heap, all flows that are idle and can be removed. We
  *	do that on each tick so we do not slow down too much
  *	operations during forwarding.
  *
  */
 struct dn_pipe {		/* a pipe */
     SLIST_ENTRY(dn_pipe)	next;	/* linked list in a hash slot */
 
     int	pipe_nr ;		/* number	*/
     int bandwidth;		/* really, bytes/tick.	*/
     int	delay ;			/* really, ticks	*/
 
     struct	mbuf *head, *tail ;	/* packets in delay line */
 
     /* WF2Q+ */
     struct dn_heap scheduler_heap ; /* top extract - key Finish time*/
     struct dn_heap not_eligible_heap; /* top extract- key Start time */
     struct dn_heap idle_heap ; /* random extract - key Start=Finish time */
 
     dn_key V ;			/* virtual time */
     int sum;			/* sum of weights of all active sessions */
 
     /* Same as in dn_flow_queue, numbytes can become large */
     int64_t numbytes;		/* bits I can transmit (more or less). */
+    uint64_t burst;		/* burst size, scaled: bits * hz */
 
     dn_key sched_time ;		/* time pipe was scheduled in ready_heap */
+    dn_key idle_time;		/* start of pipe idle time */
 
     /*
      * When the tx clock come from an interface (if_name[0] != '\0'), its name
      * is stored below, whereas the ifp is filled when the rule is configured.
      */
     char if_name[IFNAMSIZ];
     struct ifnet *ifp ;
     int ready ; /* set if ifp != NULL and we got a signal from it */
 
     struct dn_flow_set fs ; /* used with fixed-rate flows */
 
     /* fields to simulate a delay profile */
 
 #define ED_MAX_NAME_LEN		32
     char name[ED_MAX_NAME_LEN];
     int loss_level;
     int samples_no;
     int *samples;
 };
 
 /* dn_pipe_max is used to pass pipe configuration from userland onto
  * kernel space and back
  */
 #define ED_MAX_SAMPLES_NO	1024
 struct dn_pipe_max {
 	struct dn_pipe pipe;
 	int samples[ED_MAX_SAMPLES_NO];
 };
 
 SLIST_HEAD(dn_pipe_head, dn_pipe);
 
 #ifdef _KERNEL
 
 /*
  * Return the dummynet tag; if any.
  * Make sure that the dummynet tag is not reused by lower layers.
  */
 static __inline struct dn_pkt_tag *
 ip_dn_claim_tag(struct mbuf *m)
 {
 	struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
 	if (mtag != NULL) {
 		mtag->m_tag_id = PACKET_TAG_NONE;
 		return ((struct dn_pkt_tag *)(mtag + 1));
 	} else
 		return (NULL);
 }
 #endif
 #endif /* _IP_DUMMYNET_H */
diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c
index bf54839ec7fa..d5620997b3e9 100644
--- a/sys/netinet/ipfw/ip_dummynet.c
+++ b/sys/netinet/ipfw/ip_dummynet.c
@@ -1,2304 +1,2334 @@
 /*-
  * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
  * Portions Copyright (c) 2000 Akamba Corp.
  * All rights reserved
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	DUMMYNET_DEBUG
 
 #include "opt_inet6.h"
 
 /*
  * This module implements IP dummynet, a bandwidth limiter/delay emulator
  * used in conjunction with the ipfw package.
  * Description of the data structures used is in ip_dummynet.h
  * Here you mainly find the following blocks of code:
  *  + variable declarations;
  *  + heap management functions;
  *  + scheduler and dummynet functions;
  *  + configuration and initialization.
  *
  * NOTA BENE: critical sections are protected by the "dummynet lock".
  *
  * Most important Changes:
  *
  * 011004: KLDable
  * 010124: Fixed WF2Q behaviour
  * 010122: Fixed spl protection.
  * 000601: WF2Q support
  * 000106: large rewrite, use heaps to handle very many pipes.
  * 980513:	initial release
  *
  * include files marked with XXX are probably not needed
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
 #include <net/netisr.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>		/* ip_len, ip_off */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
 
 #include <netinet/if_ether.h> /* various ether_* routines */
 
 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
 #include <netinet6/ip6_var.h>
 
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
  */
 static dn_key curr_time = 0 ; /* current simulation time */
 
 static int dn_hash_size = 64 ;	/* default hash size */
 
 /* statistics on number of queue searches and search steps */
 static long searches, search_steps ;
 static int pipe_expire = 1 ;   /* expire queue if empty */
 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
 
 static long pipe_slot_limit = 100; /* Foot shooting limit for pipe queues. */
 static long pipe_byte_limit = 1024 * 1024;
 
 static int red_lookup_depth = 256;	/* RED - default lookup table depth */
 static int red_avg_pkt_size = 512;      /* RED - default medium packet size */
 static int red_max_pkt_size = 1500;     /* RED - default max packet size */
 
 static struct timeval prev_t, t;
 static long tick_last;			/* Last tick duration (usec). */
 static long tick_delta;			/* Last vs standard tick diff (usec). */
 static long tick_delta_sum;		/* Accumulated tick difference (usec).*/
 static long tick_adjustment;		/* Tick adjustments done. */
 static long tick_lost;			/* Lost(coalesced) ticks number. */
 /* Adjusted vs non-adjusted curr_time difference (ticks). */
 static long tick_diff;
 
 static int		io_fast;
 static unsigned long	io_pkt;
 static unsigned long	io_pkt_fast;
 static unsigned long	io_pkt_drop;
 
 /*
  * Three heaps contain queues and pipes that the scheduler handles:
  *
  * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
  *
  * wfq_ready_heap contains the pipes associated with WF2Q flows
  *
  * extract_heap contains pipes associated with delay lines.
  *
  */
 
 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
 
 static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ;
 
 static int	heap_init(struct dn_heap *h, int size);
 static int	heap_insert (struct dn_heap *h, dn_key key1, void *p);
 static void	heap_extract(struct dn_heap *h, void *obj);
 static void	transmit_event(struct dn_pipe *pipe, struct mbuf **head,
 		    struct mbuf **tail);
 static void	ready_event(struct dn_flow_queue *q, struct mbuf **head,
 		    struct mbuf **tail);
 static void	ready_event_wfq(struct dn_pipe *p, struct mbuf **head,
 		    struct mbuf **tail);
 
 #define	HASHSIZE	16
 #define	HASH(num)	((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
 static struct dn_pipe_head	pipehash[HASHSIZE];	/* all pipes */
 static struct dn_flow_set_head	flowsethash[HASHSIZE];	/* all flowsets */
 
 static struct callout dn_timeout;
 
 extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 #ifdef SYSCTL_NODE
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
     CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
 #if 0	/* curr_time is 64 bit */
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
     CTLFLAG_RD, &curr_time, 0, "Current tick");
 #endif
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
     CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
     CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
     CTLFLAG_RD, &searches, 0, "Number of queue searches");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
     CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
     CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
     CTLFLAG_RW, &dn_max_ratio, 0,
     "Max ratio between dynamic queues and buckets");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
     CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
     CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
     CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
     CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
     CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
     CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
     CTLFLAG_RD, &tick_diff, 0,
     "Adjusted vs non-adjusted curr_time difference (ticks).");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
     CTLFLAG_RD, &tick_lost, 0,
     "Number of ticks coalesced by dummynet taskqueue.");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
     CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io.");
 SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
     CTLFLAG_RD, &io_pkt, 0,
     "Number of packets passed to dummynet.");
 SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
     CTLFLAG_RD, &io_pkt_fast, 0,
     "Number of packets bypassed dummynet scheduler.");
 SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
     CTLFLAG_RD, &io_pkt_drop, 0,
     "Number of packets dropped by dummynet.");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
     CTLFLAG_RW, &pipe_slot_limit, 0, "Upper limit in slots for pipe queue.");
 SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
     CTLFLAG_RW, &pipe_byte_limit, 0, "Upper limit in bytes for pipe queue.");
 #endif
 
 #ifdef DUMMYNET_DEBUG
 int	dummynet_debug = 0;
 #ifdef SYSCTL_NODE
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug,
 	    0, "control debugging printfs");
 #endif
 #define	DPRINTF(X)	if (dummynet_debug) printf X
 #else
 #define	DPRINTF(X)
 #endif
 
 static struct task	dn_task;
 static struct taskqueue	*dn_tq = NULL;
 static void dummynet_task(void *, int);
 
 static struct mtx dummynet_mtx;
 #define	DUMMYNET_LOCK_INIT() \
 	mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF)
 #define	DUMMYNET_LOCK_DESTROY()	mtx_destroy(&dummynet_mtx)
 #define	DUMMYNET_LOCK()		mtx_lock(&dummynet_mtx)
 #define	DUMMYNET_UNLOCK()	mtx_unlock(&dummynet_mtx)
 #define	DUMMYNET_LOCK_ASSERT()	mtx_assert(&dummynet_mtx, MA_OWNED)
 
 static int	config_pipe(struct dn_pipe *p);
 static int	ip_dn_ctl(struct sockopt *sopt);
 
 static void	dummynet(void *);
 static void	dummynet_flush(void);
 static void	dummynet_send(struct mbuf *);
 void		dummynet_drain(void);
 static int	dummynet_io(struct mbuf **, int , struct ip_fw_args *);
 
 /*
  * Heap management functions.
  *
  * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
  * Some macros help finding parent/children so we can optimize them.
  *
  * heap_init() is called to expand the heap when needed.
  * Increment size in blocks of 16 entries.
  * XXX failure to allocate a new element is a pretty bad failure
  * as we basically stall a whole queue forever!!
  * Returns 1 on error, 0 on success
  */
 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
 #define HEAP_LEFT(x) ( 2*(x) + 1 )
 #define HEAP_IS_LEFT(x) ( (x) & 1 )
 #define HEAP_RIGHT(x) ( 2*(x) + 2 )
 #define	HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
 #define HEAP_INCREMENT	15
 
 static int
 heap_init(struct dn_heap *h, int new_size)
 {
     struct dn_heap_entry *p;
 
     if (h->size >= new_size ) {
 	printf("dummynet: %s, Bogus call, have %d want %d\n", __func__,
 		h->size, new_size);
 	return 0 ;
     }
     new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
     p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT);
     if (p == NULL) {
 	printf("dummynet: %s, resize %d failed\n", __func__, new_size );
 	return 1 ; /* error */
     }
     if (h->size > 0) {
 	bcopy(h->p, p, h->size * sizeof(*p) );
 	free(h->p, M_DUMMYNET);
     }
     h->p = p ;
     h->size = new_size ;
     return 0 ;
 }
 
 /*
  * Insert element in heap. Normally, p != NULL, we insert p in
  * a new position and bubble up. If p == NULL, then the element is
  * already in place, and key is the position where to start the
  * bubble-up.
  * Returns 1 on failure (cannot allocate new heap entry)
  *
  * If offset > 0 the position (index, int) of the element in the heap is
  * also stored in the element itself at the given offset in bytes.
  */
 #define SET_OFFSET(heap, node) \
     if (heap->offset > 0) \
 	    *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
 /*
  * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
  */
 #define RESET_OFFSET(heap, node) \
     if (heap->offset > 0) \
 	    *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
 static int
 heap_insert(struct dn_heap *h, dn_key key1, void *p)
 {
     int son = h->elements ;
 
     if (p == NULL)	/* data already there, set starting point */
 	son = key1 ;
     else {		/* insert new element at the end, possibly resize */
 	son = h->elements ;
 	if (son == h->size) /* need resize... */
 	    if (heap_init(h, h->elements+1) )
 		return 1 ; /* failure... */
 	h->p[son].object = p ;
 	h->p[son].key = key1 ;
 	h->elements++ ;
     }
     while (son > 0) {				/* bubble up */
 	int father = HEAP_FATHER(son) ;
 	struct dn_heap_entry tmp  ;
 
 	if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
 	    break ; /* found right position */
 	/* son smaller than father, swap and repeat */
 	HEAP_SWAP(h->p[son], h->p[father], tmp) ;
 	SET_OFFSET(h, son);
 	son = father ;
     }
     SET_OFFSET(h, son);
     return 0 ;
 }
 
 /*
  * remove top element from heap, or obj if obj != NULL
  */
 static void
 heap_extract(struct dn_heap *h, void *obj)
 {
     int child, father, max = h->elements - 1 ;
 
     if (max < 0) {
 	printf("dummynet: warning, extract from empty heap 0x%p\n", h);
 	return ;
     }
     father = 0 ; /* default: move up smallest child */
     if (obj != NULL) { /* extract specific element, index is at offset */
 	if (h->offset <= 0)
 	    panic("dummynet: heap_extract from middle not supported on this heap!!!\n");
 	father = *((int *)((char *)obj + h->offset)) ;
 	if (father < 0 || father >= h->elements) {
 	    printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
 		father, h->elements);
 	    panic("dummynet: heap_extract");
 	}
     }
     RESET_OFFSET(h, father);
     child = HEAP_LEFT(father) ;		/* left child */
     while (child <= max) {		/* valid entry */
 	if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
 	    child = child+1 ;		/* take right child, otherwise left */
 	h->p[father] = h->p[child] ;
 	SET_OFFSET(h, father);
 	father = child ;
 	child = HEAP_LEFT(child) ;   /* left child for next loop */
     }
     h->elements-- ;
     if (father != max) {
 	/*
 	 * Fill hole with last entry and bubble up, reusing the insert code
 	 */
 	h->p[father] = h->p[max] ;
 	heap_insert(h, father, NULL); /* this one cannot fail */
     }
 }
 
 #if 0
 /*
  * change object position and update references
  * XXX this one is never used!
  */
 static void
 heap_move(struct dn_heap *h, dn_key new_key, void *object)
 {
     int temp;
     int i ;
     int max = h->elements-1 ;
     struct dn_heap_entry buf ;
 
     if (h->offset <= 0)
 	panic("cannot move items on this heap");
 
     i = *((int *)((char *)object + h->offset));
     if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
 	h->p[i].key = new_key ;
 	for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
 		 i = temp ) { /* bubble up */
 	    HEAP_SWAP(h->p[i], h->p[temp], buf) ;
 	    SET_OFFSET(h, i);
 	}
     } else {		/* must move down */
 	h->p[i].key = new_key ;
 	while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
 	    if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
 		temp++ ; /* select child with min key */
 	    if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
 		HEAP_SWAP(h->p[i], h->p[temp], buf) ;
 		SET_OFFSET(h, i);
 	    } else
 		break ;
 	    i = temp ;
 	}
     }
     SET_OFFSET(h, i);
 }
 #endif /* heap_move, unused */
 
 /*
  * heapify() will reorganize data inside an array to maintain the
  * heap property. It is needed when we delete a bunch of entries.
  */
 static void
 heapify(struct dn_heap *h)
 {
     int i ;
 
     for (i = 0 ; i < h->elements ; i++ )
 	heap_insert(h, i , NULL) ;
 }
 
 /*
  * cleanup the heap and free data structure
  */
 static void
 heap_free(struct dn_heap *h)
 {
     if (h->size >0 )
 	free(h->p, M_DUMMYNET);
     bzero(h, sizeof(*h) );
 }
 
 /*
  * --- end of heap management functions ---
  */
 
 /*
  * Return the mbuf tag holding the dummynet state.  As an optimization
  * this is assumed to be the first tag on the list.  If this turns out
  * wrong we'll need to search the list.
  */
 static struct dn_pkt_tag *
 dn_tag_get(struct mbuf *m)
 {
     struct m_tag *mtag = m_tag_first(m);
     KASSERT(mtag != NULL &&
 	    mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
 	    mtag->m_tag_id == PACKET_TAG_DUMMYNET,
 	    ("packet on dummynet queue w/o dummynet tag!"));
     return (struct dn_pkt_tag *)(mtag+1);
 }
 
 /*
  * Scheduler functions:
  *
  * transmit_event() is called when the delay-line needs to enter
  * the scheduler, either because of existing pkts getting ready,
  * or new packets entering the queue. The event handled is the delivery
  * time of the packet.
  *
  * ready_event() does something similar with fixed-rate queues, and the
  * event handled is the finish time of the head pkt.
  *
  * wfq_ready_event() does something similar with WF2Q queues, and the
  * event handled is the start time of the head pkt.
  *
  * In all cases, we make sure that the data structures are consistent
  * before passing pkts out, because this might trigger recursive
  * invocations of the procedures.
  */
 static void
 transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
 {
 	struct mbuf *m;
 	struct dn_pkt_tag *pkt;
 
 	DUMMYNET_LOCK_ASSERT();
 
 	while ((m = pipe->head) != NULL) {
 		pkt = dn_tag_get(m);
 		if (!DN_KEY_LEQ(pkt->output_time, curr_time))
 			break;
 
 		pipe->head = m->m_nextpkt;
 		if (*tail != NULL)
 			(*tail)->m_nextpkt = m;
 		else
 			*head = m;
 		*tail = m;
 	}
 	if (*tail != NULL)
 		(*tail)->m_nextpkt = NULL;
 
 	/* If there are leftover packets, put into the heap for next event. */
 	if ((m = pipe->head) != NULL) {
 		pkt = dn_tag_get(m);
 		/*
 		 * XXX Should check errors on heap_insert, by draining the
 		 * whole pipe p and hoping in the future we are more successful.
 		 */
 		heap_insert(&extract_heap, pkt->output_time, pipe);
 	}
 }
 
 #define div64(a, b)	((int64_t)(a) / (int64_t)(b))
 #define DN_TO_DROP	0xffff
 /*
  * Compute how many ticks we have to wait before being able to send
  * a packet. This is computed as the "wire time" for the packet
  * (length + extra bits), minus the credit available, scaled to ticks.
  * Check that the result is not be negative (it could be if we have
  * too much leftover credit in q->numbytes).
  */
 static inline dn_key
 set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p)
 {
 	int64_t ret;
 
 	ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz
 		- q->numbytes + p->bandwidth - 1 , p->bandwidth);
 #if 0
 	printf("%s %d extra_bits %d numb %d ret %d\n",
 		__FUNCTION__, __LINE__,
 		(int)(q->extra_bits & 0xffffffff),
 		(int)(q->numbytes & 0xffffffff),
 		(int)(ret & 0xffffffff));
 #endif
 	if (ret < 0)
 		ret = 0;
 	return ret;
 }
 
 /*
  * Convert the additional MAC overheads/delays into an equivalent
  * number of bits for the given data rate. The samples are in milliseconds
  * so we need to divide by 1000.
  */
 static dn_key
 compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p)
 {
 	int index;
 	dn_key extra_bits;
 
 	if (!p->samples || p->samples_no == 0)
 		return 0;
 	index  = random() % p->samples_no;
 	extra_bits = ((dn_key)p->samples[index] * p->bandwidth) / 1000;
 	if (index >= p->loss_level) {
 		struct dn_pkt_tag *dt = dn_tag_get(pkt);
 		if (dt)
 			dt->dn_dir = DN_TO_DROP;
 	}
 	return extra_bits;
 }
 
 static void
 free_pipe(struct dn_pipe *p)
 {
 	if (p->samples)
 		free(p->samples, M_DUMMYNET);
 	free(p, M_DUMMYNET);
 }
 
 /*
  * extract pkt from queue, compute output time (could be now)
  * and put into delay line (p_queue)
  */
 static void
 move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p,
     int len)
 {
     struct dn_pkt_tag *dt = dn_tag_get(pkt);
 
     q->head = pkt->m_nextpkt ;
     q->len-- ;
     q->len_bytes -= len ;
 
     dt->output_time = curr_time + p->delay ;
 
     if (p->head == NULL)
 	p->head = pkt;
     else
 	p->tail->m_nextpkt = pkt;
     p->tail = pkt;
     p->tail->m_nextpkt = NULL;
 }
 
 /*
  * ready_event() is invoked every time the queue must enter the
  * scheduler, either because the first packet arrives, or because
  * a previously scheduled event fired.
  * On invokation, drain as many pkts as possible (could be 0) and then
  * if there are leftover packets reinsert the pkt in the scheduler.
  */
 static void
 ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
 {
 	struct mbuf *pkt;
 	struct dn_pipe *p = q->fs->pipe;
 	int p_was_empty;
 
 	DUMMYNET_LOCK_ASSERT();
 
 	if (p == NULL) {
 		printf("dummynet: ready_event- pipe is gone\n");
 		return;
 	}
 	p_was_empty = (p->head == NULL);
 
 	/*
 	 * Schedule fixed-rate queues linked to this pipe:
 	 * account for the bw accumulated since last scheduling, then
 	 * drain as many pkts as allowed by q->numbytes and move to
 	 * the delay line (in p) computing output time.
 	 * bandwidth==0 (no limit) means we can drain the whole queue,
 	 * setting len_scaled = 0 does the job.
 	 */
 	q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
 	while ((pkt = q->head) != NULL) {
 		int len = pkt->m_pkthdr.len;
 		dn_key len_scaled = p->bandwidth ? len*8*hz
 			+ q->extra_bits*hz
 			: 0;
 
 		if (DN_KEY_GT(len_scaled, q->numbytes))
 			break;
 		q->numbytes -= len_scaled;
 		move_pkt(pkt, q, p, len);
 		if (q->head)
 			q->extra_bits = compute_extra_bits(q->head, p);
 	}
 	/*
 	 * If we have more packets queued, schedule next ready event
 	 * (can only occur when bandwidth != 0, otherwise we would have
 	 * flushed the whole queue in the previous loop).
 	 * To this purpose we record the current time and compute how many
 	 * ticks to go for the finish time of the packet.
 	 */
 	if ((pkt = q->head) != NULL) {	/* this implies bandwidth != 0 */
 		dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */
 
 		q->sched_time = curr_time;
 		heap_insert(&ready_heap, curr_time + t, (void *)q);
 		/*
 		 * XXX Should check errors on heap_insert, and drain the whole
 		 * queue on error hoping next time we are luckier.
 		 */
 	} else		/* RED needs to know when the queue becomes empty. */
-		q->q_time = curr_time;
+		q->idle_time = curr_time;
 
 	/*
 	 * If the delay line was empty call transmit_event() now.
 	 * Otherwise, the scheduler will take care of it.
 	 */
 	if (p_was_empty)
 		transmit_event(p, head, tail);
 }
 
 /*
  * Called when we can transmit packets on WF2Q queues. Take pkts out of
  * the queues at their start time, and enqueue into the delay line.
  * Packets are drained until p->numbytes < 0. As long as
  * len_scaled >= p->numbytes, the packet goes into the delay line
  * with a deadline p->delay. For the last packet, if p->numbytes < 0,
  * there is an additional delay.
  */
 static void
 ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
 {
 	int p_was_empty = (p->head == NULL);
 	struct dn_heap *sch = &(p->scheduler_heap);
 	struct dn_heap *neh = &(p->not_eligible_heap);
 
 	DUMMYNET_LOCK_ASSERT();
 
 	if (p->if_name[0] == 0)		/* tx clock is simulated */
 		p->numbytes += (curr_time - p->sched_time) * p->bandwidth;
 	else {	/*
 		 * tx clock is for real,
 		 * the ifq must be empty or this is a NOP.
 		 */
 		if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
 			return;
 		else {
 			DPRINTF(("dummynet: pipe %d ready from %s --\n",
 			    p->pipe_nr, p->if_name));
 		}
 	}
 
 	/*
 	 * While we have backlogged traffic AND credit, we need to do
 	 * something on the queue.
 	 */
 	while (p->numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) {
 		if (sch->elements > 0) {
 			/* Have some eligible pkts to send out. */
 			struct dn_flow_queue *q = sch->p[0].object;
 			struct mbuf *pkt = q->head;
 			struct dn_flow_set *fs = q->fs;
 			uint64_t len = pkt->m_pkthdr.len;
 			int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 
 			heap_extract(sch, NULL); /* Remove queue from heap. */
 			p->numbytes -= len_scaled;
 			move_pkt(pkt, q, p, len);
 
 			p->V += (len << MY_M) / p->sum;	/* Update V. */
 			q->S = q->F;			/* Update start time. */
 			if (q->len == 0) {
 				/* Flow not backlogged any more. */
 				fs->backlogged--;
 				heap_insert(&(p->idle_heap), q->F, q);
 			} else {
 				/* Still backlogged. */
 
 				/*
 				 * Update F and position in backlogged queue,
 				 * then put flow in not_eligible_heap
 				 * (we will fix this later).
 				 */
 				len = (q->head)->m_pkthdr.len;
 				q->F += (len << MY_M) / (uint64_t)fs->weight;
 				if (DN_KEY_LEQ(q->S, p->V))
 					heap_insert(neh, q->S, q);
 				else
 					heap_insert(sch, q->F, q);
 			}
 		}
 		/*
 		 * Now compute V = max(V, min(S_i)). Remember that all elements
 		 * in sch have by definition S_i <= V so if sch is not empty,
 		 * V is surely the max and we must not update it. Conversely,
 		 * if sch is empty we only need to look at neh.
 		 */
 		if (sch->elements == 0 && neh->elements > 0)
 			p->V = MAX64(p->V, neh->p[0].key);
 		/* Move from neh to sch any packets that have become eligible */
 		while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) {
 			struct dn_flow_queue *q = neh->p[0].object;
 			heap_extract(neh, NULL);
 			heap_insert(sch, q->F, q);
 		}
 
 		if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
 			p->numbytes = -1;	/* Mark not ready for I/O. */
 			break;
 		}
 	}
-	if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 &&
-	    p->idle_heap.elements > 0) {
+	if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0) {
+		p->idle_time = curr_time;
 		/*
 		 * No traffic and no events scheduled.
 		 * We can get rid of idle-heap.
 		 */
-		int i;
-
-		for (i = 0; i < p->idle_heap.elements; i++) {
-			struct dn_flow_queue *q = p->idle_heap.p[i].object;
-
-			q->F = 0;
-			q->S = q->F + 1;
+		if (p->idle_heap.elements > 0) {
+			int i;
+
+			for (i = 0; i < p->idle_heap.elements; i++) {
+				struct dn_flow_queue *q;
+				
+				q = p->idle_heap.p[i].object;
+				q->F = 0;
+				q->S = q->F + 1;
+			}
+			p->sum = 0;
+			p->V = 0;
+			p->idle_heap.elements = 0;
 		}
-		p->sum = 0;
-		p->V = 0;
-		p->idle_heap.elements = 0;
 	}
 	/*
 	 * If we are getting clocks from dummynet (not a real interface) and
 	 * If we are under credit, schedule the next ready event.
 	 * Also fix the delivery time of the last packet.
 	 */
 	if (p->if_name[0]==0 && p->numbytes < 0) { /* This implies bw > 0. */
 		dn_key t = 0;		/* Number of ticks i have to wait. */
 
 		if (p->bandwidth > 0)
 			t = (p->bandwidth - 1 - p->numbytes) / p->bandwidth;
 		dn_tag_get(p->tail)->output_time += t;
 		p->sched_time = curr_time;
 		heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
 		/*
 		 * XXX Should check errors on heap_insert, and drain the whole
 		 * queue on error hoping next time we are luckier.
 		 */
 	}
 
 	/*
 	 * If the delay line was empty call transmit_event() now.
 	 * Otherwise, the scheduler will take care of it.
 	 */
 	if (p_was_empty)
 		transmit_event(p, head, tail);
 }
 
 /*
  * This is called one tick, after previous run. It is used to
  * schedule next run.
  */
 static void
 dummynet(void * __unused unused)
 {
 
 	taskqueue_enqueue(dn_tq, &dn_task);
 }
 
 /*
  * The main dummynet processing function.
  */
 static void
 dummynet_task(void *context, int pending)
 {
 	struct mbuf *head = NULL, *tail = NULL;
 	struct dn_pipe *pipe;
 	struct dn_heap *heaps[3];
 	struct dn_heap *h;
 	void *p;	/* generic parameter to handler */
 	int i;
 
 	DUMMYNET_LOCK();
 
 	heaps[0] = &ready_heap;			/* fixed-rate queues */
 	heaps[1] = &wfq_ready_heap;		/* wfq queues */
 	heaps[2] = &extract_heap;		/* delay line */
 
  	/* Update number of lost(coalesced) ticks. */
  	tick_lost += pending - 1;
  
  	getmicrouptime(&t);
  	/* Last tick duration (usec). */
  	tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 +
  	    (t.tv_usec - prev_t.tv_usec);
  	/* Last tick vs standard tick difference (usec). */
  	tick_delta = (tick_last * hz - 1000000) / hz;
  	/* Accumulated tick difference (usec). */
  	tick_delta_sum += tick_delta;
  
  	prev_t = t;
  
  	/*
  	 * Adjust curr_time if accumulated tick difference greater than
  	 * 'standard' tick. Since curr_time should be monotonically increasing,
  	 * we do positive adjustment as required and throttle curr_time in
  	 * case of negative adjustment.
  	 */
   	curr_time++;
  	if (tick_delta_sum - tick >= 0) {
  		int diff = tick_delta_sum / tick;
  
  		curr_time += diff;
  		tick_diff += diff;
  		tick_delta_sum %= tick;
  		tick_adjustment++;
  	} else if (tick_delta_sum + tick <= 0) {
  		curr_time--;
  		tick_diff--;
  		tick_delta_sum += tick;
  		tick_adjustment++;
  	}
 
 	for (i = 0; i < 3; i++) {
 		h = heaps[i];
 		while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) {
 			if (h->p[0].key > curr_time)
 				printf("dummynet: warning, "
 				    "heap %d is %d ticks late\n",
 				    i, (int)(curr_time - h->p[0].key));
 			/* store a copy before heap_extract */
 			p = h->p[0].object;
 			/* need to extract before processing */
 			heap_extract(h, NULL);
 			if (i == 0)
 				ready_event(p, &head, &tail);
 			else if (i == 1) {
 				struct dn_pipe *pipe = p;
 				if (pipe->if_name[0] != '\0')
 					printf("dummynet: bad ready_event_wfq "
 					    "for pipe %s\n", pipe->if_name);
 				else
 					ready_event_wfq(p, &head, &tail);
 			} else
 				transmit_event(p, &head, &tail);
 		}
 	}
 
 	/* Sweep pipes trying to expire idle flow_queues. */
 	for (i = 0; i < HASHSIZE; i++)
 		SLIST_FOREACH(pipe, &pipehash[i], next)
 			if (pipe->idle_heap.elements > 0 &&
 			    DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) {
 				struct dn_flow_queue *q =
 				    pipe->idle_heap.p[0].object;
 
 				heap_extract(&(pipe->idle_heap), NULL);
 				/* Mark timestamp as invalid. */
 				q->S = q->F + 1;
 				pipe->sum -= q->fs->weight;
 			}
 
 	DUMMYNET_UNLOCK();
 
 	if (head != NULL)
 		dummynet_send(head);
 
 	callout_reset(&dn_timeout, 1, dummynet, NULL);
 }
 
 static void
 dummynet_send(struct mbuf *m)
 {
 	struct dn_pkt_tag *pkt;
 	struct mbuf *n;
 	struct ip *ip;
 
 	for (; m != NULL; m = n) {
 		n = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		pkt = dn_tag_get(m);
 		switch (pkt->dn_dir) {
 		case DN_TO_IP_OUT:
 			ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 			break ;
 		case DN_TO_IP_IN :
 			ip = mtod(m, struct ip *);
 			ip->ip_len = htons(ip->ip_len);
 			ip->ip_off = htons(ip->ip_off);
 			netisr_dispatch(NETISR_IP, m);
 			break;
 #ifdef INET6
 		case DN_TO_IP6_IN:
 			netisr_dispatch(NETISR_IPV6, m);
 			break;
 
 		case DN_TO_IP6_OUT:
 			ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
 			break;
 #endif
 		case DN_TO_IFB_FWD:
 			if (bridge_dn_p != NULL)
 				((*bridge_dn_p)(m, pkt->ifp));
 			else
 				printf("dummynet: if_bridge not loaded\n");
 
 			break;
 		case DN_TO_ETH_DEMUX:
 			/*
 			 * The Ethernet code assumes the Ethernet header is
 			 * contiguous in the first mbuf header.
 			 * Insure this is true.
 			 */
 			if (m->m_len < ETHER_HDR_LEN &&
 			    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
 				printf("dummynet/ether: pullup failed, "
 				    "dropping packet\n");
 				break;
 			}
 			ether_demux(m->m_pkthdr.rcvif, m);
 			break;
 		case DN_TO_ETH_OUT:
 			ether_output_frame(pkt->ifp, m);
 			break;
 
 		case DN_TO_DROP:
 			/* drop the packet after some time */
 			m_freem(m);
 			break;
 
 		default:
 			printf("dummynet: bad switch %d!\n", pkt->dn_dir);
 			m_freem(m);
 			break;
 		}
 	}
 }
 
 /*
  * Unconditionally expire empty queues in case of shortage.
  * Returns the number of queues freed.
  */
 static int
 expire_queues(struct dn_flow_set *fs)
 {
     struct dn_flow_queue *q, *prev ;
     int i, initial_elements = fs->rq_elements ;
 
     if (fs->last_expired == time_uptime)
 	return 0 ;
     fs->last_expired = time_uptime ;
     for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
 	for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
 	    if (q->head != NULL || q->S != q->F+1) {
   		prev = q ;
   	        q = q->next ;
   	    } else { /* entry is idle, expire it */
 		struct dn_flow_queue *old_q = q ;
 
 		if (prev != NULL)
 		    prev->next = q = q->next ;
 		else
 		    fs->rq[i] = q = q->next ;
 		fs->rq_elements-- ;
 		free(old_q, M_DUMMYNET);
 	    }
     return initial_elements - fs->rq_elements ;
 }
 
 /*
  * If room, create a new queue and put at head of slot i;
  * otherwise, create or use the default queue.
  */
 static struct dn_flow_queue *
 create_queue(struct dn_flow_set *fs, int i)
 {
 	struct dn_flow_queue *q;
 
 	if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
 	    expire_queues(fs) == 0) {
 		/* No way to get room, use or create overflow queue. */
 		i = fs->rq_size;
 		if (fs->rq[i] != NULL)
 		    return fs->rq[i];
 	}
 	q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
 	if (q == NULL) {
 		printf("dummynet: sorry, cannot allocate queue for new flow\n");
 		return (NULL);
 	}
 	q->fs = fs;
 	q->hash_slot = i;
 	q->next = fs->rq[i];
 	q->S = q->F + 1;	/* hack - mark timestamp as invalid. */
-	q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
+	q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0);
 	fs->rq[i] = q;
 	fs->rq_elements++;
 	return (q);
 }
 
 /*
  * Given a flow_set and a pkt in last_pkt, find a matching queue
  * after appropriate masking. The queue is moved to front
  * so that further searches take less time.
  */
 static struct dn_flow_queue *
 find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id)
 {
     int i = 0 ; /* we need i and q for new allocations */
     struct dn_flow_queue *q, *prev;
     int is_v6 = IS_IP6_FLOW_ID(id);
 
     if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) )
 	q = fs->rq[0] ;
     else {
 	/* first, do the masking, then hash */
 	id->dst_port &= fs->flow_mask.dst_port ;
 	id->src_port &= fs->flow_mask.src_port ;
 	id->proto &= fs->flow_mask.proto ;
 	id->flags = 0 ; /* we don't care about this one */
 	if (is_v6) {
 	    APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6);
 	    APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6);
 	    id->flow_id6 &= fs->flow_mask.flow_id6;
 
 	    i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^
 		((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^
 		((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^
 		((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^
 
 		((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^
 		((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^
 		((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^
 		((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^
 
 		((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^
 		((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^
 		((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^
 		((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^
 
 		((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^
 		((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^
 		((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^
 		((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^
 
 		(id->dst_port << 1) ^ (id->src_port) ^
 		(id->proto ) ^
 		(id->flow_id6);
 	} else {
 	    id->dst_ip &= fs->flow_mask.dst_ip ;
 	    id->src_ip &= fs->flow_mask.src_ip ;
 
 	    i = ( (id->dst_ip) & 0xffff ) ^
 		( (id->dst_ip >> 15) & 0xffff ) ^
 		( (id->src_ip << 1) & 0xffff ) ^
 		( (id->src_ip >> 16 ) & 0xffff ) ^
 		(id->dst_port << 1) ^ (id->src_port) ^
 		(id->proto );
 	}
 	i = i % fs->rq_size ;
 	/* finally, scan the current list for a match */
 	searches++ ;
 	for (prev=NULL, q = fs->rq[i] ; q ; ) {
 	    search_steps++;
 	    if (is_v6 &&
 		    IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) &&  
 		    IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) &&  
 		    id->dst_port == q->id.dst_port &&
 		    id->src_port == q->id.src_port &&
 		    id->proto == q->id.proto &&
 		    id->flags == q->id.flags &&
 		    id->flow_id6 == q->id.flow_id6)
 		break ; /* found */
 
 	    if (!is_v6 && id->dst_ip == q->id.dst_ip &&
 		    id->src_ip == q->id.src_ip &&
 		    id->dst_port == q->id.dst_port &&
 		    id->src_port == q->id.src_port &&
 		    id->proto == q->id.proto &&
 		    id->flags == q->id.flags)
 		break ; /* found */
 
 	    /* No match. Check if we can expire the entry */
 	    if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
 		/* entry is idle and not in any heap, expire it */
 		struct dn_flow_queue *old_q = q ;
 
 		if (prev != NULL)
 		    prev->next = q = q->next ;
 		else
 		    fs->rq[i] = q = q->next ;
 		fs->rq_elements-- ;
 		free(old_q, M_DUMMYNET);
 		continue ;
 	    }
 	    prev = q ;
 	    q = q->next ;
 	}
 	if (q && prev != NULL) { /* found and not in front */
 	    prev->next = q->next ;
 	    q->next = fs->rq[i] ;
 	    fs->rq[i] = q ;
 	}
     }
     if (q == NULL) { /* no match, need to allocate a new entry */
 	q = create_queue(fs, i);
 	if (q != NULL)
 	q->id = *id ;
     }
     return q ;
 }
 
 static int
 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
 {
 	/*
 	 * RED algorithm
 	 *
 	 * RED calculates the average queue size (avg) using a low-pass filter
 	 * with an exponential weighted (w_q) moving average:
 	 * 	avg  <-  (1-w_q) * avg + w_q * q_size
 	 * where q_size is the queue length (measured in bytes or * packets).
 	 *
 	 * If q_size == 0, we compute the idle time for the link, and set
 	 *	avg = (1 - w_q)^(idle/s)
 	 * where s is the time needed for transmitting a medium-sized packet.
 	 *
 	 * Now, if avg < min_th the packet is enqueued.
 	 * If avg > max_th the packet is dropped. Otherwise, the packet is
 	 * dropped with probability P function of avg.
 	 */
 
 	int64_t p_b = 0;
 
 	/* Queue in bytes or packets? */
 	u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ?
 	    q->len_bytes : q->len;
 
 	DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size));
 
 	/* Average queue size estimation. */
 	if (q_size != 0) {
 		/* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
 		int diff = SCALE(q_size) - q->avg;
 		int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
 
 		q->avg += (int)v;
 	} else {
 		/*
 		 * Queue is empty, find for how long the queue has been
 		 * empty and use a lookup table for computing
 		 * (1 - * w_q)^(idle_time/s) where s is the time to send a
 		 * (small) packet.
 		 * XXX check wraps...
 		 */
 		if (q->avg) {
-			u_int t = (curr_time - q->q_time) / fs->lookup_step;
+			u_int t = (curr_time - q->idle_time) / fs->lookup_step;
 
 			q->avg = (t < fs->lookup_depth) ?
 			    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
 		}
 	}
 	DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
 
 	/* Should i drop? */
 	if (q->avg < fs->min_th) {
 		q->count = -1;
 		return (0);	/* accept packet */
 	}
 	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
 		if (fs->flags_fs & DN_IS_GENTLE_RED) {
 			/*
 			 * According to Gentle-RED, if avg is greater than
 			 * max_th the packet is dropped with a probability
 			 *	 p_b = c_3 * avg - c_4
 			 * where c_3 = (1 - max_p) / max_th
 			 *       c_4 = 1 - 2 * max_p
 			 */
 			p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
 			    fs->c_4;
 		} else {
 			q->count = -1;
 			DPRINTF(("dummynet: - drop"));
 			return (1);
 		}
 	} else if (q->avg > fs->min_th) {
 		/*
 		 * We compute p_b using the linear dropping function
 		 *	 p_b = c_1 * avg - c_2
 		 * where c_1 = max_p / (max_th - min_th)
 		 * 	 c_2 = max_p * min_th / (max_th - min_th)
 		 */
 		p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
 	}
 
 	if (fs->flags_fs & DN_QSIZE_IS_BYTES)
 		p_b = (p_b * len) / fs->max_pkt_size;
 	if (++q->count == 0)
 		q->random = random() & 0xffff;
 	else {
 		/*
 		 * q->count counts packets arrived since last drop, so a greater
 		 * value of q->count means a greater packet drop probability.
 		 */
 		if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
 			q->count = 0;
 			DPRINTF(("dummynet: - red drop"));
 			/* After a drop we calculate a new random value. */
 			q->random = random() & 0xffff;
 			return (1);	/* drop */
 		}
 	}
 	/* End of RED algorithm. */
 
 	return (0);	/* accept */
 }
 
 static __inline struct dn_flow_set *
 locate_flowset(int fs_nr)
 {
 	struct dn_flow_set *fs;
 
 	SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next)
 		if (fs->fs_nr == fs_nr)
 			return (fs);
 
 	return (NULL);
 }
 
 static __inline struct dn_pipe *
 locate_pipe(int pipe_nr)
 {
 	struct dn_pipe *pipe;
 
 	SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next)
 		if (pipe->pipe_nr == pipe_nr)
 			return (pipe);
 
 	return (NULL);
 }
 
 /*
  * dummynet hook for packets. Below 'pipe' is a pipe or a queue
  * depending on whether WF2Q or fixed bw is used.
  *
  * pipe_nr	pipe or queue the packet is destined for.
  * dir		where shall we send the packet after dummynet.
  * m		the mbuf with the packet
  * ifp		the 'ifp' parameter from the caller.
  *		NULL in ip_input, destination interface in ip_output,
  * rule		matching rule, in case of multiple passes
  */
 static int
 dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
 {
 	struct mbuf *m = *m0, *head = NULL, *tail = NULL;
 	struct dn_pkt_tag *pkt;
 	struct m_tag *mtag;
 	struct dn_flow_set *fs = NULL;
 	struct dn_pipe *pipe;
 	uint64_t len = m->m_pkthdr.len;
 	struct dn_flow_queue *q = NULL;
 	int is_pipe;
 	ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 
 	KASSERT(m->m_nextpkt == NULL,
 	    ("dummynet_io: mbuf queue passed to dummynet"));
 
 	if (cmd->opcode == O_LOG)
 		cmd += F_LEN(cmd);
 	if (cmd->opcode == O_ALTQ)
 		cmd += F_LEN(cmd);
 	if (cmd->opcode == O_TAG)
 		cmd += F_LEN(cmd);
 	is_pipe = (cmd->opcode == O_PIPE);
 
 	DUMMYNET_LOCK();
 	io_pkt++;
 	/*
 	 * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
 	 *
 	 * XXXGL: probably the pipe->fs and fs->pipe logic here
 	 * below can be simplified.
 	 */
 	if (is_pipe) {
 		pipe = locate_pipe(fwa->cookie);
 		if (pipe != NULL)
 			fs = &(pipe->fs);
 	} else
 		fs = locate_flowset(fwa->cookie);
 
 	if (fs == NULL)
 		goto dropit;	/* This queue/pipe does not exist! */
 	pipe = fs->pipe;
 	if (pipe == NULL) {	/* Must be a queue, try find a matching pipe. */
 		pipe = locate_pipe(fs->parent_nr);
 		if (pipe != NULL)
 			fs->pipe = pipe;
 		else {
 			printf("dummynet: no pipe %d for queue %d, drop pkt\n",
 			    fs->parent_nr, fs->fs_nr);
 			goto dropit;
 		}
 	}
 	q = find_queue(fs, &(fwa->f_id));
 	if (q == NULL)
 		goto dropit;		/* Cannot allocate queue. */
 
 	/* Update statistics, then check reasons to drop pkt. */
 	q->tot_bytes += len;
 	q->tot_pkts++;
 	if (fs->plr && random() < fs->plr)
 		goto dropit;		/* Random pkt drop. */
 	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
 		if (q->len_bytes > fs->qsize)
 			goto dropit;	/* Queue size overflow. */
 	} else {
 		if (q->len >= fs->qsize)
 			goto dropit;	/* Queue count overflow. */
 	}
 	if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len))
 		goto dropit;
 
 	/* XXX expensive to zero, see if we can remove it. */
 	mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 	    sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO);
 	if (mtag == NULL)
 		goto dropit;		/* Cannot allocate packet header. */
 	m_tag_prepend(m, mtag);		/* Attach to mbuf chain. */
 
 	pkt = (struct dn_pkt_tag *)(mtag + 1);
 	/*
 	 * Ok, i can handle the pkt now...
 	 * Build and enqueue packet + parameters.
 	 */
 	pkt->rule = fwa->rule;
 	pkt->rule_id = fwa->rule_id;
 	pkt->chain_id = fwa->chain_id;
 	pkt->dn_dir = dir;
 
 	pkt->ifp = fwa->oif;
 
 	if (q->head == NULL)
 		q->head = m;
 	else
 		q->tail->m_nextpkt = m;
 	q->tail = m;
 	q->len++;
 	q->len_bytes += len;
 
 	if (q->head != m)		/* Flow was not idle, we are done. */
 		goto done;
 
-	if (q->q_time < curr_time)
-		q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
-	q->q_time = curr_time;
+	if (is_pipe) {			/* Fixed rate queues. */
+		if (q->idle_time < curr_time) {
+			/* Calculate available burst size. */
+			q->numbytes +=
+			    (curr_time - q->idle_time) * pipe->bandwidth;
+			if (q->numbytes > pipe->burst)
+				q->numbytes = pipe->burst;
+			if (io_fast)
+				q->numbytes += pipe->bandwidth;
+		}
+	} else {			/* WF2Q. */
+		if (pipe->idle_time < curr_time) {
+			/* Calculate available burst size. */
+			pipe->numbytes +=
+			    (curr_time - pipe->idle_time) * pipe->bandwidth;
+			if (pipe->numbytes > pipe->burst)
+				pipe->numbytes = pipe->burst;
+			if (io_fast)
+				pipe->numbytes += pipe->bandwidth;
+		}
+		pipe->idle_time = curr_time;
+	}
+	/* Necessary for both: fixed rate & WF2Q queues. */
+	q->idle_time = curr_time;
 
 	/*
 	 * If we reach this point the flow was previously idle, so we need
 	 * to schedule it. This involves different actions for fixed-rate or
 	 * WF2Q queues.
 	 */
 	if (is_pipe) {
 		/* Fixed-rate queue: just insert into the ready_heap. */
 		dn_key t = 0;
 
 		if (pipe->bandwidth) {
 			q->extra_bits = compute_extra_bits(m, pipe);
 			t = set_ticks(m, q, pipe);
 		}
 		q->sched_time = curr_time;
 		if (t == 0)		/* Must process it now. */
 			ready_event(q, &head, &tail);
 		else
 			heap_insert(&ready_heap, curr_time + t , q);
 	} else {
 		/*
 		 * WF2Q. First, compute start time S: if the flow was
 		 * idle (S = F + 1) set S to the virtual time V for the
 		 * controlling pipe, and update the sum of weights for the pipe;
 		 * otherwise, remove flow from idle_heap and set S to max(F,V).
 		 * Second, compute finish time F = S + len / weight.
 		 * Third, if pipe was idle, update V = max(S, V).
 		 * Fourth, count one more backlogged flow.
 		 */
 		if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */
 			q->S = pipe->V;
 			pipe->sum += fs->weight; /* Add weight of new queue. */
 		} else {
 			heap_extract(&(pipe->idle_heap), q);
 			q->S = MAX64(q->F, pipe->V);
 		}
 		q->F = q->S + (len << MY_M) / (uint64_t)fs->weight;
 
 		if (pipe->not_eligible_heap.elements == 0 &&
 		    pipe->scheduler_heap.elements == 0)
 			pipe->V = MAX64(q->S, pipe->V);
 		fs->backlogged++;
 		/*
 		 * Look at eligibility. A flow is not eligibile if S>V (when
 		 * this happens, it means that there is some other flow already
 		 * scheduled for the same pipe, so the scheduler_heap cannot be
 		 * empty). If the flow is not eligible we just store it in the
 		 * not_eligible_heap. Otherwise, we store in the scheduler_heap
 		 * and possibly invoke ready_event_wfq() right now if there is
 		 * leftover credit.
 		 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
 		 * and for all flows in not_eligible_heap (NEH), S_i > V.
 		 * So when we need to compute max(V, min(S_i)) forall i in
 		 * SCH+NEH, we only need to look into NEH.
 		 */
 		if (DN_KEY_GT(q->S, pipe->V)) {		/* Not eligible. */
 			if (pipe->scheduler_heap.elements == 0)
 				printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
 			heap_insert(&(pipe->not_eligible_heap), q->S, q);
 		} else {
 			heap_insert(&(pipe->scheduler_heap), q->F, q);
 			if (pipe->numbytes >= 0) {	 /* Pipe is idle. */
 				if (pipe->scheduler_heap.elements != 1)
 					printf("dummynet: OUCH! pipe should have been idle!\n");
 				DPRINTF(("dummynet: waking up pipe %d at %d\n",
 				    pipe->pipe_nr, (int)(q->F >> MY_M)));
 				pipe->sched_time = curr_time;
 				ready_event_wfq(pipe, &head, &tail);
 			}
 		}
 	}
 done:
 	if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX &&
 	    dir != DN_TO_ETH_OUT) {	/* Fast io. */
 		io_pkt_fast++;
 		if (m->m_nextpkt != NULL)
 			printf("dummynet: fast io: pkt chain detected!\n");
 		head = m->m_nextpkt = NULL;
 	} else
 		*m0 = NULL;		/* Normal io. */
 
 	DUMMYNET_UNLOCK();
 	if (head != NULL)
 		dummynet_send(head);
 	return (0);
 
 dropit:
 	io_pkt_drop++;
 	if (q)
 		q->drops++;
 	DUMMYNET_UNLOCK();
 	m_freem(m);
 	*m0 = NULL;
 	return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
 }
 
 /*
  * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
  * Doing this would probably save us the initial bzero of dn_pkt
  */
 #define	DN_FREE_PKT(_m) do {				\
 	m_freem(_m);					\
 } while (0)
 
 /*
  * Dispose all packets and flow_queues on a flow_set.
  * If all=1, also remove red lookup table and other storage,
  * including the descriptor itself.
  * For the one in dn_pipe MUST also cleanup ready_heap...
  */
 static void
 purge_flow_set(struct dn_flow_set *fs, int all)
 {
 	struct dn_flow_queue *q, *qn;
 	int i;
 
 	DUMMYNET_LOCK_ASSERT();
 
 	for (i = 0; i <= fs->rq_size; i++) {
 		for (q = fs->rq[i]; q != NULL; q = qn) {
 			struct mbuf *m, *mnext;
 
 			mnext = q->head;
 			while ((m = mnext) != NULL) {
 				mnext = m->m_nextpkt;
 				DN_FREE_PKT(m);
 			}
 			qn = q->next;
 			free(q, M_DUMMYNET);
 		}
 		fs->rq[i] = NULL;
 	}
 
 	fs->rq_elements = 0;
 	if (all) {
 		/* RED - free lookup table. */
 		if (fs->w_q_lookup != NULL)
 			free(fs->w_q_lookup, M_DUMMYNET);
 		if (fs->rq != NULL)
 			free(fs->rq, M_DUMMYNET);
 		/* If this fs is not part of a pipe, free it. */
 		if (fs->pipe == NULL || fs != &(fs->pipe->fs))
 			free(fs, M_DUMMYNET);
 	}
 }
 
 /*
  * Dispose all packets queued on a pipe (not a flow_set).
  * Also free all resources associated to a pipe, which is about
  * to be deleted.
  */
 static void
 purge_pipe(struct dn_pipe *pipe)
 {
     struct mbuf *m, *mnext;
 
     purge_flow_set( &(pipe->fs), 1 );
 
     mnext = pipe->head;
     while ((m = mnext) != NULL) {
 	mnext = m->m_nextpkt;
 	DN_FREE_PKT(m);
     }
 
     heap_free( &(pipe->scheduler_heap) );
     heap_free( &(pipe->not_eligible_heap) );
     heap_free( &(pipe->idle_heap) );
 }
 
 /*
  * Delete all pipes and heaps returning memory. Must also
  * remove references from all ipfw rules to all pipes.
  */
 static void
 dummynet_flush(void)
 {
 	struct dn_pipe *pipe, *pipe1;
 	struct dn_flow_set *fs, *fs1;
 	int i;
 
 	DUMMYNET_LOCK();
 	/* Free heaps so we don't have unwanted events. */
 	heap_free(&ready_heap);
 	heap_free(&wfq_ready_heap);
 	heap_free(&extract_heap);
 
 	/*
 	 * Now purge all queued pkts and delete all pipes.
 	 *
 	 * XXXGL: can we merge the for(;;) cycles into one or not?
 	 */
 	for (i = 0; i < HASHSIZE; i++)
 		SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) {
 			SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next);
 			purge_flow_set(fs, 1);
 		}
 	for (i = 0; i < HASHSIZE; i++)
 		SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) {
 			SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next);
 			purge_pipe(pipe);
 			free_pipe(pipe);
 		}
 	DUMMYNET_UNLOCK();
 }
 
 /*
  * setup RED parameters
  */
 static int
 config_red(struct dn_flow_set *p, struct dn_flow_set *x)
 {
 	int i;
 
 	x->w_q = p->w_q;
 	x->min_th = SCALE(p->min_th);
 	x->max_th = SCALE(p->max_th);
 	x->max_p = p->max_p;
 
 	x->c_1 = p->max_p / (p->max_th - p->min_th);
 	x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
 
 	if (x->flags_fs & DN_IS_GENTLE_RED) {
 		x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
 		x->c_4 = SCALE(1) - 2 * p->max_p;
 	}
 
 	/* If the lookup table already exist, free and create it again. */
 	if (x->w_q_lookup) {
 		free(x->w_q_lookup, M_DUMMYNET);
 		x->w_q_lookup = NULL;
 	}
 	if (red_lookup_depth == 0) {
 		printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
 		    "must be > 0\n");
 		free(x, M_DUMMYNET);
 		return (EINVAL);
 	}
 	x->lookup_depth = red_lookup_depth;
 	x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int),
 	    M_DUMMYNET, M_NOWAIT);
 	if (x->w_q_lookup == NULL) {
 		printf("dummynet: sorry, cannot allocate red lookup table\n");
 		free(x, M_DUMMYNET);
 		return(ENOSPC);
 	}
 
 	/* Fill the lookup table with (1 - w_q)^x */
 	x->lookup_step = p->lookup_step;
 	x->lookup_weight = p->lookup_weight;
 	x->w_q_lookup[0] = SCALE(1) - x->w_q;
 
 	for (i = 1; i < x->lookup_depth; i++)
 		x->w_q_lookup[i] =
 		    SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
 
 	if (red_avg_pkt_size < 1)
 		red_avg_pkt_size = 512;
 	x->avg_pkt_size = red_avg_pkt_size;
 	if (red_max_pkt_size < 1)
 		red_max_pkt_size = 1500;
 	x->max_pkt_size = red_max_pkt_size;
 	return (0);
 }
 
 static int
 alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs)
 {
     if (x->flags_fs & DN_HAVE_FLOW_MASK) {     /* allocate some slots */
 	int l = pfs->rq_size;
 
 	if (l == 0)
 	    l = dn_hash_size;
 	if (l < 4)
 	    l = 4;
 	else if (l > DN_MAX_HASH_SIZE)
 	    l = DN_MAX_HASH_SIZE;
 	x->rq_size = l;
     } else                  /* one is enough for null mask */
 	x->rq_size = 1;
     x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
 	    M_DUMMYNET, M_NOWAIT | M_ZERO);
     if (x->rq == NULL) {
 	printf("dummynet: sorry, cannot allocate queue\n");
 	return (ENOMEM);
     }
     x->rq_elements = 0;
     return 0 ;
 }
 
 static void
 set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
 {
 	x->flags_fs = src->flags_fs;
 	x->qsize = src->qsize;
 	x->plr = src->plr;
 	x->flow_mask = src->flow_mask;
 	if (x->flags_fs & DN_QSIZE_IS_BYTES) {
 		if (x->qsize > pipe_byte_limit)
 			x->qsize = 1024 * 1024;
 	} else {
 		if (x->qsize == 0)
 			x->qsize = 50;
 		if (x->qsize > pipe_slot_limit)
 			x->qsize = 50;
 	}
 	/* Configuring RED. */
 	if (x->flags_fs & DN_IS_RED)
 		config_red(src, x);	/* XXX should check errors */
 }
 
 /*
  * Setup pipe or queue parameters.
  */
 static int
 config_pipe(struct dn_pipe *p)
 {
 	struct dn_flow_set *pfs = &(p->fs);
 	struct dn_flow_queue *q;
 	int i, error;
 
 	/*
 	 * The config program passes parameters as follows:
 	 * bw = bits/second (0 means no limits),
 	 * delay = ms, must be translated into ticks.
 	 * qsize = slots/bytes
 	 */
 	p->delay = (p->delay * hz) / 1000;
+	/* Scale burst size: bytes -> bits * hz */
+	p->burst *= 8 * hz;
 	/* We need either a pipe number or a flow_set number. */
 	if (p->pipe_nr == 0 && pfs->fs_nr == 0)
 		return (EINVAL);
 	if (p->pipe_nr != 0 && pfs->fs_nr != 0)
 		return (EINVAL);
 	if (p->pipe_nr != 0) {			/* this is a pipe */
 		struct dn_pipe *pipe;
 
 		DUMMYNET_LOCK();
 		pipe = locate_pipe(p->pipe_nr);	/* locate pipe */
 
 		if (pipe == NULL) {		/* new pipe */
 			pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
 			    M_NOWAIT | M_ZERO);
 			if (pipe == NULL) {
 				DUMMYNET_UNLOCK();
 				printf("dummynet: no memory for new pipe\n");
 				return (ENOMEM);
 			}
 			pipe->pipe_nr = p->pipe_nr;
 			pipe->fs.pipe = pipe;
 			/*
 			 * idle_heap is the only one from which
 			 * we extract from the middle.
 			 */
 			pipe->idle_heap.size = pipe->idle_heap.elements = 0;
 			pipe->idle_heap.offset =
 			    offsetof(struct dn_flow_queue, heap_pos);
 		} else
 			/* Flush accumulated credit for all queues. */
 			for (i = 0; i <= pipe->fs.rq_size; i++)
-				for (q = pipe->fs.rq[i]; q; q = q->next)
-					q->numbytes = io_fast ? p->bandwidth : 0;
+				for (q = pipe->fs.rq[i]; q; q = q->next) {
+					q->numbytes = p->burst +
+					    (io_fast ? p->bandwidth : 0);
+				}
 
 		pipe->bandwidth = p->bandwidth;
-		pipe->numbytes = 0;		/* just in case... */
+		pipe->burst = p->burst;
+		pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0);
 		bcopy(p->if_name, pipe->if_name, sizeof(p->if_name));
 		pipe->ifp = NULL;		/* reset interface ptr */
 		pipe->delay = p->delay;
 		set_fs_parms(&(pipe->fs), pfs);
 
 		/* Handle changes in the delay profile. */
 		if (p->samples_no > 0) {
 			if (pipe->samples_no != p->samples_no) {
 				if (pipe->samples != NULL)
 					free(pipe->samples, M_DUMMYNET);
 				pipe->samples =
 				    malloc(p->samples_no*sizeof(dn_key),
 					M_DUMMYNET, M_NOWAIT | M_ZERO);
 				if (pipe->samples == NULL) {
 					DUMMYNET_UNLOCK();
 					printf("dummynet: no memory "
 						"for new samples\n");
 					return (ENOMEM);
 				}
 				pipe->samples_no = p->samples_no;
 			}
 
 			strncpy(pipe->name,p->name,sizeof(pipe->name));
 			pipe->loss_level = p->loss_level;
 			for (i = 0; i<pipe->samples_no; ++i)
 				pipe->samples[i] = p->samples[i];
 		} else if (pipe->samples != NULL) {
 			free(pipe->samples, M_DUMMYNET);
 			pipe->samples = NULL;
 			pipe->samples_no = 0;
 		}
 
 		if (pipe->fs.rq == NULL) {	/* a new pipe */
 			error = alloc_hash(&(pipe->fs), pfs);
 			if (error) {
 				DUMMYNET_UNLOCK();
 				free_pipe(pipe);
 				return (error);
 			}
 			SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)],
 			    pipe, next);
 		}
 		DUMMYNET_UNLOCK();
 	} else {				/* config queue */
 		struct dn_flow_set *fs;
 
 		DUMMYNET_LOCK();
 		fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
 
 		if (fs == NULL) {		/* new */
 			if (pfs->parent_nr == 0) { /* need link to a pipe */
 				DUMMYNET_UNLOCK();
 				return (EINVAL);
 			}
 			fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
 			    M_NOWAIT | M_ZERO);
 			if (fs == NULL) {
 				DUMMYNET_UNLOCK();
 				printf(
 				    "dummynet: no memory for new flow_set\n");
 				return (ENOMEM);
 			}
 			fs->fs_nr = pfs->fs_nr;
 			fs->parent_nr = pfs->parent_nr;
 			fs->weight = pfs->weight;
 			if (fs->weight == 0)
 				fs->weight = 1;
 			else if (fs->weight > 100)
 				fs->weight = 100;
 		} else {
 			/*
 			 * Change parent pipe not allowed;
 			 * must delete and recreate.
 			 */
 			if (pfs->parent_nr != 0 &&
 			    fs->parent_nr != pfs->parent_nr) {
 				DUMMYNET_UNLOCK();
 				return (EINVAL);
 			}
 		}
 
 		set_fs_parms(fs, pfs);
 
 		if (fs->rq == NULL) {		/* a new flow_set */
 			error = alloc_hash(fs, pfs);
 			if (error) {
 				DUMMYNET_UNLOCK();
 				free(fs, M_DUMMYNET);
 				return (error);
 			}
 			SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)],
 			    fs, next);
 		}
 		DUMMYNET_UNLOCK();
 	}
 	return (0);
 }
 
 /*
  * Helper function to remove from a heap queues which are linked to
  * a flow_set about to be deleted.
  */
 static void
 fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs)
 {
     int i = 0, found = 0 ;
     for (; i < h->elements ;)
 	if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) {
 	    h->elements-- ;
 	    h->p[i] = h->p[h->elements] ;
 	    found++ ;
 	} else
 	    i++ ;
     if (found)
 	heapify(h);
 }
 
 /*
  * helper function to remove a pipe from a heap (can be there at most once)
  */
 static void
 pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p)
 {
     if (h->elements > 0) {
 	int i = 0 ;
 	for (i=0; i < h->elements ; i++ ) {
 	    if (h->p[i].object == p) { /* found it */
 		h->elements-- ;
 		h->p[i] = h->p[h->elements] ;
 		heapify(h);
 		break ;
 	    }
 	}
     }
 }
 
 /*
  * drain all queues. Called in case of severe mbuf shortage.
  */
 void
 dummynet_drain(void)
 {
     struct dn_flow_set *fs;
     struct dn_pipe *pipe;
     struct mbuf *m, *mnext;
     int i;
 
     DUMMYNET_LOCK_ASSERT();
 
     heap_free(&ready_heap);
     heap_free(&wfq_ready_heap);
     heap_free(&extract_heap);
     /* remove all references to this pipe from flow_sets */
     for (i = 0; i < HASHSIZE; i++)
 	SLIST_FOREACH(fs, &flowsethash[i], next)
 		purge_flow_set(fs, 0);
 
     for (i = 0; i < HASHSIZE; i++) {
 	SLIST_FOREACH(pipe, &pipehash[i], next) {
 		purge_flow_set(&(pipe->fs), 0);
 
 		mnext = pipe->head;
 		while ((m = mnext) != NULL) {
 			mnext = m->m_nextpkt;
 			DN_FREE_PKT(m);
 		}
 		pipe->head = pipe->tail = NULL;
 	}
     }
 }
 
 /*
  * Fully delete a pipe or a queue, cleaning up associated info.
  */
 static int
 delete_pipe(struct dn_pipe *p)
 {
 
     if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
 	return EINVAL ;
     if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
 	return EINVAL ;
     if (p->pipe_nr != 0) { /* this is an old-style pipe */
 	struct dn_pipe *pipe;
 	struct dn_flow_set *fs;
 	int i;
 
 	DUMMYNET_LOCK();
 	pipe = locate_pipe(p->pipe_nr);	/* locate pipe */
 
 	if (pipe == NULL) {
 	    DUMMYNET_UNLOCK();
 	    return (ENOENT);	/* not found */
 	}
 
 	/* Unlink from list of pipes. */
 	SLIST_REMOVE(&pipehash[HASH(pipe->pipe_nr)], pipe, dn_pipe, next);
 
 	/* Remove all references to this pipe from flow_sets. */
 	for (i = 0; i < HASHSIZE; i++)
 	    SLIST_FOREACH(fs, &flowsethash[i], next)
 		if (fs->pipe == pipe) {
 			printf("dummynet: ++ ref to pipe %d from fs %d\n",
 			    p->pipe_nr, fs->fs_nr);
 			fs->pipe = NULL ;
 			purge_flow_set(fs, 0);
 		}
 	fs_remove_from_heap(&ready_heap, &(pipe->fs));
 	purge_pipe(pipe); /* remove all data associated to this pipe */
 	/* remove reference to here from extract_heap and wfq_ready_heap */
 	pipe_remove_from_heap(&extract_heap, pipe);
 	pipe_remove_from_heap(&wfq_ready_heap, pipe);
 	DUMMYNET_UNLOCK();
 
 	free_pipe(pipe);
     } else { /* this is a WF2Q queue (dn_flow_set) */
 	struct dn_flow_set *fs;
 
 	DUMMYNET_LOCK();
 	fs = locate_flowset(p->fs.fs_nr); /* locate set */
 
 	if (fs == NULL) {
 	    DUMMYNET_UNLOCK();
 	    return (ENOENT); /* not found */
 	}
 
 	/* Unlink from list of flowsets. */
 	SLIST_REMOVE( &flowsethash[HASH(fs->fs_nr)], fs, dn_flow_set, next);
 
 	if (fs->pipe != NULL) {
 	    /* Update total weight on parent pipe and cleanup parent heaps. */
 	    fs->pipe->sum -= fs->weight * fs->backlogged ;
 	    fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs);
 	    fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs);
 #if 1	/* XXX should i remove from idle_heap as well ? */
 	    fs_remove_from_heap(&(fs->pipe->idle_heap), fs);
 #endif
 	}
 	purge_flow_set(fs, 1);
 	DUMMYNET_UNLOCK();
     }
     return 0 ;
 }
 
 /*
  * helper function used to copy data from kernel in DUMMYNET_GET
  */
 static char *
 dn_copy_set(struct dn_flow_set *set, char *bp)
 {
     int i, copied = 0 ;
     struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp;
 
     DUMMYNET_LOCK_ASSERT();
 
     for (i = 0 ; i <= set->rq_size ; i++)
 	for (q = set->rq[i] ; q ; q = q->next, qp++ ) {
 	    if (q->hash_slot != i)
 		printf("dummynet: ++ at %d: wrong slot (have %d, "
 		    "should be %d)\n", copied, q->hash_slot, i);
 	    if (q->fs != set)
 		printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n",
 			i, q->fs, set);
 	    copied++ ;
 	    bcopy(q, qp, sizeof( *q ) );
 	    /* cleanup pointers */
 	    qp->next = NULL ;
 	    qp->head = qp->tail = NULL ;
 	    qp->fs = NULL ;
 	}
     if (copied != set->rq_elements)
 	printf("dummynet: ++ wrong count, have %d should be %d\n",
 	    copied, set->rq_elements);
     return (char *)qp ;
 }
 
 static size_t
 dn_calc_size(void)
 {
     struct dn_flow_set *fs;
     struct dn_pipe *pipe;
     size_t size = 0;
     int i;
 
     DUMMYNET_LOCK_ASSERT();
     /*
      * Compute size of data structures: list of pipes and flow_sets.
      */
     for (i = 0; i < HASHSIZE; i++) {
 	SLIST_FOREACH(pipe, &pipehash[i], next)
 		size += sizeof(*pipe) +
 		    pipe->fs.rq_elements * sizeof(struct dn_flow_queue);
 	SLIST_FOREACH(fs, &flowsethash[i], next)
 		size += sizeof (*fs) +
 		    fs->rq_elements * sizeof(struct dn_flow_queue);
     }
     return size;
 }
 
 static int
 dummynet_get(struct sockopt *sopt)
 {
     char *buf, *bp ; /* bp is the "copy-pointer" */
     size_t size ;
     struct dn_flow_set *fs;
     struct dn_pipe *pipe;
     int error=0, i ;
 
     /* XXX lock held too long */
     DUMMYNET_LOCK();
     /*
      * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we
      *      cannot use this flag while holding a mutex.
      */
     for (i = 0; i < 10; i++) {
 	size = dn_calc_size();
 	DUMMYNET_UNLOCK();
 	buf = malloc(size, M_TEMP, M_WAITOK);
 	DUMMYNET_LOCK();
 	if (size == dn_calc_size())
 		break;
 	free(buf, M_TEMP);
 	buf = NULL;
     }
     if (buf == NULL) {
 	DUMMYNET_UNLOCK();
 	return ENOBUFS ;
     }
     bp = buf;
     for (i = 0; i < HASHSIZE; i++)
 	SLIST_FOREACH(pipe, &pipehash[i], next) {
 		struct dn_pipe *pipe_bp = (struct dn_pipe *)bp;
 
 		/*
 		 * Copy pipe descriptor into *bp, convert delay back to ms,
 		 * then copy the flow_set descriptor(s) one at a time.
 		 * After each flow_set, copy the queue descriptor it owns.
 		 */
 		bcopy(pipe, bp, sizeof(*pipe));
 		pipe_bp->delay = (pipe_bp->delay * 1000) / hz;
+		pipe_bp->burst /= 8 * hz;
 		/*
 		 * XXX the following is a hack based on ->next being the
 		 * first field in dn_pipe and dn_flow_set. The correct
 		 * solution would be to move the dn_flow_set to the beginning
 		 * of struct dn_pipe.
 		 */
 		pipe_bp->next.sle_next = (struct dn_pipe *)DN_IS_PIPE;
 		/* Clean pointers. */
 		pipe_bp->head = pipe_bp->tail = NULL;
 		pipe_bp->fs.next.sle_next = NULL;
 		pipe_bp->fs.pipe = NULL;
 		pipe_bp->fs.rq = NULL;
 		pipe_bp->samples = NULL;
 
 		bp += sizeof(*pipe) ;
 		bp = dn_copy_set(&(pipe->fs), bp);
 	}
 
     for (i = 0; i < HASHSIZE; i++)
 	SLIST_FOREACH(fs, &flowsethash[i], next) {
 		struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp;
 
 		bcopy(fs, bp, sizeof(*fs));
 		/* XXX same hack as above */
 		fs_bp->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
 		fs_bp->pipe = NULL;
 		fs_bp->rq = NULL;
 		bp += sizeof(*fs);
 		bp = dn_copy_set(fs, bp);
 	}
 
     DUMMYNET_UNLOCK();
 
     error = sooptcopyout(sopt, buf, size);
     free(buf, M_TEMP);
     return error ;
 }
 
 /*
  * Handler for the various dummynet socket options (get, flush, config, del)
  */
 static int
 ip_dn_ctl(struct sockopt *sopt)
 {
     int error;
     struct dn_pipe *p = NULL;
 
     error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
     if (error)
 	return (error);
 
     /* Disallow sets in really-really secure mode. */
     if (sopt->sopt_dir == SOPT_SET) {
 #if __FreeBSD_version >= 500034
 	error =  securelevel_ge(sopt->sopt_td->td_ucred, 3);
 	if (error)
 	    return (error);
 #else
 	if (securelevel >= 3)
 	    return (EPERM);
 #endif
     }
 
     switch (sopt->sopt_name) {
     default :
 	printf("dummynet: -- unknown option %d", sopt->sopt_name);
 	error = EINVAL ;
 	break;
 
     case IP_DUMMYNET_GET :
 	error = dummynet_get(sopt);
 	break ;
 
     case IP_DUMMYNET_FLUSH :
 	dummynet_flush() ;
 	break ;
 
     case IP_DUMMYNET_CONFIGURE :
 	p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK);
 	error = sooptcopyin(sopt, p, sizeof(struct dn_pipe_max), sizeof *p);
 	if (error)
 	    break ;
 	if (p->samples_no > 0)
 	    p->samples = &(((struct dn_pipe_max *)p)->samples[0]);
 
 	error = config_pipe(p);
 	break ;
 
     case IP_DUMMYNET_DEL :	/* remove a pipe or queue */
 	p = malloc(sizeof(struct dn_pipe), M_TEMP, M_WAITOK);
 	error = sooptcopyin(sopt, p, sizeof(struct dn_pipe), sizeof *p);
 	if (error)
 	    break ;
 
 	error = delete_pipe(p);
 	break ;
     }
     if (p != NULL)
 	free(p, M_TEMP);
     return error ;
 }
 
 static void
 ip_dn_init(void)
 {
 	int i;
 
 	if (bootverbose)
 		printf("DUMMYNET with IPv6 initialized (040826)\n");
 
 	DUMMYNET_LOCK_INIT();
 
 	for (i = 0; i < HASHSIZE; i++) {
 		SLIST_INIT(&pipehash[i]);
 		SLIST_INIT(&flowsethash[i]);
 	}
 	ready_heap.size = ready_heap.elements = 0;
 	ready_heap.offset = 0;
 
 	wfq_ready_heap.size = wfq_ready_heap.elements = 0;
 	wfq_ready_heap.offset = 0;
 
 	extract_heap.size = extract_heap.elements = 0;
 	extract_heap.offset = 0;
 
 	ip_dn_ctl_ptr = ip_dn_ctl;
 	ip_dn_io_ptr = dummynet_io;
 
 	TASK_INIT(&dn_task, 0, dummynet_task, NULL);
 	dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT,
 	    taskqueue_thread_enqueue, &dn_tq);
 	taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
 
 	callout_init(&dn_timeout, CALLOUT_MPSAFE);
 	callout_reset(&dn_timeout, 1, dummynet, NULL);
 
 	/* Initialize curr_time adjustment mechanics. */
 	getmicrouptime(&prev_t);
 }
 
 #ifdef KLD_MODULE
 static void
 ip_dn_destroy(void)
 {
 	ip_dn_ctl_ptr = NULL;
 	ip_dn_io_ptr = NULL;
 
 	DUMMYNET_LOCK();
 	callout_stop(&dn_timeout);
 	DUMMYNET_UNLOCK();
 	taskqueue_drain(dn_tq, &dn_task);
 	taskqueue_free(dn_tq);
 
 	dummynet_flush();
 
 	DUMMYNET_LOCK_DESTROY();
 }
 #endif /* KLD_MODULE */
 
 static int
 dummynet_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if (ip_dn_io_ptr) {
 		    printf("DUMMYNET already loaded\n");
 		    return EEXIST ;
 		}
 		ip_dn_init();
 		break;
 
 	case MOD_UNLOAD:
 #if !defined(KLD_MODULE)
 		printf("dummynet statically compiled, cannot unload\n");
 		return EINVAL ;
 #else
 		ip_dn_destroy();
 #endif
 		break ;
 	default:
 		return EOPNOTSUPP;
 		break ;
 	}
 	return 0 ;
 }
 
 static moduledata_t dummynet_mod = {
 	"dummynet",
 	dummynet_modevent,
 	NULL
 };
 DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
 MODULE_VERSION(dummynet, 1);