Index: head/sys/netpfil/ipfw/test/Makefile =================================================================== --- head/sys/netpfil/ipfw/test/Makefile (revision 294881) +++ head/sys/netpfil/ipfw/test/Makefile (revision 294882) @@ -1,51 +1,51 @@ # # $FreeBSD$ # # Makefile for building userland tests # this is written in a form compatible with gmake SCHED_SRCS = test_dn_sched.c SCHED_SRCS += dn_sched_fifo.c SCHED_SRCS += dn_sched_prio.c SCHED_SRCS += dn_sched_qfq.c SCHED_SRCS += dn_sched_rr.c SCHED_SRCS += dn_sched_wf2q.c SCHED_SRCS += dn_heap.c SCHED_SRCS += main.c SCHED_OBJS=$(SCHED_SRCS:.c=.o) HEAP_SRCS = dn_heap.c test_dn_heap.c HEAP_OBJS=$(HEAP_SRCS:.c=.o) VPATH= .:.. -CFLAGS = -I.. -I. -Wall -Werror -O3 -DIPFW -Wextra +CFLAGS = -I.. -I. -Wall -Werror -O3 -Wextra TARGETS= test_sched # no test_heap by default all: $(TARGETS) test_heap : $(HEAP_OBJS) $(CC) -o $@ $(HEAP_OBJS) test_sched : $(SCHED_OBJS) $(CC) -o $@ $(SCHED_OBJS) $(SCHED_OBJS): dn_test.h main.o: mylist.h clean: - rm *.o $(TARGETS) *.core ALLSRCS = $(SCHED_SRCS) dn_test.h mylist.h \ dn_sched.h dn_heap.h ip_dn_private.h Makefile TMPBASE = /tmp/testXYZ TMPDIR = $(TMPBASE)/test tgz: -rm -rf $(TMPDIR) mkdir -p $(TMPDIR) -cp -p $(ALLSRCS) $(TMPDIR) -(cd ..; cp -p $(ALLSRCS) $(TMPDIR)) ls -la $(TMPDIR) (cd $(TMPBASE); tar cvzf /tmp/test.tgz test) Index: head/sys/netpfil/ipfw/test/dn_test.h =================================================================== --- head/sys/netpfil/ipfw/test/dn_test.h (revision 294881) +++ head/sys/netpfil/ipfw/test/dn_test.h (revision 294882) @@ -1,184 +1,184 @@ /* * $FreeBSD$ * * userspace compatibility code for dummynet schedulers */ #ifndef _DN_TEST_H #define _DN_TEST_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include /* bzero, ffs, ... */ #include /* strcmp */ #include #include #include extern int debug; #define ND(fmt, args...) do {} while (0) #define D1(fmt, args...) do {} while (0) -#define D(fmt, args...) fprintf(stderr, "%-8s " fmt "\n", \ - __FUNCTION__, ## args) +#define D(fmt, args...) fprintf(stderr, "%-10s %4d %-8s " fmt "\n", \ + __FILE__, __LINE__, __FUNCTION__, ## args) #define DX(lev, fmt, args...) do { \ if (debug > lev) D(fmt, ## args); } while (0) #ifndef offsetof #define offsetof(t,m) (int)(intptr_t)((&((t *)0L)->m)) #endif #if defined(__APPLE__) // XXX osx typedef unsigned int u_int; #endif /* osx */ #include /* prevent include of other system headers */ #define _NETINET_IP_VAR_H_ /* ip_fw_args */ #define _IPFW2_H #define _SYS_MBUF_H_ enum { DN_QUEUE, }; enum { DN_SCHED_FIFO, DN_SCHED_WF2QP, }; +/* from ip_dummynet.h, fields used in ip_dn_private.h */ struct dn_id { - int type, subtype, len, id; + uint16_t len; /* total len inc. this header */ + uint8_t type; + uint8_t subtype; +// uint32_t id; /* generic id */ }; +/* (from ip_dummynet.h) + * A flowset, which is a template for flows. Contains parameters + * from the command line: id, target scheduler, queue sizes, plr, + * flow masks, buckets for the flow hash, and possibly scheduler- + * specific parameters (weight, quantum and so on). + */ struct dn_fs { + /* generic scheduler parameters. Leave them at -1 if unset. + * Now we use 0: weight, 1: lmax, 2: priority + */ int par[4]; /* flowset parameters */ /* simulation entries. * 'index' is not strictly necessary * y is used for the inverse mapping , */ int index; int y; /* inverse mapping */ int base_y; /* inverse mapping */ int next_y; /* inverse mapping */ int n_flows; int first_flow; int next_flow; /* first_flow + n_flows */ /* * when generating, let 'cur' go from 0 to n_flows-1, * then point to flow first_flow + cur */ int cur; }; +/* (ip_dummynet.h) + * scheduler template, indicating nam, number, mask and buckets + */ struct dn_sch { }; +/* (from ip_dummynet.h) + * dn_flow collects flow_id and stats for queues and scheduler + * instances, and is used to pass these info to userland. + * oid.type/oid.subtype describe the object, oid.id is number + * of the parent object. + */ struct dn_flow { struct dn_id oid; - int length; - int len_bytes; - int drops; + uint64_t tot_pkts; uint64_t tot_bytes; - uint32_t flow_id; + uint32_t length; /* Queue length, in packets */ + uint32_t len_bytes; /* Queue length, in bytes */ + uint32_t drops; + //uint32_t flow_id; + + /* the following fields are used by the traffic generator. + */ struct list_head h; /* used by the generator */ /* bytes served by the flow since the last backlog time */ uint64_t bytes; /* bytes served by the system at the last backlog time */ uint64_t sch_bytes; }; +/* the link */ struct dn_link { }; struct ip_fw_args { }; struct mbuf { struct { int len; } m_pkthdr; struct mbuf *m_nextpkt; - int flow_id; /* for testing, index of a flow */ + uint32_t flow_id; /* for testing, index of a flow */ //int flowset_id; /* for testing, index of a flowset */ - void *cfg; /* config args */ + //void *cfg; /* config args */ }; #define MALLOC_DECLARE(x) extern volatile int __dummy__ ## x #define KASSERT(x, y) do { if (!(x)) printf y ; exit(0); } while (0) struct ipfw_flow_id { }; typedef void * module_t; struct _md_t { const char *name; int (*f)(module_t, int, void *); void *p; }; typedef struct _md_t moduledata_t; #define DECLARE_MODULE(name, b, c, d) \ moduledata_t *_g_##name = & b #define MODULE_DEPEND(a, b, c, d, e) -#ifdef IPFW #include #include #include -#else -struct dn_queue { - struct dn_fsk *fs; /* parent flowset. */ - struct dn_sch_inst *_si; /* parent sched instance. */ -}; -struct dn_schk { -}; -struct dn_fsk { - struct dn_fs fs; - struct dn_schk *sched; -}; -struct dn_sch_inst { - struct dn_schk *sched; -}; -struct dn_alg { - int type; - const char *name; - void *enqueue, *dequeue; - int q_datalen, si_datalen, schk_datalen; - int (*config)(struct dn_schk *); - int (*new_sched)(struct dn_sch_inst *); - int (*new_fsk)(struct dn_fsk *); - int (*new_queue)(struct dn_queue *q); -}; - -#endif #ifndef __FreeBSD__ int fls(int); #endif static inline void mq_append(struct mq *q, struct mbuf *m) { if (q->head == NULL) q->head = m; else q->tail->m_nextpkt = m; q->tail = m; m->m_nextpkt = NULL; } #ifdef __cplusplus } #endif #endif /* _DN_TEST_H */ Index: head/sys/netpfil/ipfw/test/main.c =================================================================== --- head/sys/netpfil/ipfw/test/main.c (revision 294881) +++ head/sys/netpfil/ipfw/test/main.c (revision 294882) @@ -1,686 +1,740 @@ /* * $FreeBSD$ * * Testing program for schedulers * * The framework include a simple controller which, at each * iteration, decides whether we can enqueue and/or dequeue. * Then the mainloop runs the required number of tests, * keeping track of statistics. */ +// #define USE_BURST // what is this for ? + #include "dn_test.h" -struct q_list { - struct list_head h; -}; - struct cfg_s { int ac; char * const *av; const char *name; int loops; struct timeval time; /* running counters */ uint32_t _enqueue; uint32_t drop; uint32_t pending; uint32_t dequeue; /* generator parameters */ - int th_min, th_max; + int32_t th_min, th_max; /* thresholds for hysteresis; negative means per flow */ +#ifdef USE_BURST int maxburst; +#endif /* USE_BURST */ int lmin, lmax; /* packet len */ int flows; /* number of flows */ int flowsets; /* number of flowsets */ int wsum; /* sum of weights of all flows */ +#ifdef USE_CUR int max_y; /* max random number in the generation */ - int cur_y, cur_fs; /* used in generation, between 0 and max_y - 1 */ + int cur_y + int cur_fs; /* used in generation, between 0 and max_y - 1 */ +#endif /* USE_CUR */ const char *fs_config; /* flowset config */ int can_dequeue; int burst; /* count of packets sent in a burst */ struct mbuf *tosend; /* packet to send -- also flag to enqueue */ struct mbuf *freelist; struct mbuf *head, *tail; /* a simple tailq */ /* scheduler hooks */ int (*enq)(struct dn_sch_inst *, struct dn_queue *, struct mbuf *); struct mbuf * (*deq)(struct dn_sch_inst *); /* size of the three fields including sched-specific areas */ - int schk_len; - int q_len; /* size of a queue including sched-fields */ - int si_len; /* size of a sch_inst including sched-fields */ + uint32_t schk_len; + uint32_t q_len; /* size of a queue including sched-fields */ + uint32_t si_len; /* size of a sch_inst including sched-fields */ char *q; /* array of flow queues */ /* use a char* because size is variable */ - struct dn_fsk *fs; /* array of flowsets */ - struct dn_sch_inst *si; + /* + * The scheduler template (one) followd by schk_datalen bytes + * for scheduler-specific parameters, total size is schk_len + */ struct dn_schk *sched; + /* + * one scheduler instance, followed by si_datalen bytes + * for scheduler specific parameters of this instance, + * total size is si_len. si->sched points to sched + */ + struct dn_sch_inst *si; + struct dn_fsk *fs; /* array of flowsets */ /* generator state */ - int state; /* 0 = going up, 1: going down */ + int state; /* 0 = going up (enqueue), 1: going down (dequeue) */ /* * We keep lists for each backlog level, and always serve * the one with shortest backlog. llmask contains a bitmap * of lists, and ll are the heads of the lists. The last * entry (BACKLOG) contains all entries considered 'full' * XXX to optimize things, entry i could contain queues with * 2^{i-1}+1 .. 2^i entries. */ -#define BACKLOG 30 - uint32_t llmask; +#define BACKLOG 30 /* this many backlogged classes, we only need BACKLOG+1 */ + uint64_t llmask; struct list_head ll[BACKLOG + 10]; double *q_wfi; /* (byte) Worst-case Fair Index of the flows */ double wfi; /* (byte) Worst-case Fair Index of the system */ }; -/* FI2Q and Q2FI converts from flow_id to dn_queue and back. - * We cannot easily use pointer arithmetic because it is variable size. - */ +/* FI2Q and Q2FI converts from flow_id (i.e. queue index) + * to dn_queue and back. We cannot simply use pointer arithmetic + * because the queu has variable size, q_len + */ #define FI2Q(c, i) ((struct dn_queue *)((c)->q + (c)->q_len * (i))) #define Q2FI(c, q) (((char *)(q) - (c)->q)/(c)->q_len) int debug = 0; struct dn_parms dn_cfg; static void controller(struct cfg_s *c); -/* release a packet: put the mbuf in the freelist, and the queue in - * the bucket. +/* release a packet for a given flow_id. + * Put the mbuf in the freelist, and in case move the + * flow to the end of the bucket. */ -int +static int drop(struct cfg_s *c, struct mbuf *m) { struct dn_queue *q; int i; c->drop++; q = FI2Q(c, m->flow_id); i = q->ni.length; // XXX or ffs... ND("q %p id %d current length %d", q, m->flow_id, i); if (i < BACKLOG) { struct list_head *h = &q->ni.h; c->llmask &= ~(1<<(i+1)); c->llmask |= (1<<(i)); list_del(h); list_add_tail(h, &c->ll[i]); } m->m_nextpkt = c->freelist; c->freelist = m; return 0; } -/* dequeue returns NON-NULL when a packet is dropped */ + +/* + * dn_sch_inst does not have a queue, for the RR we + * allocate a mq right after si + */ static int -enqueue(struct cfg_s *c, void *_m) +default_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m) { - struct mbuf *m = _m; - if (c->enq) - return c->enq(c->si, FI2Q(c, m->flow_id), m); - if (c->head == NULL) - c->head = m; + struct mq *mq = (struct mq *)si; + + (void)q; + /* this is the default function if no scheduler is provided */ + if (mq->head == NULL) + mq->head = m; else - c->tail->m_nextpkt = m; - c->tail = m; + mq->tail->m_nextpkt = m; + mq->tail = m; return 0; /* default - success */ } -/* dequeue returns NON-NULL when a packet is available */ -static void * -dequeue(struct cfg_s *c) + +static struct mbuf * +default_dequeue(struct dn_sch_inst *si) { + struct mq *mq = (struct mq *)si; struct mbuf *m; - if (c->deq) - return c->deq(c->si); - if ((m = c->head)) { - m = c->head; - c->head = m->m_nextpkt; + /* this is the default function if no scheduler is provided */ + if ((m = mq->head)) { + m = mq->head; + mq->head = m->m_nextpkt; m->m_nextpkt = NULL; } return m; } static void gnet_stats_enq(struct cfg_s *c, struct mbuf *mb) { struct dn_sch_inst *si = c->si; struct dn_queue *_q = FI2Q(c, mb->flow_id); if (_q->ni.length == 1) { _q->ni.bytes = 0; _q->ni.sch_bytes = si->ni.bytes; } } static void gnet_stats_deq(struct cfg_s *c, struct mbuf *mb) { struct dn_sch_inst *si = c->si; struct dn_queue *_q = FI2Q(c, mb->flow_id); int len = mb->m_pkthdr.len; _q->ni.bytes += len; si->ni.bytes += len; if (_q->ni.length == 0) { double bytes = (double)_q->ni.bytes; double sch_bytes = (double)si->ni.bytes - _q->ni.sch_bytes; double weight = (double)_q->fs->fs.par[0] / c->wsum; double wfi = sch_bytes * weight - bytes; if (c->q_wfi[mb->flow_id] < wfi) c->q_wfi[mb->flow_id] = wfi; } } static int mainloop(struct cfg_s *c) { int i; struct mbuf *m; for (i=0; i < c->loops; i++) { /* implement histeresis */ controller(c); DX(3, "loop %d enq %d send %p rx %d", i, c->_enqueue, c->tosend, c->can_dequeue); if ( (m = c->tosend) ) { + int ret; + struct dn_queue *q = FI2Q(c, m->flow_id); c->_enqueue++; - if (enqueue(c, m)) { + ret = c->enq(c->si, q, m); + if (ret) { drop(c, m); - ND("loop %d enqueue fail", i ); + D("loop %d enqueue fail", i ); + /* + * XXX do not insist; rather, try dequeue + */ + goto do_dequeue; } else { ND("enqueue ok"); c->pending++; gnet_stats_enq(c, m); } - } - if (c->can_dequeue) { + } else if (c->can_dequeue) { +do_dequeue: c->dequeue++; - if ((m = dequeue(c))) { + m = c->deq(c->si); + if (m) { c->pending--; drop(c, m); c->drop--; /* compensate */ gnet_stats_deq(c, m); + } else { + D("--- ouch, cannot operate on iteration %d, pending %d", i, c->pending); + break; } } } DX(1, "mainloop ends %d", i); return 0; } int dump(struct cfg_s *c) { int i; - struct dn_queue *q; for (i=0; i < c->flows; i++) { - q = FI2Q(c, i); - DX(1, "queue %4d tot %10llu", i, + //struct dn_queue *q = FI2Q(c, i); + ND(1, "queue %4d tot %10llu", i, (unsigned long long)q->ni.tot_bytes); } DX(1, "done %d loops\n", c->loops); return 0; } /* interpret a number in human form */ static long getnum(const char *s, char **next, const char *key) { char *end = NULL; long l; if (next) /* default */ *next = NULL; if (s && *s) { DX(3, "token is <%s> %s", s, key ? key : "-"); l = strtol(s, &end, 0); } else { DX(3, "empty string"); l = -1; } if (l < 0) { DX(2, "invalid %s for %s", s ? s : "NULL", (key ? key : "") ); return 0; // invalid } if (!end || !*end) return l; if (*end == 'n') l = -l; /* multiply by n */ else if (*end == 'K') l = l*1000; else if (*end == 'M') l = l*1000000; else if (*end == 'k') l = l*1024; else if (*end == 'm') l = l*1024*1024; else if (*end == 'w') ; else {/* not recognized */ D("suffix %s for %s, next %p", end, key, next); end--; } end++; DX(3, "suffix now %s for %s, next %p", end, key, next); if (next && *end) { DX(3, "setting next to %s for %s", end, key); *next = end; } return l; } /* * flowsets are a comma-separated list of * weight:maxlen:flows * indicating how many flows are hooked to that fs. * Both weight and range can be min-max-steps. - * In a first pass we just count the number of flowsets and flows, - * in a second pass we complete the setup. + * The first pass (fs != NULL) justs count the number of flowsets and flows, + * the second pass (fs == NULL) we complete the setup. */ static void -parse_flowsets(struct cfg_s *c, const char *fs, int pass) +parse_flowsets(struct cfg_s *c, const char *fs) { char *s, *cur, *next; int n_flows = 0, n_fs = 0, wsum = 0; int i, j; struct dn_fs *prev = NULL; + int pass = (fs == NULL); DX(3, "--- pass %d flows %d flowsets %d", pass, c->flows, c->flowsets); - if (pass == 0) + if (fs != NULL) { /* first pass */ + if (c->fs_config) + D("warning, overwriting fs %s with %s", + c->fs_config, fs); c->fs_config = fs; + } s = c->fs_config ? strdup(c->fs_config) : NULL; if (s == NULL) { if (pass == 0) D("no fsconfig"); return; } for (next = s; (cur = strsep(&next, ","));) { char *p = NULL; int w, w_h, w_steps, wi; int len, len_h, l_steps, li; int flows; w = getnum(strsep(&cur, ":"), &p, "weight"); if (w <= 0) w = 1; w_h = p ? getnum(p+1, &p, "weight_max") : w; w_steps = p ? getnum(p+1, &p, "w_steps") : (w_h == w ?1:2); len = getnum(strsep(&cur, ":"), &p, "len"); if (len <= 0) len = 1000; len_h = p ? getnum(p+1, &p, "len_max") : len; l_steps = p ? getnum(p+1, &p, "l_steps") : (len_h == len ? 1 : 2); flows = getnum(strsep(&cur, ":"), NULL, "flows"); if (flows == 0) flows = 1; DX(4, "weight %d..%d (%d) len %d..%d (%d) flows %d", w, w_h, w_steps, len, len_h, l_steps, flows); if (w == 0 || w_h < w || len == 0 || len_h < len || flows == 0) { - DX(4,"wrong parameters %s", fs); + DX(4,"wrong parameters %s", s); return; } n_flows += flows * w_steps * l_steps; for (i = 0; i < w_steps; i++) { wi = w + ((w_h - w)* i)/(w_steps == 1 ? 1 : (w_steps-1)); for (j = 0; j < l_steps; j++, n_fs++) { struct dn_fs *fs = &c->fs[n_fs].fs; // tentative int x; li = len + ((len_h - len)* j)/(l_steps == 1 ? 1 : (l_steps-1)); x = (wi*2048)/li; DX(3, "----- fs %4d weight %4d lmax %4d X %4d flows %d", n_fs, wi, li, x, flows); if (pass == 0) continue; if (c->fs == NULL || c->flowsets <= n_fs) { D("error in number of flowsets"); return; } wsum += wi * flows; fs->par[0] = wi; fs->par[1] = li; fs->index = n_fs; fs->n_flows = flows; fs->cur = fs->first_flow = prev==NULL ? 0 : prev->next_flow; fs->next_flow = fs->first_flow + fs->n_flows; fs->y = x * flows; fs->base_y = (prev == NULL) ? 0 : prev->next_y; fs->next_y = fs->base_y + fs->y; prev = fs; } } } - c->max_y = prev ? prev->base_y + prev->y : 0; c->flows = n_flows; c->flowsets = n_fs; c->wsum = wsum; if (pass == 0) return; /* now link all flows to their parent flowsets */ + DX(1,"%d flows on %d flowsets", c->flows, c->flowsets); +#ifdef USE_CUR + c->max_y = prev ? prev->base_y + prev->y : 0; DX(1,"%d flows on %d flowsets max_y %d", c->flows, c->flowsets, c->max_y); +#endif /* USE_CUR */ for (i=0; i < c->flowsets; i++) { struct dn_fs *fs = &c->fs[i].fs; DX(1, "fs %3d w %5d l %4d flow %5d .. %5d y %6d .. %6d", i, fs->par[0], fs->par[1], fs->first_flow, fs->next_flow, fs->base_y, fs->next_y); for (j = fs->first_flow; j < fs->next_flow; j++) { struct dn_queue *q = FI2Q(c, j); q->fs = &c->fs[i]; } } } +/* available schedulers */ +extern moduledata_t *_g_dn_fifo; +extern moduledata_t *_g_dn_wf2qp; +extern moduledata_t *_g_dn_rr; +extern moduledata_t *_g_dn_qfq; +#ifdef WITH_QFQP +extern moduledata_t *_g_dn_qfqp; +#endif +#ifdef WITH_KPS +extern moduledata_t *_g_dn_kps; +#endif + static int init(struct cfg_s *c) { int i; int ac = c->ac; char * const *av = c->av; c->si_len = sizeof(struct dn_sch_inst); c->q_len = sizeof(struct dn_queue); moduledata_t *mod = NULL; struct dn_alg *p = NULL; - c->th_min = 0; + c->th_min = -1; /* 1 packet per flow */ c->th_max = -20;/* 20 packets per flow */ c->lmin = c->lmax = 1280; /* packet len */ c->flows = 1; c->flowsets = 1; c->name = "null"; ac--; av++; while (ac > 1) { if (!strcmp(*av, "-n")) { c->loops = getnum(av[1], NULL, av[0]); } else if (!strcmp(*av, "-d")) { debug = atoi(av[1]); } else if (!strcmp(*av, "-alg")) { - extern moduledata_t *_g_dn_fifo; - extern moduledata_t *_g_dn_wf2qp; - extern moduledata_t *_g_dn_rr; - extern moduledata_t *_g_dn_qfq; -#ifdef WITH_QFQP - extern moduledata_t *_g_dn_qfqp; -#endif -#ifdef WITH_KPS - extern moduledata_t *_g_dn_kps; -#endif if (!strcmp(av[1], "rr")) mod = _g_dn_rr; else if (!strcmp(av[1], "wf2qp")) mod = _g_dn_wf2qp; else if (!strcmp(av[1], "fifo")) mod = _g_dn_fifo; else if (!strcmp(av[1], "qfq")) mod = _g_dn_qfq; #ifdef WITH_QFQP else if (!strcmp(av[1], "qfq+") || !strcmp(av[1], "qfqp") ) mod = _g_dn_qfqp; #endif #ifdef WITH_KPS else if (!strcmp(av[1], "kps")) mod = _g_dn_kps; #endif else mod = NULL; c->name = mod ? mod->name : "NULL"; DX(3, "using scheduler %s", c->name); } else if (!strcmp(*av, "-len")) { c->lmin = getnum(av[1], NULL, av[0]); c->lmax = c->lmin; DX(3, "setting max to %d", c->th_max); +#ifdef USE_BURST } else if (!strcmp(*av, "-burst")) { c->maxburst = getnum(av[1], NULL, av[0]); DX(3, "setting max to %d", c->th_max); +#endif /* USE_BURST */ } else if (!strcmp(*av, "-qmax")) { c->th_max = getnum(av[1], NULL, av[0]); DX(3, "setting max to %d", c->th_max); } else if (!strcmp(*av, "-qmin")) { c->th_min = getnum(av[1], NULL, av[0]); DX(3, "setting min to %d", c->th_min); } else if (!strcmp(*av, "-flows")) { c->flows = getnum(av[1], NULL, av[0]); DX(3, "setting flows to %d", c->flows); } else if (!strcmp(*av, "-flowsets")) { - parse_flowsets(c, av[1], 0); + parse_flowsets(c, av[1]); /* first pass */ DX(3, "setting flowsets to %d", c->flowsets); } else { D("option %s not recognised, ignore", *av); } ac -= 2; av += 2; } +#ifdef USE_BURST if (c->maxburst <= 0) c->maxburst = 1; +#endif /* USE_BURST */ if (c->loops <= 0) c->loops = 1; if (c->flows <= 0) c->flows = 1; if (c->flowsets <= 0) c->flowsets = 1; if (c->lmin <= 0) c->lmin = 1; if (c->lmax <= 0) c->lmax = 1; /* multiply by N */ if (c->th_min < 0) c->th_min = c->flows * -c->th_min; if (c->th_max < 0) c->th_max = c->flows * -c->th_max; if (c->th_max <= c->th_min) c->th_max = c->th_min + 1; + + /* now load parameters from the module */ if (mod) { p = mod->p; DX(3, "using module %s f %p p %p", mod->name, mod->f, mod->p); DX(3, "modname %s ty %d", p->name, p->type); + // XXX check enq and deq not null c->enq = p->enqueue; c->deq = p->dequeue; c->si_len += p->si_datalen; c->q_len += p->q_datalen; c->schk_len += p->schk_datalen; + } else { + /* make sure c->si has room for a queue */ + c->enq = default_enqueue; + c->deq = default_dequeue; } + /* allocate queues, flowsets and one scheduler */ - c->q = calloc(c->flows, c->q_len); - c->q_wfi = (double *)calloc(c->flows, sizeof(double)); + D("using %d flows, %d flowsets", c->flows, c->flowsets); + D("q_len %d dn_fsk %d si %d sched %d", + c->q_len, (int)sizeof(struct dn_fsk), + c->si_len, c->schk_len); + c->sched = calloc(1, c->schk_len); /* one parent scheduler */ + c->si = calloc(1, c->si_len); /* one scheduler instance */ c->fs = calloc(c->flowsets, sizeof(struct dn_fsk)); - c->si = calloc(1, c->si_len); - c->sched = calloc(c->flows, c->schk_len); - if (c->q == NULL || c->fs == NULL || !c->q_wfi) { - D("error allocating memory for flows"); + c->q = calloc(c->flows, c->q_len); /* one queue per flow */ + c->q_wfi = calloc(c->flows, sizeof(double)); /* stats, one per flow */ + if (!c->sched || !c->si || !c->fs || !c->q || !c->q_wfi) { + D("error allocating memory"); exit(1); } - c->si->sched = c->sched; + c->si->sched = c->sched; /* link scheduler instance to template */ if (p) { + /* run initialization code if needed */ if (p->config) - p->config(c->sched); + p->config(c->si->sched); if (p->new_sched) p->new_sched(c->si); } /* parse_flowsets links queues to their flowsets */ - parse_flowsets(c, av[1], 1); + parse_flowsets(c, NULL); /* second pass */ /* complete the work calling new_fsk */ for (i = 0; i < c->flowsets; i++) { - if (c->fs[i].fs.par[1] == 0) - c->fs[i].fs.par[1] = 1000; /* default pkt len */ - c->fs[i].sched = c->sched; + struct dn_fsk *fsk = &c->fs[i]; + if (fsk->fs.par[1] == 0) + fsk->fs.par[1] = 1000; /* default pkt len */ + fsk->sched = c->si->sched; if (p && p->new_fsk) - p->new_fsk(&c->fs[i]); + p->new_fsk(fsk); } + /* --- now the scheduler is initialized --- */ - /* initialize the lists for the generator, and put + /* + * initialize the lists for the generator, and put * all flows in the list for backlog = 0 */ for (i=0; i <= BACKLOG+5; i++) INIT_LIST_HEAD(&c->ll[i]); for (i = 0; i < c->flows; i++) { struct dn_queue *q = FI2Q(c, i); if (q->fs == NULL) q->fs = &c->fs[0]; /* XXX */ q->_si = c->si; if (p && p->new_queue) p->new_queue(q); INIT_LIST_HEAD(&q->ni.h); list_add_tail(&q->ni.h, &c->ll[0]); } - c->llmask = 1; + c->llmask = 1; /* all flows are in the first list */ return 0; } int main(int ac, char *av[]) { struct cfg_s c; - struct timeval end; double ll; int i; char msg[40]; bzero(&c, sizeof(c)); c.ac = ac; c.av = av; init(&c); gettimeofday(&c.time, NULL); + D("th_min %d th_max %d", c.th_min, c.th_max); + mainloop(&c); - gettimeofday(&end, NULL); - end.tv_sec -= c.time.tv_sec; - end.tv_usec -= c.time.tv_usec; - if (end.tv_usec < 0) { - end.tv_usec += 1000000; - end.tv_sec--; + { + struct timeval end; + gettimeofday(&end, NULL); + timersub(&end, &c.time, &c.time); } - c.time = end; - ll = end.tv_sec*1000000 + end.tv_usec; + ll = c.time.tv_sec*1000000 + c.time.tv_usec; ll *= 1000; /* convert to nanoseconds */ ll /= c._enqueue; sprintf(msg, "1::%d", c.flows); for (i = 0; i < c.flows; i++) { if (c.wfi < c.q_wfi[i]) c.wfi = c.q_wfi[i]; } - D("sched=%-12s\ttime=%d.%03d sec (%.0f nsec)\twfi=%.02f\tflow=%-16s", - c.name, (int)c.time.tv_sec, (int)c.time.tv_usec / 1000, ll, c.wfi, + D("sched=%-12s\ttime=%d.%03d sec (%.0f nsec) enq %lu %lu deq\n" + "\twfi=%.02f\tflow=%-16s", + c.name, (int)c.time.tv_sec, (int)c.time.tv_usec / 1000, ll, + (unsigned long)c._enqueue, (unsigned long)c.dequeue, c.wfi, c.fs_config ? c.fs_config : msg); dump(&c); DX(1, "done ac %d av %p", ac, av); for (i=0; i < ac; i++) DX(1, "arg %d %s", i, av[i]); return 0; } /* * The controller decides whether in this iteration we should send * (the packet is in c->tosend) and/or receive (flag c->can_dequeue) */ static void controller(struct cfg_s *c) { struct mbuf *m; struct dn_fs *fs; int flow_id; - /* histeresis between max and min */ + /* hysteresis between max and min */ if (c->state == 0 && c->pending >= (uint32_t)c->th_max) c->state = 1; else if (c->state == 1 && c->pending <= (uint32_t)c->th_min) c->state = 0; ND(1, "state %d pending %2d", c->state, c->pending); c->can_dequeue = c->state; c->tosend = NULL; - if (c->state) + if (c->can_dequeue) return; + /* + * locate the flow to use for enqueueing + * We take the queue with the lowest number of queued packets, + * generate a packet for it, and put the queue in the next highest. + */ if (1) { int i; struct dn_queue *q; struct list_head *h; i = ffs(c->llmask) - 1; if (i < 0) { - DX(2, "no candidate"); + D("no candidate"); c->can_dequeue = 1; return; } h = &c->ll[i]; ND(1, "backlog %d p %p prev %p next %p", i, h, h->prev, h->next); q = list_first_entry(h, struct dn_queue, ni.h); list_del(&q->ni.h); flow_id = Q2FI(c, q); DX(2, "extracted flow %p %d backlog %d", q, flow_id, i); if (list_empty(h)) { ND(2, "backlog %d empty", i); c->llmask &= ~(1<ni.h, h+1); ND(1, " after %d p %p prev %p next %p", i+1, h+1, h[1].prev, h[1].next); if (i < BACKLOG) { ND(2, "backlog %d full", i+1); c->llmask |= 1<<(1+i); } fs = &q->fs->fs; - c->cur_fs = q->fs - c->fs; fs->cur = flow_id; +#ifdef USE_CUR + c->cur_fs = q->fs - c->fs; } else { /* XXX this does not work ? */ /* now decide whom to send the packet, and the length */ /* lookup in the flow table */ if (c->cur_y >= c->max_y) { /* handle wraparound */ c->cur_y = 0; c->cur_fs = 0; } fs = &c->fs[c->cur_fs].fs; flow_id = fs->cur++; if (fs->cur >= fs->next_flow) fs->cur = fs->first_flow; c->cur_y++; if (c->cur_y >= fs->next_y) c->cur_fs++; +#endif /* USE_CUR */ } /* construct a packet */ if (c->freelist) { m = c->tosend = c->freelist; c->freelist = c->freelist->m_nextpkt; } else { m = c->tosend = calloc(1, sizeof(struct mbuf)); } if (m == NULL) return; - m->cfg = c; + //m->cfg = c; m->m_nextpkt = NULL; m->m_pkthdr.len = fs->par[1]; // XXX maxlen m->flow_id = flow_id; ND(2,"y %6d flow %5d fs %3d weight %4d len %4d", c->cur_y, m->flow_id, c->cur_fs, fs->par[0], m->m_pkthdr.len); } - -/* -Packet allocation: -to achieve a distribution that matches weights, for each X=w/lmax class -we should generate a number of packets proportional to Y = X times the number -of flows in the class. -So we construct an array with the cumulative distribution of Y's, -and use it to identify the flow via inverse mapping (if the Y's are -not too many we can use an array for the lookup). In practice, -each flow will have X entries [virtually] pointing to it. - -*/ Index: head/sys/netpfil/ipfw/test/mylist.h =================================================================== --- head/sys/netpfil/ipfw/test/mylist.h (revision 294881) +++ head/sys/netpfil/ipfw/test/mylist.h (revision 294882) @@ -1,49 +1,50 @@ /* * $FreeBSD$ * * linux-like bidirectional lists */ #ifndef _MYLIST_H #define _MYLIST_H +/* not just a head, also the link field for a list entry */ struct list_head { struct list_head *prev, *next; }; #define INIT_LIST_HEAD(l) do { (l)->prev = (l)->next = (l); } while (0) #define list_empty(l) ( (l)->next == l ) static inline void __list_add(struct list_head *o, struct list_head *prev, struct list_head *next) { next->prev = o; o->next = next; o->prev = prev; prev->next = o; } static inline void list_add_tail(struct list_head *o, struct list_head *head) { __list_add(o, head->prev, head); } #define list_first_entry(pL, ty, member) \ (ty *)((char *)((pL)->next) - offsetof(ty, member)) static inline void __list_del(struct list_head *prev, struct list_head *next) { next->prev = prev; prev->next = next; } static inline void list_del(struct list_head *entry) { ND("called on %p", entry); __list_del(entry->prev, entry->next); entry->next = entry->prev = NULL; } #endif /* _MYLIST_H */ Index: head/sys/netpfil/ipfw/test/test_dn_sched.c =================================================================== --- head/sys/netpfil/ipfw/test/test_dn_sched.c (revision 294881) +++ head/sys/netpfil/ipfw/test/test_dn_sched.c (revision 294882) @@ -1,95 +1,96 @@ /* * $FreeBSD$ * * library functions for userland testing of dummynet schedulers */ #include "dn_test.h" void m_freem(struct mbuf *m) { printf("free %p\n", m); } int dn_sched_modevent(module_t mod, int cmd, void *arg) { (void)mod; (void)cmd; (void)arg; return 0; } void dn_free_pkts(struct mbuf *m) { struct mbuf *x; while ( (x = m) ) { m = m->m_nextpkt; m_freem(x); } } int dn_delete_queue(void *_q, void *do_free) { struct dn_queue *q = _q; (void)do_free; if (q->mq.head) dn_free_pkts(q->mq.head); free(q); return 0; } /* * This is a simplified function for testing purposes, which does * not implement statistics or random loss. * Enqueue a packet in q, subject to space and queue management policy * (whose parameters are in q->fs). * Update stats for the queue and the scheduler. * Return 0 on success, 1 on drop. The packet is consumed anyways. */ int dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop) { if (drop) goto drop; if (q->ni.length >= 200) goto drop; mq_append(&q->mq, m); q->ni.length++; q->ni.tot_bytes += m->m_pkthdr.len; + q->ni.tot_pkts++; return 0; drop: q->ni.drops++; return 1; } int ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) { (void)msg; if (*v < lo) { *v = dflt; } else if (*v > hi) { *v = hi; } return *v; } #ifndef __FreeBSD__ int fls(int mask) { int bit; if (mask == 0) return (0); for (bit = 1; mask != 1; bit++) mask = (unsigned int)mask >> 1; return (bit); } #endif