Changeset View
Changeset View
Standalone View
Standalone View
sys/netpfil/ipfw/ip_dn_io.c
Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | |||||
#ifdef NEW_AQM | #ifdef NEW_AQM | ||||
#include <netpfil/ipfw/dn_aqm.h> | #include <netpfil/ipfw/dn_aqm.h> | ||||
#endif | #endif | ||||
#include <netpfil/ipfw/dn_sched.h> | #include <netpfil/ipfw/dn_sched.h> | ||||
/* | /* | ||||
* We keep a private variable for the simulation time, but we could | * We keep a private variable for the simulation time, but we could | ||||
* probably use an existing one ("softticks" in sys/kern/kern_timeout.c) | * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) | ||||
* instead of dn_cfg.curr_time | * instead of V_dn_cfg.curr_time | ||||
*/ | */ | ||||
VNET_DEFINE(struct dn_parms, dn_cfg); | |||||
#define V_dn_cfg VNET(dn_cfg) | |||||
struct dn_parms dn_cfg; | |||||
//VNET_DEFINE(struct dn_parms, _base_dn_cfg); | |||||
/* | /* | ||||
* We use a heap to store entities for which we have pending timer events. | * We use a heap to store entities for which we have pending timer events. | ||||
* The heap is checked at every tick and all entities with expired events | * The heap is checked at every tick and all entities with expired events | ||||
* are extracted. | * are extracted. | ||||
*/ | */ | ||||
MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); | MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); | ||||
extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); | extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); | ||||
#ifdef SYSCTL_NODE | #ifdef SYSCTL_NODE | ||||
/* | /* | ||||
* Because of the way the SYSBEGIN/SYSEND macros work on other | * Because of the way the SYSBEGIN/SYSEND macros work on other | ||||
* platforms, there should not be functions between them. | * platforms, there should not be functions between them. | ||||
* So keep the handlers outside the block. | * So keep the handlers outside the block. | ||||
*/ | */ | ||||
static int | static int | ||||
sysctl_hash_size(SYSCTL_HANDLER_ARGS) | sysctl_hash_size(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
int error, value; | int error, value; | ||||
value = dn_cfg.hash_size; | value = V_dn_cfg.hash_size; | ||||
error = sysctl_handle_int(oidp, &value, 0, req); | error = sysctl_handle_int(oidp, &value, 0, req); | ||||
if (error != 0 || req->newptr == NULL) | if (error != 0 || req->newptr == NULL) | ||||
return (error); | return (error); | ||||
if (value < 16 || value > 65536) | if (value < 16 || value > 65536) | ||||
return (EINVAL); | return (EINVAL); | ||||
dn_cfg.hash_size = value; | V_dn_cfg.hash_size = value; | ||||
return (0); | return (0); | ||||
} | } | ||||
static int | static int | ||||
sysctl_limits(SYSCTL_HANDLER_ARGS) | sysctl_limits(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
int error; | int error; | ||||
long value; | long value; | ||||
if (arg2 != 0) | if (arg2 != 0) | ||||
value = dn_cfg.slot_limit; | value = V_dn_cfg.slot_limit; | ||||
else | else | ||||
value = dn_cfg.byte_limit; | value = V_dn_cfg.byte_limit; | ||||
error = sysctl_handle_long(oidp, &value, 0, req); | error = sysctl_handle_long(oidp, &value, 0, req); | ||||
if (error != 0 || req->newptr == NULL) | if (error != 0 || req->newptr == NULL) | ||||
return (error); | return (error); | ||||
if (arg2 != 0) { | if (arg2 != 0) { | ||||
if (value < 1) | if (value < 1) | ||||
return (EINVAL); | return (EINVAL); | ||||
dn_cfg.slot_limit = value; | V_dn_cfg.slot_limit = value; | ||||
} else { | } else { | ||||
if (value < 1500) | if (value < 1500) | ||||
return (EINVAL); | return (EINVAL); | ||||
dn_cfg.byte_limit = value; | V_dn_cfg.byte_limit = value; | ||||
} | } | ||||
return (0); | return (0); | ||||
} | } | ||||
SYSBEGIN(f4) | SYSBEGIN(f4) | ||||
SYSCTL_DECL(_net_inet); | SYSCTL_DECL(_net_inet); | ||||
SYSCTL_DECL(_net_inet_ip); | SYSCTL_DECL(_net_inet_ip); | ||||
#ifdef NEW_AQM | #ifdef NEW_AQM | ||||
SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"Dummynet"); | "Dummynet"); | ||||
#else | #else | ||||
static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, | static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"Dummynet"); | "Dummynet"); | ||||
#endif | #endif | ||||
/* wrapper to pass dn_cfg fields to SYSCTL_* */ | /* wrapper to pass V_dn_cfg fields to SYSCTL_* */ | ||||
//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x)) | #define DC(x) (&(VNET_NAME(dn_cfg).x)) | ||||
#define DC(x) (&(dn_cfg.x)) | |||||
/* parameters */ | /* parameters */ | ||||
SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size, | SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size, | ||||
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, | ||||
0, 0, sysctl_hash_size, "I", | 0, 0, sysctl_hash_size, "I", | ||||
"Default hash table size"); | "Default hash table size"); | ||||
SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, | SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, | ||||
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | if (q_size != 0) { | ||||
/* | /* | ||||
* Queue is empty, find for how long the queue has been | * Queue is empty, find for how long the queue has been | ||||
* empty and use a lookup table for computing | * empty and use a lookup table for computing | ||||
* (1 - * w_q)^(idle_time/s) where s is the time to send a | * (1 - * w_q)^(idle_time/s) where s is the time to send a | ||||
* (small) packet. | * (small) packet. | ||||
* XXX check wraps... | * XXX check wraps... | ||||
*/ | */ | ||||
if (q->avg) { | if (q->avg) { | ||||
u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step); | u_int t = div64((V_dn_cfg.curr_time - q->q_time), fs->lookup_step); | ||||
q->avg = (t < fs->lookup_depth) ? | q->avg = (t < fs->lookup_depth) ? | ||||
SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; | SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; | ||||
} | } | ||||
} | } | ||||
/* Should i drop? */ | /* Should i drop? */ | ||||
if (q->avg < fs->min_th) { | if (q->avg < fs->min_th) { | ||||
▲ Show 20 Lines • Show All 158 Lines • ▼ Show 20 Lines | #endif | ||||
mq_append(&q->mq, m); | mq_append(&q->mq, m); | ||||
q->ni.length++; | q->ni.length++; | ||||
q->ni.len_bytes += len; | q->ni.len_bytes += len; | ||||
ni->length++; | ni->length++; | ||||
ni->len_bytes += len; | ni->len_bytes += len; | ||||
return (0); | return (0); | ||||
drop: | drop: | ||||
dn_cfg.io_pkt_drop++; | V_dn_cfg.io_pkt_drop++; | ||||
q->ni.drops++; | q->ni.drops++; | ||||
ni->drops++; | ni->drops++; | ||||
FREE_PKT(m); | FREE_PKT(m); | ||||
return (1); | return (1); | ||||
} | } | ||||
/* | /* | ||||
* Fetch packets from the delay line which are due now. If there are | * Fetch packets from the delay line which are due now. If there are | ||||
Show All 12 Lines | while ((m = dline->mq.head) != NULL) { | ||||
if (!DN_KEY_LEQ(pkt->output_time, now)) | if (!DN_KEY_LEQ(pkt->output_time, now)) | ||||
break; | break; | ||||
dline->mq.head = m->m_nextpkt; | dline->mq.head = m->m_nextpkt; | ||||
dline->mq.count--; | dline->mq.count--; | ||||
mq_append(q, m); | mq_append(q, m); | ||||
} | } | ||||
if (m != NULL) { | if (m != NULL) { | ||||
dline->oid.subtype = 1; /* in heap */ | dline->oid.subtype = 1; /* in heap */ | ||||
heap_insert(&dn_cfg.evheap, pkt->output_time, dline); | heap_insert(&V_dn_cfg.evheap, pkt->output_time, dline); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Convert the additional MAC overheads/delays into an equivalent | * Convert the additional MAC overheads/delays into an equivalent | ||||
* number of bits for the given data rate. The samples are | * number of bits for the given data rate. The samples are | ||||
* in milliseconds so we need to divide by 1000. | * in milliseconds so we need to divide by 1000. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) | ||||
while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) { | while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) { | ||||
uint64_t len_scaled; | uint64_t len_scaled; | ||||
done++; | done++; | ||||
len_scaled = (bw == 0) ? 0 : hz * | len_scaled = (bw == 0) ? 0 : hz * | ||||
(m->m_pkthdr.len * 8 + extra_bits(m, s)); | (m->m_pkthdr.len * 8 + extra_bits(m, s)); | ||||
si->credit -= len_scaled; | si->credit -= len_scaled; | ||||
/* Move packet in the delay line */ | /* Move packet in the delay line */ | ||||
dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ; | dn_tag_get(m)->output_time = V_dn_cfg.curr_time + s->link.delay ; | ||||
mq_append(&si->dline.mq, m); | mq_append(&si->dline.mq, m); | ||||
} | } | ||||
/* | /* | ||||
* If credit >= 0 the instance is idle, mark time. | * If credit >= 0 the instance is idle, mark time. | ||||
* Otherwise put back in the heap, and adjust the output | * Otherwise put back in the heap, and adjust the output | ||||
* time of the last inserted packet, m, which was too early. | * time of the last inserted packet, m, which was too early. | ||||
*/ | */ | ||||
if (si->credit >= 0) { | if (si->credit >= 0) { | ||||
si->idle_time = now; | si->idle_time = now; | ||||
} else { | } else { | ||||
uint64_t t; | uint64_t t; | ||||
KASSERT (bw > 0, ("bw=0 and credit<0 ?")); | KASSERT (bw > 0, ("bw=0 and credit<0 ?")); | ||||
t = div64(bw - 1 - si->credit, bw); | t = div64(bw - 1 - si->credit, bw); | ||||
if (m) | if (m) | ||||
dn_tag_get(m)->output_time += t; | dn_tag_get(m)->output_time += t; | ||||
si->kflags |= DN_ACTIVE; | si->kflags |= DN_ACTIVE; | ||||
heap_insert(&dn_cfg.evheap, now + t, si); | heap_insert(&V_dn_cfg.evheap, now + t, si); | ||||
} | } | ||||
if (delay_line_idle && done) | if (delay_line_idle && done) | ||||
transmit_event(q, &si->dline, now); | transmit_event(q, &si->dline, now); | ||||
return q->head; | return q->head; | ||||
} | } | ||||
/* | /* | ||||
* The timer handler for dummynet. Time is computed in ticks, but | * The timer handler for dummynet. Time is computed in ticks, but | ||||
* but the code is tolerant to the actual rate at which this is called. | * but the code is tolerant to the actual rate at which this is called. | ||||
* Once complete, the function reschedules itself for the next tick. | * Once complete, the function reschedules itself for the next tick. | ||||
*/ | */ | ||||
void | void | ||||
dummynet_task(void *context, int pending) | dummynet_task(void *context, int pending) | ||||
{ | { | ||||
struct timeval t; | struct timeval t; | ||||
struct mq q = { NULL, NULL }; /* queue to accumulate results */ | struct mq q = { NULL, NULL }; /* queue to accumulate results */ | ||||
CURVNET_SET((struct vnet *)context); | /* We're a NET_TASK, so we always will be. */ | ||||
NET_EPOCH_ASSERT(); | |||||
VNET_ITERATOR_DECL(vnet_iter); | |||||
VNET_LIST_RLOCK(); | |||||
VNET_FOREACH(vnet_iter) { | |||||
memset(&q, 0, sizeof(struct mq)); | |||||
kp: Is it worth asserting that it's empty at this point? (I.e. before we memset(0) it? | |||||
CURVNET_SET(vnet_iter); | |||||
DN_BH_WLOCK(); | DN_BH_WLOCK(); | ||||
/* Update number of lost(coalesced) ticks. */ | /* Update number of lost(coalesced) ticks. */ | ||||
dn_cfg.tick_lost += pending - 1; | V_dn_cfg.tick_lost += pending - 1; | ||||
getmicrouptime(&t); | getmicrouptime(&t); | ||||
/* Last tick duration (usec). */ | /* Last tick duration (usec). */ | ||||
dn_cfg.tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 + | V_dn_cfg.tick_last = (t.tv_sec - V_dn_cfg.prev_t.tv_sec) * 1000000 + | ||||
(t.tv_usec - dn_cfg.prev_t.tv_usec); | (t.tv_usec - V_dn_cfg.prev_t.tv_usec); | ||||
/* Last tick vs standard tick difference (usec). */ | /* Last tick vs standard tick difference (usec). */ | ||||
dn_cfg.tick_delta = (dn_cfg.tick_last * hz - 1000000) / hz; | V_dn_cfg.tick_delta = (V_dn_cfg.tick_last * hz - 1000000) / hz; | ||||
/* Accumulated tick difference (usec). */ | /* Accumulated tick difference (usec). */ | ||||
dn_cfg.tick_delta_sum += dn_cfg.tick_delta; | V_dn_cfg.tick_delta_sum += V_dn_cfg.tick_delta; | ||||
dn_cfg.prev_t = t; | V_dn_cfg.prev_t = t; | ||||
/* | /* | ||||
Not Done Inline ActionsShouldn't tick_last be V_tick_last? (I think I've moved it into dn_parms in a different proposed patch, so it's probably academic.) kp: Shouldn't tick_last be V_tick_last?
(I think I've moved it into dn_parms in a different… | |||||
* Adjust curr_time if the accumulated tick difference is | * Adjust curr_time if the accumulated tick difference is | ||||
* greater than the 'standard' tick. Since curr_time should | * greater than the 'standard' tick. Since curr_time should | ||||
* be monotonically increasing, we do positive adjustments | * be monotonically increasing, we do positive adjustments | ||||
* as required, and throttle curr_time in case of negative | * as required, and throttle curr_time in case of negative | ||||
* adjustment. | * adjustment. | ||||
*/ | */ | ||||
dn_cfg.curr_time++; | V_dn_cfg.curr_time++; | ||||
if (dn_cfg.tick_delta_sum - tick >= 0) { | if (V_dn_cfg.tick_delta_sum - tick >= 0) { | ||||
int diff = dn_cfg.tick_delta_sum / tick; | int diff = V_dn_cfg.tick_delta_sum / tick; | ||||
dn_cfg.curr_time += diff; | V_dn_cfg.curr_time += diff; | ||||
dn_cfg.tick_diff += diff; | V_dn_cfg.tick_diff += diff; | ||||
dn_cfg.tick_delta_sum %= tick; | V_dn_cfg.tick_delta_sum %= tick; | ||||
dn_cfg.tick_adjustment++; | V_dn_cfg.tick_adjustment++; | ||||
} else if (dn_cfg.tick_delta_sum + tick <= 0) { | } else if (V_dn_cfg.tick_delta_sum + tick <= 0) { | ||||
dn_cfg.curr_time--; | V_dn_cfg.curr_time--; | ||||
dn_cfg.tick_diff--; | V_dn_cfg.tick_diff--; | ||||
dn_cfg.tick_delta_sum += tick; | V_dn_cfg.tick_delta_sum += tick; | ||||
dn_cfg.tick_adjustment++; | V_dn_cfg.tick_adjustment++; | ||||
} | } | ||||
/* serve pending events, accumulate in q */ | /* serve pending events, accumulate in q */ | ||||
for (;;) { | for (;;) { | ||||
struct dn_id *p; /* generic parameter to handler */ | struct dn_id *p; /* generic parameter to handler */ | ||||
if (dn_cfg.evheap.elements == 0 || | if (V_dn_cfg.evheap.elements == 0 || | ||||
DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key)) | DN_KEY_LT(V_dn_cfg.curr_time, HEAP_TOP(&V_dn_cfg.evheap)->key)) | ||||
break; | break; | ||||
p = HEAP_TOP(&dn_cfg.evheap)->object; | p = HEAP_TOP(&V_dn_cfg.evheap)->object; | ||||
heap_extract(&dn_cfg.evheap, NULL); | heap_extract(&V_dn_cfg.evheap, NULL); | ||||
if (p->type == DN_SCH_I) { | if (p->type == DN_SCH_I) { | ||||
serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time); | serve_sched(&q, (struct dn_sch_inst *)p, V_dn_cfg.curr_time); | ||||
} else { /* extracted a delay line */ | } else { /* extracted a delay line */ | ||||
transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time); | transmit_event(&q, (struct delay_line *)p, V_dn_cfg.curr_time); | ||||
} | } | ||||
} | } | ||||
if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) { | if (V_dn_cfg.expire && ++V_dn_cfg.expire_cycle >= V_dn_cfg.expire) { | ||||
dn_cfg.expire_cycle = 0; | V_dn_cfg.expire_cycle = 0; | ||||
dn_drain_scheduler(); | dn_drain_scheduler(); | ||||
dn_drain_queue(); | dn_drain_queue(); | ||||
} | } | ||||
dn_reschedule(); | |||||
DN_BH_WUNLOCK(); | DN_BH_WUNLOCK(); | ||||
if (q.head != NULL) | if (q.head != NULL) | ||||
dummynet_send(q.head); | dummynet_send(q.head); | ||||
CURVNET_RESTORE(); | CURVNET_RESTORE(); | ||||
} | } | ||||
VNET_LIST_RUNLOCK(); | |||||
/* Schedule our next run. */ | |||||
dn_reschedule(); | |||||
} | |||||
Not Done Inline ActionsI think we only want to do this once, not once per vnet. Possibly we'll have to find the minimal timeout though. kp: I think we only want to do this once, not once per vnet.
Possibly we'll have to find the… | |||||
/* | /* | ||||
* forward a chain of packets to the proper destination. | * forward a chain of packets to the proper destination. | ||||
* This runs outside the dummynet lock. | * This runs outside the dummynet lock. | ||||
*/ | */ | ||||
static void | static void | ||||
dummynet_send(struct mbuf *m) | dummynet_send(struct mbuf *m) | ||||
{ | { | ||||
struct mbuf *n; | struct mbuf *n; | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | if (mtag == NULL) | ||||
return 1; /* Cannot allocate packet header. */ | return 1; /* Cannot allocate packet header. */ | ||||
m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ | m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ | ||||
dt = (struct dn_pkt_tag *)(mtag + 1); | dt = (struct dn_pkt_tag *)(mtag + 1); | ||||
dt->rule = fwa->rule; | dt->rule = fwa->rule; | ||||
dt->rule.info &= IPFW_ONEPASS; /* only keep this info */ | dt->rule.info &= IPFW_ONEPASS; /* only keep this info */ | ||||
dt->dn_dir = dir; | dt->dn_dir = dir; | ||||
dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL; | dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL; | ||||
/* dt->output tame is updated as we move through */ | /* dt->output tame is updated as we move through */ | ||||
dt->output_time = dn_cfg.curr_time; | dt->output_time = V_dn_cfg.curr_time; | ||||
dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0; | dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0; | ||||
return 0; | return 0; | ||||
} | } | ||||
/* | /* | ||||
* dummynet hook for packets. | * dummynet hook for packets. | ||||
* We use the argument to locate the flowset fs and the sched_set sch | * We use the argument to locate the flowset fs and the sched_set sch | ||||
* associated to it. The we apply flow_mask and sched_mask to | * associated to it. The we apply flow_mask and sched_mask to | ||||
Show All 15 Lines | if (fwa->flags & IPFW_ARGS_IN) | ||||
dir = DIR_IN; | dir = DIR_IN; | ||||
else | else | ||||
dir = DIR_OUT; | dir = DIR_OUT; | ||||
if (fwa->flags & IPFW_ARGS_ETHER) | if (fwa->flags & IPFW_ARGS_ETHER) | ||||
dir |= PROTO_LAYER2; | dir |= PROTO_LAYER2; | ||||
else if (fwa->flags & IPFW_ARGS_IP6) | else if (fwa->flags & IPFW_ARGS_IP6) | ||||
dir |= PROTO_IPV6; | dir |= PROTO_IPV6; | ||||
DN_BH_WLOCK(); | DN_BH_WLOCK(); | ||||
dn_cfg.io_pkt++; | V_dn_cfg.io_pkt++; | ||||
/* we could actually tag outside the lock, but who cares... */ | /* we could actually tag outside the lock, but who cares... */ | ||||
if (tag_mbuf(m, dir, fwa)) | if (tag_mbuf(m, dir, fwa)) | ||||
goto dropit; | goto dropit; | ||||
/* XXX locate_flowset could be optimised with a direct ref. */ | /* XXX locate_flowset could be optimised with a direct ref. */ | ||||
fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL); | fs = dn_ht_find(V_dn_cfg.fshash, fs_id, 0, NULL); | ||||
if (fs == NULL) | if (fs == NULL) | ||||
goto dropit; /* This queue/pipe does not exist! */ | goto dropit; /* This queue/pipe does not exist! */ | ||||
if (fs->sched == NULL) /* should not happen */ | if (fs->sched == NULL) /* should not happen */ | ||||
goto dropit; | goto dropit; | ||||
/* find scheduler instance, possibly applying sched_mask */ | /* find scheduler instance, possibly applying sched_mask */ | ||||
si = ipdn_si_find(fs->sched, &(fwa->f_id)); | si = ipdn_si_find(fs->sched, &(fwa->f_id)); | ||||
if (si == NULL) | if (si == NULL) | ||||
goto dropit; | goto dropit; | ||||
/* | /* | ||||
* If the scheduler supports multiple queues, find the right one | * If the scheduler supports multiple queues, find the right one | ||||
* (otherwise it will be ignored by enqueue). | * (otherwise it will be ignored by enqueue). | ||||
*/ | */ | ||||
if (fs->sched->fp->flags & DN_MULTIQUEUE) { | if (fs->sched->fp->flags & DN_MULTIQUEUE) { | ||||
q = ipdn_q_find(fs, si, &(fwa->f_id)); | q = ipdn_q_find(fs, si, &(fwa->f_id)); | ||||
if (q == NULL) | if (q == NULL) | ||||
goto dropit; | goto dropit; | ||||
} | } | ||||
if (fs->sched->fp->enqueue(si, q, m)) { | if (fs->sched->fp->enqueue(si, q, m)) { | ||||
/* packet was dropped by enqueue() */ | /* packet was dropped by enqueue() */ | ||||
m = *m0 = NULL; | m = *m0 = NULL; | ||||
/* dn_enqueue already increases io_pkt_drop */ | /* dn_enqueue already increases io_pkt_drop */ | ||||
dn_cfg.io_pkt_drop--; | V_dn_cfg.io_pkt_drop--; | ||||
goto dropit; | goto dropit; | ||||
} | } | ||||
if (si->kflags & DN_ACTIVE) { | if (si->kflags & DN_ACTIVE) { | ||||
m = *m0 = NULL; /* consumed */ | m = *m0 = NULL; /* consumed */ | ||||
goto done; /* already active, nothing to do */ | goto done; /* already active, nothing to do */ | ||||
} | } | ||||
/* compute the initial allowance */ | /* compute the initial allowance */ | ||||
if (si->idle_time < dn_cfg.curr_time) { | if (si->idle_time < V_dn_cfg.curr_time) { | ||||
/* Do this only on the first packet on an idle pipe */ | /* Do this only on the first packet on an idle pipe */ | ||||
struct dn_link *p = &fs->sched->link; | struct dn_link *p = &fs->sched->link; | ||||
si->sched_time = dn_cfg.curr_time; | si->sched_time = V_dn_cfg.curr_time; | ||||
si->credit = dn_cfg.io_fast ? p->bandwidth : 0; | si->credit = V_dn_cfg.io_fast ? p->bandwidth : 0; | ||||
if (p->burst) { | if (p->burst) { | ||||
uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth; | uint64_t burst = (V_dn_cfg.curr_time - si->idle_time) * p->bandwidth; | ||||
if (burst > p->burst) | if (burst > p->burst) | ||||
burst = p->burst; | burst = p->burst; | ||||
si->credit += burst; | si->credit += burst; | ||||
} | } | ||||
} | } | ||||
/* pass through scheduler and delay line */ | /* pass through scheduler and delay line */ | ||||
m = serve_sched(NULL, si, dn_cfg.curr_time); | m = serve_sched(NULL, si, V_dn_cfg.curr_time); | ||||
/* optimization -- pass it back to ipfw for immediate send */ | /* optimization -- pass it back to ipfw for immediate send */ | ||||
/* XXX Don't call dummynet_send() if scheduler return the packet | /* XXX Don't call dummynet_send() if scheduler return the packet | ||||
* just enqueued. This avoid a lock order reversal. | * just enqueued. This avoid a lock order reversal. | ||||
* | * | ||||
*/ | */ | ||||
if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) { | if (/*V_dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) { | ||||
/* fast io, rename the tag * to carry reinject info. */ | /* fast io, rename the tag * to carry reinject info. */ | ||||
struct m_tag *tag = m_tag_first(m); | struct m_tag *tag = m_tag_first(m); | ||||
tag->m_tag_cookie = MTAG_IPFW_RULE; | tag->m_tag_cookie = MTAG_IPFW_RULE; | ||||
tag->m_tag_id = 0; | tag->m_tag_id = 0; | ||||
dn_cfg.io_pkt_fast++; | V_dn_cfg.io_pkt_fast++; | ||||
if (m->m_nextpkt != NULL) { | if (m->m_nextpkt != NULL) { | ||||
printf("dummynet: fast io: pkt chain detected!\n"); | printf("dummynet: fast io: pkt chain detected!\n"); | ||||
m->m_nextpkt = NULL; | m->m_nextpkt = NULL; | ||||
} | } | ||||
m = NULL; | m = NULL; | ||||
} else { | } else { | ||||
*m0 = NULL; | *m0 = NULL; | ||||
} | } | ||||
done: | done: | ||||
DN_BH_WUNLOCK(); | DN_BH_WUNLOCK(); | ||||
if (m) | if (m) | ||||
dummynet_send(m); | dummynet_send(m); | ||||
return 0; | return 0; | ||||
dropit: | dropit: | ||||
dn_cfg.io_pkt_drop++; | V_dn_cfg.io_pkt_drop++; | ||||
DN_BH_WUNLOCK(); | DN_BH_WUNLOCK(); | ||||
if (m) | if (m) | ||||
FREE_PKT(m); | FREE_PKT(m); | ||||
*m0 = NULL; | *m0 = NULL; | ||||
return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS; | return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS; | ||||
} | } |
Is it worth asserting that it's empty at this point? (I.e. before we memset(0) it?