Index: head/sys/netpfil/ipfw/dn_sched_fq_pie.c =================================================================== --- head/sys/netpfil/ipfw/dn_sched_fq_pie.c +++ head/sys/netpfil/ipfw/dn_sched_fq_pie.c @@ -111,7 +111,7 @@ int deficit; int active; /* 1: flow is active (in a list) */ struct pie_status pst; /* pie status variables */ - struct fq_pie_si *psi; /* parent scheduler instance */ + struct fq_pie_si_extra *psi_extra; STAILQ_ENTRY(fq_pie_flow) flowchain; }; @@ -120,23 +120,30 @@ struct dn_sch_fq_pie_parms cfg; }; + +/* fq_pie scheduler instance extra state vars. + * The purpose of separation this structure is to preserve number of active + * sub-queues and the flows array pointer even after the scheduler instance + * is destroyed. + * Preserving these varaiables allows freeing the allocated memory by + * fqpie_callout_cleanup() independently from fq_pie_free_sched(). + */ +struct fq_pie_si_extra { + uint32_t nr_active_q; /* number of active queues */ + struct fq_pie_flow *flows; /* array of flows (queues) */ + }; + /* fq_pie scheduler instance */ struct fq_pie_si { - struct dn_sch_inst _si; /* standard scheduler instance */ + struct dn_sch_inst _si; /* standard scheduler instance. SHOULD BE FIRST */ struct dn_queue main_q; /* main queue is after si directly */ - uint32_t nr_active_q; - struct fq_pie_flow *flows; /* array of flows (queues) */ uint32_t perturbation; /* random value */ struct fq_pie_list newflows; /* list of new queues */ struct fq_pie_list oldflows; /* list of old queues */ + struct fq_pie_si_extra *si_extra; /* extra state vars*/ }; -struct mem_to_free { - void *mem_flows; - void *mem_callout; -}; -static struct mtx freemem_mtx; static struct dn_alg fq_pie_desc; /* Default FQ-PIE parameters including PIE */ @@ -371,22 +378,6 @@ int64_t p, prob, oldprob; aqm_time_t now; - /* dealing with race condition */ - if (callout_pending(&pst->aqm_pie_callout)) { - /* callout was reset */ - mtx_unlock(&pst->lock_mtx); - return; - } - - if (!callout_active(&pst->aqm_pie_callout)) { - /* callout was stopped */ - mtx_unlock(&pst->lock_mtx); - mtx_destroy(&pst->lock_mtx); - q->psi->nr_active_q--; - return; - } - callout_deactivate(&pst->aqm_pie_callout); - now = AQM_UNOW; pprms = pst->parms; prob = pst->drop_prob; @@ -524,20 +515,17 @@ * Initialize PIE for sub-queue 'q' */ static int -pie_init(struct fq_pie_flow *q) +pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk) { struct pie_status *pst=&q->pst; struct dn_aqm_pie_parms *pprms = pst->parms; - struct fq_pie_schk *fqpie_schk; - - fqpie_schk = (struct fq_pie_schk *)(q->psi->_si.sched+1); - int err = 0; + int err = 0; if (!pprms){ D("AQM_PIE is not configured"); err = EINVAL; } else { - q->psi->nr_active_q++; + q->psi_extra->nr_active_q++; /* For speed optimization, we caculate 1/3 queue size once here */ // XXX limit divided by number of queues divided by 3 ??? @@ -553,8 +541,36 @@ } /* + * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si + * extra memory when number of active sub-queues reaches zero. + * 'x' is a fq_pie_flow to be destroyed + */ +static void +fqpie_callout_cleanup(void *x) +{ + struct fq_pie_flow *q = x; + struct pie_status *pst = &q->pst; + struct fq_pie_si_extra *psi_extra; + + mtx_unlock(&pst->lock_mtx); + mtx_destroy(&pst->lock_mtx); + psi_extra = q->psi_extra; + + DN_BH_WLOCK(); + psi_extra->nr_active_q--; + + /* when all sub-queues are destroyed, free flows fq_pie extra vars memory */ + if (!psi_extra->nr_active_q) { + free(psi_extra->flows, M_DUMMYNET); + free(psi_extra, M_DUMMYNET); + fq_pie_desc.ref_count--; + } + DN_BH_WUNLOCK(); +} + +/* * Clean up PIE status for sub-queue 'q' - * Stop callout timer and destroy mtx + * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout. */ static int pie_cleanup(struct fq_pie_flow *q) @@ -562,14 +578,9 @@ struct pie_status *pst = &q->pst; mtx_lock(&pst->lock_mtx); - if (callout_stop(&pst->aqm_pie_callout) || !(pst->sflags & PIE_ACTIVE)) { - mtx_unlock(&pst->lock_mtx); - mtx_destroy(&pst->lock_mtx); - q->psi->nr_active_q--; - } else { - mtx_unlock(&pst->lock_mtx); - return EBUSY; - } + callout_reset_sbt(&pst->aqm_pie_callout, + SBT_1US, 0, fqpie_callout_cleanup, q, 0); + mtx_unlock(&pst->lock_mtx); return 0; } @@ -831,10 +842,12 @@ struct fq_pie_schk *schk; struct dn_sch_fq_pie_parms *param; struct dn_queue *mainq; + struct fq_pie_flow *flows; int idx, drop, i, maxidx; mainq = (struct dn_queue *)(_si + 1); si = (struct fq_pie_si *)_si; + flows = si->si_extra->flows; schk = (struct fq_pie_schk *)(si->_si.sched+1); param = &schk->cfg; @@ -844,7 +857,7 @@ /* enqueue packet into appropriate queue using PIE AQM. * Note: 'pie_enqueue' function returns 1 only when it unable to * add timestamp to packet (no limit check)*/ - drop = pie_enqueue(&si->flows[idx], m, si); + drop = pie_enqueue(&flows[idx], m, si); /* pie unable to timestamp a packet */ if (drop) @@ -853,11 +866,11 @@ /* If the flow (sub-queue) is not active ,then add it to tail of * new flows list, initialize and activate it. */ - if (!si->flows[idx].active) { - STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain); - si->flows[idx].deficit = param->quantum; - fq_activate_pie(&si->flows[idx]); - si->flows[idx].active = 1; + if (!flows[idx].active) { + STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain); + flows[idx].deficit = param->quantum; + fq_activate_pie(&flows[idx]); + flows[idx].active = 1; } /* check the limit for all queues and remove a packet from the @@ -866,15 +879,15 @@ if (mainq->ni.length > schk->cfg.limit) { /* find first active flow */ for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++) - if (si->flows[maxidx].active) + if (flows[maxidx].active) break; if (maxidx < schk->cfg.flows_cnt) { /* find the largest sub- queue */ for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) - if (si->flows[i].active && si->flows[i].stats.length > - si->flows[maxidx].stats.length) + if (flows[i].active && flows[i].stats.length > + flows[maxidx].stats.length) maxidx = i; - pie_drop_head(&si->flows[maxidx], si); + pie_drop_head(&flows[maxidx], si); drop = 1; } } @@ -974,12 +987,13 @@ struct fq_pie_si *si; struct dn_queue *q; struct fq_pie_schk *schk; + struct fq_pie_flow *flows; int i; si = (struct fq_pie_si *)_si; schk = (struct fq_pie_schk *)(_si->sched+1); - if(si->flows) { + if(si->si_extra) { D("si already configured!"); return 0; } @@ -990,17 +1004,27 @@ q->_si = _si; q->fs = _si->sched->fs; + /* allocate memory for scheduler instance extra vars */ + si->si_extra = malloc(sizeof(struct fq_pie_si_extra), + M_DUMMYNET, M_NOWAIT | M_ZERO); + if (si->si_extra == NULL) { + D("cannot allocate memory for fq_pie si extra vars"); + return ENOMEM ; + } /* allocate memory for flows array */ - si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow), + si->si_extra->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow), M_DUMMYNET, M_NOWAIT | M_ZERO); - if (si->flows == NULL) { - D("cannot allocate memory for fq_pie configuration parameters"); + flows = si->si_extra->flows; + if (flows == NULL) { + free(si->si_extra, M_DUMMYNET); + si->si_extra = NULL; + D("cannot allocate memory for fq_pie flows"); return ENOMEM ; } /* init perturbation for this si */ si->perturbation = random(); - si->nr_active_q = 0; + si->si_extra->nr_active_q = 0; /* init the old and new flows lists */ STAILQ_INIT(&si->newflows); @@ -1008,45 +1032,16 @@ /* init the flows (sub-queues) */ for (i = 0; i < schk->cfg.flows_cnt; i++) { - si->flows[i].pst.parms = &schk->cfg.pcfg; - si->flows[i].psi = si; - pie_init(&si->flows[i]); - } - - /* init mtx lock and callout function for free memory */ - if (!fq_pie_desc.ref_count) { - mtx_init(&freemem_mtx, "mtx_pie", NULL, MTX_DEF); + flows[i].pst.parms = &schk->cfg.pcfg; + flows[i].psi_extra = si->si_extra; + pie_init(&flows[i], schk); } - mtx_lock(&freemem_mtx); fq_pie_desc.ref_count++; - mtx_unlock(&freemem_mtx); return 0; } -/* - * Free FQ-PIE flows memory callout function. - * This function is scheduled when a flow or more still active and - * the scheduer is about to be destroyed, to prevent memory leak. - */ -static void -free_flows(void *_mem) -{ - struct mem_to_free *mem = _mem; - - free(mem->mem_flows, M_DUMMYNET); - free(mem->mem_callout, M_DUMMYNET); - free(_mem, M_DUMMYNET); - - fq_pie_desc.ref_count--; - if (!fq_pie_desc.ref_count) { - mtx_unlock(&freemem_mtx); - mtx_destroy(&freemem_mtx); - } else - mtx_unlock(&freemem_mtx); - //D("mem freed ok!"); -} /* * Free fq_pie scheduler instance. @@ -1056,61 +1051,17 @@ { struct fq_pie_si *si; struct fq_pie_schk *schk; + struct fq_pie_flow *flows; int i; si = (struct fq_pie_si *)_si; schk = (struct fq_pie_schk *)(_si->sched+1); - + flows = si->si_extra->flows; for (i = 0; i < schk->cfg.flows_cnt; i++) { - pie_cleanup(&si->flows[i]); - } - - /* if there are still some queues have a callout going to start, - * we cannot free flows memory. If we do so, a panic can happen - * as prob calculate callout function uses flows memory. - */ - if (!si->nr_active_q) { - /* free the flows array */ - free(si->flows , M_DUMMYNET); - si->flows = NULL; - mtx_lock(&freemem_mtx); - fq_pie_desc.ref_count--; - if (!fq_pie_desc.ref_count) { - mtx_unlock(&freemem_mtx); - mtx_destroy(&freemem_mtx); - } else - mtx_unlock(&freemem_mtx); - //D("ok!"); - return 0; - } else { - /* memory leak happens here. So, we register a callout function to free - * flows memory later. - */ - D("unable to stop all fq_pie sub-queues!"); - mtx_lock(&freemem_mtx); - - struct callout *mem_callout; - struct mem_to_free *mem; - - mem = malloc(sizeof(*mem), M_DUMMYNET, - M_NOWAIT | M_ZERO); - mem_callout = malloc(sizeof(*mem_callout), M_DUMMYNET, - M_NOWAIT | M_ZERO); - - callout_init_mtx(mem_callout, &freemem_mtx, - CALLOUT_RETURNUNLOCKED); - - mem->mem_flows = si->flows; - mem->mem_callout = mem_callout; - callout_reset_sbt(mem_callout, - (uint64_t)(si->flows[0].pst.parms->tupdate + 1000) * SBT_1US, - 0, free_flows, mem, 0); - - si->flows = NULL; - mtx_unlock(&freemem_mtx); - - return EBUSY; + pie_cleanup(&flows[i]); } + si->si_extra = NULL; + return 0; } /*