diff --git a/sys/netpfil/ipfw/dn_aqm_codel.c b/sys/netpfil/ipfw/dn_aqm_codel.c
index a095b33b0833..8e90dcdb1e5b 100644
--- a/sys/netpfil/ipfw/dn_aqm_codel.c
+++ b/sys/netpfil/ipfw/dn_aqm_codel.c
@@ -1,444 +1,442 @@
 /*
  * Codel - The Controlled-Delay Active Queue Management algorithm.
  *
  * $FreeBSD$
  * 
  * Copyright (C) 2016 Centre for Advanced Internet Architectures,
  *  Swinburne University of Technology, Melbourne, Australia.
  * Portions of this code were made possible in part by a gift from 
  *  The Comcast Innovation Fund.
  * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>		/* ip_len, ip_off */
 #include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/if_ether.h> /* various ether_* routines */
 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
 #include <netinet6/ip6_var.h>
 #include <netpfil/ipfw/dn_heap.h>
 
 #ifdef NEW_AQM
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_dn_private.h>
 #include <netpfil/ipfw/dn_aqm.h>
 #include <netpfil/ipfw/dn_aqm_codel.h>
 #include <netpfil/ipfw/dn_sched.h>
 
 #define DN_AQM_CODEL 1
 
 static struct dn_aqm codel_desc;
 
 /* default codel parameters */
 struct dn_aqm_codel_parms codel_sysctl = {5000 * AQM_TIME_1US,
 	100000 * AQM_TIME_1US, 0};
 
 static int
 codel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	value = codel_sysctl.interval;
 	value /= AQM_TIME_1US;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 100 * AQM_TIME_1S)
 		return (EINVAL);
 	codel_sysctl.interval = value * AQM_TIME_1US ;
 	return (0);
 }
 
 static int
 codel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	value = codel_sysctl.target;
 	value /= AQM_TIME_1US;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	D("%ld", value);
 	if (value < 1 || value > 5 * AQM_TIME_1S)
 		return (EINVAL);
 	codel_sysctl.target = value * AQM_TIME_1US ;
 	return (0);
 }
 
 /* defining Codel sysctl variables */
 SYSBEGIN(f4)
 
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_ip_dummynet);
 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, 
 	codel, CTLFLAG_RW, 0, "CODEL");
 
 #ifdef SYSCTL_NODE
 SYSCTL_PROC(_net_inet_ip_dummynet_codel, OID_AUTO, target,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,codel_sysctl_target_handler, "L",
 	"CoDel target in microsecond");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_codel, OID_AUTO, interval,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, codel_sysctl_interval_handler, "L",
 	"CoDel interval in microsecond");
 #endif
 
 /* This function computes codel_interval/sqrt(count) 
  *  Newton's method of approximation is used to compute 1/sqrt(count).
  * http://betterexplained.com/articles/
  * 	understanding-quakes-fast-inverse-square-root/ 
  */
 aqm_time_t 
 control_law(struct codel_status *cst, struct dn_aqm_codel_parms *cprms,
 	aqm_time_t t)
 {
 	uint32_t count;
 	uint64_t temp;
 	count = cst->count;
 
 	/* we don't calculate isqrt(1) to get more accurate result*/
 	if (count == 1) {
 		/* prepare isqrt (old guess) for the next iteration i.e. 1/sqrt(2)*/
 		cst->isqrt = (1UL<< FIX_POINT_BITS) * 7/10;
 		/* return time + isqrt(1)*interval */
 		return t + cprms->interval;
 	}
 
 	/* newguess = g(1.5 - 0.5*c*g^2)
 	 * Multiplying both sides by 2 to make all the constants intergers
 	 * newguess * 2  = g(3 - c*g^2) g=old guess, c=count
 	 * So, newguess = newguess /2
 	 * Fixed point operations are used here.  
 	 */
 
 	/* Calculate g^2 */
 	temp = (uint32_t) cst->isqrt * cst->isqrt;
 	/* Calculate (3 - c*g^2) i.e. (3 - c * temp) */
 	temp = (3ULL<< (FIX_POINT_BITS*2)) - (count * temp);
 
 	/* 
 	 * Divide by 2 because we multiplied the original equation by two 
 	 * Also, we shift the result by 8 bits to prevent overflow. 
 	 * */
 	temp >>= (1 + 8); 
 
 	/*  Now, temp = (1.5 - 0.5*c*g^2)
 	 * Calculate g (1.5 - 0.5*c*g^2) i.e. g * temp 
 	 */
 	temp = (cst->isqrt * temp) >> (FIX_POINT_BITS + FIX_POINT_BITS - 8);
 	cst->isqrt = temp;
 
 	 /* calculate codel_interval/sqrt(count) */
 	 return t + ((cprms->interval * temp) >> FIX_POINT_BITS);
 }
 
 /*
  * Extract a packet from the head of queue 'q'
  * Return a packet or NULL if the queue is empty.
  * Also extract packet's timestamp from mtag.
  */
 struct mbuf *
 codel_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts)
 {
 	struct m_tag *mtag;
 	struct mbuf *m = q->mq.head;
 
 	if (m == NULL)
 		return m;
 	q->mq.head = m->m_nextpkt;
 
 	/* Update stats */
 	update_stats(q, -m->m_pkthdr.len, 0);
 
 	if (q->ni.length == 0) /* queue is now idle */
 			q->q_time = dn_cfg.curr_time;
 
 	/* extract packet TS*/
 	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 	if (mtag == NULL) {
 		D("Codel timestamp mtag not found!");
 		*pkt_ts = 0;
 	} else {
 		*pkt_ts = *(aqm_time_t *)(mtag + 1);
 		m_tag_delete(m,mtag); 
 	}
 
 	return m;
 }
 
 /*
  * Enqueue a packet 'm' in queue 'q'
  */
 static int
 aqm_codel_enqueue(struct dn_queue *q, struct mbuf *m)
 {
 	struct dn_fs *f;
 	uint64_t len;
 	struct codel_status *cst;	/*codel status variables */
 	struct m_tag *mtag;
 
 	f = &(q->fs->fs);
 	len = m->m_pkthdr.len;
 	cst = q->aqm_status;
 	if(!cst) {
 		D("Codel queue is not initialized\n");
 		goto drop;
 	}
 
 	/* Finding maximum packet size */
 	// XXX we can get MTU from driver instead 
 	if (len > cst->maxpkt_size)
 		cst->maxpkt_size = len;
 
 	/* check for queue size and drop the tail if exceed queue limit*/
 	if (f->flags & DN_QSIZE_BYTES) {
 		if ( q->ni.len_bytes > f->qsize)
 			goto drop;
 	}
 	else {
 		if ( q->ni.length >= f->qsize)
 			goto drop;
 	}
 
 	/* Add timestamp as mtag */
 	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 	if (mtag == NULL)
 		mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
 			sizeof(aqm_time_t), M_NOWAIT);
-	if (mtag == NULL) {
-		m_freem(m); 
+	if (mtag == NULL)
 		goto drop;
-	}
 
 	*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
 	m_tag_prepend(m, mtag);
 
 	mq_append(&q->mq, m);
 	update_stats(q, len, 0);
 	return (0);
 
 drop:
 	update_stats(q, 0, 1);
 	FREE_PKT(m);
 	return (1);
 }
 
 
 /* Dequeue a pcaket from queue q */
 static struct mbuf * 
 aqm_codel_dequeue(struct dn_queue *q)
 {
 	return codel_dequeue(q);
 }
 
 /* 
  * initialize Codel for queue 'q' 
  * First allocate memory for codel status.
  */
 static int 
 aqm_codel_init(struct dn_queue *q)
 {
 	struct codel_status *cst;
 
 	if (!q->fs->aqmcfg) {
 		D("Codel is not configure!d");
 		return EINVAL;
 	}
 
 	q->aqm_status = malloc(sizeof(struct codel_status),
 			 M_DUMMYNET, M_NOWAIT | M_ZERO);
 	if (q->aqm_status == NULL) {
 		D("Cannot allocate AQM_codel private data");
 		return ENOMEM ; 
 	}
 
 	/* init codel status variables */
 	cst = q->aqm_status;
 	cst->dropping=0;
 	cst->first_above_time=0;
 	cst->drop_next_time=0;
 	cst->count=0;
 	cst->maxpkt_size = 500;
 
 	/* increase reference counters */
 	codel_desc.ref_count++;
 
 	return 0;
 }
 
 /* 
  * Clean up Codel status for queue 'q' 
  * Destroy memory allocated for codel status.
  */
 static int
 aqm_codel_cleanup(struct dn_queue *q)
 {
 
 	if (q && q->aqm_status) {
 		free(q->aqm_status, M_DUMMYNET);
 		q->aqm_status = NULL;
 		/* decrease reference counters */
 		codel_desc.ref_count--;
 	}
 	else
 		D("Codel already cleaned up");
 	return 0;
 }
 
 /* 
  * Config codel parameters
  * also allocate memory for codel configurations
  */
 static int
 aqm_codel_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
 {
 	struct dn_aqm_codel_parms *ccfg;
 
 	int l = sizeof(struct dn_extra_parms);
 	if (len < l) {
 		D("invalid sched parms length got %d need %d", len, l);
 		return EINVAL;
 	}
 	/* we free the old cfg because maybe the original allocation 
 	 * not the same size as the new one (different AQM type).
 	 */
 	if (fs->aqmcfg) {
 		free(fs->aqmcfg, M_DUMMYNET);
 		fs->aqmcfg = NULL;
 	}
 
 	fs->aqmcfg = malloc(sizeof(struct dn_aqm_codel_parms),
 			 M_DUMMYNET, M_NOWAIT | M_ZERO);
 	if (fs->aqmcfg== NULL) {
 		D("cannot allocate AQM_codel configuration parameters");
 		return ENOMEM; 
 	}
 	
 	/* configure codel parameters */
 	ccfg = fs->aqmcfg;
 	
 	if (ep->par[0] < 0)
 		ccfg->target = codel_sysctl.target;
 	else
 		ccfg->target = ep->par[0] * AQM_TIME_1US;
 
 	if (ep->par[1] < 0)
 		ccfg->interval = codel_sysctl.interval;
 	else
 		ccfg->interval = ep->par[1] * AQM_TIME_1US;
 
 	if (ep->par[2] < 0)
 		ccfg->flags = 0;
 	else
 		ccfg->flags = ep->par[2];
 
 	/* bound codel configurations */
 	ccfg->target = BOUND_VAR(ccfg->target,1, 5 * AQM_TIME_1S);
 	ccfg->interval = BOUND_VAR(ccfg->interval,1, 5 * AQM_TIME_1S);
 	/* increase config reference counter */
 	codel_desc.cfg_ref_count++;
 
 	return 0;
 }
 
 /*
  * Deconfigure Codel and free memory allocation
  */
 static int
 aqm_codel_deconfig(struct dn_fsk* fs)
 {
 
 	if (fs && fs->aqmcfg) {
 		free(fs->aqmcfg, M_DUMMYNET);
 		fs->aqmcfg = NULL;
 		fs->aqmfp = NULL;
 		/* decrease config reference counter */
 		codel_desc.cfg_ref_count--;
 	}
 
 	return 0;
 }
 
 /* 
  * Retrieve Codel configuration parameters.
  */ 
 static int
 aqm_codel_getconfig(struct dn_fsk *fs, struct dn_extra_parms * ep)
 {
 	struct dn_aqm_codel_parms *ccfg;
 
 	if (fs->aqmcfg) {
 		strlcpy(ep->name, codel_desc.name, sizeof(ep->name));
 		ccfg = fs->aqmcfg;
 		ep->par[0] = ccfg->target / AQM_TIME_1US;
 		ep->par[1] = ccfg->interval / AQM_TIME_1US;
 		ep->par[2] = ccfg->flags;
 		return 0;
 	}
 	return 1;
 }
 
 static struct dn_aqm codel_desc = {
 	_SI( .type = )  DN_AQM_CODEL,
 	_SI( .name = )  "CODEL",
 	_SI( .enqueue = )  aqm_codel_enqueue,
 	_SI( .dequeue = )  aqm_codel_dequeue,
 	_SI( .config = )  aqm_codel_config,
 	_SI( .getconfig = )  aqm_codel_getconfig,
 	_SI( .deconfig = )  aqm_codel_deconfig,
 	_SI( .init = )  aqm_codel_init,
 	_SI( .cleanup = )  aqm_codel_cleanup,
 };
 
 DECLARE_DNAQM_MODULE(dn_aqm_codel, &codel_desc);
 
 
 #endif
diff --git a/sys/netpfil/ipfw/dn_aqm_pie.c b/sys/netpfil/ipfw/dn_aqm_pie.c
index c306a4caac9d..e106fd0121f3 100644
--- a/sys/netpfil/ipfw/dn_aqm_pie.c
+++ b/sys/netpfil/ipfw/dn_aqm_pie.c
@@ -1,810 +1,810 @@
 /*
  * PIE - Proportional Integral controller Enhanced AQM algorithm.
  *
  * $FreeBSD$
  * 
  * Copyright (C) 2016 Centre for Advanced Internet Architectures,
  *  Swinburne University of Technology, Melbourne, Australia.
  * Portions of this code were made possible in part by a gift from 
  *  The Comcast Innovation Fund.
  * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>		/* ip_len, ip_off */
 #include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/if_ether.h> /* various ether_* routines */
 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
 #include <netinet6/ip6_var.h>
 #include <netpfil/ipfw/dn_heap.h>
 
 #ifdef NEW_AQM
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_dn_private.h>
 #include <netpfil/ipfw/dn_aqm.h>
 #include <netpfil/ipfw/dn_aqm_pie.h>
 #include <netpfil/ipfw/dn_sched.h>
 
 /* for debugging */
 #include <sys/syslog.h>
 
 static struct dn_aqm pie_desc;
 
 /*  PIE defaults
  * target=15ms, tupdate=15ms, max_burst=150ms, 
  * max_ecnth=0.1, alpha=0.125, beta=1.25, 
  */
 struct dn_aqm_pie_parms pie_sysctl = 
 	{ 15 * AQM_TIME_1MS,  15 * AQM_TIME_1MS, 150 * AQM_TIME_1MS,
 	PIE_SCALE/10 , PIE_SCALE * 0.125,  PIE_SCALE * 1.25 ,
 	PIE_CAPDROP_ENABLED | PIE_DEPRATEEST_ENABLED | PIE_DERAND_ENABLED };
 
 static int
 pie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	if (!strcmp(oidp->oid_name,"alpha"))
 		value = pie_sysctl.alpha;
 	else
 		value = pie_sysctl.beta;
 		
 	value = value * 1000 / PIE_SCALE;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 7 * PIE_SCALE)
 		return (EINVAL);
 	value = (value * PIE_SCALE) / 1000;
 	if (!strcmp(oidp->oid_name,"alpha"))
 			pie_sysctl.alpha = value;
 	else
 		pie_sysctl.beta = value;
 	return (0);
 }
 
 static int
 pie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	if (!strcmp(oidp->oid_name,"target"))
 		value = pie_sysctl.qdelay_ref;
 	else if (!strcmp(oidp->oid_name,"tupdate"))
 		value = pie_sysctl.tupdate;
 	else
 		value = pie_sysctl.max_burst;
 	
 	value = value / AQM_TIME_1US;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 10 * AQM_TIME_1S)
 		return (EINVAL);
 	value = value * AQM_TIME_1US;
 	
 	if (!strcmp(oidp->oid_name,"target"))
 		pie_sysctl.qdelay_ref  = value;
 	else if (!strcmp(oidp->oid_name,"tupdate"))
 		pie_sysctl.tupdate  = value;
 	else
 		pie_sysctl.max_burst = value;
 	return (0);
 }
 
 static int
 pie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	value = pie_sysctl.max_ecnth;
 	value = value * 1000 / PIE_SCALE;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > PIE_SCALE)
 		return (EINVAL);
 	value = (value * PIE_SCALE) / 1000;
 	pie_sysctl.max_ecnth = value;
 	return (0);
 }
 
 /* define PIE sysctl variables */
 SYSBEGIN(f4)
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_ip_dummynet);
 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, 
 	pie, CTLFLAG_RW, 0, "PIE");
 
 #ifdef SYSCTL_NODE
 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, target,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 
 	pie_sysctl_target_tupdate_maxb_handler, "L",
 	"queue target in microsecond");
 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, tupdate,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	pie_sysctl_target_tupdate_maxb_handler, "L",
 	"the frequency of drop probability calculation in microsecond");
 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_burst,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	pie_sysctl_target_tupdate_maxb_handler, "L",
 	"Burst allowance interval in microsecond");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_ecnth,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	pie_sysctl_max_ecnth_handler, "L",
 	"ECN safeguard threshold scaled by 1000");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, alpha,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	pie_sysctl_alpha_beta_handler, "L",
 	"PIE alpha scaled by 1000");
 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, beta,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	pie_sysctl_alpha_beta_handler, "L",
 	"beta scaled by 1000");
 #endif
 
 
 /*
  * Callout function for drop probability calculation 
  * This function is called over tupdate ms and takes pointer of PIE
  * status variables as an argument
   */
 static void
 calculate_drop_prob(void *x)
 {
 	int64_t p, prob, oldprob;
 	struct dn_aqm_pie_parms *pprms;
 	struct pie_status *pst = (struct pie_status *) x;
 	int p_isneg;
 
 	pprms = pst->parms;
 	prob = pst->drop_prob;
 
 	/* calculate current qdelay using DRE method.
 	 * If TS is used and no data in the queue, reset current_qdelay
 	 * as it stays at last value during dequeue process. 
 	*/
 	if (pprms->flags & PIE_DEPRATEEST_ENABLED)
 		pst->current_qdelay = ((uint64_t)pst->pq->ni.len_bytes *
 			pst->avg_dq_time) >> PIE_DQ_THRESHOLD_BITS;
 	else 
 		if (!pst->pq->ni.len_bytes)
 			 pst->current_qdelay = 0;
 
 	/* calculate drop probability */
 	p = (int64_t)pprms->alpha * 
 		((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref); 
 	p +=(int64_t) pprms->beta * 
 		((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old); 
 
 	/* take absolute value so right shift result is well defined */
 	p_isneg = p < 0;
 	if (p_isneg) {
 		p = -p;
 	}
 		
 	/* We PIE_MAX_PROB shift by 12-bits to increase the division precision */
 	p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
 
 	/* auto-tune drop probability */
 	if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
 		p >>= 11 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
 		p >>= 9 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
 		p >>= 7 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
 		p >>= 5 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
 		p >>= 3 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
 		p >>= 1 + PIE_FIX_POINT_BITS + 12;
 	else
 		p >>= PIE_FIX_POINT_BITS + 12;
 
 	oldprob = prob;
 
 	if (p_isneg) {
 		prob = prob - p;
 
 		/* check for multiplication underflow */
 		if (prob > oldprob) {
 			prob= 0;
 			D("underflow");
 		}
 	} else {
 		/* Cap Drop adjustment */
 		if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
 		    prob >= PIE_MAX_PROB / 10 &&
 		    p > PIE_MAX_PROB / 50 ) {
 			p = PIE_MAX_PROB / 50;
 		}
 
 		prob = prob + p;
 
 		/* check for multiplication overflow */
 		if (prob<oldprob) {
 			D("overflow");
 			prob= PIE_MAX_PROB;
 		}
 	}
 
 	/*
 	 * decay the drop probability exponentially
 	 * and restrict it to range 0 to PIE_MAX_PROB
 	 */
 	if (prob < 0) {
 		prob = 0;
 	} else {
 		if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
 			/* 0.98 ~= 1- 1/64 */
 			prob = prob - (prob >> 6); 
 		}
 
 		if (prob > PIE_MAX_PROB) {
 			prob = PIE_MAX_PROB;
 		}
 	}
 
 	pst->drop_prob = prob;
 	
 	/* store current queue delay value in old queue delay*/
 	pst->qdelay_old = pst->current_qdelay;
 
 	/* update burst allowance */
 	if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance>0) {
 		
 		if (pst->burst_allowance > pprms->tupdate )
 			pst->burst_allowance -= pprms->tupdate;
 		else 
 			pst->burst_allowance = 0;
 	}
 
 	/* reschedule calculate_drop_prob function */
 	if (pst->sflags & PIE_ACTIVE)
 		callout_reset_sbt(&pst->aqm_pie_callout,
 			(uint64_t)pprms->tupdate * SBT_1US, 0, calculate_drop_prob, pst, 0);
 
 	mtx_unlock(&pst->lock_mtx);
 }
 
 /*
  * Extract a packet from the head of queue 'q'
  * Return a packet or NULL if the queue is empty.
  * If getts is set, also extract packet's timestamp from mtag.
  */
 static struct mbuf *
 pie_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts, int getts)
 {
 	struct m_tag *mtag;
 	struct mbuf *m = q->mq.head;
 
 	if (m == NULL)
 		return m;
 	q->mq.head = m->m_nextpkt;
 
 	/* Update stats */
 	update_stats(q, -m->m_pkthdr.len, 0);
 
 	if (q->ni.length == 0) /* queue is now idle */
 			q->q_time = dn_cfg.curr_time;
 
 	if (getts) {
 		/* extract packet TS*/
 		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 		if (mtag == NULL) {
 			D("PIE timestamp mtag not found!");
 			*pkt_ts = 0;
 		} else {
 			*pkt_ts = *(aqm_time_t *)(mtag + 1);
 			m_tag_delete(m,mtag); 
 		}
 	}
 	return m;
 }
 
 /* 
  * Initiate PIE  variable and optionally activate it
  */
 __inline static void
 init_activate_pie(struct pie_status *pst, int resettimer)
 {
 	struct dn_aqm_pie_parms *pprms;
 
 	mtx_lock(&pst->lock_mtx);
 	pprms = pst->parms;
 	pst->drop_prob = 0;
 	pst->qdelay_old = 0;
 	pst->burst_allowance = pprms->max_burst;
 	pst->accu_prob = 0;
 	pst->dq_count = 0;
 	pst->avg_dq_time = 0;
 	pst->sflags = PIE_INMEASUREMENT;
 	pst->measurement_start = AQM_UNOW;
 
 	if (resettimer) {
 		pst->sflags |= PIE_ACTIVE;
 		callout_reset_sbt(&pst->aqm_pie_callout,
 			(uint64_t)pprms->tupdate * SBT_1US,
 			0, calculate_drop_prob, pst, 0);
 	}
 	//DX(2, "PIE Activated");
 	mtx_unlock(&pst->lock_mtx);
 }
 
 /* 
  * Deactivate PIE and stop probe update callout 
  */
 __inline static void
 deactivate_pie(struct pie_status *pst)
 {
 	mtx_lock(&pst->lock_mtx);
 	pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
 	callout_stop(&pst->aqm_pie_callout);
 	//D("PIE Deactivated");
 	mtx_unlock(&pst->lock_mtx);
 }
 
 /* 
  * Dequeue and return a pcaket from queue 'q' or NULL if 'q' is empty.
  * Also, caculate depature time or queue delay using timestamp
  */
 static struct mbuf *
 aqm_pie_dequeue(struct dn_queue *q)
 {
 	struct mbuf *m;
 	struct dn_flow *ni;	/* stats for scheduler instance */	
 	struct dn_aqm_pie_parms *pprms;
 	struct pie_status *pst;
 	aqm_time_t now;
 	aqm_time_t pkt_ts, dq_time;
 	int32_t w;
 
 	pst  = q->aqm_status;
 	pprms = pst->parms;
 	ni = &q->_si->ni;
 
 	/*we extarct packet ts only when Departure Rate Estimation dis not used*/
 	m = pie_extract_head(q, &pkt_ts, !(pprms->flags & PIE_DEPRATEEST_ENABLED));
 
 	if (!m || !(pst->sflags & PIE_ACTIVE))
 		return m;
 
 	now = AQM_UNOW;
 	if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
 		/* calculate average depature time */
 		if(pst->sflags & PIE_INMEASUREMENT) {
 			pst->dq_count += m->m_pkthdr.len;
 
 			if (pst->dq_count >= PIE_DQ_THRESHOLD) {
 				dq_time = now - pst->measurement_start;
 
 				/* 
 				 * if we don't have old avg dq_time i.e PIE is (re)initialized, 
 				 * don't use weight to calculate new avg_dq_time
 				 */
 				if(pst->avg_dq_time == 0)
 					pst->avg_dq_time = dq_time;
 				else {
 					/* 
 					 * weight = PIE_DQ_THRESHOLD/2^6, but we scaled 
 					 * weight by 2^8. Thus, scaled 
 					 * weight = PIE_DQ_THRESHOLD /2^8 
 					 * */
 					w = PIE_DQ_THRESHOLD >> 8;
 					pst->avg_dq_time = (dq_time* w
 						+ (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
 					pst->sflags &= ~PIE_INMEASUREMENT;
 				}
 			}
 		}
 
 		/* 
 		 * Start new measurment cycle when the queue has
 		 *  PIE_DQ_THRESHOLD worth of bytes.
 		 */
 		if(!(pst->sflags & PIE_INMEASUREMENT) && 
 			q->ni.len_bytes >= PIE_DQ_THRESHOLD) {
 			pst->sflags |= PIE_INMEASUREMENT;
 			pst->measurement_start = now;
 			pst->dq_count = 0;
 		}
 	}
 	/* Optionally, use packet timestamp to estimate queue delay */
 	else
 		pst->current_qdelay = now - pkt_ts;
 
 	return m;	
 }
 
 /*
  * Enqueue a packet in q, subject to space and  PIE queue management policy
  * (whose parameters are in q->fs).
  * Update stats for the queue and the scheduler.
  * Return 0 on success, 1 on drop. The packet is consumed anyways.
  */
 static int
 aqm_pie_enqueue(struct dn_queue *q, struct mbuf* m)
 {
 	struct dn_fs *f;
 	uint64_t len;
 	uint32_t qlen;
 	struct pie_status *pst;
 	struct dn_aqm_pie_parms *pprms;
 	int t;
 
 	len = m->m_pkthdr.len;
 	pst  = q->aqm_status;
 	if(!pst) {
 		DX(2, "PIE queue is not initialized\n");
 		update_stats(q, 0, 1);
 		FREE_PKT(m);
 		return 1;
 	}
 
 	f = &(q->fs->fs);
 	pprms = pst->parms;
 	t = ENQUE;
 
 	/* get current queue length in bytes or packets*/
 	qlen = (f->flags & DN_QSIZE_BYTES) ?
 		q->ni.len_bytes : q->ni.length;
 
 	/* check for queue size and drop the tail if exceed queue limit*/
 	if (qlen >= f->qsize)
 		t = DROP;
 	/* drop/mark the packet when PIE is active and burst time elapsed */
 	else if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance==0
 			&& drop_early(pst, q->ni.len_bytes) == DROP) {
 				/* 
 				 * if drop_prob over ECN threshold, drop the packet 
 				 * otherwise mark and enqueue it.
 				 */
 				if ((pprms->flags & PIE_ECN_ENABLED) && pst->drop_prob <
 					(pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
 					&& ecn_mark(m))
 					t = ENQUE;
 				else
 					t = DROP;
 	}
 
 	/* Turn PIE on when 1/3 of the queue is full */ 
 	if (!(pst->sflags & PIE_ACTIVE) && qlen >= pst->one_third_q_size) {
 		init_activate_pie(pst, 1);
 	}
 
 	/*  Reset burst tolerance and optinally turn PIE off*/
 	if ((pst->sflags & PIE_ACTIVE) && pst->drop_prob == 0 &&
 		pst->current_qdelay < (pprms->qdelay_ref >> 1) &&
 		pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
 
 			pst->burst_allowance = pprms->max_burst;
 			if ((pprms->flags & PIE_ON_OFF_MODE_ENABLED) && qlen<=0)
 				deactivate_pie(pst);
 	}
 
 	/* Timestamp the packet if Departure Rate Estimation is disabled */
 	if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
 		/* Add TS to mbuf as a TAG */
 		struct m_tag *mtag;
 		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 		if (mtag == NULL)
 			mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
 				sizeof(aqm_time_t), M_NOWAIT);
 		if (mtag == NULL) {
-			m_freem(m); 
 			t = DROP;
+		} else {
+			*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+			m_tag_prepend(m, mtag);
 		}
-		*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
-		m_tag_prepend(m, mtag);
 	}
 
 	if (t != DROP) {
 		mq_append(&q->mq, m);
 		update_stats(q, len, 0);
 		return (0);
 	} else {
 		update_stats(q, 0, 1);
 
 		/* reset accu_prob after packet drop */
 		pst->accu_prob = 0;
 		FREE_PKT(m);
 		return 1;
 	}
 	return 0;
 }
 
 /* 
  * initialize PIE for queue 'q' 
  * First allocate memory for PIE status.
  */
 static int
 aqm_pie_init(struct dn_queue *q)
 {
 	struct pie_status *pst;
 	struct dn_aqm_pie_parms *pprms;
 	int err = 0;
 	
 	pprms = q->fs->aqmcfg;
 	
 	do { /* exit with break when error occurs*/
 		if (!pprms){
 			DX(2, "AQM_PIE is not configured");
 			err = EINVAL;
 			break;
 		}
 
 		q->aqm_status = malloc(sizeof(struct pie_status),
 				 M_DUMMYNET, M_NOWAIT | M_ZERO);
 		if (q->aqm_status == NULL) {
 			D("cannot allocate PIE private data");
 			err =  ENOMEM ; 
 			break;
 		}
 
 		pst = q->aqm_status;
 		/* increase reference count for PIE module */
 		pie_desc.ref_count++;
 		
 		pst->pq = q;
 		pst->parms = pprms;
 		
 		/* For speed optimization, we caculate 1/3 queue size once here */
 		// we can use x/3 = (x >>2) + (x >>4) + (x >>7)
 		pst->one_third_q_size = q->fs->fs.qsize/3;
 		
 		mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
 		callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
 			CALLOUT_RETURNUNLOCKED);
 		
 		pst->current_qdelay = 0;
 		init_activate_pie(pst, !(pprms->flags & PIE_ON_OFF_MODE_ENABLED));
 		
 		//DX(2, "aqm_PIE_init");
 
 	} while(0);
 	
 	return err;
 }
 
 /* 
  * Callout function to destroy pie mtx and free PIE status memory
  */
 static void
 pie_callout_cleanup(void *x)
 {
 	struct pie_status *pst = (struct pie_status *) x;
 
 	mtx_unlock(&pst->lock_mtx);
 	mtx_destroy(&pst->lock_mtx);
 	free(x, M_DUMMYNET);
 	DN_BH_WLOCK();
 	pie_desc.ref_count--;
 	DN_BH_WUNLOCK();
 }
 
 /* 
  * Clean up PIE status for queue 'q' 
  * Destroy memory allocated for PIE status.
  */
 static int
 aqm_pie_cleanup(struct dn_queue *q)
 {
 
 	if(!q) {
 		D("q is null");
 		return 0;
 	}
 	struct pie_status *pst  = q->aqm_status;
 	if(!pst) {
 		//D("queue is already cleaned up");
 		return 0;
 	}
 	if(!q->fs || !q->fs->aqmcfg) {
 		D("fs is null or no cfg");
 		return 1;
 	}
 	if (q->fs->aqmfp && q->fs->aqmfp->type !=DN_AQM_PIE) {
 		D("Not PIE fs (%d)", q->fs->fs.fs_nr);
 		return 1;
 	}
 
 	/* 
 	 * Free PIE status allocated memory using pie_callout_cleanup() callout
 	 * function to avoid any potential race.
 	 * We reset aqm_pie_callout to call pie_callout_cleanup() in next 1um. This
 	 * stops the scheduled calculate_drop_prob() callout and call pie_callout_cleanup() 
 	 * which does memory freeing.
 	 */
 	mtx_lock(&pst->lock_mtx);
 	callout_reset_sbt(&pst->aqm_pie_callout,
 		SBT_1US, 0, pie_callout_cleanup, pst, 0);
 	q->aqm_status = NULL;
 	mtx_unlock(&pst->lock_mtx);
 
 	return 0;
 }
 
 /* 
  * Config PIE parameters
  * also allocate memory for PIE configurations
  */
 static int 
 aqm_pie_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
 { 
 	struct dn_aqm_pie_parms *pcfg;
 
 	int l = sizeof(struct dn_extra_parms);
 	if (len < l) {
 		D("invalid sched parms length got %d need %d", len, l);
 		return EINVAL;
 	}
 	/* we free the old cfg because maybe the orignal allocation 
 	 * was used for diffirent AQM type.
 	 */
 	if (fs->aqmcfg) {
 		free(fs->aqmcfg, M_DUMMYNET);
 		fs->aqmcfg = NULL;
 	}
 	
 	fs->aqmcfg = malloc(sizeof(struct dn_aqm_pie_parms),
 			 M_DUMMYNET, M_NOWAIT | M_ZERO);
 	if (fs->aqmcfg== NULL) {
 		D("cannot allocate PIE configuration parameters");
 		return ENOMEM; 
 	}
 
 	/* par array contains pie configuration as follow
 	 * 0- qdelay_ref,1- tupdate, 2- max_burst
 	 * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
 	 */
 
 	/* configure PIE parameters */
 	pcfg = fs->aqmcfg;
 	
 	if (ep->par[0] < 0)
 		pcfg->qdelay_ref = pie_sysctl.qdelay_ref * AQM_TIME_1US;
 	else
 		pcfg->qdelay_ref = ep->par[0];
 	if (ep->par[1] < 0)
 		pcfg->tupdate = pie_sysctl.tupdate * AQM_TIME_1US;
 	else
 		pcfg->tupdate = ep->par[1];
 	if (ep->par[2] < 0)
 		pcfg->max_burst = pie_sysctl.max_burst * AQM_TIME_1US;
 	else
 		pcfg->max_burst = ep->par[2];
 	if (ep->par[3] < 0)
 		pcfg->max_ecnth = pie_sysctl.max_ecnth;
 	else
 		pcfg->max_ecnth = ep->par[3];
 	if (ep->par[4] < 0)
 		pcfg->alpha = pie_sysctl.alpha;
 	else
 		pcfg->alpha = ep->par[4];
 	if (ep->par[5] < 0)
 		pcfg->beta = pie_sysctl.beta;
 	else
 		pcfg->beta = ep->par[5];
 	if (ep->par[6] < 0)
 		pcfg->flags = pie_sysctl.flags;
 	else
 		pcfg->flags = ep->par[6];
 
 	/* bound PIE configurations */
 	pcfg->qdelay_ref = BOUND_VAR(pcfg->qdelay_ref, 1, 10 * AQM_TIME_1S);
 	pcfg->tupdate = BOUND_VAR(pcfg->tupdate, 1, 10 * AQM_TIME_1S);
 	pcfg->max_burst = BOUND_VAR(pcfg->max_burst, 0, 10 * AQM_TIME_1S);
 	pcfg->max_ecnth = BOUND_VAR(pcfg->max_ecnth, 0, PIE_SCALE);
 	pcfg->alpha = BOUND_VAR(pcfg->alpha, 0, 7 * PIE_SCALE);
 	pcfg->beta = BOUND_VAR(pcfg->beta, 0 , 7 * PIE_SCALE);
 
 	pie_desc.cfg_ref_count++;
 	//D("pie cfg_ref_count=%d", pie_desc.cfg_ref_count);
 	return 0;
 }
 
 /*
  * Deconfigure PIE and free memory allocation
  */
 static int
 aqm_pie_deconfig(struct dn_fsk* fs)
 {
 	if (fs && fs->aqmcfg) {
 		free(fs->aqmcfg, M_DUMMYNET);
 		fs->aqmcfg = NULL;
 		pie_desc.cfg_ref_count--;
 	}
 	return 0;
 }
 
 /* 
  * Retrieve PIE configuration parameters.
  */ 
 static int 
 aqm_pie_getconfig (struct dn_fsk *fs, struct dn_extra_parms * ep)
 {
 	struct dn_aqm_pie_parms *pcfg;
 	if (fs->aqmcfg) {
 		strlcpy(ep->name, pie_desc.name, sizeof(ep->name));
 		pcfg = fs->aqmcfg;
 		ep->par[0] = pcfg->qdelay_ref / AQM_TIME_1US;
 		ep->par[1] = pcfg->tupdate / AQM_TIME_1US;
 		ep->par[2] = pcfg->max_burst / AQM_TIME_1US;
 		ep->par[3] = pcfg->max_ecnth;
 		ep->par[4] = pcfg->alpha;
 		ep->par[5] = pcfg->beta;
 		ep->par[6] = pcfg->flags;
 
 		return 0;
 	}
 	return 1;
 }
 
 static struct dn_aqm pie_desc = {
 	_SI( .type = )  DN_AQM_PIE,
 	_SI( .name = )  "PIE",
 	_SI( .ref_count = )  0,
 	_SI( .cfg_ref_count = )  0,
 	_SI( .enqueue = )  aqm_pie_enqueue,
 	_SI( .dequeue = )  aqm_pie_dequeue,
 	_SI( .config = )  aqm_pie_config,
 	_SI( .deconfig = )  aqm_pie_deconfig,
 	_SI( .getconfig = )  aqm_pie_getconfig,
 	_SI( .init = )  aqm_pie_init,
 	_SI( .cleanup = )  aqm_pie_cleanup,
 };
 
 DECLARE_DNAQM_MODULE(dn_aqm_pie, &pie_desc);
 #endif
diff --git a/sys/netpfil/ipfw/dn_sched_fq_codel.c b/sys/netpfil/ipfw/dn_sched_fq_codel.c
index 44610aaf9740..d2fe6a76f1cc 100644
--- a/sys/netpfil/ipfw/dn_sched_fq_codel.c
+++ b/sys/netpfil/ipfw/dn_sched_fq_codel.c
@@ -1,618 +1,616 @@
 /* 
  * FQ_Codel - The FlowQueue-Codel scheduler/AQM
  *
  * $FreeBSD$
  * 
  * Copyright (C) 2016 Centre for Advanced Internet Architectures,
  *  Swinburne University of Technology, Melbourne, Australia.
  * Portions of this code were made possible in part by a gift from 
  *  The Comcast Innovation Fund.
  * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifdef _KERNEL
 #include <sys/malloc.h>
 #include <sys/socket.h>
 //#include <sys/socketvar.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
 
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <sys/sysctl.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <sys/queue.h>
 #include <sys/hash.h>
 
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
 
 #include <netpfil/ipfw/dn_aqm.h>
 #include <netpfil/ipfw/dn_aqm_codel.h>
 #include <netpfil/ipfw/dn_sched.h>
 #include <netpfil/ipfw/dn_sched_fq_codel.h>
 #include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
 
 #else
 #include <dn_test.h>
 #endif
 
 /* NOTE: In fq_codel module, we reimplements CoDel AQM functions 
  * because fq_codel use different flows (sub-queues) structure and 
  * dn_queue includes many variables not needed by a flow (sub-queue 
  * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
  * Also, CoDel functions manages stats of sub-queues as well as the main queue.
  */
 
 #define DN_SCHED_FQ_CODEL 6
 
 static struct dn_alg fq_codel_desc;
 
 /* fq_codel default parameters including codel */
 struct dn_sch_fq_codel_parms 
 fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
 	CODEL_ECN_ENABLED}, 1024, 10240, 1514};
 
 static int
 fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	value = fq_codel_sysctl.ccfg.interval;
 	value /= AQM_TIME_1US;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 100 * AQM_TIME_1S)
 		return (EINVAL);
 	fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
 
 	return (0);
 }
 
 static int
 fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	value = fq_codel_sysctl.ccfg.target;
 	value /= AQM_TIME_1US;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 5 * AQM_TIME_1S)
 		return (EINVAL);
 	fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
 
 	return (0);
 }
 
 
 SYSBEGIN(f4)
 
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_ip_dummynet);
 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
 	CTLFLAG_RW, 0, "FQ_CODEL");
 
 #ifdef SYSCTL_NODE
 	
 SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_target_handler, "L",
 	"FQ_CoDel target in microsecond");
 SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_interval_handler, "L",
 	"FQ_CoDel interval in microsecond");
 	
 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
 	CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
 	CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024, 
 	"Number of queues for FQ_CoDel");
 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
 	CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
 #endif
 
 /* Drop a packet form the head of codel queue */
 static void
 codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
 {
 	struct mbuf *m = q->mq.head;
 
 	if (m == NULL)
 		return;
 	q->mq.head = m->m_nextpkt;
 
 	fq_update_stats(q, si, -m->m_pkthdr.len, 1);
 
 	if (si->main_q.ni.length == 0) /* queue is now idle */
 			si->main_q.q_time = dn_cfg.curr_time;
 
 	FREE_PKT(m);
 }
 
 /* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
  * Return 1 when unable to add timestamp, otherwise return 0 
  */
 static int
 codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
 {
 	uint64_t len;
 
 	len = m->m_pkthdr.len;
 	/* finding maximum packet size */
 	if (len > q->cst.maxpkt_size)
 		q->cst.maxpkt_size = len;
 
 	/* Add timestamp to mbuf as MTAG */
 	struct m_tag *mtag;
 	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 	if (mtag == NULL)
 		mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
 			M_NOWAIT);
-	if (mtag == NULL) {
-		m_freem(m); 
+	if (mtag == NULL)
 		goto drop;
-	}
 	*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
 	m_tag_prepend(m, mtag);
 
 	mq_append(&q->mq, m);
 	fq_update_stats(q, si, len, 0);
 	return 0;
 
 drop:
 	fq_update_stats(q, si, len, 1);
 	m_freem(m);
 	return 1;
 }
 
 /*
  * Classify a packet to queue number using Jenkins hash function.
  * Return: queue number 
  * the input of the hash are protocol no, perturbation, src IP, dst IP,
  * src port, dst port,
  */
 static inline int
 fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
 {
 	struct ip *ip;
 	struct tcphdr *th;
 	struct udphdr *uh;
 	uint8_t tuple[41];
 	uint16_t hash=0;
 
 	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
 //#ifdef INET6
 	struct ip6_hdr *ip6;
 	int isip6;
 	isip6 = (ip->ip_v == 6);
 
 	if(isip6) {
 		ip6 = (struct ip6_hdr *)ip;
 		*((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
 		*((uint32_t *) &tuple[1]) = si->perturbation;
 		memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
 		memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
 
 		switch (ip6->ip6_nxt) {
 		case IPPROTO_TCP:
 			th = (struct tcphdr *)(ip6 + 1);
 			*((uint16_t *) &tuple[37]) = th->th_dport;
 			*((uint16_t *) &tuple[39]) = th->th_sport;
 			break;
 
 		case IPPROTO_UDP:
 			uh = (struct udphdr *)(ip6 + 1);
 			*((uint16_t *) &tuple[37]) = uh->uh_dport;
 			*((uint16_t *) &tuple[39]) = uh->uh_sport;
 			break;
 		default:
 			memset(&tuple[37], 0, 4);
 
 		}
 
 		hash = jenkins_hash(tuple, 41, HASHINIT) %  fcount;
 		return hash;
 	} 
 //#endif
 
 	/* IPv4 */
 	*((uint8_t *) &tuple[0]) = ip->ip_p;
 	*((uint32_t *) &tuple[1]) = si->perturbation;
 	*((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
 	*((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
 
 	switch (ip->ip_p) {
 		case IPPROTO_TCP:
 			th = (struct tcphdr *)(ip + 1);
 			*((uint16_t *) &tuple[13]) = th->th_dport;
 			*((uint16_t *) &tuple[15]) = th->th_sport;
 			break;
 
 		case IPPROTO_UDP:
 			uh = (struct udphdr *)(ip + 1);
 			*((uint16_t *) &tuple[13]) = uh->uh_dport;
 			*((uint16_t *) &tuple[15]) = uh->uh_sport;
 			break;
 		default:
 			memset(&tuple[13], 0, 4);
 
 	}
 	hash = jenkins_hash(tuple, 17, HASHINIT) %  fcount;
 
 	return hash;
 }
 
 /*
  * Enqueue a packet into an appropriate queue according to
  * FQ_CODEL algorithm.
  */
 static int 
 fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q, 
 	struct mbuf *m)
 {
 	struct fq_codel_si *si;
 	struct fq_codel_schk *schk;
 	struct dn_sch_fq_codel_parms *param;
 	struct dn_queue *mainq;
 	int idx, drop, i, maxidx;
 
 	mainq = (struct dn_queue *)(_si + 1);
 	si = (struct fq_codel_si *)_si;
 	schk = (struct fq_codel_schk *)(si->_si.sched+1);
 	param = &schk->cfg;
 
 	 /* classify a packet to queue number*/
 	idx = fq_codel_classify_flow(m, param->flows_cnt, si);
 	/* enqueue packet into appropriate queue using CoDel AQM.
 	 * Note: 'codel_enqueue' function returns 1 only when it unable to 
 	 * add timestamp to packet (no limit check)*/
 	drop = codel_enqueue(&si->flows[idx], m, si);
 	
 	/* codel unable to timestamp a packet */ 
 	if (drop)
 		return 1;
 	
 	/* If the flow (sub-queue) is not active ,then add it to the tail of
 	 * new flows list, initialize and activate it.
 	 */
 	if (!si->flows[idx].active ) {
 		STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
 		si->flows[idx].deficit = param->quantum;
 		si->flows[idx].cst.dropping = false;
 		si->flows[idx].cst.first_above_time = 0;
 		si->flows[idx].active = 1;
 		//D("activate %d",idx);
 	}
 
 	/* check the limit for all queues and remove a packet from the
 	 * largest one 
 	 */
 	if (mainq->ni.length > schk->cfg.limit) { D("over limit");
 		/* find first active flow */
 		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
 			if (si->flows[maxidx].active)
 				break;
 		if (maxidx < schk->cfg.flows_cnt) {
 			/* find the largest sub- queue */
 			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) 
 				if (si->flows[i].active && si->flows[i].stats.length >
 					si->flows[maxidx].stats.length)
 					maxidx = i;
 			codel_drop_head(&si->flows[maxidx], si);
 			D("maxidx = %d",maxidx);
 			drop = 1;
 		}
 	}
 
 	return drop;
 }
 
 /*
  * Dequeue a packet from an appropriate queue according to
  * FQ_CODEL algorithm.
  */
 static struct mbuf *
 fq_codel_dequeue(struct dn_sch_inst *_si)
 {
 	struct fq_codel_si *si;
 	struct fq_codel_schk *schk;
 	struct dn_sch_fq_codel_parms *param;
 	struct fq_codel_flow *f;
 	struct mbuf *mbuf;
 	struct fq_codel_list *fq_codel_flowlist;
 
 	si = (struct fq_codel_si *)_si;
 	schk = (struct fq_codel_schk *)(si->_si.sched+1);
 	param = &schk->cfg;
 
 	do {
 		/* select a list to start with */
 		if (STAILQ_EMPTY(&si->newflows))
 			fq_codel_flowlist = &si->oldflows;
 		else
 			fq_codel_flowlist = &si->newflows;
 
 		/* Both new and old queue lists are empty, return NULL */
 		if (STAILQ_EMPTY(fq_codel_flowlist)) 
 			return NULL;
 
 		f = STAILQ_FIRST(fq_codel_flowlist);
 		while (f != NULL)	{
 			/* if there is no flow(sub-queue) deficit, increase deficit
 			 * by quantum, move the flow to the tail of old flows list
 			 * and try another flow.
 			 * Otherwise, the flow will be used for dequeue.
 			 */
 			if (f->deficit < 0) {
 				 f->deficit += param->quantum;
 				 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
 				 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
 			 } else 
 				 break;
 
 			f = STAILQ_FIRST(fq_codel_flowlist);
 		}
 		
 		/* the new flows list is empty, try old flows list */
 		if (STAILQ_EMPTY(fq_codel_flowlist)) 
 			continue;
 
 		/* Dequeue a packet from the selected flow */
 		mbuf = fqc_codel_dequeue(f, si);
 
 		/* Codel did not return a packet */
 		if (!mbuf) {
 			/* If the selected flow belongs to new flows list, then move 
 			 * it to the tail of old flows list. Otherwise, deactivate it and
 			 * remove it from the old list and
 			 */
 			if (fq_codel_flowlist == &si->newflows) {
 				STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
 				STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
 			}	else {
 				f->active = 0;
 				STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
 			}
 			/* start again */
 			continue;
 		}
 
 		/* we have a packet to return, 
 		 * update flow deficit and return the packet*/
 		f->deficit -= mbuf->m_pkthdr.len;
 		return mbuf;
 
 	} while (1);
 	
 	/* unreachable point */
 	return NULL;
 }
 
 /*
  * Initialize fq_codel scheduler instance.
  * also, allocate memory for flows array.
  */
 static int
 fq_codel_new_sched(struct dn_sch_inst *_si)
 {
 	struct fq_codel_si *si;
 	struct dn_queue *q;
 	struct fq_codel_schk *schk;
 	int i;
 
 	si = (struct fq_codel_si *)_si;
 	schk = (struct fq_codel_schk *)(_si->sched+1);
 
 	if(si->flows) {
 		D("si already configured!");
 		return 0;
 	}
 
 	/* init the main queue */
 	q = &si->main_q;
 	set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
 	q->_si = _si;
 	q->fs = _si->sched->fs;
 
 	/* allocate memory for flows array */
 	si->flows = mallocarray(schk->cfg.flows_cnt,
 	    sizeof(struct fq_codel_flow), M_DUMMYNET, M_NOWAIT | M_ZERO);
 	if (si->flows == NULL) {
 		D("cannot allocate memory for fq_codel configuration parameters");
 		return ENOMEM ; 
 	}
 
 	/* init perturbation for this si */
 	si->perturbation = random();
 
 	/* init the old and new flows lists */
 	STAILQ_INIT(&si->newflows);
 	STAILQ_INIT(&si->oldflows);
 
 	/* init the flows (sub-queues) */
 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
 		/* init codel */
 		si->flows[i].cst.maxpkt_size = 500;
 	}
 
 	fq_codel_desc.ref_count++;
 	return 0;
 }
 
 /*
  * Free fq_codel scheduler instance.
  */
 static int
 fq_codel_free_sched(struct dn_sch_inst *_si)
 {
 	struct fq_codel_si *si = (struct fq_codel_si *)_si ;
 
 	/* free the flows array */
 	free(si->flows , M_DUMMYNET);
 	si->flows = NULL;
 	fq_codel_desc.ref_count--;
 
 	return 0;
 }
 
 /*
  * Configure fq_codel scheduler.
  * the configurations for the scheduler is passed from userland.
  */
 static int
 fq_codel_config(struct dn_schk *_schk)
 {
 	struct fq_codel_schk *schk;
 	struct dn_extra_parms *ep;
 	struct dn_sch_fq_codel_parms *fqc_cfg;
 	
 	schk = (struct fq_codel_schk *)(_schk+1);
 	ep = (struct dn_extra_parms *) _schk->cfg;
 
 	/* par array contains fq_codel configuration as follow
 	 * Codel: 0- target,1- interval, 2- flags
 	 * FQ_CODEL: 3- quantum, 4- limit, 5- flows
 	 */
 	if (ep && ep->oid.len ==sizeof(*ep) &&
 		ep->oid.subtype == DN_SCH_PARAMS) {
 
 		fqc_cfg = &schk->cfg;
 		if (ep->par[0] < 0)
 			fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
 		else
 			fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
 
 		if (ep->par[1] < 0)
 			fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
 		else
 			fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
 
 		if (ep->par[2] < 0)
 			fqc_cfg->ccfg.flags = 0;
 		else
 			fqc_cfg->ccfg.flags = ep->par[2];
 
 		/* FQ configurations */
 		if (ep->par[3] < 0)
 			fqc_cfg->quantum = fq_codel_sysctl.quantum;
 		else
 			fqc_cfg->quantum = ep->par[3];
 
 		if (ep->par[4] < 0)
 			fqc_cfg->limit = fq_codel_sysctl.limit;
 		else
 			fqc_cfg->limit = ep->par[4];
 
 		if (ep->par[5] < 0)
 			fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
 		else
 			fqc_cfg->flows_cnt = ep->par[5];
 
 		/* Bound the configurations */
 		fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 , 
 			5 * AQM_TIME_1S); ;
 		fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
 			100 * AQM_TIME_1S);
 
 		fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
 		fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
 		fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
 	}
 	else
 		return 1;
 
 	return 0;
 }
 
 /*
  * Return fq_codel scheduler configurations
  * the configurations for the scheduler is passed to userland.
  */
 static int 
 fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
 	
 	struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
 	struct dn_sch_fq_codel_parms *fqc_cfg;
 
 	fqc_cfg = &schk->cfg;
 
 	strcpy(ep->name, fq_codel_desc.name);
 	ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
 	ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
 	ep->par[2] = fqc_cfg->ccfg.flags;
 
 	ep->par[3] = fqc_cfg->quantum;
 	ep->par[4] = fqc_cfg->limit;
 	ep->par[5] = fqc_cfg->flows_cnt;
 
 	return 0;
 }
 
 /*
  * fq_codel scheduler descriptor
  * contains the type of the scheduler, the name, the size of extra
  * data structures, and function pointers.
  */
 static struct dn_alg fq_codel_desc = {
 	_SI( .type = )  DN_SCHED_FQ_CODEL,
 	_SI( .name = ) "FQ_CODEL",
 	_SI( .flags = ) 0,
 
 	_SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
 	_SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
 	_SI( .q_datalen = ) 0,
 
 	_SI( .enqueue = ) fq_codel_enqueue,
 	_SI( .dequeue = ) fq_codel_dequeue,
 	_SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
 	_SI( .destroy = ) NULL,  /*sched x delete */
 	_SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
 	_SI( .free_sched = ) fq_codel_free_sched,	/* delete schd instance */
 	_SI( .new_fsk = ) NULL,
 	_SI( .free_fsk = ) NULL,
 	_SI( .new_queue = ) NULL,
 	_SI( .free_queue = ) NULL,
 	_SI( .getconfig = )  fq_codel_getconfig,
 	_SI( .ref_count = ) 0
 };
 
 DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);
diff --git a/sys/netpfil/ipfw/dn_sched_fq_pie.c b/sys/netpfil/ipfw/dn_sched_fq_pie.c
index 2f61ca3de94e..c791ee333bc6 100644
--- a/sys/netpfil/ipfw/dn_sched_fq_pie.c
+++ b/sys/netpfil/ipfw/dn_sched_fq_pie.c
@@ -1,1235 +1,1235 @@
 /* 
  * FQ_PIE - The FlowQueue-PIE scheduler/AQM
  *
  * $FreeBSD$
  * 
  * Copyright (C) 2016 Centre for Advanced Internet Architectures,
  *  Swinburne University of Technology, Melbourne, Australia.
  * Portions of this code were made possible in part by a gift from 
  *  The Comcast Innovation Fund.
  * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* Important note:
  * As there is no an office document for FQ-PIE specification, we used
  * FQ-CoDel algorithm with some modifications to implement FQ-PIE.
  * This FQ-PIE implementation is a beta version and have not been tested 
  * extensively. Our FQ-PIE uses stand-alone PIE AQM per sub-queue. By
  * default, timestamp is used to calculate queue delay instead of departure
  * rate estimation method. Although departure rate estimation is available 
  * as testing option, the results could be incorrect. Moreover, turning PIE on 
  * and off option is available but it does not work properly in this version.
  */
 
 
 #ifdef _KERNEL
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
 
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <sys/sysctl.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <sys/queue.h>
 #include <sys/hash.h>
 
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
 
 #include <netpfil/ipfw/dn_aqm.h>
 #include <netpfil/ipfw/dn_aqm_pie.h>
 #include <netpfil/ipfw/dn_sched.h>
 
 #else
 #include <dn_test.h>
 #endif
 
 #define DN_SCHED_FQ_PIE 7
 
 /* list of queues */
 STAILQ_HEAD(fq_pie_list, fq_pie_flow) ;
 
 /* FQ_PIE parameters including PIE */
 struct dn_sch_fq_pie_parms {
 	struct dn_aqm_pie_parms	pcfg;	/* PIE configuration Parameters */
 	/* FQ_PIE Parameters */
 	uint32_t flows_cnt;	/* number of flows */
 	uint32_t limit;	/* hard limit of FQ_PIE queue size*/
 	uint32_t quantum;
 };
 
 /* flow (sub-queue) stats */
 struct flow_stats {
 	uint64_t tot_pkts;	/* statistics counters  */
 	uint64_t tot_bytes;
 	uint32_t length;		/* Queue length, in packets */
 	uint32_t len_bytes;	/* Queue length, in bytes */
 	uint32_t drops;
 };
 
 /* A flow of packets (sub-queue)*/
 struct fq_pie_flow {
 	struct mq	mq;	/* list of packets */
 	struct flow_stats stats;	/* statistics */
 	int deficit;
 	int active;		/* 1: flow is active (in a list) */
 	struct pie_status pst;	/* pie status variables */
 	struct fq_pie_si_extra *psi_extra;
 	STAILQ_ENTRY(fq_pie_flow) flowchain;
 };
 
 /* extra fq_pie scheduler configurations */
 struct fq_pie_schk {
 	struct dn_sch_fq_pie_parms cfg;
 };
 
 
 /* fq_pie scheduler instance extra state vars.
  * The purpose of separation this structure is to preserve number of active
  * sub-queues and the flows array pointer even after the scheduler instance
  * is destroyed.
  * Preserving these varaiables allows freeing the allocated memory by
  * fqpie_callout_cleanup() independently from fq_pie_free_sched().
  */
 struct fq_pie_si_extra {
 	uint32_t nr_active_q;	/* number of active queues */
 	struct fq_pie_flow *flows;	/* array of flows (queues) */
 	};
 
 /* fq_pie scheduler instance */
 struct fq_pie_si {
 	struct dn_sch_inst _si;	/* standard scheduler instance. SHOULD BE FIRST */ 
 	struct dn_queue main_q; /* main queue is after si directly */
 	uint32_t perturbation; 	/* random value */
 	struct fq_pie_list newflows;	/* list of new queues */
 	struct fq_pie_list oldflows;	/* list of old queues */
 	struct fq_pie_si_extra *si_extra; /* extra state vars*/
 };
 
 
 static struct dn_alg fq_pie_desc;
 
 /*  Default FQ-PIE parameters including PIE */
 /*  PIE defaults
  * target=15ms, max_burst=150ms, max_ecnth=0.1, 
  * alpha=0.125, beta=1.25, tupdate=15ms
  * FQ-
  * flows=1024, limit=10240, quantum =1514
  */
 struct dn_sch_fq_pie_parms 
  fq_pie_sysctl = {{15000 * AQM_TIME_1US, 15000 * AQM_TIME_1US,
 	150000 * AQM_TIME_1US, PIE_SCALE * 0.1, PIE_SCALE * 0.125, 
 	PIE_SCALE * 1.25,	PIE_CAPDROP_ENABLED | PIE_DERAND_ENABLED},
 	1024, 10240, 1514};
 
 static int
 fqpie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	if (!strcmp(oidp->oid_name,"alpha"))
 		value = fq_pie_sysctl.pcfg.alpha;
 	else
 		value = fq_pie_sysctl.pcfg.beta;
 		
 	value = value * 1000 / PIE_SCALE;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 7 * PIE_SCALE)
 		return (EINVAL);
 	value = (value * PIE_SCALE) / 1000;
 	if (!strcmp(oidp->oid_name,"alpha"))
 			fq_pie_sysctl.pcfg.alpha = value;
 	else
 		fq_pie_sysctl.pcfg.beta = value;
 	return (0);
 }
 
 static int
 fqpie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	if (!strcmp(oidp->oid_name,"target"))
 		value = fq_pie_sysctl.pcfg.qdelay_ref;
 	else if (!strcmp(oidp->oid_name,"tupdate"))
 		value = fq_pie_sysctl.pcfg.tupdate;
 	else
 		value = fq_pie_sysctl.pcfg.max_burst;
 	
 	value = value / AQM_TIME_1US;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > 10 * AQM_TIME_1S)
 		return (EINVAL);
 	value = value * AQM_TIME_1US;
 	
 	if (!strcmp(oidp->oid_name,"target"))
 		fq_pie_sysctl.pcfg.qdelay_ref  = value;
 	else if (!strcmp(oidp->oid_name,"tupdate"))
 		fq_pie_sysctl.pcfg.tupdate  = value;
 	else
 		fq_pie_sysctl.pcfg.max_burst = value;
 	return (0);
 }
 
 static int
 fqpie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long  value;
 
 	value = fq_pie_sysctl.pcfg.max_ecnth;
 	value = value * 1000 / PIE_SCALE;
 	error = sysctl_handle_long(oidp, &value, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (value < 1 || value > PIE_SCALE)
 		return (EINVAL);
 	value = (value * PIE_SCALE) / 1000;
 	fq_pie_sysctl.pcfg.max_ecnth = value;
 	return (0);
 }
 
 /* define FQ- PIE sysctl variables */
 SYSBEGIN(f4)
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_ip_dummynet);
 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqpie,
 	CTLFLAG_RW, 0, "FQ_PIE");
 
 #ifdef SYSCTL_NODE
 	
 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, target,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	fqpie_sysctl_target_tupdate_maxb_handler, "L",
 	"queue target in microsecond");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, tupdate,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	fqpie_sysctl_target_tupdate_maxb_handler, "L",
 	"the frequency of drop probability calculation in microsecond");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_burst,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	fqpie_sysctl_target_tupdate_maxb_handler, "L",
 	"Burst allowance interval in microsecond");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_ecnth,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	fqpie_sysctl_max_ecnth_handler, "L",
 	"ECN safeguard threshold scaled by 1000");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, alpha,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	fqpie_sysctl_alpha_beta_handler, "L", "PIE alpha scaled by 1000");
 
 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, beta,
 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
 	fqpie_sysctl_alpha_beta_handler, "L", "beta scaled by 1000");
 
 SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, quantum,
 	CTLFLAG_RW, &fq_pie_sysctl.quantum, 1514, "quantum for FQ_PIE");
 SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, flows,
 	CTLFLAG_RW, &fq_pie_sysctl.flows_cnt, 1024, "Number of queues for FQ_PIE");
 SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, limit,
 	CTLFLAG_RW, &fq_pie_sysctl.limit, 10240, "limit for FQ_PIE");
 #endif
 
 /* Helper function to update queue&main-queue and scheduler statistics.
  * negative len & drop -> drop
  * negative len -> dequeue
  * positive len -> enqueue
  * positive len + drop -> drop during enqueue
  */
 __inline static void
 fq_update_stats(struct fq_pie_flow *q, struct fq_pie_si *si, int len,
 	int drop)
 {
 	int inc = 0;
 
 	if (len < 0) 
 		inc = -1;
 	else if (len > 0)
 		inc = 1;
 
 	if (drop) {
 		si->main_q.ni.drops ++;
 		q->stats.drops ++;
 		si->_si.ni.drops ++;
 		dn_cfg.io_pkt_drop ++;
 	} 
 
 	if (!drop || (drop && len < 0)) {
 		/* Update stats for the main queue */
 		si->main_q.ni.length += inc;
 		si->main_q.ni.len_bytes += len;
 
 		/*update sub-queue stats */
 		q->stats.length += inc;
 		q->stats.len_bytes += len;
 
 		/*update scheduler instance stats */
 		si->_si.ni.length += inc;
 		si->_si.ni.len_bytes += len;
 	}
 
 	if (inc > 0) {
 		si->main_q.ni.tot_bytes += len;
 		si->main_q.ni.tot_pkts ++;
 		
 		q->stats.tot_bytes +=len;
 		q->stats.tot_pkts++;
 		
 		si->_si.ni.tot_bytes +=len;
 		si->_si.ni.tot_pkts ++;
 	}
 
 }
 
 /*
  * Extract a packet from the head of sub-queue 'q'
  * Return a packet or NULL if the queue is empty.
  * If getts is set, also extract packet's timestamp from mtag.
  */
 __inline static struct mbuf *
 fq_pie_extract_head(struct fq_pie_flow *q, aqm_time_t *pkt_ts,
 	struct fq_pie_si *si, int getts)
 {
 	struct mbuf *m = q->mq.head;
 
 	if (m == NULL)
 		return m;
 	q->mq.head = m->m_nextpkt;
 
 	fq_update_stats(q, si, -m->m_pkthdr.len, 0);
 
 	if (si->main_q.ni.length == 0) /* queue is now idle */
 			si->main_q.q_time = dn_cfg.curr_time;
 
 	if (getts) {
 		/* extract packet timestamp*/
 		struct m_tag *mtag;
 		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 		if (mtag == NULL){
 			D("PIE timestamp mtag not found!");
 			*pkt_ts = 0;
 		} else {
 			*pkt_ts = *(aqm_time_t *)(mtag + 1);
 			m_tag_delete(m,mtag); 
 		}
 	}
 	return m;
 }
 
 /*
  * Callout function for drop probability calculation 
  * This function is called over tupdate ms and takes pointer of FQ-PIE
  * flow as an argument
   */
 static void
 fq_calculate_drop_prob(void *x)
 {
 	struct fq_pie_flow *q = (struct fq_pie_flow *) x;
 	struct pie_status *pst = &q->pst;
 	struct dn_aqm_pie_parms *pprms; 
 	int64_t p, prob, oldprob;
 	aqm_time_t now;
 	int p_isneg;
 
 	now = AQM_UNOW;
 	pprms = pst->parms;
 	prob = pst->drop_prob;
 
 	/* calculate current qdelay using DRE method.
 	 * If TS is used and no data in the queue, reset current_qdelay
 	 * as it stays at last value during dequeue process.
 	*/
 	if (pprms->flags & PIE_DEPRATEEST_ENABLED)
 		pst->current_qdelay = ((uint64_t)q->stats.len_bytes  * pst->avg_dq_time)
 			>> PIE_DQ_THRESHOLD_BITS;
 	else
 		if (!q->stats.len_bytes)
 			pst->current_qdelay = 0;
 
 	/* calculate drop probability */
 	p = (int64_t)pprms->alpha * 
 		((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref); 
 	p +=(int64_t) pprms->beta * 
 		((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old); 
 
 	/* take absolute value so right shift result is well defined */
 	p_isneg = p < 0;
 	if (p_isneg) {
 		p = -p;
 	}
 		
 	/* We PIE_MAX_PROB shift by 12-bits to increase the division precision  */
 	p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
 
 	/* auto-tune drop probability */
 	if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
 		p >>= 11 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
 		p >>= 9 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
 		p >>= 7 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
 		p >>= 5 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
 		p >>= 3 + PIE_FIX_POINT_BITS + 12;
 	else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
 		p >>= 1 + PIE_FIX_POINT_BITS + 12;
 	else
 		p >>= PIE_FIX_POINT_BITS + 12;
 
 	oldprob = prob;
 
 	if (p_isneg) {
 		prob = prob - p;
 
 		/* check for multiplication underflow */
 		if (prob > oldprob) {
 			prob= 0;
 			D("underflow");
 		}
 	} else {
 		/* Cap Drop adjustment */
 		if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
 		    prob >= PIE_MAX_PROB / 10 &&
 		    p > PIE_MAX_PROB / 50 ) {
 			p = PIE_MAX_PROB / 50;
 		}
 
 		prob = prob + p;
 
 		/* check for multiplication overflow */
 		if (prob<oldprob) {
 			D("overflow");
 			prob= PIE_MAX_PROB;
 		}
 	}
 
 	/*
 	 * decay the drop probability exponentially
 	 * and restrict it to range 0 to PIE_MAX_PROB
 	 */
 	if (prob < 0) {
 		prob = 0;
 	} else {
 		if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
 			/* 0.98 ~= 1- 1/64 */
 			prob = prob - (prob >> 6); 
 		}
 
 		if (prob > PIE_MAX_PROB) {
 			prob = PIE_MAX_PROB;
 		}
 	}
 
 	pst->drop_prob = prob;
 	
 	/* store current delay value */
 	pst->qdelay_old = pst->current_qdelay;
 
 	/* update burst allowance */
 	if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance) {
 		if (pst->burst_allowance > pprms->tupdate)
 			pst->burst_allowance -= pprms->tupdate;
 		else 
 			pst->burst_allowance = 0;
 	}
 
 	if (pst->sflags & PIE_ACTIVE)
 	callout_reset_sbt(&pst->aqm_pie_callout,
 		(uint64_t)pprms->tupdate * SBT_1US,
 		0, fq_calculate_drop_prob, q, 0);
 
 	mtx_unlock(&pst->lock_mtx);
 }
 
 /* 
  * Reset PIE variables & activate the queue
  */
 __inline static void
 fq_activate_pie(struct fq_pie_flow *q)
 { 
 	struct pie_status *pst = &q->pst;
 	struct dn_aqm_pie_parms *pprms;
 
 	mtx_lock(&pst->lock_mtx);
 	pprms = pst->parms;
 
 	pprms = pst->parms;
 	pst->drop_prob = 0;
 	pst->qdelay_old = 0;
 	pst->burst_allowance = pprms->max_burst;
 	pst->accu_prob = 0;
 	pst->dq_count = 0;
 	pst->avg_dq_time = 0;
 	pst->sflags = PIE_INMEASUREMENT | PIE_ACTIVE;
 	pst->measurement_start = AQM_UNOW;
 	
 	callout_reset_sbt(&pst->aqm_pie_callout,
 		(uint64_t)pprms->tupdate * SBT_1US,
 		0, fq_calculate_drop_prob, q, 0);
 
 	mtx_unlock(&pst->lock_mtx);
 }
 
  
  /* 
   * Deactivate PIE and stop probe update callout
   */
 __inline static void
 fq_deactivate_pie(struct pie_status *pst)
 { 
 	mtx_lock(&pst->lock_mtx);
 	pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
 	callout_stop(&pst->aqm_pie_callout);
 	//D("PIE Deactivated");
 	mtx_unlock(&pst->lock_mtx);
 }
 
  /* 
   * Initialize PIE for sub-queue 'q'
   */
 static int
 pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk)
 {
 	struct pie_status *pst=&q->pst;
 	struct dn_aqm_pie_parms *pprms = pst->parms;
 
 	int err = 0;
 	if (!pprms){
 		D("AQM_PIE is not configured");
 		err = EINVAL;
 	} else {
 		q->psi_extra->nr_active_q++;
 
 		/* For speed optimization, we caculate 1/3 queue size once here */
 		// XXX limit divided by number of queues divided by 3 ??? 
 		pst->one_third_q_size = (fqpie_schk->cfg.limit / 
 			fqpie_schk->cfg.flows_cnt) / 3;
 
 		mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
 		callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
 			CALLOUT_RETURNUNLOCKED);
 	}
 
 	return err;
 }
 
 /* 
  * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si
  * extra memory when number of active sub-queues reaches zero.
  * 'x' is a fq_pie_flow to be destroyed
  */
 static void
 fqpie_callout_cleanup(void *x)
 {
 	struct fq_pie_flow *q = x;
 	struct pie_status *pst = &q->pst;
 	struct fq_pie_si_extra *psi_extra;
 
 	mtx_unlock(&pst->lock_mtx);
 	mtx_destroy(&pst->lock_mtx);
 	psi_extra = q->psi_extra;
 	
 	DN_BH_WLOCK();
 	psi_extra->nr_active_q--;
 
 	/* when all sub-queues are destroyed, free flows fq_pie extra vars memory */
 	if (!psi_extra->nr_active_q) {
 		free(psi_extra->flows, M_DUMMYNET);
 		free(psi_extra, M_DUMMYNET);
 		fq_pie_desc.ref_count--;
 	}
 	DN_BH_WUNLOCK();
 }
 
 /* 
  * Clean up PIE status for sub-queue 'q' 
  * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout.
  */
 static int
 pie_cleanup(struct fq_pie_flow *q)
 {
 	struct pie_status *pst  = &q->pst;
 
 	mtx_lock(&pst->lock_mtx);
 	callout_reset_sbt(&pst->aqm_pie_callout,
 		SBT_1US, 0, fqpie_callout_cleanup, q, 0);
 	mtx_unlock(&pst->lock_mtx);
 	return 0;
 }
 
 /* 
  * Dequeue and return a pcaket from sub-queue 'q' or NULL if 'q' is empty.
  * Also, caculate depature time or queue delay using timestamp
  */
  static struct mbuf *
 pie_dequeue(struct fq_pie_flow *q, struct fq_pie_si *si)
 {
 	struct mbuf *m;
 	struct dn_aqm_pie_parms *pprms;
 	struct pie_status *pst;
 	aqm_time_t now;
 	aqm_time_t pkt_ts, dq_time;
 	int32_t w;
 
 	pst  = &q->pst;
 	pprms = q->pst.parms;
 
 	/*we extarct packet ts only when Departure Rate Estimation dis not used*/
 	m = fq_pie_extract_head(q, &pkt_ts, si, 
 		!(pprms->flags & PIE_DEPRATEEST_ENABLED));
 	
 	if (!m || !(pst->sflags & PIE_ACTIVE))
 		return m;
 
 	now = AQM_UNOW;
 	if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
 		/* calculate average depature time */
 		if(pst->sflags & PIE_INMEASUREMENT) {
 			pst->dq_count += m->m_pkthdr.len;
 
 			if (pst->dq_count >= PIE_DQ_THRESHOLD) {
 				dq_time = now - pst->measurement_start;
 
 				/* 
 				 * if we don't have old avg dq_time i.e PIE is (re)initialized, 
 				 * don't use weight to calculate new avg_dq_time
 				 */
 				if(pst->avg_dq_time == 0)
 					pst->avg_dq_time = dq_time;
 				else {
 					/* 
 					 * weight = PIE_DQ_THRESHOLD/2^6, but we scaled 
 					 * weight by 2^8. Thus, scaled 
 					 * weight = PIE_DQ_THRESHOLD /2^8 
 					 * */
 					w = PIE_DQ_THRESHOLD >> 8;
 					pst->avg_dq_time = (dq_time* w
 						+ (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
 					pst->sflags &= ~PIE_INMEASUREMENT;
 				}
 			}
 		}
 
 		/* 
 		 * Start new measurment cycle when the queue has
 		 *  PIE_DQ_THRESHOLD worth of bytes.
 		 */
 		if(!(pst->sflags & PIE_INMEASUREMENT) && 
 			q->stats.len_bytes >= PIE_DQ_THRESHOLD) {
 			pst->sflags |= PIE_INMEASUREMENT;
 			pst->measurement_start = now;
 			pst->dq_count = 0;
 		}
 	}
 	/* Optionally, use packet timestamp to estimate queue delay */
 	else
 		pst->current_qdelay = now - pkt_ts;
 
 	return m;	
 }
 
 
  /*
  * Enqueue a packet in q, subject to space and FQ-PIE queue management policy
  * (whose parameters are in q->fs).
  * Update stats for the queue and the scheduler.
  * Return 0 on success, 1 on drop. The packet is consumed anyways.
  */
 static int
 pie_enqueue(struct fq_pie_flow *q, struct mbuf* m, struct fq_pie_si *si)
 {
 	uint64_t len;
 	struct pie_status *pst;
 	struct dn_aqm_pie_parms *pprms;
 	int t;
 
 	len = m->m_pkthdr.len;
 	pst  = &q->pst;
 	pprms = pst->parms;
 	t = ENQUE;
 
 	/* drop/mark the packet when PIE is active and burst time elapsed */
 	if (pst->sflags & PIE_ACTIVE && pst->burst_allowance == 0
 		&& drop_early(pst, q->stats.len_bytes) == DROP) {
 			/* 
 			 * if drop_prob over ECN threshold, drop the packet 
 			 * otherwise mark and enqueue it.
 			 */
 			if (pprms->flags & PIE_ECN_ENABLED && pst->drop_prob < 
 				(pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
 				&& ecn_mark(m))
 				t = ENQUE;
 			else
 				t = DROP;
 		}
 
 	/* Turn PIE on when 1/3 of the queue is full */ 
 	if (!(pst->sflags & PIE_ACTIVE) && q->stats.len_bytes >= 
 		pst->one_third_q_size) {
 		fq_activate_pie(q);
 	}
 
 	/*  reset burst tolerance and optinally turn PIE off*/
 	if (pst->drop_prob == 0 && pst->current_qdelay < (pprms->qdelay_ref >> 1)
 		&& pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
 			
 			pst->burst_allowance = pprms->max_burst;
 		if (pprms->flags & PIE_ON_OFF_MODE_ENABLED && q->stats.len_bytes<=0)
 			fq_deactivate_pie(pst);
 	}
 
 	/* Use timestamp if Departure Rate Estimation mode is disabled */
 	if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
 		/* Add TS to mbuf as a TAG */
 		struct m_tag *mtag;
 		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
 		if (mtag == NULL)
 			mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
 				sizeof(aqm_time_t), M_NOWAIT);
 		if (mtag == NULL) {
-			m_freem(m); 
 			t = DROP;
+		} else {
+			*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+			m_tag_prepend(m, mtag);
 		}
-		*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
-		m_tag_prepend(m, mtag);
 	}
 
 	if (t != DROP) {
 		mq_append(&q->mq, m);
 		fq_update_stats(q, si, len, 0);
 		return 0;
 	} else {
 		fq_update_stats(q, si, len, 1);
 		pst->accu_prob = 0;
 		FREE_PKT(m);
 		return 1;
 	}
 
 	return 0;
 }
 
 /* Drop a packet form the head of FQ-PIE sub-queue */
 static void
 pie_drop_head(struct fq_pie_flow *q, struct fq_pie_si *si)
 {
 	struct mbuf *m = q->mq.head;
 
 	if (m == NULL)
 		return;
 	q->mq.head = m->m_nextpkt;
 
 	fq_update_stats(q, si, -m->m_pkthdr.len, 1);
 
 	if (si->main_q.ni.length == 0) /* queue is now idle */
 			si->main_q.q_time = dn_cfg.curr_time;
 	/* reset accu_prob after packet drop */
 	q->pst.accu_prob = 0;
 	
 	FREE_PKT(m);
 }
 
 /*
  * Classify a packet to queue number using Jenkins hash function.
  * Return: queue number 
  * the input of the hash are protocol no, perturbation, src IP, dst IP,
  * src port, dst port,
  */
 static inline int
 fq_pie_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_pie_si *si)
 {
 	struct ip *ip;
 	struct tcphdr *th;
 	struct udphdr *uh;
 	uint8_t tuple[41];
 	uint16_t hash=0;
 
 	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
 //#ifdef INET6
 	struct ip6_hdr *ip6;
 	int isip6;
 	isip6 = (ip->ip_v == 6);
 
 	if(isip6) {
 		ip6 = (struct ip6_hdr *)ip;
 		*((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
 		*((uint32_t *) &tuple[1]) = si->perturbation;
 		memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
 		memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
 
 		switch (ip6->ip6_nxt) {
 		case IPPROTO_TCP:
 			th = (struct tcphdr *)(ip6 + 1);
 			*((uint16_t *) &tuple[37]) = th->th_dport;
 			*((uint16_t *) &tuple[39]) = th->th_sport;
 			break;
 
 		case IPPROTO_UDP:
 			uh = (struct udphdr *)(ip6 + 1);
 			*((uint16_t *) &tuple[37]) = uh->uh_dport;
 			*((uint16_t *) &tuple[39]) = uh->uh_sport;
 			break;
 		default:
 			memset(&tuple[37], 0, 4);
 		}
 
 		hash = jenkins_hash(tuple, 41, HASHINIT) %  fcount;
 		return hash;
 	} 
 //#endif
 
 	/* IPv4 */
 	*((uint8_t *) &tuple[0]) = ip->ip_p;
 	*((uint32_t *) &tuple[1]) = si->perturbation;
 	*((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
 	*((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
 
 	switch (ip->ip_p) {
 		case IPPROTO_TCP:
 			th = (struct tcphdr *)(ip + 1);
 			*((uint16_t *) &tuple[13]) = th->th_dport;
 			*((uint16_t *) &tuple[15]) = th->th_sport;
 			break;
 
 		case IPPROTO_UDP:
 			uh = (struct udphdr *)(ip + 1);
 			*((uint16_t *) &tuple[13]) = uh->uh_dport;
 			*((uint16_t *) &tuple[15]) = uh->uh_sport;
 			break;
 		default:
 			memset(&tuple[13], 0, 4);
 	}
 	hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
 
 	return hash;
 }
 
 /*
  * Enqueue a packet into an appropriate queue according to
  * FQ-CoDe; algorithm.
  */
 static int 
 fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q, 
 	struct mbuf *m)
 { 
 	struct fq_pie_si *si;
 	struct fq_pie_schk *schk;
 	struct dn_sch_fq_pie_parms *param;
 	struct dn_queue *mainq;
 	struct fq_pie_flow *flows;
 	int idx, drop, i, maxidx;
 
 	mainq = (struct dn_queue *)(_si + 1);
 	si = (struct fq_pie_si *)_si;
 	flows = si->si_extra->flows;
 	schk = (struct fq_pie_schk *)(si->_si.sched+1);
 	param = &schk->cfg;
 
 	 /* classify a packet to queue number*/
 	idx = fq_pie_classify_flow(m, param->flows_cnt, si);
 
 	/* enqueue packet into appropriate queue using PIE AQM.
 	 * Note: 'pie_enqueue' function returns 1 only when it unable to 
 	 * add timestamp to packet (no limit check)*/
 	drop = pie_enqueue(&flows[idx], m, si);
 	
 	/* pie unable to timestamp a packet */ 
 	if (drop)
 		return 1;
 	
 	/* If the flow (sub-queue) is not active ,then add it to tail of
 	 * new flows list, initialize and activate it.
 	 */
 	if (!flows[idx].active) {
 		STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain);
 		flows[idx].deficit = param->quantum;
 		fq_activate_pie(&flows[idx]);
 		flows[idx].active = 1;
 	}
 
 	/* check the limit for all queues and remove a packet from the
 	 * largest one 
 	 */
 	if (mainq->ni.length > schk->cfg.limit) {
 		/* find first active flow */
 		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
 			if (flows[maxidx].active)
 				break;
 		if (maxidx < schk->cfg.flows_cnt) {
 			/* find the largest sub- queue */
 			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) 
 				if (flows[i].active && flows[i].stats.length >
 					flows[maxidx].stats.length)
 					maxidx = i;
 			pie_drop_head(&flows[maxidx], si);
 			drop = 1;
 		}
 	}
 
 	return drop;
 }
 
 /*
  * Dequeue a packet from an appropriate queue according to
  * FQ-CoDel algorithm.
  */
 static struct mbuf *
 fq_pie_dequeue(struct dn_sch_inst *_si)
 { 
 	struct fq_pie_si *si;
 	struct fq_pie_schk *schk;
 	struct dn_sch_fq_pie_parms *param;
 	struct fq_pie_flow *f;
 	struct mbuf *mbuf;
 	struct fq_pie_list *fq_pie_flowlist;
 
 	si = (struct fq_pie_si *)_si;
 	schk = (struct fq_pie_schk *)(si->_si.sched+1);
 	param = &schk->cfg;
 
 	do {
 		/* select a list to start with */
 		if (STAILQ_EMPTY(&si->newflows))
 			fq_pie_flowlist = &si->oldflows;
 		else
 			fq_pie_flowlist = &si->newflows;
 
 		/* Both new and old queue lists are empty, return NULL */
 		if (STAILQ_EMPTY(fq_pie_flowlist)) 
 			return NULL;
 
 		f = STAILQ_FIRST(fq_pie_flowlist);
 		while (f != NULL)	{
 			/* if there is no flow(sub-queue) deficit, increase deficit
 			 * by quantum, move the flow to the tail of old flows list
 			 * and try another flow.
 			 * Otherwise, the flow will be used for dequeue.
 			 */
 			if (f->deficit < 0) {
 				 f->deficit += param->quantum;
 				 STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
 				 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
 			 } else 
 				 break;
 
 			f = STAILQ_FIRST(fq_pie_flowlist);
 		}
 		
 		/* the new flows list is empty, try old flows list */
 		if (STAILQ_EMPTY(fq_pie_flowlist)) 
 			continue;
 
 		/* Dequeue a packet from the selected flow */
 		mbuf = pie_dequeue(f, si);
 
 		/* pie did not return a packet */
 		if (!mbuf) {
 			/* If the selected flow belongs to new flows list, then move 
 			 * it to the tail of old flows list. Otherwise, deactivate it and
 			 * remove it from the old list and
 			 */
 			if (fq_pie_flowlist == &si->newflows) {
 				STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
 				STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
 			}	else {
 				f->active = 0;
 				fq_deactivate_pie(&f->pst);
 				STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
 			}
 			/* start again */
 			continue;
 		}
 
 		/* we have a packet to return, 
 		 * update flow deficit and return the packet*/
 		f->deficit -= mbuf->m_pkthdr.len;
 		return mbuf;
 
 	} while (1);
 	
 	/* unreachable point */
 	return NULL;
 }
 
 /*
  * Initialize fq_pie scheduler instance.
  * also, allocate memory for flows array.
  */
 static int
 fq_pie_new_sched(struct dn_sch_inst *_si)
 {
 	struct fq_pie_si *si;
 	struct dn_queue *q;
 	struct fq_pie_schk *schk;
 	struct fq_pie_flow *flows;
 	int i;
 
 	si = (struct fq_pie_si *)_si;
 	schk = (struct fq_pie_schk *)(_si->sched+1);
 
 	if(si->si_extra) {
 		D("si already configured!");
 		return 0;
 	}
 
 	/* init the main queue */
 	q = &si->main_q;
 	set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
 	q->_si = _si;
 	q->fs = _si->sched->fs;
 
 	/* allocate memory for scheduler instance extra vars */
 	si->si_extra = malloc(sizeof(struct fq_pie_si_extra),
 		 M_DUMMYNET, M_NOWAIT | M_ZERO);
 	if (si->si_extra == NULL) {
 		D("cannot allocate memory for fq_pie si extra vars");
 		return ENOMEM ; 
 	}
 	/* allocate memory for flows array */
 	si->si_extra->flows = mallocarray(schk->cfg.flows_cnt,
 	    sizeof(struct fq_pie_flow), M_DUMMYNET, M_NOWAIT | M_ZERO);
 	flows = si->si_extra->flows;
 	if (flows == NULL) {
 		free(si->si_extra, M_DUMMYNET);
 		si->si_extra = NULL;
 		D("cannot allocate memory for fq_pie flows");
 		return ENOMEM ; 
 	}
 
 	/* init perturbation for this si */
 	si->perturbation = random();
 	si->si_extra->nr_active_q = 0;
 
 	/* init the old and new flows lists */
 	STAILQ_INIT(&si->newflows);
 	STAILQ_INIT(&si->oldflows);
 
 	/* init the flows (sub-queues) */
 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
 		flows[i].pst.parms = &schk->cfg.pcfg;
 		flows[i].psi_extra = si->si_extra;
 		pie_init(&flows[i], schk);
 	}
 
 	fq_pie_desc.ref_count++;
 
 	return 0;
 }
 
 
 /*
  * Free fq_pie scheduler instance.
  */
 static int
 fq_pie_free_sched(struct dn_sch_inst *_si)
 {
 	struct fq_pie_si *si;
 	struct fq_pie_schk *schk;
 	struct fq_pie_flow *flows;
 	int i;
 
 	si = (struct fq_pie_si *)_si;
 	schk = (struct fq_pie_schk *)(_si->sched+1);
 	flows = si->si_extra->flows;
 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
 		pie_cleanup(&flows[i]);
 	}
 	si->si_extra = NULL;
 	return 0;
 }
 
 /*
  * Configure FQ-PIE scheduler.
  * the configurations for the scheduler is passed fromipfw  userland.
  */
 static int
 fq_pie_config(struct dn_schk *_schk)
 {
 	struct fq_pie_schk *schk;
 	struct dn_extra_parms *ep;
 	struct dn_sch_fq_pie_parms *fqp_cfg;
 	
 	schk = (struct fq_pie_schk *)(_schk+1);
 	ep = (struct dn_extra_parms *) _schk->cfg;
 
 	/* par array contains fq_pie configuration as follow
 	 * PIE: 0- qdelay_ref,1- tupdate, 2- max_burst
 	 * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
 	 * FQ_PIE: 7- quantum, 8- limit, 9- flows
 	 */
 	if (ep && ep->oid.len ==sizeof(*ep) &&
 		ep->oid.subtype == DN_SCH_PARAMS) {
 
 		fqp_cfg = &schk->cfg;
 		if (ep->par[0] < 0)
 			fqp_cfg->pcfg.qdelay_ref = fq_pie_sysctl.pcfg.qdelay_ref;
 		else
 			fqp_cfg->pcfg.qdelay_ref = ep->par[0];
 		if (ep->par[1] < 0)
 			fqp_cfg->pcfg.tupdate = fq_pie_sysctl.pcfg.tupdate;
 		else
 			fqp_cfg->pcfg.tupdate = ep->par[1];
 		if (ep->par[2] < 0)
 			fqp_cfg->pcfg.max_burst = fq_pie_sysctl.pcfg.max_burst;
 		else
 			fqp_cfg->pcfg.max_burst = ep->par[2];
 		if (ep->par[3] < 0)
 			fqp_cfg->pcfg.max_ecnth = fq_pie_sysctl.pcfg.max_ecnth;
 		else
 			fqp_cfg->pcfg.max_ecnth = ep->par[3];
 		if (ep->par[4] < 0)
 			fqp_cfg->pcfg.alpha = fq_pie_sysctl.pcfg.alpha;
 		else
 			fqp_cfg->pcfg.alpha = ep->par[4];
 		if (ep->par[5] < 0)
 			fqp_cfg->pcfg.beta = fq_pie_sysctl.pcfg.beta;
 		else
 			fqp_cfg->pcfg.beta = ep->par[5];
 		if (ep->par[6] < 0)
 			fqp_cfg->pcfg.flags = 0;
 		else
 			fqp_cfg->pcfg.flags = ep->par[6];
 
 		/* FQ configurations */
 		if (ep->par[7] < 0)
 			fqp_cfg->quantum = fq_pie_sysctl.quantum;
 		else
 			fqp_cfg->quantum = ep->par[7];
 		if (ep->par[8] < 0)
 			fqp_cfg->limit = fq_pie_sysctl.limit;
 		else
 			fqp_cfg->limit = ep->par[8];
 		if (ep->par[9] < 0)
 			fqp_cfg->flows_cnt = fq_pie_sysctl.flows_cnt;
 		else
 			fqp_cfg->flows_cnt = ep->par[9];
 
 		/* Bound the configurations */
 		fqp_cfg->pcfg.qdelay_ref = BOUND_VAR(fqp_cfg->pcfg.qdelay_ref,
 			1, 5 * AQM_TIME_1S);
 		fqp_cfg->pcfg.tupdate = BOUND_VAR(fqp_cfg->pcfg.tupdate,
 			1, 5 * AQM_TIME_1S);
 		fqp_cfg->pcfg.max_burst = BOUND_VAR(fqp_cfg->pcfg.max_burst,
 			0, 5 * AQM_TIME_1S);
 		fqp_cfg->pcfg.max_ecnth = BOUND_VAR(fqp_cfg->pcfg.max_ecnth,
 			0, PIE_SCALE);
 		fqp_cfg->pcfg.alpha = BOUND_VAR(fqp_cfg->pcfg.alpha, 0, 7 * PIE_SCALE);
 		fqp_cfg->pcfg.beta = BOUND_VAR(fqp_cfg->pcfg.beta, 0, 7 * PIE_SCALE);
 
 		fqp_cfg->quantum = BOUND_VAR(fqp_cfg->quantum,1,9000);
 		fqp_cfg->limit= BOUND_VAR(fqp_cfg->limit,1,20480);
 		fqp_cfg->flows_cnt= BOUND_VAR(fqp_cfg->flows_cnt,1,65536);
 	}
 	else {
 		D("Wrong parameters for fq_pie scheduler");
 		return 1;
 	}
 
 	return 0;
 }
 
 /*
  * Return FQ-PIE scheduler configurations
  * the configurations for the scheduler is passed to userland.
  */
 static int 
 fq_pie_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
 	
 	struct fq_pie_schk *schk = (struct fq_pie_schk *)(_schk+1);
 	struct dn_sch_fq_pie_parms *fqp_cfg;
 
 	fqp_cfg = &schk->cfg;
 
 	strcpy(ep->name, fq_pie_desc.name);
 	ep->par[0] = fqp_cfg->pcfg.qdelay_ref;
 	ep->par[1] = fqp_cfg->pcfg.tupdate;
 	ep->par[2] = fqp_cfg->pcfg.max_burst;
 	ep->par[3] = fqp_cfg->pcfg.max_ecnth;
 	ep->par[4] = fqp_cfg->pcfg.alpha;
 	ep->par[5] = fqp_cfg->pcfg.beta;
 	ep->par[6] = fqp_cfg->pcfg.flags;
 	
 	ep->par[7] = fqp_cfg->quantum;
 	ep->par[8] = fqp_cfg->limit;
 	ep->par[9] = fqp_cfg->flows_cnt;
 
 	return 0;
 }
 
 /*
  *  FQ-PIE scheduler descriptor
  * contains the type of the scheduler, the name, the size of extra
  * data structures, and function pointers.
  */
 static struct dn_alg fq_pie_desc = {
 	_SI( .type = )  DN_SCHED_FQ_PIE,
 	_SI( .name = ) "FQ_PIE",
 	_SI( .flags = ) 0,
 
 	_SI( .schk_datalen = ) sizeof(struct fq_pie_schk),
 	_SI( .si_datalen = ) sizeof(struct fq_pie_si) - sizeof(struct dn_sch_inst),
 	_SI( .q_datalen = ) 0,
 
 	_SI( .enqueue = ) fq_pie_enqueue,
 	_SI( .dequeue = ) fq_pie_dequeue,
 	_SI( .config = ) fq_pie_config, /* new sched i.e. sched X config ...*/
 	_SI( .destroy = ) NULL,  /*sched x delete */
 	_SI( .new_sched = ) fq_pie_new_sched, /* new schd instance */
 	_SI( .free_sched = ) fq_pie_free_sched,	/* delete schd instance */
 	_SI( .new_fsk = ) NULL,
 	_SI( .free_fsk = ) NULL,
 	_SI( .new_queue = ) NULL,
 	_SI( .free_queue = ) NULL,
 	_SI( .getconfig = )  fq_pie_getconfig,
 	_SI( .ref_count = ) 0
 };
 
 DECLARE_DNSCHED_MODULE(dn_fq_pie, &fq_pie_desc);