diff --git a/sys/net/altq/altq_cbq.c b/sys/net/altq/altq_cbq.c index 6c6557725354..0e267fcb49e2 100644 --- a/sys/net/altq/altq_cbq.c +++ b/sys/net/altq/altq_cbq.c @@ -1,563 +1,563 @@ /*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. * * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Forward Declarations. */ static int cbq_class_destroy(cbq_state_t *, struct rm_class *); static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t); static int cbq_clear_interface(cbq_state_t *); static int cbq_request(struct ifaltq *, int, void *); static int cbq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *cbq_dequeue(struct ifaltq *, int); static void cbqrestart(struct ifaltq *); static void get_class_stats(class_stats_t *, struct rm_class *); static void cbq_purge(cbq_state_t *); /* * int * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This * function destroys a given traffic class. Before destroying * the class, all traffic for that class is released. */ static int cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl) { int i; /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; return (0); } /* convert class handle to class pointer */ static struct rm_class * clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle) { int i; struct rm_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % CBQ_MAX_CLASSES; if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); return (NULL); } static int cbq_clear_interface(cbq_state_t *cbqp) { int again, i; struct rm_class *cl; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1); #endif /* clear out the classes now */ do { again = 0; for (i = 0; i < CBQ_MAX_CLASSES; i++) { if ((cl = cbqp->cbq_class_tbl[i]) != NULL) { if (is_a_parent_class(cl)) again++; else { cbq_class_destroy(cbqp, cl); cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; } } } } while (again); return (0); } static int cbq_request(struct ifaltq *ifq, int req, void *arg) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: cbq_purge(cbqp); break; } return (0); } /* copy the stats info in rm_class to class_states_t */ static void get_class_stats(class_stats_t *statsp, struct rm_class *cl) { statsp->xmit_cnt = cl->stats_.xmit_cnt; statsp->drop_cnt = cl->stats_.drop_cnt; statsp->over = cl->stats_.over; statsp->borrows = cl->stats_.borrows; statsp->overactions = cl->stats_.overactions; statsp->delays = cl->stats_.delays; statsp->depth = cl->depth_; statsp->priority = cl->pri_; statsp->maxidle = cl->maxidle_; statsp->minidle = cl->minidle_; statsp->offtime = cl->offtime_; statsp->qmax = qlimit(cl->q_); statsp->ns_per_byte = cl->ns_per_byte_; statsp->wrr_allot = cl->w_allotment_; statsp->qcnt = qlen(cl->q_); statsp->avgidle = cl->avgidle_; statsp->qtype = qtype(cl->q_); #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_getstats(cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->q_)) codel_getstats(cl->codel_, &statsp->codel); #endif } int cbq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc, - cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL); + cbq_enqueue, cbq_dequeue, cbq_request); splx(s); return (error); } int cbq_add_altq(struct ifnet *ifp, struct pf_altq *a) { cbq_state_t *cbqp; if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); /* allocate and initialize cbq_state_t */ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* keep the state in pf_altq */ a->altq_disc = cbqp; return (0); } int cbq_remove_altq(struct pf_altq *a) { cbq_state_t *cbqp; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; cbq_clear_interface(cbqp); if (cbqp->ifnp.default_) cbq_class_destroy(cbqp, cbqp->ifnp.default_); if (cbqp->ifnp.root_) cbq_class_destroy(cbqp, cbqp->ifnp.root_); /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } int cbq_add_queue(struct pf_altq *a) { struct rm_class *borrow, *parent; cbq_state_t *cbqp; struct rm_class *cl; struct cbq_opts *opts; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = a->qid % CBQ_MAX_CLASSES; if (cbqp->cbq_class_tbl[i] != NULL) { for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); } opts = &a->pq_u.cbq_opts; /* check parameters */ if (a->priority >= CBQ_MAXPRI) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, a->parent_qid); if (opts->flags & CBQCLF_BORROW) borrow = parent; else borrow = NULL; /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_queue: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } /* * check parameters */ switch (opts->flags & CBQCLF_CLASSMASK) { case CBQCLF_ROOTCLASS: if (parent != NULL) return (EINVAL); if (cbqp->ifnp.root_) return (EINVAL); break; case CBQCLF_DEFCLASS: if (cbqp->ifnp.default_) return (EINVAL); break; case 0: if (a->qid == 0) return (EINVAL); break; default: /* more than two flags bits set */ return (EINVAL); } /* * create a class. if this is a root class, initialize the * interface. */ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte, cbqrestart, a->qlimit, RM_MAXQUEUED, opts->maxidle, opts->minidle, opts->offtime, opts->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(a->priority, &cbqp->ifnp, opts->ns_per_byte, rmc_delay_action, a->qlimit, parent, borrow, opts->maxidle, opts->minidle, opts->offtime, opts->pktsize, opts->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ cl->stats_.handle = a->qid; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; return (0); } int cbq_remove_queue(struct pf_altq *a) { struct rm_class *cl; cbq_state_t *cbqp; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) { cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; break; } return (0); } int cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats; int error = 0; if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * int * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr) * - Queue data packets. * * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper * layer (e.g. ether_output). cbq_enqueue queues the given packet * to the cbq, then invokes the driver's start routine. * * Assumptions: called in splimp * Returns: 0 if the queueing is successful. * ENOBUFS if a packet dropping occurred as a result of * the queueing. */ static int cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct rm_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(cbqp, t->qid); if (cl == NULL) { cl = cbqp->ifnp.default_; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } cl->pktattr_ = NULL; len = m_pktlen(m); if (rmc_queue_packet(cl, m) != 0) { /* drop occurred. some mbuf was freed in rmc_queue_packet. */ PKTCNTR_ADD(&cl->stats_.drop_cnt, len); return (ENOBUFS); } /* successfully queued. */ ++cbqp->cbq_qlen; IFQ_INC_LEN(ifq); return (0); } static struct mbuf * cbq_dequeue(struct ifaltq *ifq, int op) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); m = rmc_dequeue_next(&cbqp->ifnp, op); if (m && op == ALTDQ_REMOVE) { --cbqp->cbq_qlen; /* decrement # of packets in cbq */ IFQ_DEC_LEN(ifq); /* Update the class. */ rmc_update_class_util(&cbqp->ifnp); } return (m); } /* * void * cbqrestart(queue_t *) - Restart sending of data. * called from rmc_restart in splimp via timeout after waking up * a suspended class. * Returns: NONE */ static void cbqrestart(struct ifaltq *ifq) { cbq_state_t *cbqp; struct ifnet *ifp; IFQ_LOCK_ASSERT(ifq); if (!ALTQ_IS_ENABLED(ifq)) /* cbq must have been detached */ return; if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL) /* should not happen */ return; ifp = ifq->altq_ifp; if (ifp->if_start && cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_UNLOCK(ifq); (*ifp->if_start)(ifp); IFQ_LOCK(ifq); } } static void cbq_purge(cbq_state_t *cbqp) { struct rm_class *cl; int i; for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL) rmc_dropall(cl); if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_)) cbqp->ifnp.ifq_->ifq_len = 0; } #endif /* ALTQ_CBQ */ diff --git a/sys/net/altq/altq_codel.c b/sys/net/altq/altq_codel.c index cca75a70b4c6..b9950ed94d2b 100644 --- a/sys/net/altq/altq_codel.c +++ b/sys/net/altq/altq_codel.c @@ -1,475 +1,475 @@ /* * CoDel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2013 Ermal Luçi * Copyright (C) 2011-2012 Kathleen Nichols * Copyright (C) 2011-2012 Van Jacobson * Copyright (C) 2012 Michael D. Taht * Copyright (C) 2012 Eric Dumazet * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include static int codel_should_drop(struct codel *, class_queue_t *, struct mbuf *, u_int64_t); static void codel_Newton_step(struct codel_vars *); static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); #define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) #define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) #define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) #define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) static int codel_request(struct ifaltq *, int, void *); static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *codel_dequeue(struct ifaltq *, int); int codel_pfattach(struct pf_altq *a) { struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, - codel_enqueue, codel_dequeue, codel_request, NULL, NULL)); + codel_enqueue, codel_dequeue, codel_request)); } int codel_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct codel_if *cif; struct codel_opts *opts; if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); opts = &a->pq_u.codel_opts; cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (cif == NULL) return (ENOMEM); cif->cif_bandwidth = a->ifbandwidth; cif->cif_ifq = &ifp->if_snd; cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cif->cl_q == NULL) { free(cif, M_DEVBUF); return (ENOMEM); } if (a->qlimit == 0) a->qlimit = 50; /* use default. */ qlimit(cif->cl_q) = a->qlimit; qtype(cif->cl_q) = Q_CODEL; qlen(cif->cl_q) = 0; qsize(cif->cl_q) = 0; if (opts->target == 0) opts->target = 5; if (opts->interval == 0) opts->interval = 100; cif->codel.params.target = machclk_freq * opts->target / 1000; cif->codel.params.interval = machclk_freq * opts->interval / 1000; cif->codel.params.ecn = opts->ecn; cif->codel.stats.maxpacket = 256; cif->cl_stats.qlength = qlen(cif->cl_q); cif->cl_stats.qlimit = qlimit(cif->cl_q); /* keep the state in pf_altq */ a->altq_disc = cif; return (0); } int codel_remove_altq(struct pf_altq *a) { struct codel_if *cif; if ((cif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; if (cif->cl_q) free(cif->cl_q, M_DEVBUF); free(cif, M_DEVBUF); return (0); } int codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct codel_if *cif; struct codel_ifstats stats; int error = 0; if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) return (EBADF); if (*nbytes < sizeof(stats)) return (EINVAL); stats = cif->cl_stats; stats.stats = cif->codel.stats; if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } static int codel_request(struct ifaltq *ifq, int req, void *arg) { struct codel_if *cif = (struct codel_if *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: if (!ALTQ_IS_ENABLED(cif->cif_ifq)) break; if (qempty(cif->cl_q)) break; while ((m = _getq(cif->cl_q)) != NULL) { PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); m_freem(m); IFQ_DEC_LEN(cif->cif_ifq); } cif->cif_ifq->ifq_len = 0; break; } return (0); } static int codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct codel_if *cif = (struct codel_if *) ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); return (ENOBUFS); } if (codel_addq(&cif->codel, cif->cl_q, m)) { PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); return (ENOBUFS); } IFQ_INC_LEN(ifq); return (0); } static struct mbuf * codel_dequeue(struct ifaltq *ifq, int op) { struct codel_if *cif = (struct codel_if *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) return (NULL); if (op == ALTDQ_POLL) return (qhead(cif->cl_q)); m = codel_getq(&cif->codel, cif->cl_q); if (m != NULL) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); return (m); } return (NULL); } struct codel * codel_alloc(int target, int interval, int ecn) { struct codel *c; c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); if (c != NULL) { c->params.target = machclk_freq * target / 1000; c->params.interval = machclk_freq * interval / 1000; c->params.ecn = ecn; c->stats.maxpacket = 256; } return (c); } void codel_destroy(struct codel *c) { free(c, M_DEVBUF); } #define MTAG_CODEL 1438031249 int codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) { struct m_tag *mtag; uint64_t *enqueue_time; if (qlen(q) < qlimit(q)) { mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); if (mtag == NULL) mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), M_NOWAIT); if (mtag == NULL) { m_freem(m); return (-1); } enqueue_time = (uint64_t *)(mtag + 1); *enqueue_time = read_machclk(); m_tag_prepend(m, mtag); _addq(q, m); return (0); } c->drop_overlimit++; m_freem(m); return (-1); } static int codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, u_int64_t now) { struct m_tag *mtag; uint64_t *enqueue_time; if (m == NULL) { c->vars.first_above_time = 0; return (0); } mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); if (mtag == NULL) { /* Only one warning per second. */ if (ppsratecheck(&c->last_log, &c->last_pps, 1)) printf("%s: could not found the packet mtag!\n", __func__); c->vars.first_above_time = 0; return (0); } enqueue_time = (uint64_t *)(mtag + 1); c->vars.ldelay = now - *enqueue_time; c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); if (codel_time_before(c->vars.ldelay, c->params.target) || qsize(q) <= c->stats.maxpacket) { /* went below - stay below for at least interval */ c->vars.first_above_time = 0; return (0); } if (c->vars.first_above_time == 0) { /* just went above from below. If we stay above * for at least interval we'll say it's ok to drop */ c->vars.first_above_time = now + c->params.interval; return (0); } if (codel_time_after(now, c->vars.first_above_time)) return (1); return (0); } /* * Run a Newton method step: * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { uint32_t invsqrt, invsqrt2; uint64_t val; /* sizeof_in_bits(rec_inv_sqrt) */ #define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) /* needed shift to get a Q0.32 number from rec_inv_sqrt */ #define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } static u_int64_t codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) { return (t + (u_int32_t)(((u_int64_t)interval * (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); } struct mbuf * codel_getq(struct codel *c, class_queue_t *q) { struct mbuf *m; u_int64_t now; int drop; if ((m = _getq(q)) == NULL) { c->vars.dropping = 0; return (m); } now = read_machclk(); drop = codel_should_drop(c, q, m, now); if (c->vars.dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ c->vars.dropping = 0; } else if (codel_time_after_eq(now, c->vars.drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might * take us out of dropping state. * If not, schedule the next drop. * A large backlog might result in drop rates so high * that the next drop should happen now, * hence the while loop. */ while (c->vars.dropping && codel_time_after_eq(now, c->vars.drop_next)) { c->vars.count++; /* don't care of possible wrap * since there is no more * divide */ codel_Newton_step(&c->vars); /* TODO ECN */ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); m_freem(m); m = _getq(q); if (!codel_should_drop(c, q, m, now)) /* leave dropping state */ c->vars.dropping = 0; else /* and schedule the next drop */ c->vars.drop_next = codel_control_law(c->vars.drop_next, c->params.interval, c->vars.rec_inv_sqrt); } } } else if (drop) { /* TODO ECN */ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); m_freem(m); m = _getq(q); drop = codel_should_drop(c, q, m, now); c->vars.dropping = 1; /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ if (codel_time_before(now - c->vars.drop_next, 16 * c->params.interval)) { c->vars.count = (c->vars.count - c->vars.lastcount) | 1; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(&c->vars); } else { c->vars.count = 1; c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } c->vars.lastcount = c->vars.count; c->vars.drop_next = codel_control_law(now, c->params.interval, c->vars.rec_inv_sqrt); } return (m); } void codel_getstats(struct codel *c, struct codel_stats *s) { *s = c->stats; } #endif /* ALTQ_CODEL */ diff --git a/sys/net/altq/altq_fairq.c b/sys/net/altq/altq_fairq.c index a8f96daeed36..b4eeb1ddf9b4 100644 --- a/sys/net/altq/altq_fairq.c +++ b/sys/net/altq/altq_fairq.c @@ -1,907 +1,907 @@ /* * Copyright (c) 2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. Neither the name of The DragonFly Project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific, prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $ * $FreeBSD$ */ /* * Matt: I gutted altq_priq.c and used it as a skeleton on which to build * fairq. The fairq algorithm is completely different then priq, of course, * but because I used priq's skeleton I believe I should include priq's * copyright. * * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * FAIRQ - take traffic classified by keep state (hashed into * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract * the first packet from each bucket in a round-robin fashion. * * TODO - better overall qlimit support (right now it is per-bucket). * - NOTE: red etc is per bucket, not overall. * - better service curve support. * * EXAMPLE: * * altq on em0 fairq bandwidth 650Kb queue { std, bulk } * queue std priority 3 bandwidth 400Kb \ * fairq (buckets 64, default, hogs 1Kb) qlimit 50 * queue bulk priority 2 bandwidth 100Kb \ * fairq (buckets 64, hogs 1Kb) qlimit 50 * * pass out on em0 from any to any keep state queue std * pass out on em0 inet proto tcp ..... port ... keep state queue bulk */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * function prototypes */ static int fairq_clear_interface(struct fairq_if *); static int fairq_request(struct ifaltq *, int, void *); static void fairq_purge(struct fairq_if *); static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int); static int fairq_class_destroy(struct fairq_class *); static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *fairq_dequeue(struct ifaltq *, int); static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t); static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); static void fairq_purgeq(struct fairq_class *); static void get_class_stats(struct fairq_classstats *, struct fairq_class *); static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); int fairq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc, - fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); + fairq_enqueue, fairq_dequeue, fairq_request); return (error); } int fairq_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct fairq_if *pif; if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct fairq_if), M_DEVBUF, M_WAITOK | M_ZERO); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int fairq_remove_altq(struct pf_altq *a) { struct fairq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; fairq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int fairq_add_queue(struct pf_altq *a) { struct fairq_if *pif; struct fairq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= FAIRQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, &a->pq_u.fairq_opts, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int fairq_remove_queue(struct pf_altq *a) { struct fairq_if *pif; struct fairq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (fairq_class_destroy(cl)); } int fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct fairq_if *pif; struct fairq_class *cl; struct fairq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int fairq_clear_interface(struct fairq_if *pif) { struct fairq_class *cl; int pri; /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL) fairq_class_destroy(cl); } return (0); } static int fairq_request(struct ifaltq *ifq, int req, void *arg) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: fairq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void fairq_purge(struct fairq_if *pif) { struct fairq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) fairq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct fairq_class * fairq_class_create(struct fairq_if *pif, int pri, int qlimit, u_int bandwidth, struct fairq_opts *opts, int qid) { struct fairq_class *cl; int flags = opts->flags; u_int nbuckets = opts->nbuckets; int i; #ifndef ALTQ_RED if (flags & FARF_RED) { #ifdef ALTQ_DEBUG printf("fairq_class_create: RED not configured for FAIRQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & FARF_CODEL) { #ifdef ALTQ_DEBUG printf("fairq_class_create: CODEL not configured for FAIRQ!\n"); #endif return (NULL); } #endif if (nbuckets == 0) nbuckets = 256; if (nbuckets > FAIRQ_MAX_BUCKETS) nbuckets = FAIRQ_MAX_BUCKETS; /* enforce power-of-2 size */ while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) ++nbuckets; if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ IFQ_LOCK(cl->cl_pif->pif_ifq); if (cl->cl_head) fairq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) codel_destroy(cl->cl_codel); #endif } else { cl = malloc(sizeof(struct fairq_class), M_DEVBUF, M_WAITOK | M_ZERO); cl->cl_nbuckets = nbuckets; cl->cl_nbucket_mask = nbuckets - 1; cl->cl_buckets = malloc( sizeof(struct fairq_bucket) * cl->cl_nbuckets, M_DEVBUF, M_WAITOK | M_ZERO); cl->cl_head = NULL; } pif->pif_classes[pri] = cl; if (flags & FARF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ cl->cl_qlimit = qlimit; for (i = 0; i < cl->cl_nbuckets; ++i) { qlimit(&cl->cl_buckets[i].queue) = qlimit; } cl->cl_bandwidth = bandwidth / 8; cl->cl_qtype = Q_DROPTAIL; cl->cl_flags = flags & FARF_USERFLAGS; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; cl->cl_hogs_m1 = opts->hogs_m1 / 8; cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ #ifdef ALTQ_RED if (flags & (FARF_RED|FARF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & FARF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & FARF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & FARF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) cl->cl_qtype = Q_RIO; } else #endif if (flags & FARF_RED) { cl->cl_red = red_alloc(0, 0, cl->cl_qlimit * 10/100, cl->cl_qlimit * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) cl->cl_qtype = Q_RED; } } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & FARF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) cl->cl_qtype = Q_CODEL; } #endif return (cl); } static int fairq_class_destroy(struct fairq_class *cl) { struct fairq_if *pif; int pri; IFQ_LOCK(cl->cl_pif->pif_ifq); if (cl->cl_head) fairq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_poll_cache == cl) pif->pif_poll_cache = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) codel_destroy(cl->cl_codel); #endif } free(cl->cl_buckets, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * fairq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; struct fairq_class *cl = NULL; /* Make compiler happy */ struct pf_mtag *t; u_int32_t qid_hash = 0; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } if ((t = pf_find_mtag(m)) != NULL) { cl = clh_to_clp(pif, t->qid); qid_hash = t->qid_hash; } if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } cl->cl_flags |= FARF_HAS_PACKETS; cl->cl_pktattr = NULL; len = m_pktlen(m); if (fairq_addq(cl, m, qid_hash) != 0) { /* drop occurred. mbuf was freed in fairq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); return (0); } /* * fairq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * fairq_dequeue(struct ifaltq *ifq, int op) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; struct fairq_class *cl; struct fairq_class *best_cl; struct mbuf *best_m; struct mbuf *m = NULL; uint64_t cur_time = read_machclk(); int pri; int hit_limit; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) { return (NULL); } if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { best_cl = pif->pif_poll_cache; m = fairq_getq(best_cl, cur_time); pif->pif_poll_cache = NULL; if (m) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); return (m); } } else { best_cl = NULL; best_m = NULL; for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) == NULL) continue; if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) continue; m = fairq_pollq(cl, cur_time, &hit_limit); if (m == NULL) { cl->cl_flags &= ~FARF_HAS_PACKETS; continue; } /* * Only override the best choice if we are under * the BW limit. */ if (hit_limit == 0 || best_cl == NULL) { best_cl = cl; best_m = m; } /* * Remember the highest priority mbuf in case we * do not find any lower priority mbufs. */ if (hit_limit) continue; break; } if (op == ALTDQ_POLL) { pif->pif_poll_cache = best_cl; m = best_m; } else if (best_cl) { m = fairq_getq(best_cl, cur_time); if (m != NULL) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); } } return (m); } return (NULL); } static int fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid) { fairq_bucket_t *b; u_int hindex; uint64_t bw; /* * If the packet doesn't have any keep state put it on the end of * our queue. XXX this can result in out of order delivery. */ if (bucketid == 0) { if (cl->cl_head) b = cl->cl_head->prev; else b = &cl->cl_buckets[0]; } else { hindex = bucketid & cl->cl_nbucket_mask; b = &cl->cl_buckets[hindex]; } /* * Add the bucket to the end of the circular list of active buckets. * * As a special case we add the bucket to the beginning of the list * instead of the end if it was not previously on the list and if * its traffic is less then the hog level. */ if (b->in_use == 0) { b->in_use = 1; if (cl->cl_head == NULL) { cl->cl_head = b; b->next = b; b->prev = b; } else { b->next = cl->cl_head; b->prev = cl->cl_head->prev; b->prev->next = b; b->next->prev = b; if (b->bw_delta && cl->cl_hogs_m1) { bw = b->bw_bytes * machclk_freq / b->bw_delta; if (bw < cl->cl_hogs_m1) cl->cl_head = b; } } } #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) return codel_addq(cl->cl_codel, &b->queue, m); #endif if (qlen(&b->queue) >= qlimit(&b->queue)) { m_freem(m); return (-1); } if (cl->cl_flags & FARF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(&b->queue, m); return (0); } static struct mbuf * fairq_getq(struct fairq_class *cl, uint64_t cur_time) { fairq_bucket_t *b; struct mbuf *m; b = fairq_selectq(cl, 0); if (b == NULL) m = NULL; #ifdef ALTQ_RIO else if (cl->cl_qtype == Q_RIO) m = rio_getq((rio_t *)cl->cl_red, &b->queue); #endif #ifdef ALTQ_RED else if (cl->cl_qtype == Q_RED) m = red_getq(cl->cl_red, &b->queue); #endif #ifdef ALTQ_CODEL else if (cl->cl_qtype == Q_CODEL) m = codel_getq(cl->cl_codel, &b->queue); #endif else m = _getq(&b->queue); /* * Calculate the BW change */ if (m != NULL) { uint64_t delta; /* * Per-class bandwidth calculation */ delta = (cur_time - cl->cl_last_time); if (delta > machclk_freq * 8) delta = machclk_freq * 8; cl->cl_bw_delta += delta; cl->cl_bw_bytes += m->m_pkthdr.len; cl->cl_last_time = cur_time; cl->cl_bw_delta -= cl->cl_bw_delta >> 3; cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3; /* * Per-bucket bandwidth calculation */ delta = (cur_time - b->last_time); if (delta > machclk_freq * 8) delta = machclk_freq * 8; b->bw_delta += delta; b->bw_bytes += m->m_pkthdr.len; b->last_time = cur_time; b->bw_delta -= b->bw_delta >> 3; b->bw_bytes -= b->bw_bytes >> 3; } return(m); } /* * Figure out what the next packet would be if there were no limits. If * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise * it is set to 0. A non-NULL mbuf is returned either way. */ static struct mbuf * fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) { fairq_bucket_t *b; struct mbuf *m; uint64_t delta; uint64_t bw; *hit_limit = 0; b = fairq_selectq(cl, 1); if (b == NULL) return(NULL); m = qhead(&b->queue); /* * Did this packet exceed the class bandwidth? Calculate the * bandwidth component of the packet. * * - Calculate bytes per second */ delta = cur_time - cl->cl_last_time; if (delta > machclk_freq * 8) delta = machclk_freq * 8; cl->cl_bw_delta += delta; cl->cl_last_time = cur_time; if (cl->cl_bw_delta) { bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; if (bw > cl->cl_bandwidth) *hit_limit = 1; #ifdef ALTQ_DEBUG printf("BW %6ju relative to %6u %d queue %p\n", (uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b); #endif } return(m); } /* * Locate the next queue we want to pull a packet out of. This code * is also responsible for removing empty buckets from the circular list. */ static fairq_bucket_t * fairq_selectq(struct fairq_class *cl, int ispoll) { fairq_bucket_t *b; uint64_t bw; if (ispoll == 0 && cl->cl_polled) { b = cl->cl_polled; cl->cl_polled = NULL; return(b); } while ((b = cl->cl_head) != NULL) { /* * Remove empty queues from consideration */ if (qempty(&b->queue)) { b->in_use = 0; cl->cl_head = b->next; if (cl->cl_head == b) { cl->cl_head = NULL; } else { b->next->prev = b->prev; b->prev->next = b->next; } continue; } /* * Advance the round robin. Queues with bandwidths less * then the hog bandwidth are allowed to burst. */ if (cl->cl_hogs_m1 == 0) { cl->cl_head = b->next; } else if (b->bw_delta) { bw = b->bw_bytes * machclk_freq / b->bw_delta; if (bw >= cl->cl_hogs_m1) { cl->cl_head = b->next; } /* * XXX TODO - */ } /* * Return bucket b. */ break; } if (ispoll) cl->cl_polled = b; return(b); } static void fairq_purgeq(struct fairq_class *cl) { fairq_bucket_t *b; struct mbuf *m; while ((b = fairq_selectq(cl, 0)) != NULL) { while ((m = _getq(&b->queue)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(&b->queue) == 0); } } static void get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) { fairq_bucket_t *b; sp->class_handle = cl->cl_handle; sp->qlimit = cl->cl_qlimit; sp->xmit_cnt = cl->cl_xmitcnt; sp->drop_cnt = cl->cl_dropcnt; sp->qtype = cl->cl_qtype; sp->qlength = 0; if (cl->cl_head) { b = cl->cl_head; do { sp->qlength += qlen(&b->queue); b = b->next; } while (b != cl->cl_head); } #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct fairq_class * clh_to_clp(struct fairq_if *pif, uint32_t chandle) { struct fairq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #endif /* ALTQ_FAIRQ */ diff --git a/sys/net/altq/altq_hfsc.c b/sys/net/altq/altq_hfsc.c index 2dbf571cdbb9..c3e84b345509 100644 --- a/sys/net/altq/altq_hfsc.c +++ b/sys/net/altq/altq_hfsc.c @@ -1,1736 +1,1736 @@ /*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. * * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ * $FreeBSD$ */ /* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */ #include #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #include #include #include /* * function prototypes */ static int hfsc_clear_interface(struct hfsc_if *); static int hfsc_request(struct ifaltq *, int, void *); static void hfsc_purge(struct hfsc_if *); static struct hfsc_class *hfsc_class_create(struct hfsc_if *, struct service_curve *, struct service_curve *, struct service_curve *, struct hfsc_class *, int, int, int); static int hfsc_class_destroy(struct hfsc_class *); static struct hfsc_class *hfsc_nextclass(struct hfsc_class *); static int hfsc_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *hfsc_dequeue(struct ifaltq *, int); static int hfsc_addq(struct hfsc_class *, struct mbuf *); static struct mbuf *hfsc_getq(struct hfsc_class *); static struct mbuf *hfsc_pollq(struct hfsc_class *); static void hfsc_purgeq(struct hfsc_class *); static void update_cfmin(struct hfsc_class *); static void set_active(struct hfsc_class *, int); static void set_passive(struct hfsc_class *); static void init_ed(struct hfsc_class *, int); static void update_ed(struct hfsc_class *, int); static void update_d(struct hfsc_class *, int); static void init_vf(struct hfsc_class *, int); static void update_vf(struct hfsc_class *, int, u_int64_t); static void ellist_insert(struct hfsc_class *); static void ellist_remove(struct hfsc_class *); static void ellist_update(struct hfsc_class *); struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t); static void actlist_insert(struct hfsc_class *); static void actlist_remove(struct hfsc_class *); static void actlist_update(struct hfsc_class *); static struct hfsc_class *actlist_firstfit(struct hfsc_class *, u_int64_t); static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); static __inline u_int64_t m2sm(u_int64_t); static __inline u_int64_t m2ism(u_int64_t); static __inline u_int64_t d2dx(u_int); static u_int64_t sm2m(u_int64_t); static u_int dx2d(u_int64_t); static void sc2isc(struct service_curve *, struct internal_sc *); static void rtsc_init(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t); static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t); static void rtsc_min(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static void get_class_stats_v0(struct hfsc_classstats_v0 *, struct hfsc_class *); static void get_class_stats_v1(struct hfsc_classstats_v1 *, struct hfsc_class *); static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t); /* * macros */ #define is_a_parent_class(cl) ((cl)->cl_children != NULL) #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ int hfsc_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc, - hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL); + hfsc_enqueue, hfsc_dequeue, hfsc_request); splx(s); return (error); } int hfsc_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct hfsc_if *hif; if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); TAILQ_INIT(&hif->hif_eligible); hif->hif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = hif; return (0); } int hfsc_remove_altq(struct pf_altq *a) { struct hfsc_if *hif; if ((hif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); free(hif, M_DEVBUF); return (0); } int hfsc_add_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl, *parent; struct hfsc_opts_v1 *opts; struct service_curve rtsc, lssc, ulsc; if ((hif = a->altq_disc) == NULL) return (EINVAL); opts = &a->pq_u.hfsc_opts; if (a->parent_qid == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); if (clh_to_clp(hif, a->qid) != NULL) return (EBUSY); rtsc.m1 = opts->rtsc_m1; rtsc.d = opts->rtsc_d; rtsc.m2 = opts->rtsc_m2; lssc.m1 = opts->lssc_m1; lssc.d = opts->lssc_d; lssc.m2 = opts->lssc_m2; ulsc.m1 = opts->ulsc_m1; ulsc.d = opts->ulsc_d; ulsc.m2 = opts->ulsc_m2; cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, parent, a->qlimit, opts->flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int hfsc_remove_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); return (hfsc_class_destroy(cl)); } int hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct hfsc_if *hif; struct hfsc_class *cl; union { struct hfsc_classstats_v0 v0; struct hfsc_classstats_v1 v1; } stats; size_t stats_size; int error = 0; if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); if (version > HFSC_STATS_VERSION) return (EINVAL); memset(&stats, 0, sizeof(stats)); switch (version) { case 0: get_class_stats_v0(&stats.v0, cl); stats_size = sizeof(struct hfsc_classstats_v0); break; case 1: get_class_stats_v1(&stats.v1, cl); stats_size = sizeof(struct hfsc_classstats_v1); break; } if (*nbytes < stats_size) return (EINVAL); if ((error = copyout((caddr_t)&stats, ubuf, stats_size)) != 0) return (error); *nbytes = stats_size; return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes except the root class. */ static int hfsc_clear_interface(struct hfsc_if *hif) { struct hfsc_class *cl; /* clear out the classes */ while (hif->hif_rootclass != NULL && (cl = hif->hif_rootclass->cl_children) != NULL) { /* * remove the first leaf class found in the hierarchy * then start over */ for (; cl != NULL; cl = hfsc_nextclass(cl)) { if (!is_a_parent_class(cl)) { (void)hfsc_class_destroy(cl); break; } } } return (0); } static int hfsc_request(struct ifaltq *ifq, int req, void *arg) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: hfsc_purge(hif); break; } return (0); } /* discard all the queued packets on the interface */ static void hfsc_purge(struct hfsc_if *hif) { struct hfsc_class *cl; for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (ALTQ_IS_ENABLED(hif->hif_ifq)) hif->hif_ifq->ifq_len = 0; } struct hfsc_class * hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, struct service_curve *fsc, struct service_curve *usc, struct hfsc_class *parent, int qlimit, int flags, int qid) { struct hfsc_class *cl, *p; int i, s; if (hif->hif_classes >= HFSC_MAX_CLASSES) return (NULL); #ifndef ALTQ_RED if (flags & HFCF_RED) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: RED not configured for HFSC!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & HFCF_CODEL) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: CODEL not configured for HFSC!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; TAILQ_INIT(&cl->cl_actc); if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; qsize(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { int red_flags, red_pkttime; u_int m2; m2 = 0; if (rsc != NULL && rsc->m2 > m2) m2 = rsc->m2; if (fsc != NULL && fsc->m2 > m2) m2 = fsc->m2; if (usc != NULL && usc->m2 > m2) m2 = usc->m2; red_flags = 0; if (flags & HFCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & HFCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (m2 < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (m2 / 8); if (flags & HFCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RED; } #ifdef ALTQ_RIO else { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RIO; } #endif } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & HFCF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) qtype(cl->cl_q) = Q_CODEL; } #endif if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_rsc == NULL) goto err_ret; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { cl->cl_fsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_fsc == NULL) goto err_ret; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { cl->cl_usc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_usc == NULL) goto err_ret; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); } cl->cl_id = hif->hif_classid++; cl->cl_handle = qid; cl->cl_hif = hif; cl->cl_parent = parent; s = splnet(); IFQ_LOCK(hif->hif_ifq); hif->hif_classes++; /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = qid % HFSC_MAX_CLASSES; if (hif->hif_class_tbl[i] == NULL) hif->hif_class_tbl[i] = cl; else { for (i = 0; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) { hif->hif_class_tbl[i] = cl; break; } if (i == HFSC_MAX_CLASSES) { IFQ_UNLOCK(hif->hif_ifq); splx(s); goto err_ret; } } cl->cl_slot = i; if (flags & HFCF_DEFAULTCLASS) hif->hif_defaultclass = cl; if (parent == NULL) { /* this is root class */ hif->hif_rootclass = cl; } else { /* add this class to the children list of the parent */ if ((p = parent->cl_children) == NULL) parent->cl_children = cl; else { while (p->cl_siblings != NULL) p = p->cl_siblings; p->cl_siblings = cl; } } IFQ_UNLOCK(hif->hif_ifq); splx(s); return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int hfsc_class_destroy(struct hfsc_class *cl) { int s; if (cl == NULL) return (0); if (is_a_parent_class(cl)) return (EBUSY); s = splnet(); IFQ_LOCK(cl->cl_hif->hif_ifq); if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (cl->cl_parent == NULL) { /* this is root class */ } else { struct hfsc_class *p = cl->cl_parent->cl_children; if (p == cl) cl->cl_parent->cl_children = cl->cl_siblings; else do { if (p->cl_siblings == cl) { p->cl_siblings = cl->cl_siblings; break; } } while ((p = p->cl_siblings) != NULL); ASSERT(p != NULL); } cl->cl_hif->hif_class_tbl[cl->cl_slot] = NULL; cl->cl_hif->hif_classes--; IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } IFQ_LOCK(cl->cl_hif->hif_ifq); if (cl == cl->cl_hif->hif_rootclass) cl->cl_hif->hif_rootclass = NULL; if (cl == cl->cl_hif->hif_defaultclass) cl->cl_hif->hif_defaultclass = NULL; IFQ_UNLOCK(cl->cl_hif->hif_ifq); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * hfsc_nextclass returns the next class in the tree. * usage: * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) * do_something; */ static struct hfsc_class * hfsc_nextclass(struct hfsc_class *cl) { if (cl->cl_children != NULL) cl = cl->cl_children; else if (cl->cl_siblings != NULL) cl = cl->cl_siblings; else { while ((cl = cl->cl_parent) != NULL) if (cl->cl_siblings) { cl = cl->cl_siblings; break; } } return (cl); } /* * hfsc_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(hif, t->qid); if (cl == NULL || is_a_parent_class(cl)) { cl = hif->hif_defaultclass; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } cl->cl_pktattr = NULL; len = m_pktlen(m); if (hfsc_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in hfsc_addq. */ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); cl->cl_hif->hif_packets++; /* successfully queued. */ if (qlen(cl->cl_q) == 1) set_active(cl, m_pktlen(m)); return (0); } /* * hfsc_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * hfsc_dequeue(struct ifaltq *ifq, int op) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct mbuf *m; int len, next_len; int realtime = 0; u_int64_t cur_time; IFQ_LOCK_ASSERT(ifq); if (hif->hif_packets == 0) /* no packet in the tree */ return (NULL); cur_time = read_machclk(); if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) { cl = hif->hif_pollcache; hif->hif_pollcache = NULL; /* check if the class was scheduled by real-time criteria */ if (cl->cl_rsc != NULL) realtime = (cl->cl_e <= cur_time); } else { /* * if there are eligible classes, use real-time criteria. * find the class with the minimum deadline among * the eligible classes. */ if ((cl = hfsc_get_mindl(hif, cur_time)) != NULL) { realtime = 1; } else { #ifdef ALTQ_DEBUG int fits = 0; #endif /* * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ cl = hif->hif_rootclass; while (is_a_parent_class(cl)) { cl = actlist_firstfit(cl, cur_time); if (cl == NULL) { #ifdef ALTQ_DEBUG if (fits > 0) printf("%d fit but none found\n",fits); #endif return (NULL); } /* * update parent's cl_cvtmin. * don't update if the new vt is smaller. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; #ifdef ALTQ_DEBUG fits++; #endif } } if (op == ALTDQ_POLL) { hif->hif_pollcache = cl; m = hfsc_pollq(cl); return (m); } } m = hfsc_getq(cl); if (m == NULL) panic("hfsc_dequeue:"); len = m_pktlen(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len); update_vf(cl, len, cur_time); if (realtime) cl->cl_cumul += len; if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) { /* update ed */ next_len = m_pktlen(qhead(cl->cl_q)); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); } } else { /* the class becomes passive */ set_passive(cl); } return (m); } static int hfsc_addq(struct hfsc_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_addq(cl->cl_codel, cl->cl_q, m); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & HFCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * hfsc_getq(struct hfsc_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_getq(cl->cl_codel, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * hfsc_pollq(struct hfsc_class *cl) { return qhead(cl->cl_q); } static void hfsc_purgeq(struct hfsc_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m)); m_freem(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(cl->cl_hif->hif_ifq); } ASSERT(qlen(cl->cl_q) == 0); update_vf(cl, 0, 0); /* remove cl from the actlist */ set_passive(cl); } static void set_active(struct hfsc_class *cl, int len) { if (cl->cl_rsc != NULL) init_ed(cl, len); if (cl->cl_fsc != NULL) init_vf(cl, len); cl->cl_stats.period++; } static void set_passive(struct hfsc_class *cl) { if (cl->cl_rsc != NULL) ellist_remove(cl); /* * actlist is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from actlist */ } static void init_ed(struct hfsc_class *cl, int next_len) { u_int64_t cur_time; cur_time = read_machclk(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); /* * update the eligible curve. * for concave, it is equal to the deadline curve. * for convex, it is a linear curve with slope m2. */ cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } /* compute e and d */ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_insert(cl); } static void update_ed(struct hfsc_class *cl, int next_len) { cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_update(cl); } static void update_d(struct hfsc_class *cl, int next_len) { cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } static void init_vf(struct hfsc_class *cl, int len) { struct hfsc_class *max_cl, *p; u_int64_t vt, f, cur_time; int go_active; cur_time = 0; go_active = 1; for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { if (go_active && cl->cl_nactive++ == 0) go_active = 1; else go_active = 0; if (go_active) { max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); if (max_cl != NULL) { /* * set vt to the average of the min and max * classes. if the parent's period didn't * change, don't decrease vt of the class. */ vt = max_cl->cl_vt; if (cl->cl_parent->cl_cvtmin != 0) vt = (cl->cl_parent->cl_cvtmin + vt)/2; if (cl->cl_parent->cl_vtperiod != cl->cl_parentperiod || vt > cl->cl_vt) cl->cl_vt = vt; } else { /* * first child for a new parent backlog period. * add parent's cvtmax to vtoff of children * to make a new vt (vtoff + vt) larger than * the vt in the last period for all children. */ vt = cl->cl_parent->cl_cvtmax; for (p = cl->cl_parent->cl_children; p != NULL; p = p->cl_siblings) p->cl_vtoff += vt; cl->cl_vt = 0; cl->cl_parent->cl_cvtmax = 0; cl->cl_parent->cl_cvtmin = 0; } cl->cl_initvt = cl->cl_vt; /* update the virtual curve */ vt = cl->cl_vt + cl->cl_vtoff; rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total); if (cl->cl_virtual.x == vt) { cl->cl_virtual.x -= cl->cl_vtoff; cl->cl_vtoff = 0; } cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; if (cl->cl_parent->cl_nactive == 0) cl->cl_parentperiod++; cl->cl_f = 0; actlist_insert(cl); if (cl->cl_usc != NULL) { /* class has upper limit curve */ if (cur_time == 0) cur_time = read_machclk(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); cl->cl_myfadj = 0; } } if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) { u_int64_t f, myf_bound, delta; int go_passive; go_passive = qempty(cl->cl_q); for (; cl->cl_parent != NULL; cl = cl->cl_parent) { cl->cl_total += len; if (cl->cl_fsc == NULL || cl->cl_nactive == 0) continue; if (go_passive && --cl->cl_nactive == 0) go_passive = 1; else go_passive = 0; if (go_passive) { /* no more active child, going passive */ /* update cvtmax of the parent class */ if (cl->cl_vt > cl->cl_parent->cl_cvtmax) cl->cl_parent->cl_cvtmax = cl->cl_vt; /* remove this class from the vt list */ actlist_remove(cl); update_cfmin(cl->cl_parent); continue; } /* * update vt and f */ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - cl->cl_vtoff + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, * the class was skipped in the past due to non-fit. * if so, we need to adjust vtadj. */ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; cl->cl_vt = cl->cl_parent->cl_cvtmin; } /* update the vt list */ actlist_update(cl); if (cl->cl_usc != NULL) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); /* * if myf lags behind by more than one clock tick * from the current time, adjust myfadj to prevent * a rate-limited class from going greedy. * in a steady state under rate-limiting, myf * fluctuates within one clock tick. */ myf_bound = cur_time - machclk_per_tick; if (cl->cl_myf < myf_bound) { delta = cur_time - cl->cl_myf; cl->cl_myfadj += delta; cl->cl_myf += delta; } } /* cl_f is max(cl_myf, cl_cfmin) */ if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_cfmin(struct hfsc_class *cl) { struct hfsc_class *p; u_int64_t cfmin; if (TAILQ_EMPTY(&cl->cl_actc)) { cl->cl_cfmin = 0; return; } cfmin = HT_INFINITY; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f == 0) { cl->cl_cfmin = 0; return; } if (p->cl_f < cfmin) cfmin = p->cl_f; } cl->cl_cfmin = cfmin; } /* * TAILQ based ellist and actlist implementation * (ion wanted to make a calendar queue based implementation) */ /* * eligible list holds backlogged classes being sorted by their eligible times. * there is one eligible list per interface. */ static void ellist_insert(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL || p->cl_e <= cl->cl_e) { TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (cl->cl_e < p->cl_e) { TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } static void ellist_remove(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); } static void ellist_update(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p, *last; /* * the eligible time of a class increases monotonically. * if the next entry has a larger eligible time, nothing to do. */ p = TAILQ_NEXT(cl, cl_ellist); if (p == NULL || cl->cl_e <= p->cl_e) return; /* check the last entry */ last = TAILQ_LAST(&hif->hif_eligible, elighead); ASSERT(last != NULL); if (last->cl_e <= cl->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { if (cl->cl_e < p->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } /* find the class with the minimum deadline among the eligible classes */ struct hfsc_class * hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) { struct hfsc_class *p, *cl = NULL; TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return (cl); } /* * active children list holds backlogged child classes being sorted * by their virtual time. * each intermediate class has one active children list. */ static void actlist_insert(struct hfsc_class *cl) { struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL || p->cl_vt <= cl->cl_vt) { TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { if (cl->cl_vt < p->cl_vt) { TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static void actlist_remove(struct hfsc_class *cl) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); } static void actlist_update(struct hfsc_class *cl) { struct hfsc_class *p, *last; /* * the virtual time of a class increases monotonically during its * backlogged period. * if the next entry has a larger virtual time, nothing to do. */ p = TAILQ_NEXT(cl, cl_actlist); if (p == NULL || cl->cl_vt < p->cl_vt) return; /* check the last entry */ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); ASSERT(last != NULL); if (last->cl_vt <= cl->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { if (cl->cl_vt < p->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static struct hfsc_class * actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) { struct hfsc_class *p; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f <= cur_time) return (p); } return (NULL); } /* * service curve support functions * * external service curve parameters * m: bits/sec * d: msec * internal service curve parameters * sm: (bytes/machclk tick) << SM_SHIFT * ism: (machclk ticks/byte) << ISM_SHIFT * dx: machclk ticks * * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. we * should be able to handle 100K-100Gbps linkspeed with 256 MHz machclk * frequency and at least 3 effective digits in decimal. * */ #define SM_SHIFT 24 #define ISM_SHIFT 14 #define SM_MASK ((1LL << SM_SHIFT) - 1) #define ISM_MASK ((1LL << ISM_SHIFT) - 1) static __inline u_int64_t seg_x2y(u_int64_t x, u_int64_t sm) { u_int64_t y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return (y); } static __inline u_int64_t seg_y2x(u_int64_t y, u_int64_t ism) { u_int64_t x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return (x); } static __inline u_int64_t m2sm(u_int64_t m) { u_int64_t sm; sm = (m << SM_SHIFT) / 8 / machclk_freq; return (sm); } static __inline u_int64_t m2ism(u_int64_t m) { u_int64_t ism; if (m == 0) ism = HT_INFINITY; else ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m; return (ism); } static __inline u_int64_t d2dx(u_int d) { u_int64_t dx; dx = ((u_int64_t)d * machclk_freq) / 1000; return (dx); } static u_int64_t sm2m(u_int64_t sm) { u_int64_t m; m = (sm * 8 * machclk_freq) >> SM_SHIFT; return (m); } static u_int dx2d(u_int64_t dx) { u_int64_t d; d = dx * 1000 / machclk_freq; return ((u_int)d); } static void sc2isc(struct service_curve *sc, struct internal_sc *isc) { isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2); } /* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */ static void rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x, u_int64_t y) { rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2; } /* * calculate the y-projection of the runtime service curve by the * given x-projection value */ static u_int64_t rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y) { u_int64_t x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return (x); } static u_int64_t rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x) { u_int64_t y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return (y); } /* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x, u_int64_t y) { u_int64_t y1, y2, dx, dy; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx) */ y1 = rtsc_x2y(rtsc, x); if (y1 <= y) { /* rtsc is below isc, no change to rtsc */ return; } y2 = rtsc_x2y(rtsc, x + isc->dx); if (y2 >= y + isc->dy) { /* rtsc is above isc, replace rtsc by isc */ rtsc->x = x; rtsc->y = y; rtsc->dx = isc->dx; rtsc->dy = isc->dy; return; } /* * the two curves intersect * compute the offsets (dx, dy) using the reverse * function of seg_x2y() * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) */ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. */ if (rtsc->x + rtsc->dx > x) dx += rtsc->x + rtsc->dx - x; dy = seg_x2y(dx, isc->sm1); rtsc->x = x; rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; return; } static void get_class_stats_v0(struct hfsc_classstats_v0 *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; #define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) if (cl->cl_rsc != NULL) { sp->rsc.m1 = SATU32(sm2m(cl->cl_rsc->sm1)); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = SATU32(sm2m(cl->cl_rsc->sm2)); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = SATU32(sm2m(cl->cl_fsc->sm1)); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = SATU32(sm2m(cl->cl_fsc->sm2)); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = SATU32(sm2m(cl->cl_usc->sm1)); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = SATU32(sm2m(cl->cl_usc->sm2)); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } #undef SATU32 sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } static void get_class_stats_v1(struct hfsc_classstats_v1 *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; if (cl->cl_rsc != NULL) { sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = sm2m(cl->cl_usc->sm1); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = sm2m(cl->cl_usc->sm2); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct hfsc_class * clh_to_clp(struct hfsc_if *hif, u_int32_t chandle) { int i; struct hfsc_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % HFSC_MAX_CLASSES; if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); for (i = 0; i < HFSC_MAX_CLASSES; i++) if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #endif /* ALTQ_HFSC */ diff --git a/sys/net/altq/altq_priq.c b/sys/net/altq/altq_priq.c index e0c3e0f6ee80..f0627e2611a7 100644 --- a/sys/net/altq/altq_priq.c +++ b/sys/net/altq/altq_priq.c @@ -1,641 +1,641 @@ /*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ * $FreeBSD$ */ /* * priority queue */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * function prototypes */ static int priq_clear_interface(struct priq_if *); static int priq_request(struct ifaltq *, int, void *); static void priq_purge(struct priq_if *); static struct priq_class *priq_class_create(struct priq_if *, int, int, int, int); static int priq_class_destroy(struct priq_class *); static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *priq_dequeue(struct ifaltq *, int); static int priq_addq(struct priq_class *, struct mbuf *); static struct mbuf *priq_getq(struct priq_class *); static struct mbuf *priq_pollq(struct priq_class *); static void priq_purgeq(struct priq_class *); static void get_class_stats(struct priq_classstats *, struct priq_class *); static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t); int priq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc, - priq_enqueue, priq_dequeue, priq_request, NULL, NULL); + priq_enqueue, priq_dequeue, priq_request); splx(s); return (error); } int priq_add_altq(struct ifnet * ifp, struct pf_altq *a) { struct priq_if *pif; if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int priq_remove_altq(struct pf_altq *a) { struct priq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)priq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int priq_add_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= PRIQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = priq_class_create(pif, a->priority, a->qlimit, a->pq_u.priq_opts.flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int priq_remove_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (priq_class_destroy(cl)); } int priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int priq_clear_interface(struct priq_if *pif) { struct priq_class *cl; int pri; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&pif->pif_classifier, NULL, 1); #endif /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) if ((cl = pif->pif_classes[pri]) != NULL) priq_class_destroy(cl); return (0); } static int priq_request(struct ifaltq *ifq, int req, void *arg) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: priq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void priq_purge(struct priq_if *pif) { struct priq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) priq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct priq_class * priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) { struct priq_class *cl; int s; #ifndef ALTQ_RED if (flags & PRCF_RED) { #ifdef ALTQ_DEBUG printf("priq_class_create: RED not configured for PRIQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & PRCF_CODEL) { #ifdef ALTQ_DEBUG printf("priq_class_create: CODEL not configured for PRIQ!\n"); #endif return (NULL); } #endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ s = splnet(); IFQ_LOCK(cl->cl_pif->pif_ifq); if (!qempty(cl->cl_q)) priq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; } pif->pif_classes[pri] = cl; if (flags & PRCF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; qsize(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; #ifdef ALTQ_RED if (flags & (PRCF_RED|PRCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & PRCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & PRCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & PRCF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RIO; } else #endif if (flags & PRCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RED; } } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & PRCF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) qtype(cl->cl_q) = Q_CODEL; } #endif return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int priq_class_destroy(struct priq_class *cl) { struct priq_if *pif; int s, pri; s = splnet(); IFQ_LOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ3_CLFIER_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0); #endif if (!qempty(cl->cl_q)) priq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * priq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(pif, t->qid); if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } cl->cl_pktattr = NULL; len = m_pktlen(m); if (priq_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in priq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); /* successfully queued. */ return (0); } /* * priq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * priq_dequeue(struct ifaltq *ifq, int op) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct mbuf *m; int pri; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) /* no packet in the queue */ return (NULL); for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) { if (op == ALTDQ_POLL) return (priq_pollq(cl)); m = priq_getq(cl); if (m != NULL) { IFQ_DEC_LEN(ifq); if (qempty(cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m)); } return (m); } } return (NULL); } static int priq_addq(struct priq_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_addq(cl->cl_codel, cl->cl_q, m); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & PRCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * priq_getq(struct priq_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_getq(cl->cl_codel, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * priq_pollq(cl) struct priq_class *cl; { return qhead(cl->cl_q); } static void priq_purgeq(struct priq_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(cl->cl_q) == 0); } static void get_class_stats(struct priq_classstats *sp, struct priq_class *cl) { sp->class_handle = cl->cl_handle; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->period = cl->cl_period; sp->xmitcnt = cl->cl_xmitcnt; sp->dropcnt = cl->cl_dropcnt; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct priq_class * clh_to_clp(struct priq_if *pif, u_int32_t chandle) { struct priq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #endif /* ALTQ_PRIQ */ diff --git a/sys/net/altq/altq_subr.c b/sys/net/altq/altq_subr.c index b35637820a47..dff589607ac8 100644 --- a/sys/net/altq/altq_subr.c +++ b/sys/net/altq/altq_subr.c @@ -1,1964 +1,1956 @@ /*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include /* machine dependent clock related includes */ #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include /* for pentium tsc */ #include /* for CPUID_TSC */ #include /* for cpu_feature */ #endif /* __amd64 || __i386__ */ /* * internal function prototypes */ static void tbr_timeout(void *); int (*altq_input)(struct mbuf *, int) = NULL; static struct mbuf *tbr_dequeue(struct ifaltq *, int); static int tbr_timer = 0; /* token bucket regulator timer */ #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) static struct callout tbr_callout = CALLOUT_INITIALIZER; #else static struct callout tbr_callout; #endif #ifdef ALTQ3_CLFIER_COMPAT static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); #ifdef INET6 static int extract_ports6(struct mbuf *, struct ip6_hdr *, struct flowinfo_in6 *); #endif static int apply_filter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static int apply_ppfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); #ifdef INET6 static int apply_filter6(u_int32_t, struct flow_filter6 *, struct flowinfo_in6 *); #endif static int apply_tosfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static u_long get_filt_handle(struct acc_classifier *, int); static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); static u_int32_t filt2fibmask(struct flow_filter *); static void ip4f_cache(struct ip *, struct flowinfo_in *); static int ip4f_lookup(struct ip *, struct flowinfo_in *); static int ip4f_init(void); static struct ip4_frag *ip4f_alloc(void); static void ip4f_free(struct ip4_frag *); #endif /* ALTQ3_CLFIER_COMPAT */ #ifdef ALTQ SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0, "ALTQ packet queuing"); #define ALTQ_FEATURE(name, desc) \ SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name, \ CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, \ desc, "feature") #ifdef ALTQ_CBQ ALTQ_FEATURE(cbq, "ATLQ Class Based Queuing discipline"); #endif #ifdef ALTQ_CODEL ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline"); #endif #ifdef ALTQ_RED ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline"); #endif #ifdef ALTQ_RIO ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline"); #endif #ifdef ALTQ_HFSC ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline"); #endif #ifdef ALTQ_PRIQ ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline"); #endif #ifdef ALTQ_FAIRQ ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline"); #endif #endif /* * alternate queueing support routines */ /* look up the queue state by the interface name and the queueing type. */ void * altq_lookup(name, type) char *name; int type; { struct ifnet *ifp; if ((ifp = ifunit(name)) != NULL) { /* read if_snd unlocked */ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) return (ifp->if_snd.altq_disc); } return NULL; } int -altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) +altq_attach(ifq, type, discipline, enqueue, dequeue, request) struct ifaltq *ifq; int type; void *discipline; int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*dequeue)(struct ifaltq *, int); int (*request)(struct ifaltq *, int, void *); - void *clfier; - void *(*classify)(void *, struct mbuf *, int); { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } ifq->altq_type = type; ifq->altq_disc = discipline; ifq->altq_enqueue = enqueue; ifq->altq_dequeue = dequeue; ifq->altq_request = request; - ifq->altq_clfier = clfier; - ifq->altq_classify = classify; ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); IFQ_UNLOCK(ifq); return 0; } int altq_detach(ifq) struct ifaltq *ifq; { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (!ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return (0); } ifq->altq_type = ALTQT_NONE; ifq->altq_disc = NULL; ifq->altq_enqueue = NULL; ifq->altq_dequeue = NULL; ifq->altq_request = NULL; - ifq->altq_clfier = NULL; - ifq->altq_classify = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; IFQ_UNLOCK(ifq); return 0; } int altq_enable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } s = splnet(); IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ ifq->altq_flags |= ALTQF_ENABLED; - if (ifq->altq_clfier != NULL) - ifq->altq_flags |= ALTQF_CLASSIFY; splx(s); IFQ_UNLOCK(ifq); return 0; } int altq_disable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } s = splnet(); IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); - ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); + ifq->altq_flags &= ~(ALTQF_ENABLED); splx(s); IFQ_UNLOCK(ifq); return 0; } #ifdef ALTQ_DEBUG void altq_assert(file, line, failedexpr) const char *file, *failedexpr; int line; { (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", failedexpr, file, line); panic("altq assertion"); /* NOTREACHED */ } #endif /* * internal representation of token bucket parameters * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq * depth: byte << TBR_SHIFT * */ #define TBR_SHIFT 29 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) static struct mbuf * tbr_dequeue(ifq, op) struct ifaltq *ifq; int op; { struct tb_regulator *tbr; struct mbuf *m; int64_t interval; u_int64_t now; IFQ_LOCK_ASSERT(ifq); tbr = ifq->altq_tbr; if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { /* if this is a remove after poll, bypass tbr check */ } else { /* update token only when it is negative */ if (tbr->tbr_token <= 0) { now = read_machclk(); interval = now - tbr->tbr_last; if (interval >= tbr->tbr_filluptime) tbr->tbr_token = tbr->tbr_depth; else { tbr->tbr_token += interval * tbr->tbr_rate; if (tbr->tbr_token > tbr->tbr_depth) tbr->tbr_token = tbr->tbr_depth; } tbr->tbr_last = now; } /* if token is still negative, don't allow dequeue */ if (tbr->tbr_token <= 0) return (NULL); } if (ALTQ_IS_ENABLED(ifq)) m = (*ifq->altq_dequeue)(ifq, op); else { if (op == ALTDQ_POLL) _IF_POLL(ifq, m); else _IF_DEQUEUE(ifq, m); } if (m != NULL && op == ALTDQ_REMOVE) tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); tbr->tbr_lastop = op; return (m); } /* * set a token bucket regulator. * if the specified rate is zero, the token bucket regulator is deleted. */ int tbr_set(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr, *otbr; if (tbr_dequeue_ptr == NULL) tbr_dequeue_ptr = tbr_dequeue; if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("tbr_set: no cpu clock available!\n"); return (ENXIO); } IFQ_LOCK(ifq); if (profile->rate == 0) { /* delete this tbr */ if ((tbr = ifq->altq_tbr) == NULL) { IFQ_UNLOCK(ifq); return (ENOENT); } ifq->altq_tbr = NULL; free(tbr, M_DEVBUF); IFQ_UNLOCK(ifq); return (0); } tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); if (tbr->tbr_rate > 0) tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; else tbr->tbr_filluptime = LLONG_MAX; /* * The longest time between tbr_dequeue() calls will be about 1 * system tick, as the callout that drives it is scheduled once per * tick. The refill-time detection logic in tbr_dequeue() can only * properly detect the passage of up to LLONG_MAX machclk ticks. * Therefore, in order for this logic to function properly in the * extreme case, the maximum value of tbr_filluptime should be * LLONG_MAX less one system tick's worth of machclk ticks less * some additional slop factor (here one more system tick's worth * of machclk ticks). */ if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; tbr->tbr_token = tbr->tbr_depth; tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ if (otbr != NULL) free(otbr, M_DEVBUF); else { if (tbr_timer == 0) { CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); tbr_timer = 1; } } IFQ_UNLOCK(ifq); return (0); } /* * tbr_timeout goes through the interface list, and kicks the drivers * if necessary. * * MPSAFE */ static void tbr_timeout(arg) void *arg; { VNET_ITERATOR_DECL(vnet_iter); struct ifnet *ifp; struct epoch_tracker et; int active; active = 0; NET_EPOCH_ENTER(et); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) { /* read from if_snd unlocked */ if (!TBR_IS_ENABLED(&ifp->if_snd)) continue; active++; if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) (*ifp->if_start)(ifp); } CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); NET_EPOCH_EXIT(et); if (active > 0) CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); else tbr_timer = 0; /* don't need tbr_timer anymore */ } /* * attach a discipline to the interface. if one already exists, it is * overridden. * Locking is done in the discipline specific attach functions. Basically * they call back to altq_attach which takes care of the attach and locking. */ int altq_pfattach(struct pf_altq *a) { int error = 0; switch (a->scheduler) { case ALTQT_NONE: break; #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_pfattach(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_pfattach(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_pfattach(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_pfattach(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_pfattach(a); break; #endif default: error = ENXIO; } return (error); } /* * detach a discipline from the interface. * it is possible that the discipline was already overridden by another * discipline. */ int altq_pfdetach(struct pf_altq *a) { struct ifnet *ifp; int s, error = 0; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); /* if this discipline is no longer referenced, just return */ /* read unlocked from if_snd */ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) return (0); s = splnet(); /* read unlocked from if_snd, _disable and _detach take care */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) error = altq_disable(&ifp->if_snd); if (error == 0) error = altq_detach(&ifp->if_snd); splx(s); return (error); } /* * add a discipline or a queue * Locking is done in the discipline specific functions with regards to * malloc with WAITOK, also it is not yet clear which lock to use. */ int altq_add(struct ifnet *ifp, struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_add_queue(a)); if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) panic("altq_add: no cpu clock"); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_altq(ifp, a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_altq(ifp, a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_altq(ifp, a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_add_altq(ifp, a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_add_altq(ifp, a); break; #endif default: error = ENXIO; } return (error); } /* * remove a discipline or a queue * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_remove_queue(a)); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_altq(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_remove_altq(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_remove_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * add a queue to the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_add_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_queue(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_add_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a queue from the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_queue(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_remove_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * get queue statistics * Locking is done in the discipline specific functions with regards to * copyout operations, also it is not yet clear which lock to use. */ int altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_getqstats(a, ubuf, nbytes, version); break; #endif default: error = ENXIO; } return (error); } /* * read and write diffserv field in IPv4 or IPv6 header */ u_int8_t read_dsfield(m, pktattr) struct mbuf *m; struct altq_pktattr *pktattr; { struct mbuf *m0; u_int8_t ds_field = 0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return ((u_int8_t)0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("read_dsfield: can't locate header!\n"); #endif return ((u_int8_t)0); } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; if (ip->ip_v != 4) return ((u_int8_t)0); /* version mismatch! */ ds_field = ip->ip_tos; } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return ((u_int8_t)0); /* version mismatch! */ ds_field = (flowlabel >> 20) & 0xff; } #endif return (ds_field); } void write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) { struct mbuf *m0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return; /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("write_dsfield: can't locate header!\n"); #endif return; } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; u_int8_t old; int32_t sum; if (ip->ip_v != 4) return; /* version mismatch! */ old = ip->ip_tos; dsfield |= old & 3; /* leave CU bits */ if (old == dsfield) return; ip->ip_tos = dsfield; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += 0xff00 + (~old & 0xff) + dsfield; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return; /* version mismatch! */ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); ip6->ip6_flow = htonl(flowlabel); } #endif return; } /* * high resolution clock support taking advantage of a machine dependent * high resolution time counter (e.g., timestamp counter of intel pentium). * we assume * - 64-bit-long monotonically-increasing counter * - frequency range is 100M-4GHz (CPU speed) */ /* if pcc is not available or disabled, emulate 256MHz using microtime() */ #define MACHCLK_SHIFT 8 int machclk_usepcc; u_int32_t machclk_freq; u_int32_t machclk_per_tick; #if defined(__i386__) && defined(__NetBSD__) extern u_int64_t cpu_tsc_freq; #endif #if (__FreeBSD_version >= 700035) /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) /* If TSC is P-state invariant, don't do anything. */ if (tsc_is_invariant) return; #endif /* Total setting for this level gives the new frequency in MHz. */ init_machclk(); } EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_LAST); #endif /* __FreeBSD_version >= 700035 */ static void init_machclk_setup(void) { #if (__FreeBSD_version >= 600000) callout_init(&tbr_callout, 0); #endif machclk_usepcc = 1; #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) machclk_usepcc = 0; #endif #if defined(__FreeBSD__) && defined(SMP) machclk_usepcc = 0; #endif #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif #if defined(__amd64__) || defined(__i386__) /* check if TSC is available */ if ((cpu_feature & CPUID_TSC) == 0 || atomic_load_acq_64(&tsc_freq) == 0) machclk_usepcc = 0; #endif } void init_machclk(void) { static int called; /* Call one-time initialization function. */ if (!called) { init_machclk_setup(); called = 1; } if (machclk_usepcc == 0) { /* emulate 256MHz using microtime() */ machclk_freq = 1000000 << MACHCLK_SHIFT; machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: emulate %uHz cpu clock\n", machclk_freq); #endif return; } /* * if the clock frequency (of Pentium TSC or Alpha PCC) is * accessible, just use it. */ #if defined(__amd64__) || defined(__i386__) machclk_freq = atomic_load_acq_64(&tsc_freq); #endif /* * if we don't know the clock frequency, measure it. */ if (machclk_freq == 0) { static int wait; struct timeval tv_start, tv_end; u_int64_t start, end, diff; int timo; microtime(&tv_start); start = read_machclk(); timo = hz; /* 1 sec */ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); microtime(&tv_end); end = read_machclk(); diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 + tv_end.tv_usec - tv_start.tv_usec; if (diff != 0) machclk_freq = (u_int)((end - start) * 1000000 / diff); } machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: CPU clock: %uHz\n", machclk_freq); #endif } #if defined(__OpenBSD__) && defined(__i386__) static __inline u_int64_t rdtsc(void) { u_int64_t rv; __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); return (rv); } #endif /* __OpenBSD__ && __i386__ */ u_int64_t read_machclk(void) { u_int64_t val; if (machclk_usepcc) { #if defined(__amd64__) || defined(__i386__) val = rdtsc(); #else panic("read_machclk"); #endif } else { struct timeval tv, boottime; microtime(&tv); getboottime(&boottime); val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 + tv.tv_usec) << MACHCLK_SHIFT); } return (val); } #ifdef ALTQ3_CLFIER_COMPAT #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 /* encapsulating security payload */ #endif #ifndef IPPROTO_AH #define IPPROTO_AH 51 /* authentication header */ #endif /* * extract flow information from a given packet. * filt_mask shows flowinfo fields required. * we assume the ip header is in one mbuf, and addresses and ports are * in network byte order. */ int altq_extractflow(m, af, flow, filt_bmask) struct mbuf *m; int af; struct flowinfo *flow; u_int32_t filt_bmask; { switch (af) { case PF_INET: { struct flowinfo_in *fin; struct ip *ip; ip = mtod(m, struct ip *); if (ip->ip_v != 4) break; fin = (struct flowinfo_in *)flow; fin->fi_len = sizeof(struct flowinfo_in); fin->fi_family = AF_INET; fin->fi_proto = ip->ip_p; fin->fi_tos = ip->ip_tos; fin->fi_src.s_addr = ip->ip_src.s_addr; fin->fi_dst.s_addr = ip->ip_dst.s_addr; if (filt_bmask & FIMB4_PORTS) /* if port info is required, extract port numbers */ extract_ports4(m, ip, fin); else { fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; } return (1); } #ifdef INET6 case PF_INET6: { struct flowinfo_in6 *fin6; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); /* should we check the ip version? */ fin6 = (struct flowinfo_in6 *)flow; fin6->fi6_len = sizeof(struct flowinfo_in6); fin6->fi6_family = AF_INET6; fin6->fi6_proto = ip6->ip6_nxt; fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); fin6->fi6_src = ip6->ip6_src; fin6->fi6_dst = ip6->ip6_dst; if ((filt_bmask & FIMB6_PORTS) || ((filt_bmask & FIMB6_PROTO) && ip6->ip6_nxt > IPPROTO_IPV6)) /* * if port info is required, or proto is required * but there are option headers, extract port * and protocol numbers. */ extract_ports6(m, ip6, fin6); else { fin6->fi6_sport = 0; fin6->fi6_dport = 0; fin6->fi6_gpi = 0; } return (1); } #endif /* INET6 */ default: break; } /* failed */ flow->fi_len = sizeof(struct flowinfo); flow->fi_family = AF_UNSPEC; return (0); } /* * helper routine to extract port numbers */ /* structure for ipsec and ipv6 option header template */ struct _opt6 { u_int8_t opt6_nxt; /* next header */ u_int8_t opt6_hlen; /* header extension length */ u_int16_t _pad; u_int32_t ah_spi; /* security parameter index for authentication header */ }; /* * extract port numbers from a ipv4 packet. */ static int extract_ports4(m, ip, fin) struct mbuf *m; struct ip *ip; struct flowinfo_in *fin; { struct mbuf *m0; u_short ip_off; u_int8_t proto; int off; fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; ip_off = ntohs(ip->ip_off); /* if it is a fragment, try cached fragment info */ if (ip_off & IP_OFFMASK) { ip4f_lookup(ip, fin); return (1); } /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip >= m0->m_data) && ((caddr_t)ip < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports4: can't locate header! ip=%p\n", ip); #endif return (0); } off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); proto = ip->ip_p; #ifdef ALTQ_IPSEC again: #endif while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); /* bogus ip_hl! */ } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin->fi_sport = udp->uh_sport; fin->fi_dport = udp->uh_dport; fin->fi_proto = proto; } break; #ifdef ALTQ_IPSEC case IPPROTO_ESP: if (fin->fi_gpi == 0){ u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin->fi_gpi = *gpi; } fin->fi_proto = proto; break; case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); if (fin->fi_gpi == 0 && m0->m_len >= off + 8) fin->fi_gpi = opt6->ah_spi; } /* goto the next header */ goto again; #endif /* ALTQ_IPSEC */ default: fin->fi_proto = proto; return (0); } /* if this is a first fragment, cache it. */ if (ip_off & IP_MF) ip4f_cache(ip, fin); return (1); } #ifdef INET6 static int extract_ports6(m, ip6, fin6) struct mbuf *m; struct ip6_hdr *ip6; struct flowinfo_in6 *fin6; { struct mbuf *m0; int off; u_int8_t proto; fin6->fi6_gpi = 0; fin6->fi6_sport = 0; fin6->fi6_dport = 0; /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip6 >= m0->m_data) && ((caddr_t)ip6 < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports6: can't locate header! ip6=%p\n", ip6); #endif return (0); } off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; do { while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin6->fi6_sport = udp->uh_sport; fin6->fi6_dport = udp->uh_dport; fin6->fi6_proto = proto; } return (1); case IPPROTO_ESP: if (fin6->fi6_gpi == 0) { u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin6->fi6_gpi = *gpi; } fin6->fi6_proto = proto; return (1); case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) fin6->fi6_gpi = opt6->ah_spi; proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); /* goto the next header */ break; } case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += (opt6->opt6_hlen + 1) * 8; /* goto the next header */ break; } case IPPROTO_FRAGMENT: /* ipv6 fragmentations are not supported yet */ default: fin6->fi6_proto = proto; return (0); } } while (1); /*NOTREACHED*/ } #endif /* INET6 */ /* * altq common classifier */ int acc_add_filter(classifier, filter, class, phandle) struct acc_classifier *classifier; struct flow_filter *filter; void *class; u_long *phandle; { struct acc_filter *afp, *prev, *tmp; int i, s; #ifdef INET6 if (filter->ff_flow.fi_family != AF_INET && filter->ff_flow.fi_family != AF_INET6) return (EINVAL); #else if (filter->ff_flow.fi_family != AF_INET) return (EINVAL); #endif afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK); if (afp == NULL) return (ENOMEM); bzero(afp, sizeof(struct acc_filter)); afp->f_filter = *filter; afp->f_class = class; i = ACC_WILDCARD_INDEX; if (filter->ff_flow.fi_family == AF_INET) { struct flow_filter *filter4 = &afp->f_filter; /* * if address is 0, it's a wildcard. if address mask * isn't set, use full mask. */ if (filter4->ff_flow.fi_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0; else if (filter4->ff_mask.mask_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0xffffffff; if (filter4->ff_flow.fi_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0; else if (filter4->ff_mask.mask_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0xffffffff; /* clear extra bits in addresses */ filter4->ff_flow.fi_dst.s_addr &= filter4->ff_mask.mask_dst.s_addr; filter4->ff_flow.fi_src.s_addr &= filter4->ff_mask.mask_src.s_addr; /* * if dst address is a wildcard, use hash-entry * ACC_WILDCARD_INDEX. */ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); } #ifdef INET6 else if (filter->ff_flow.fi_family == AF_INET6) { struct flow_filter6 *filter6 = (struct flow_filter6 *)&afp->f_filter; #ifndef IN6MASK0 /* taken from kame ipv6 */ #define IN6MASK0 {{{ 0, 0, 0, 0 }}} #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask128 = IN6MASK128; #endif if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) filter6->ff_mask6.mask6_dst = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) filter6->ff_mask6.mask6_dst = in6mask128; if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) filter6->ff_mask6.mask6_src = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) filter6->ff_mask6.mask6_src = in6mask128; /* clear extra bits in addresses */ for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_dst.s6_addr[i] &= filter6->ff_mask6.mask6_dst.s6_addr[i]; for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_src.s6_addr[i] &= filter6->ff_mask6.mask6_src.s6_addr[i]; if (filter6->ff_flow6.fi6_flowlabel == 0) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); } #endif /* INET6 */ afp->f_handle = get_filt_handle(classifier, i); /* update filter bitmask */ afp->f_fbmask = filt2fibmask(filter); classifier->acc_fbmask |= afp->f_fbmask; /* * add this filter to the filter list. * filters are ordered from the highest rule number. */ s = splnet(); prev = NULL; LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) prev = tmp; else break; } if (prev == NULL) LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); else LIST_INSERT_AFTER(prev, afp, f_chain); splx(s); *phandle = afp->f_handle; return (0); } int acc_delete_filter(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int s; if ((afp = filth_to_filtp(classifier, handle)) == NULL) return (EINVAL); s = splnet(); LIST_REMOVE(afp, f_chain); splx(s); free(afp, M_DEVBUF); /* todo: update filt_bmask */ return (0); } /* * delete filters referencing to the specified class. * if the all flag is not 0, delete all the filters. */ int acc_discard_filters(classifier, class, all) struct acc_classifier *classifier; void *class; int all; { struct acc_filter *afp; int i, s; s = splnet(); for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (all || afp->f_class == class) { LIST_REMOVE(afp, f_chain); free(afp, M_DEVBUF); /* start again from the head */ break; } } while (afp != NULL); } splx(s); if (all) classifier->acc_fbmask = 0; return (0); } void * acc_classify(clfier, m, af) void *clfier; struct mbuf *m; int af; { struct acc_classifier *classifier; struct flowinfo flow; struct acc_filter *afp; int i; classifier = (struct acc_classifier *)clfier; altq_extractflow(m, af, &flow, classifier->acc_fbmask); if (flow.fi_family == AF_INET) { struct flowinfo_in *fp = (struct flowinfo_in *)&flow; if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { /* only tos is used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_tosfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else if ((classifier->acc_fbmask & (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) == 0) { /* only proto and ports are used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_ppfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else { /* get the filter hash entry from its dest address */ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); do { /* * go through this loop twice. first for dst * hash, second for wildcards. */ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); /* * check again for filters with a dst addr * wildcard. * (daddr == 0 || dmask != 0xffffffff). */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } } #ifdef INET6 else if (flow.fi_family == AF_INET6) { struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; /* get the filter hash entry from its flow ID */ if (fp6->fi6_flowlabel != 0) i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); else /* flowlable can be zero */ i = ACC_WILDCARD_INDEX; /* go through this loop twice. first for flow hash, second for wildcards. */ do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter6(afp->f_fbmask, (struct flow_filter6 *)&afp->f_filter, fp6)) /* filter matched */ return (afp->f_class); /* * check again for filters with a wildcard. */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } #endif /* INET6 */ /* no filter matched */ return (NULL); } static int apply_filter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_DADDR) && filt->ff_flow.fi_dst.s_addr != (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) return (0); if ((fbmask & FIMB4_SADDR) && filt->ff_flow.fi_src.s_addr != (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) return (0); /* match */ return (1); } /* * filter matching function optimized for a common case that checks * only protocol and port numbers */ static int apply_ppfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); /* match */ return (1); } /* * filter matching function only for tos field. */ static int apply_tosfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); /* match */ return (1); } #ifdef INET6 static int apply_filter6(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter6 *filt; struct flowinfo_in6 *pkt; { int i; if (filt->ff_flow6.fi6_family != AF_INET6) return (0); if ((fbmask & FIMB6_FLABEL) && filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) return (0); if ((fbmask & FIMB6_PROTO) && filt->ff_flow6.fi6_proto != pkt->fi6_proto) return (0); if ((fbmask & FIMB6_SPORT) && filt->ff_flow6.fi6_sport != pkt->fi6_sport) return (0); if ((fbmask & FIMB6_DPORT) && filt->ff_flow6.fi6_dport != pkt->fi6_dport) return (0); if (fbmask & FIMB6_SADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_src.s6_addr32[i] != (pkt->fi6_src.s6_addr32[i] & filt->ff_mask6.mask6_src.s6_addr32[i])) return (0); } if (fbmask & FIMB6_DADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_dst.s6_addr32[i] != (pkt->fi6_dst.s6_addr32[i] & filt->ff_mask6.mask6_dst.s6_addr32[i])) return (0); } if ((fbmask & FIMB6_TCLASS) && filt->ff_flow6.fi6_tclass != (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) return (0); if ((fbmask & FIMB6_GPI) && filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) return (0); /* match */ return (1); } #endif /* INET6 */ /* * filter handle: * bit 20-28: index to the filter hash table * bit 0-19: unique id in the hash bucket. */ static u_long get_filt_handle(classifier, i) struct acc_classifier *classifier; int i; { static u_long handle_number = 1; u_long handle; struct acc_filter *afp; while (1) { handle = handle_number++ & 0x000fffff; if (LIST_EMPTY(&classifier->acc_filters[i])) break; LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if ((afp->f_handle & 0x000fffff) == handle) break; if (afp == NULL) break; /* this handle is already used, try again */ } return ((i << 20) | handle); } /* convert filter handle to filter pointer */ static struct acc_filter * filth_to_filtp(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int i; i = ACC_GET_HINDEX(handle); LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (afp->f_handle == handle) return (afp); return (NULL); } /* create flowinfo bitmask */ static u_int32_t filt2fibmask(filt) struct flow_filter *filt; { u_int32_t mask = 0; #ifdef INET6 struct flow_filter6 *filt6; #endif switch (filt->ff_flow.fi_family) { case AF_INET: if (filt->ff_flow.fi_proto != 0) mask |= FIMB4_PROTO; if (filt->ff_flow.fi_tos != 0) mask |= FIMB4_TOS; if (filt->ff_flow.fi_dst.s_addr != 0) mask |= FIMB4_DADDR; if (filt->ff_flow.fi_src.s_addr != 0) mask |= FIMB4_SADDR; if (filt->ff_flow.fi_sport != 0) mask |= FIMB4_SPORT; if (filt->ff_flow.fi_dport != 0) mask |= FIMB4_DPORT; if (filt->ff_flow.fi_gpi != 0) mask |= FIMB4_GPI; break; #ifdef INET6 case AF_INET6: filt6 = (struct flow_filter6 *)filt; if (filt6->ff_flow6.fi6_proto != 0) mask |= FIMB6_PROTO; if (filt6->ff_flow6.fi6_tclass != 0) mask |= FIMB6_TCLASS; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) mask |= FIMB6_DADDR; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) mask |= FIMB6_SADDR; if (filt6->ff_flow6.fi6_sport != 0) mask |= FIMB6_SPORT; if (filt6->ff_flow6.fi6_dport != 0) mask |= FIMB6_DPORT; if (filt6->ff_flow6.fi6_gpi != 0) mask |= FIMB6_GPI; if (filt6->ff_flow6.fi6_flowlabel != 0) mask |= FIMB6_FLABEL; break; #endif /* INET6 */ } return (mask); } /* * helper functions to handle IPv4 fragments. * currently only in-sequence fragments are handled. * - fragment info is cached in a LRU list. * - when a first fragment is found, cache its flow info. * - when a non-first fragment is found, lookup the cache. */ struct ip4_frag { TAILQ_ENTRY(ip4_frag) ip4f_chain; char ip4f_valid; u_short ip4f_id; struct flowinfo_in ip4f_info; }; static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ static void ip4f_cache(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; if (TAILQ_EMPTY(&ip4f_list)) { /* first time call, allocate fragment cache entries. */ if (ip4f_init() < 0) /* allocation failed! */ return; } fp = ip4f_alloc(); fp->ip4f_id = ip->ip_id; fp->ip4f_info.fi_proto = ip->ip_p; fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; /* save port numbers */ fp->ip4f_info.fi_sport = fin->fi_sport; fp->ip4f_info.fi_dport = fin->fi_dport; fp->ip4f_info.fi_gpi = fin->fi_gpi; } static int ip4f_lookup(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; fp = TAILQ_NEXT(fp, ip4f_chain)) if (ip->ip_id == fp->ip4f_id && ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && ip->ip_p == fp->ip4f_info.fi_proto) { /* found the matching entry */ fin->fi_sport = fp->ip4f_info.fi_sport; fin->fi_dport = fp->ip4f_info.fi_dport; fin->fi_gpi = fp->ip4f_info.fi_gpi; if ((ntohs(ip->ip_off) & IP_MF) == 0) /* this is the last fragment, release the entry. */ ip4f_free(fp); return (1); } /* no matching entry found */ return (0); } static int ip4f_init(void) { struct ip4_frag *fp; int i; TAILQ_INIT(&ip4f_list); for (i=0; iip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } return (0); } static struct ip4_frag * ip4f_alloc(void) { struct ip4_frag *fp; /* reclaim an entry at the tail, put it at the head */ fp = TAILQ_LAST(&ip4f_list, ip4f_list); TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 1; TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); return (fp); } static void ip4f_free(fp) struct ip4_frag *fp; { TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } #endif /* ALTQ3_CLFIER_COMPAT */ diff --git a/sys/net/altq/if_altq.h b/sys/net/altq/if_altq.h index 8d0463e09b3e..e0237ce77fda 100644 --- a/sys/net/altq/if_altq.h +++ b/sys/net/altq/if_altq.h @@ -1,185 +1,178 @@ /*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ * $FreeBSD$ */ #ifndef _ALTQ_IF_ALTQ_H_ #define _ALTQ_IF_ALTQ_H_ #include /* XXX */ #include /* XXX */ #include /* XXX */ struct altq_pktattr; struct tb_regulator; struct top_cdnr; /* * Structure defining a queue for a network interface. */ struct ifaltq { /* fields compatible with struct ifqueue */ struct mbuf *ifq_head; struct mbuf *ifq_tail; int ifq_len; int ifq_maxlen; struct mtx ifq_mtx; /* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */ struct mbuf *ifq_drv_head; struct mbuf *ifq_drv_tail; int ifq_drv_len; int ifq_drv_maxlen; /* alternate queueing related fields */ int altq_type; /* discipline type */ int altq_flags; /* flags (e.g. ready, in-use) */ void *altq_disc; /* for discipline-specific use */ struct ifnet *altq_ifp; /* back pointer to interface */ int (*altq_enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*altq_dequeue)(struct ifaltq *, int); int (*altq_request)(struct ifaltq *, int, void *); - /* classifier fields */ - void *altq_clfier; /* classifier-specific use */ - void *(*altq_classify)(void *, struct mbuf *, int); - /* token bucket regulator */ struct tb_regulator *altq_tbr; /* input traffic conditioner (doesn't belong to the output queue...) */ struct top_cdnr *altq_cdnr; }; #ifdef _KERNEL /* * packet attributes used by queueing disciplines. * pattr_class is a discipline-dependent scheduling class that is * set by a classifier. * pattr_hdr and pattr_af may be used by a discipline to access * the header within a mbuf. (e.g. ECN needs to update the CE bit) * note that pattr_hdr could be stale after m_pullup, though link * layer output routines usually don't use m_pullup. link-level * compression also invalidates these fields. thus, pattr_hdr needs * to be verified when a discipline touches the header. */ struct altq_pktattr { void *pattr_class; /* sched class set by classifier */ int pattr_af; /* address family */ caddr_t pattr_hdr; /* saved header position in mbuf */ }; /* * mbuf tag to carry a queue id (and hints for ECN). */ struct altq_tag { u_int32_t qid; /* queue id */ /* hints for ecn */ int af; /* address family */ void *hdr; /* saved header position in mbuf */ }; /* * a token-bucket regulator limits the rate that a network driver can * dequeue packets from the output queue. * modern cards are able to buffer a large amount of packets and dequeue * too many packets at a time. this bursty dequeue behavior makes it * impossible to schedule packets by queueing disciplines. * a token-bucket is used to control the burst size in a device * independent manner. */ struct tb_regulator { int64_t tbr_rate; /* (scaled) token bucket rate */ int64_t tbr_depth; /* (scaled) token bucket depth */ int64_t tbr_token; /* (scaled) current token */ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */ u_int64_t tbr_last; /* last time token was updated */ int tbr_lastop; /* last dequeue operation type needed for poll-and-dequeue */ }; /* if_altqflags */ #define ALTQF_READY 0x01 /* driver supports alternate queueing */ #define ALTQF_ENABLED 0x02 /* altq is in use */ -#define ALTQF_CLASSIFY 0x04 /* classify packets */ +/* ALTQF_CLASSIFY 0x04 obsolete classify packets */ #define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */ #define ALTQF_DRIVER1 0x40 /* driver specific */ /* if_altqflags set internally only: */ #define ALTQF_CANTCHANGE (ALTQF_READY) /* altq_dequeue 2nd arg */ #define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */ #define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */ /* altq request types (currently only purge is defined) */ #define ALTRQ_PURGE 1 /* purge all packets */ #define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY) #ifdef ALTQ #define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED) #else #define ALTQ_IS_ENABLED(ifq) 0 #endif -#define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY) #define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING) #define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING) #define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING) #define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL) #define ALTQ_ENQUEUE(ifq, m, pa, err) \ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa)) #define ALTQ_DEQUEUE(ifq, m) \ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE) #define ALTQ_POLL(ifq, m) \ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL) #define ALTQ_PURGE(ifq) \ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0) #define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0) #define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL) extern int altq_attach(struct ifaltq *, int, void *, int (*)(struct ifaltq *, struct mbuf *, struct altq_pktattr *), struct mbuf *(*)(struct ifaltq *, int), - int (*)(struct ifaltq *, int, void *), - void *, - void *(*)(void *, struct mbuf *, int)); + int (*)(struct ifaltq *, int, void *)); extern int altq_detach(struct ifaltq *); extern int altq_enable(struct ifaltq *); extern int altq_disable(struct ifaltq *); extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int); extern int (*altq_input)(struct mbuf *, int); #if 0 /* ALTQ3_CLFIER_COMPAT */ void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *); #endif #endif /* _KERNEL */ #endif /* _ALTQ_IF_ALTQ_H_ */