Index: head/sys/net/altq/altq.h =================================================================== --- head/sys/net/altq/altq.h (revision 341506) +++ head/sys/net/altq/altq.h (revision 341507) @@ -1,231 +1,109 @@ /*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_H_ #define _ALTQ_ALTQ_H_ #if 0 /* * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq. * altq3 is mainly for research experiments. pf-based altq is for daily use. */ #define ALTQ3_COMPAT /* for compatibility with altq-3 */ #define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */ #endif -#ifdef ALTQ3_COMPAT -#include -#include -#include -#include -#ifndef IFNAMSIZ -#define IFNAMSIZ 16 -#endif -#endif /* ALTQ3_COMPAT */ - /* altq discipline type */ #define ALTQT_NONE 0 /* reserved */ #define ALTQT_CBQ 1 /* cbq */ #define ALTQT_WFQ 2 /* wfq */ #define ALTQT_AFMAP 3 /* afmap */ #define ALTQT_FIFOQ 4 /* fifoq */ #define ALTQT_RED 5 /* red */ #define ALTQT_RIO 6 /* rio */ #define ALTQT_LOCALQ 7 /* local use */ #define ALTQT_HFSC 8 /* hfsc */ #define ALTQT_CDNR 9 /* traffic conditioner */ #define ALTQT_BLUE 10 /* blue */ #define ALTQT_PRIQ 11 /* priority queue */ #define ALTQT_JOBS 12 /* JoBS */ #define ALTQT_FAIRQ 13 /* fairq */ #define ALTQT_CODEL 14 /* CoDel */ #define ALTQT_MAX 15 /* should be max discipline type + 1 */ -#ifdef ALTQ3_COMPAT -struct altqreq { - char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ - u_long arg; /* request-specific argument */ -}; -#endif /* simple token backet meter profile */ struct tb_profile { u_int64_t rate; /* rate in bit-per-sec */ u_int32_t depth; /* depth in bytes */ }; -#ifdef ALTQ3_COMPAT -struct tbrreq { - char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ - struct tb_profile tb_prof; /* token bucket profile */ -}; -#ifdef ALTQ3_CLFIER_COMPAT /* - * common network flow info structure - */ -struct flowinfo { - u_char fi_len; /* total length */ - u_char fi_family; /* address family */ - u_int8_t fi_data[46]; /* actually longer; address family - specific flow info. */ -}; - -/* - * flow info structure for internet protocol family. - * (currently this is the only protocol family supported) - */ -struct flowinfo_in { - u_char fi_len; /* sizeof(struct flowinfo_in) */ - u_char fi_family; /* AF_INET */ - u_int8_t fi_proto; /* IPPROTO_XXX */ - u_int8_t fi_tos; /* type-of-service */ - struct in_addr fi_dst; /* dest address */ - struct in_addr fi_src; /* src address */ - u_int16_t fi_dport; /* dest port */ - u_int16_t fi_sport; /* src port */ - u_int32_t fi_gpi; /* generalized port id for ipsec */ - u_int8_t _pad[28]; /* make the size equal to - flowinfo_in6 */ -}; - -#ifdef SIN6_LEN -struct flowinfo_in6 { - u_char fi6_len; /* sizeof(struct flowinfo_in6) */ - u_char fi6_family; /* AF_INET6 */ - u_int8_t fi6_proto; /* IPPROTO_XXX */ - u_int8_t fi6_tclass; /* traffic class */ - u_int32_t fi6_flowlabel; /* ipv6 flowlabel */ - u_int16_t fi6_dport; /* dest port */ - u_int16_t fi6_sport; /* src port */ - u_int32_t fi6_gpi; /* generalized port id */ - struct in6_addr fi6_dst; /* dest address */ - struct in6_addr fi6_src; /* src address */ -}; -#endif /* INET6 */ - -/* - * flow filters for AF_INET and AF_INET6 - */ -struct flow_filter { - int ff_ruleno; - struct flowinfo_in ff_flow; - struct { - struct in_addr mask_dst; - struct in_addr mask_src; - u_int8_t mask_tos; - u_int8_t _pad[3]; - } ff_mask; - u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */ -}; - -#ifdef SIN6_LEN -struct flow_filter6 { - int ff_ruleno; - struct flowinfo_in6 ff_flow6; - struct { - struct in6_addr mask6_dst; - struct in6_addr mask6_src; - u_int8_t mask6_tclass; - u_int8_t _pad[3]; - } ff_mask6; -}; -#endif /* INET6 */ -#endif /* ALTQ3_CLFIER_COMPAT */ -#endif /* ALTQ3_COMPAT */ - -/* * generic packet counter */ struct pktcntr { u_int64_t packets; u_int64_t bytes; }; #define PKTCNTR_ADD(cntr, len) \ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0) -#ifdef ALTQ3_COMPAT -/* - * altq related ioctls - */ -#define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */ -#if 0 -/* - * these ioctls are currently discipline-specific but could be shared - * in the future. - */ -#define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */ -#define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */ -#define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */ -#define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/ -#define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */ -#define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */ -#define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */ -#define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */ -#define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */ -#define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */ -#define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */ -#define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */ -#define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */ -#endif /* 0 */ -#define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */ -#define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */ -#endif /* ALTQ3_COMPAT */ #ifdef _KERNEL #include #endif /* * Can't put these versions in the scheduler-specific headers and include * them all here as that will cause build failure due to cross-including * each other scheduler's private bits into each scheduler's * implementation. */ #define CBQ_STATS_VERSION 0 /* Latest version of class_stats_t */ #define CODEL_STATS_VERSION 0 /* Latest version of codel_ifstats */ #define FAIRQ_STATS_VERSION 0 /* Latest version of fairq_classstats */ #define HFSC_STATS_VERSION 1 /* Latest version of hfsc_classstats */ #define PRIQ_STATS_VERSION 0 /* Latest version of priq_classstats */ /* Return the latest stats version for the given scheduler. */ static inline int altq_stats_version(int scheduler) { switch (scheduler) { case ALTQT_CBQ: return (CBQ_STATS_VERSION); case ALTQT_CODEL: return (CODEL_STATS_VERSION); case ALTQT_FAIRQ: return (FAIRQ_STATS_VERSION); case ALTQT_HFSC: return (HFSC_STATS_VERSION); case ALTQT_PRIQ: return (PRIQ_STATS_VERSION); default: return (0); } } #endif /* _ALTQ_ALTQ_H_ */ Index: head/sys/net/altq/altq_cbq.c =================================================================== --- head/sys/net/altq/altq_cbq.c (revision 341506) +++ head/sys/net/altq/altq_cbq.c (revision 341507) @@ -1,1169 +1,565 @@ /*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. * * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#include -#endif #include #include #include #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#endif -#ifdef ALTQ3_COMPAT -/* - * Local Data structures. - */ -static cbq_state_t *cbq_list = NULL; -#endif /* * Forward Declarations. */ static int cbq_class_destroy(cbq_state_t *, struct rm_class *); static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t); static int cbq_clear_interface(cbq_state_t *); static int cbq_request(struct ifaltq *, int, void *); static int cbq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *cbq_dequeue(struct ifaltq *, int); static void cbqrestart(struct ifaltq *); static void get_class_stats(class_stats_t *, struct rm_class *); static void cbq_purge(cbq_state_t *); -#ifdef ALTQ3_COMPAT -static int cbq_add_class(struct cbq_add_class *); -static int cbq_delete_class(struct cbq_delete_class *); -static int cbq_modify_class(struct cbq_modify_class *); -static int cbq_class_create(cbq_state_t *, struct cbq_add_class *, - struct rm_class *, struct rm_class *); -static int cbq_clear_hierarchy(struct cbq_interface *); -static int cbq_set_enable(struct cbq_interface *, int); -static int cbq_ifattach(struct cbq_interface *); -static int cbq_ifdetach(struct cbq_interface *); -static int cbq_getstats(struct cbq_getstats *); -static int cbq_add_filter(struct cbq_add_filter *); -static int cbq_delete_filter(struct cbq_delete_filter *); -#endif /* ALTQ3_COMPAT */ - /* * int * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This * function destroys a given traffic class. Before destroying * the class, all traffic for that class is released. */ static int cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl) { int i; /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; -#ifdef ALTQ3_COMPAT - if (cl == cbqp->ifnp.ctl_) - cbqp->ifnp.ctl_ = NULL; -#endif return (0); } /* convert class handle to class pointer */ static struct rm_class * clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle) { int i; struct rm_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % CBQ_MAX_CLASSES; if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); return (NULL); } static int cbq_clear_interface(cbq_state_t *cbqp) { int again, i; struct rm_class *cl; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1); #endif /* clear out the classes now */ do { again = 0; for (i = 0; i < CBQ_MAX_CLASSES; i++) { if ((cl = cbqp->cbq_class_tbl[i]) != NULL) { if (is_a_parent_class(cl)) again++; else { cbq_class_destroy(cbqp, cl); cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; -#ifdef ALTQ3_COMPAT - if (cl == cbqp->ifnp.ctl_) - cbqp->ifnp.ctl_ = NULL; -#endif } } } } while (again); return (0); } static int cbq_request(struct ifaltq *ifq, int req, void *arg) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: cbq_purge(cbqp); break; } return (0); } /* copy the stats info in rm_class to class_states_t */ static void get_class_stats(class_stats_t *statsp, struct rm_class *cl) { statsp->xmit_cnt = cl->stats_.xmit_cnt; statsp->drop_cnt = cl->stats_.drop_cnt; statsp->over = cl->stats_.over; statsp->borrows = cl->stats_.borrows; statsp->overactions = cl->stats_.overactions; statsp->delays = cl->stats_.delays; statsp->depth = cl->depth_; statsp->priority = cl->pri_; statsp->maxidle = cl->maxidle_; statsp->minidle = cl->minidle_; statsp->offtime = cl->offtime_; statsp->qmax = qlimit(cl->q_); statsp->ns_per_byte = cl->ns_per_byte_; statsp->wrr_allot = cl->w_allotment_; statsp->qcnt = qlen(cl->q_); statsp->avgidle = cl->avgidle_; statsp->qtype = qtype(cl->q_); #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_getstats(cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->q_)) codel_getstats(cl->codel_, &statsp->codel); #endif } int cbq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc, cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL); splx(s); return (error); } int cbq_add_altq(struct pf_altq *a) { cbq_state_t *cbqp; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); /* allocate and initialize cbq_state_t */ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* keep the state in pf_altq */ a->altq_disc = cbqp; return (0); } int cbq_remove_altq(struct pf_altq *a) { cbq_state_t *cbqp; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; cbq_clear_interface(cbqp); if (cbqp->ifnp.default_) cbq_class_destroy(cbqp, cbqp->ifnp.default_); if (cbqp->ifnp.root_) cbq_class_destroy(cbqp, cbqp->ifnp.root_); /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } int cbq_add_queue(struct pf_altq *a) { struct rm_class *borrow, *parent; cbq_state_t *cbqp; struct rm_class *cl; struct cbq_opts *opts; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = a->qid % CBQ_MAX_CLASSES; if (cbqp->cbq_class_tbl[i] != NULL) { for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); } opts = &a->pq_u.cbq_opts; /* check parameters */ if (a->priority >= CBQ_MAXPRI) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, a->parent_qid); if (opts->flags & CBQCLF_BORROW) borrow = parent; else borrow = NULL; /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_queue: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } /* * check parameters */ switch (opts->flags & CBQCLF_CLASSMASK) { case CBQCLF_ROOTCLASS: if (parent != NULL) return (EINVAL); if (cbqp->ifnp.root_) return (EINVAL); break; case CBQCLF_DEFCLASS: if (cbqp->ifnp.default_) return (EINVAL); break; case 0: if (a->qid == 0) return (EINVAL); break; default: /* more than two flags bits set */ return (EINVAL); } /* * create a class. if this is a root class, initialize the * interface. */ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte, cbqrestart, a->qlimit, RM_MAXQUEUED, opts->maxidle, opts->minidle, opts->offtime, opts->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(a->priority, &cbqp->ifnp, opts->ns_per_byte, rmc_delay_action, a->qlimit, parent, borrow, opts->maxidle, opts->minidle, opts->offtime, opts->pktsize, opts->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ cl->stats_.handle = a->qid; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; return (0); } int cbq_remove_queue(struct pf_altq *a) { struct rm_class *cl; cbq_state_t *cbqp; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) { cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; break; } return (0); } int cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats; int error = 0; if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * int * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr) * - Queue data packets. * * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper * layer (e.g. ether_output). cbq_enqueue queues the given packet * to the cbq, then invokes the driver's start routine. * * Assumptions: called in splimp * Returns: 0 if the queueing is successful. * ENOBUFS if a packet dropping occurred as a result of * the queueing. */ static int cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct rm_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(cbqp, t->qid); -#ifdef ALTQ3_COMPAT - else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) - cl = pktattr->pattr_class; -#endif if (cl == NULL) { cl = cbqp->ifnp.default_; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } -#ifdef ALTQ3_COMPAT - if (pktattr != NULL) - cl->pktattr_ = pktattr; /* save proto hdr used by ECN */ - else -#endif cl->pktattr_ = NULL; len = m_pktlen(m); if (rmc_queue_packet(cl, m) != 0) { /* drop occurred. some mbuf was freed in rmc_queue_packet. */ PKTCNTR_ADD(&cl->stats_.drop_cnt, len); return (ENOBUFS); } /* successfully queued. */ ++cbqp->cbq_qlen; IFQ_INC_LEN(ifq); return (0); } static struct mbuf * cbq_dequeue(struct ifaltq *ifq, int op) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); m = rmc_dequeue_next(&cbqp->ifnp, op); if (m && op == ALTDQ_REMOVE) { --cbqp->cbq_qlen; /* decrement # of packets in cbq */ IFQ_DEC_LEN(ifq); /* Update the class. */ rmc_update_class_util(&cbqp->ifnp); } return (m); } /* * void * cbqrestart(queue_t *) - Restart sending of data. * called from rmc_restart in splimp via timeout after waking up * a suspended class. * Returns: NONE */ static void cbqrestart(struct ifaltq *ifq) { cbq_state_t *cbqp; struct ifnet *ifp; IFQ_LOCK_ASSERT(ifq); if (!ALTQ_IS_ENABLED(ifq)) /* cbq must have been detached */ return; if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL) /* should not happen */ return; ifp = ifq->altq_ifp; if (ifp->if_start && cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_UNLOCK(ifq); (*ifp->if_start)(ifp); IFQ_LOCK(ifq); } } static void cbq_purge(cbq_state_t *cbqp) { struct rm_class *cl; int i; for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL) rmc_dropall(cl); if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_)) cbqp->ifnp.ifq_->ifq_len = 0; } -#ifdef ALTQ3_COMPAT - -static int -cbq_add_class(acp) - struct cbq_add_class *acp; -{ - char *ifacename; - struct rm_class *borrow, *parent; - cbq_state_t *cbqp; - - ifacename = acp->cbq_iface.cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - /* check parameters */ - if (acp->cbq_class.priority >= CBQ_MAXPRI || - acp->cbq_class.maxq > CBQ_MAXQSIZE) - return (EINVAL); - - /* Get pointers to parent and borrow classes. */ - parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle); - borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle); - - /* - * A class must borrow from it's parent or it can not - * borrow at all. Hence, borrow can be null. - */ - if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) { - printf("cbq_add_class: no parent class!\n"); - return (EINVAL); - } - - if ((borrow != parent) && (borrow != NULL)) { - printf("cbq_add_class: borrow class != parent\n"); - return (EINVAL); - } - - return cbq_class_create(cbqp, acp, parent, borrow); -} - -static int -cbq_delete_class(dcp) - struct cbq_delete_class *dcp; -{ - char *ifacename; - struct rm_class *cl; - cbq_state_t *cbqp; - - ifacename = dcp->cbq_iface.cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL) - return (EINVAL); - - /* if we are a parent class, then return an error. */ - if (is_a_parent_class(cl)) - return (EINVAL); - - /* if a filter has a reference to this class delete the filter */ - acc_discard_filters(&cbqp->cbq_classifier, cl, 0); - - return cbq_class_destroy(cbqp, cl); -} - -static int -cbq_modify_class(acp) - struct cbq_modify_class *acp; -{ - char *ifacename; - struct rm_class *cl; - cbq_state_t *cbqp; - - ifacename = acp->cbq_iface.cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - /* Get pointer to this class */ - if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL) - return (EINVAL); - - if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte, - acp->cbq_class.maxq, acp->cbq_class.maxidle, - acp->cbq_class.minidle, acp->cbq_class.offtime, - acp->cbq_class.pktsize) < 0) - return (EINVAL); - return (0); -} - -/* - * struct rm_class * - * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp, - * struct rm_class *parent, struct rm_class *borrow) - * - * This function create a new traffic class in the CBQ class hierarchy of - * given parameters. The class that created is either the root, default, - * or a new dynamic class. If CBQ is not initilaized, the root class - * will be created. - */ -static int -cbq_class_create(cbqp, acp, parent, borrow) - cbq_state_t *cbqp; - struct cbq_add_class *acp; - struct rm_class *parent, *borrow; -{ - struct rm_class *cl; - cbq_class_spec_t *spec = &acp->cbq_class; - u_int32_t chandle; - int i; - - /* - * allocate class handle - */ - for (i = 1; i < CBQ_MAX_CLASSES; i++) - if (cbqp->cbq_class_tbl[i] == NULL) - break; - if (i == CBQ_MAX_CLASSES) - return (EINVAL); - chandle = i; /* use the slot number as class handle */ - - /* - * create a class. if this is a root class, initialize the - * interface. - */ - if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { - rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte, - cbqrestart, spec->maxq, RM_MAXQUEUED, - spec->maxidle, spec->minidle, spec->offtime, - spec->flags); - cl = cbqp->ifnp.root_; - } else { - cl = rmc_newclass(spec->priority, - &cbqp->ifnp, spec->nano_sec_per_byte, - rmc_delay_action, spec->maxq, parent, borrow, - spec->maxidle, spec->minidle, spec->offtime, - spec->pktsize, spec->flags); - } - if (cl == NULL) - return (ENOMEM); - - /* return handle to user space. */ - acp->cbq_class_handle = chandle; - - cl->stats_.handle = chandle; - cl->stats_.depth = cl->depth_; - - /* save the allocated class */ - cbqp->cbq_class_tbl[i] = cl; - - if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) - cbqp->ifnp.default_ = cl; - if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS) - cbqp->ifnp.ctl_ = cl; - - return (0); -} - -static int -cbq_add_filter(afp) - struct cbq_add_filter *afp; -{ - char *ifacename; - cbq_state_t *cbqp; - struct rm_class *cl; - - ifacename = afp->cbq_iface.cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - /* Get the pointer to class. */ - if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL) - return (EINVAL); - - return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter, - cl, &afp->cbq_filter_handle); -} - -static int -cbq_delete_filter(dfp) - struct cbq_delete_filter *dfp; -{ - char *ifacename; - cbq_state_t *cbqp; - - ifacename = dfp->cbq_iface.cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - return acc_delete_filter(&cbqp->cbq_classifier, - dfp->cbq_filter_handle); -} - -/* - * cbq_clear_hierarchy deletes all classes and their filters on the - * given interface. - */ -static int -cbq_clear_hierarchy(ifacep) - struct cbq_interface *ifacep; -{ - char *ifacename; - cbq_state_t *cbqp; - - ifacename = ifacep->cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - return cbq_clear_interface(cbqp); -} - -/* - * static int - * cbq_set_enable(struct cbq_enable *ep) - this function processed the - * ioctl request to enable class based queueing. It searches the list - * of interfaces for the specified interface and then enables CBQ on - * that interface. - * - * Returns: 0, for no error. - * EBADF, for specified inteface not found. - */ - -static int -cbq_set_enable(ep, enable) - struct cbq_interface *ep; - int enable; -{ - int error = 0; - cbq_state_t *cbqp; - char *ifacename; - - ifacename = ep->cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - switch (enable) { - case ENABLE: - if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL || - cbqp->ifnp.ctl_ == NULL) { - if (cbqp->ifnp.root_ == NULL) - printf("No Root Class for %s\n", ifacename); - if (cbqp->ifnp.default_ == NULL) - printf("No Default Class for %s\n", ifacename); - if (cbqp->ifnp.ctl_ == NULL) - printf("No Control Class for %s\n", ifacename); - error = EINVAL; - } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) { - cbqp->cbq_qlen = 0; - } - break; - - case DISABLE: - error = altq_disable(cbqp->ifnp.ifq_); - break; - } - return (error); -} - -static int -cbq_getstats(gsp) - struct cbq_getstats *gsp; -{ - char *ifacename; - int i, n, nclasses; - cbq_state_t *cbqp; - struct rm_class *cl; - class_stats_t stats, *usp; - int error = 0; - - ifacename = gsp->iface.cbq_ifacename; - nclasses = gsp->nclasses; - usp = gsp->stats; - - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - if (nclasses <= 0) - return (EINVAL); - - for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) { - while ((cl = cbqp->cbq_class_tbl[i]) == NULL) - if (++i >= CBQ_MAX_CLASSES) - goto out; - - get_class_stats(&stats, cl); - stats.handle = cl->stats_.handle; - - if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, - sizeof(stats))) != 0) - return (error); - } - - out: - gsp->nclasses = n; - return (error); -} - -static int -cbq_ifattach(ifacep) - struct cbq_interface *ifacep; -{ - int error = 0; - char *ifacename; - cbq_state_t *new_cbqp; - struct ifnet *ifp; - - ifacename = ifacep->cbq_ifacename; - if ((ifp = ifunit(ifacename)) == NULL) - return (ENXIO); - if (!ALTQ_IS_READY(&ifp->if_snd)) - return (ENXIO); - - /* allocate and initialize cbq_state_t */ - new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK); - if (new_cbqp == NULL) - return (ENOMEM); - bzero(new_cbqp, sizeof(cbq_state_t)); - CALLOUT_INIT(&new_cbqp->cbq_callout); - - new_cbqp->cbq_qlen = 0; - new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ - - /* - * set CBQ to this ifnet structure. - */ - error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp, - cbq_enqueue, cbq_dequeue, cbq_request, - &new_cbqp->cbq_classifier, acc_classify); - if (error) { - free(new_cbqp, M_DEVBUF); - return (error); - } - - /* prepend to the list of cbq_state_t's. */ - new_cbqp->cbq_next = cbq_list; - cbq_list = new_cbqp; - - return (0); -} - -static int -cbq_ifdetach(ifacep) - struct cbq_interface *ifacep; -{ - char *ifacename; - cbq_state_t *cbqp; - - ifacename = ifacep->cbq_ifacename; - if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) - return (EBADF); - - (void)cbq_set_enable(ifacep, DISABLE); - - cbq_clear_interface(cbqp); - - /* remove CBQ from the ifnet structure. */ - (void)altq_detach(cbqp->ifnp.ifq_); - - /* remove from the list of cbq_state_t's. */ - if (cbq_list == cbqp) - cbq_list = cbqp->cbq_next; - else { - cbq_state_t *cp; - - for (cp = cbq_list; cp != NULL; cp = cp->cbq_next) - if (cp->cbq_next == cbqp) { - cp->cbq_next = cbqp->cbq_next; - break; - } - ASSERT(cp != NULL); - } - - /* deallocate cbq_state_t */ - free(cbqp, M_DEVBUF); - - return (0); -} - -/* - * cbq device interface - */ - -altqdev_decl(cbq); - -int -cbqopen(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - return (0); -} - -int -cbqclose(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct ifnet *ifp; - struct cbq_interface iface; - int err, error = 0; - - while (cbq_list) { - ifp = cbq_list->ifnp.ifq_->altq_ifp; - sprintf(iface.cbq_ifacename, "%s", ifp->if_xname); - err = cbq_ifdetach(&iface); - if (err != 0 && error == 0) - error = err; - } - - return (error); -} - -int -cbqioctl(dev, cmd, addr, flag, p) - dev_t dev; - ioctlcmd_t cmd; - caddr_t addr; - int flag; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - int error = 0; - - /* check cmd for superuser only */ - switch (cmd) { - case CBQ_GETSTATS: - /* currently only command that an ordinary user can call */ - break; - default: -#if (__FreeBSD_version > 700000) - error = priv_check(p, PRIV_ALTQ_MANAGE); -#elsif (__FreeBSD_version > 400000) - error = suser(p); -#else - error = suser(p->p_ucred, &p->p_acflag); -#endif - if (error) - return (error); - break; - } - - switch (cmd) { - - case CBQ_ENABLE: - error = cbq_set_enable((struct cbq_interface *)addr, ENABLE); - break; - - case CBQ_DISABLE: - error = cbq_set_enable((struct cbq_interface *)addr, DISABLE); - break; - - case CBQ_ADD_FILTER: - error = cbq_add_filter((struct cbq_add_filter *)addr); - break; - - case CBQ_DEL_FILTER: - error = cbq_delete_filter((struct cbq_delete_filter *)addr); - break; - - case CBQ_ADD_CLASS: - error = cbq_add_class((struct cbq_add_class *)addr); - break; - - case CBQ_DEL_CLASS: - error = cbq_delete_class((struct cbq_delete_class *)addr); - break; - - case CBQ_MODIFY_CLASS: - error = cbq_modify_class((struct cbq_modify_class *)addr); - break; - - case CBQ_CLEAR_HIERARCHY: - error = cbq_clear_hierarchy((struct cbq_interface *)addr); - break; - - case CBQ_IF_ATTACH: - error = cbq_ifattach((struct cbq_interface *)addr); - break; - - case CBQ_IF_DETACH: - error = cbq_ifdetach((struct cbq_interface *)addr); - break; - - case CBQ_GETSTATS: - error = cbq_getstats((struct cbq_getstats *)addr); - break; - - default: - error = EINVAL; - break; - } - - return error; -} - -#if 0 -/* for debug */ -static void cbq_class_dump(int); - -static void cbq_class_dump(i) - int i; -{ - struct rm_class *cl; - rm_class_stats_t *s; - struct _class_queue_ *q; - - if (cbq_list == NULL) { - printf("cbq_class_dump: no cbq_state found\n"); - return; - } - cl = cbq_list->cbq_class_tbl[i]; - - printf("class %d cl=%p\n", i, cl); - if (cl != NULL) { - s = &cl->stats_; - q = cl->q_; - - printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n", - cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_); - printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n", - cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_, - cl->maxidle_); - printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n", - cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_); - printf("handle=%d, depth=%d, packets=%d, bytes=%d\n", - s->handle, s->depth, - (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes); - printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n", - s->over, s->borrows, (int)s->drop_cnt.packets, - s->overactions, s->delays); - printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n", - q->tail_, q->head_, q->qlen_, q->qlim_, - q->qthresh_, q->qtype_); - } -} -#endif /* 0 */ - -#ifdef KLD_MODULE - -static struct altqsw cbq_sw = - {"cbq", cbqopen, cbqclose, cbqioctl}; - -ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw); -MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1); -MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1); - -#endif /* KLD_MODULE */ -#endif /* ALTQ3_COMPAT */ #endif /* ALTQ_CBQ */ Index: head/sys/net/altq/altq_cbq.h =================================================================== --- head/sys/net/altq/altq_cbq.h (revision 341506) +++ head/sys/net/altq/altq_cbq.h (revision 341507) @@ -1,240 +1,146 @@ /*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. * * $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_CBQ_H_ #define _ALTQ_ALTQ_CBQ_H_ #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define NULL_CLASS_HANDLE 0 /* class flags must be same as class flags in altq_rmclass.h */ #define CBQCLF_RED 0x0001 /* use RED */ #define CBQCLF_ECN 0x0002 /* use RED/ECN */ #define CBQCLF_RIO 0x0004 /* use RIO */ #define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define CBQCLF_BORROW 0x0020 /* borrow from parent */ #define CBQCLF_CODEL 0x0040 /* use CoDel */ #ifdef _KERNEL CTASSERT(CBQCLF_RED == RMCF_RED); CTASSERT(CBQCLF_ECN == RMCF_ECN); CTASSERT(CBQCLF_RIO == RMCF_RIO); CTASSERT(CBQCLF_FLOWVALVE == RMCF_FLOWVALVE); CTASSERT(CBQCLF_CLEARDSCP == RMCF_CLEARDSCP); CTASSERT(CBQCLF_CODEL == RMCF_CODEL); #endif /* class flags only for root class */ #define CBQCLF_WRR 0x0100 /* weighted-round robin */ #define CBQCLF_EFFICIENT 0x0200 /* work-conserving */ /* class flags for special classes */ #define CBQCLF_ROOTCLASS 0x1000 /* root class */ #define CBQCLF_DEFCLASS 0x2000 /* default class */ -#ifdef ALTQ3_COMPAT -#define CBQCLF_CTLCLASS 0x4000 /* control class */ -#endif #define CBQCLF_CLASSMASK 0xf000 /* class mask */ #define CBQ_MAXQSIZE 200 #define CBQ_MAXPRI RM_MAXPRIO typedef struct _cbq_class_stats_ { u_int32_t handle; u_int depth; struct pktcntr xmit_cnt; /* packets sent in this class */ struct pktcntr drop_cnt; /* dropped packets */ u_int over; /* # times went over limit */ u_int borrows; /* # times tried to borrow */ u_int overactions; /* # times invoked overlimit action */ u_int delays; /* # times invoked delay actions */ /* other static class parameters useful for debugging */ int priority; int maxidle; int minidle; int offtime; int qmax; int ns_per_byte; int wrr_allot; int qcnt; /* # packets in queue */ int avgidle; /* codel, red and rio related info */ int qtype; struct redstats red[3]; struct codel_stats codel; } class_stats_t; /* * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming * from mixing of public-API and internal bits in each scheduler-specific * header. */ -#ifdef ALTQ3_COMPAT -/* - * Define structures associated with IOCTLS for cbq. - */ -/* - * Define the CBQ interface structure. This must be included in all - * IOCTL's such that the CBQ driver may find the appropriate CBQ module - * associated with the network interface to be affected. - */ -struct cbq_interface { - char cbq_ifacename[IFNAMSIZ]; -}; - -typedef struct cbq_class_spec { - u_int priority; - u_int nano_sec_per_byte; - u_int maxq; - u_int maxidle; - int minidle; - u_int offtime; - u_int32_t parent_class_handle; - u_int32_t borrow_class_handle; - - u_int pktsize; - int flags; -} cbq_class_spec_t; - -struct cbq_add_class { - struct cbq_interface cbq_iface; - - cbq_class_spec_t cbq_class; - u_int32_t cbq_class_handle; -}; - -struct cbq_delete_class { - struct cbq_interface cbq_iface; - u_int32_t cbq_class_handle; -}; - -struct cbq_modify_class { - struct cbq_interface cbq_iface; - - cbq_class_spec_t cbq_class; - u_int32_t cbq_class_handle; -}; - -struct cbq_add_filter { - struct cbq_interface cbq_iface; - u_int32_t cbq_class_handle; - struct flow_filter cbq_filter; - - u_long cbq_filter_handle; -}; - -struct cbq_delete_filter { - struct cbq_interface cbq_iface; - u_long cbq_filter_handle; -}; - -/* number of classes are returned in nclasses field */ -struct cbq_getstats { - struct cbq_interface iface; - int nclasses; - class_stats_t *stats; -}; - -/* - * Define IOCTLs for CBQ. - */ -#define CBQ_IF_ATTACH _IOW('Q', 1, struct cbq_interface) -#define CBQ_IF_DETACH _IOW('Q', 2, struct cbq_interface) -#define CBQ_ENABLE _IOW('Q', 3, struct cbq_interface) -#define CBQ_DISABLE _IOW('Q', 4, struct cbq_interface) -#define CBQ_CLEAR_HIERARCHY _IOW('Q', 5, struct cbq_interface) -#define CBQ_ADD_CLASS _IOWR('Q', 7, struct cbq_add_class) -#define CBQ_DEL_CLASS _IOW('Q', 8, struct cbq_delete_class) -#define CBQ_MODIFY_CLASS _IOWR('Q', 9, struct cbq_modify_class) -#define CBQ_ADD_FILTER _IOWR('Q', 10, struct cbq_add_filter) -#define CBQ_DEL_FILTER _IOW('Q', 11, struct cbq_delete_filter) -#define CBQ_GETSTATS _IOWR('Q', 12, struct cbq_getstats) -#endif /* ALTQ3_COMPAT */ - #ifdef _KERNEL /* * Define macros only good for kernel drivers and modules. */ #define CBQ_WATCHDOG (hz / 20) #define CBQ_TIMEOUT 10 #define CBQ_LS_TIMEOUT (20 * hz / 1000) #define CBQ_MAX_CLASSES 256 -#ifdef ALTQ3_COMPAT -#define CBQ_MAX_FILTERS 256 -#define DISABLE 0x00 -#define ENABLE 0x01 -#endif /* ALTQ3_COMPAT */ - /* * Define State structures. */ typedef struct cbqstate { -#ifdef ALTQ3_COMPAT - struct cbqstate *cbq_next; -#endif int cbq_qlen; /* # of packets in cbq */ struct rm_class *cbq_class_tbl[CBQ_MAX_CLASSES]; struct rm_ifdat ifnp; struct callout cbq_callout; /* for timeouts */ #ifdef ALTQ3_CLFIER_COMPAT struct acc_classifier cbq_classifier; #endif } cbq_state_t; #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* !_ALTQ_ALTQ_CBQ_H_ */ Index: head/sys/net/altq/altq_cdnr.c =================================================================== --- head/sys/net/altq/altq_cdnr.c (revision 341506) +++ head/sys/net/altq/altq_cdnr.c (revision 341507) @@ -1,1382 +1,57 @@ /*- * Copyright (C) 1999-2002 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include -#ifdef ALTQ3_COMPAT -#include -#endif #include -#ifdef ALTQ3_COMPAT -/* - * diffserv traffic conditioning module - */ - -int altq_cdnr_enabled = 0; - -/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */ -#ifdef ALTQ_CDNR - -/* cdnr_list keeps all cdnr's allocated. */ -static LIST_HEAD(, top_cdnr) tcb_list; - -static int altq_cdnr_input(struct mbuf *, int); -static struct top_cdnr *tcb_lookup(char *ifname); -static struct cdnr_block *cdnr_handle2cb(u_long); -static u_long cdnr_cb2handle(struct cdnr_block *); -static void *cdnr_cballoc(struct top_cdnr *, int, - struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *)); -static void cdnr_cbdestroy(void *); -static int tca_verify_action(struct tc_action *); -static void tca_import_action(struct tc_action *, struct tc_action *); -static void tca_invalidate_action(struct tc_action *); - -static int generic_element_destroy(struct cdnr_block *); -static struct top_cdnr *top_create(struct ifaltq *); -static int top_destroy(struct top_cdnr *); -static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *); -static int element_destroy(struct cdnr_block *); -static void tb_import_profile(struct tbe *, struct tb_profile *); -static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *, - struct tc_action *, struct tc_action *); -static int tbm_destroy(struct tbmeter *); -static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *); -static struct trtcm *trtcm_create(struct top_cdnr *, - struct tb_profile *, struct tb_profile *, - struct tc_action *, struct tc_action *, struct tc_action *, - int); -static int trtcm_destroy(struct trtcm *); -static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); -static struct tswtcm *tswtcm_create(struct top_cdnr *, - u_int32_t, u_int32_t, u_int32_t, - struct tc_action *, struct tc_action *, struct tc_action *); -static int tswtcm_destroy(struct tswtcm *); -static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); - -static int cdnrcmd_if_attach(char *); -static int cdnrcmd_if_detach(char *); -static int cdnrcmd_add_element(struct cdnr_add_element *); -static int cdnrcmd_delete_element(struct cdnr_delete_element *); -static int cdnrcmd_add_filter(struct cdnr_add_filter *); -static int cdnrcmd_delete_filter(struct cdnr_delete_filter *); -static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *); -static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *); -static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *); -static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *); -static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *); -static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *); -static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *); -static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *); -static int cdnrcmd_get_stats(struct cdnr_get_stats *); - -altqdev_decl(cdnr); - -/* - * top level input function called from ip_input. - * should be called before converting header fields to host-byte-order. - */ -int -altq_cdnr_input(m, af) - struct mbuf *m; - int af; /* address family */ -{ - struct ifnet *ifp; - struct ip *ip; - struct top_cdnr *top; - struct tc_action *tca; - struct cdnr_block *cb; - struct cdnr_pktinfo pktinfo; - - ifp = m->m_pkthdr.rcvif; - if (!ALTQ_IS_CNDTNING(&ifp->if_snd)) - /* traffic conditioner is not enabled on this interface */ - return (1); - - top = ifp->if_snd.altq_cdnr; - - ip = mtod(m, struct ip *); -#ifdef INET6 - if (af == AF_INET6) { - u_int32_t flowlabel; - - flowlabel = ((struct ip6_hdr *)ip)->ip6_flow; - pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK; - } else -#endif - pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK; - pktinfo.pkt_len = m_pktlen(m); - - tca = NULL; - - cb = acc_classify(&top->tc_classifier, m, af); - if (cb != NULL) - tca = &cb->cb_action; - - if (tca == NULL) - tca = &top->tc_block.cb_action; - - while (1) { - PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len); - - switch (tca->tca_code) { - case TCACODE_PASS: - return (1); - case TCACODE_DROP: - m_freem(m); - return (0); - case TCACODE_RETURN: - return (0); - case TCACODE_MARK: -#ifdef INET6 - if (af == AF_INET6) { - struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; - u_int32_t flowlabel; - - flowlabel = ntohl(ip6->ip6_flow); - flowlabel = (tca->tca_dscp << 20) | - (flowlabel & ~(DSCP_MASK << 20)); - ip6->ip6_flow = htonl(flowlabel); - } else -#endif - ip->ip_tos = tca->tca_dscp | - (ip->ip_tos & DSCP_CUMASK); - return (1); - case TCACODE_NEXT: - cb = tca->tca_next; - tca = (*cb->cb_input)(cb, &pktinfo); - break; - case TCACODE_NONE: - default: - return (1); - } - } -} - -static struct top_cdnr * -tcb_lookup(ifname) - char *ifname; -{ - struct top_cdnr *top; - struct ifnet *ifp; - - if ((ifp = ifunit(ifname)) != NULL) - LIST_FOREACH(top, &tcb_list, tc_next) - if (top->tc_ifq->altq_ifp == ifp) - return (top); - return (NULL); -} - -static struct cdnr_block * -cdnr_handle2cb(handle) - u_long handle; -{ - struct cdnr_block *cb; - - cb = (struct cdnr_block *)handle; - if (handle != ALIGN(cb)) - return (NULL); - - if (cb == NULL || cb->cb_handle != handle) - return (NULL); - return (cb); -} - -static u_long -cdnr_cb2handle(cb) - struct cdnr_block *cb; -{ - return (cb->cb_handle); -} - -static void * -cdnr_cballoc(top, type, input_func) - struct top_cdnr *top; - int type; - struct tc_action *(*input_func)(struct cdnr_block *, - struct cdnr_pktinfo *); -{ - struct cdnr_block *cb; - int size; - - switch (type) { - case TCETYPE_TOP: - size = sizeof(struct top_cdnr); - break; - case TCETYPE_ELEMENT: - size = sizeof(struct cdnr_block); - break; - case TCETYPE_TBMETER: - size = sizeof(struct tbmeter); - break; - case TCETYPE_TRTCM: - size = sizeof(struct trtcm); - break; - case TCETYPE_TSWTCM: - size = sizeof(struct tswtcm); - break; - default: - return (NULL); - } - - cb = malloc(size, M_DEVBUF, M_WAITOK); - if (cb == NULL) - return (NULL); - bzero(cb, size); - - cb->cb_len = size; - cb->cb_type = type; - cb->cb_ref = 0; - cb->cb_handle = (u_long)cb; - if (top == NULL) - cb->cb_top = (struct top_cdnr *)cb; - else - cb->cb_top = top; - - if (input_func != NULL) { - /* - * if this cdnr has an action function, - * make tc_action to call itself. - */ - cb->cb_action.tca_code = TCACODE_NEXT; - cb->cb_action.tca_next = cb; - cb->cb_input = input_func; - } else - cb->cb_action.tca_code = TCACODE_NONE; - - /* if this isn't top, register the element to the top level cdnr */ - if (top != NULL) - LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next); - - return ((void *)cb); -} - -static void -cdnr_cbdestroy(cblock) - void *cblock; -{ - struct cdnr_block *cb = cblock; - - /* delete filters belonging to this cdnr */ - acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0); - - /* remove from the top level cdnr */ - if (cb->cb_top != cblock) - LIST_REMOVE(cb, cb_next); - - free(cb, M_DEVBUF); -} - -/* - * conditioner common destroy routine - */ -static int -generic_element_destroy(cb) - struct cdnr_block *cb; -{ - int error = 0; - - switch (cb->cb_type) { - case TCETYPE_TOP: - error = top_destroy((struct top_cdnr *)cb); - break; - case TCETYPE_ELEMENT: - error = element_destroy(cb); - break; - case TCETYPE_TBMETER: - error = tbm_destroy((struct tbmeter *)cb); - break; - case TCETYPE_TRTCM: - error = trtcm_destroy((struct trtcm *)cb); - break; - case TCETYPE_TSWTCM: - error = tswtcm_destroy((struct tswtcm *)cb); - break; - default: - error = EINVAL; - } - return (error); -} - -static int -tca_verify_action(utca) - struct tc_action *utca; -{ - switch (utca->tca_code) { - case TCACODE_PASS: - case TCACODE_DROP: - case TCACODE_MARK: - /* these are ok */ - break; - - case TCACODE_HANDLE: - /* verify handle value */ - if (cdnr_handle2cb(utca->tca_handle) == NULL) - return (-1); - break; - - case TCACODE_NONE: - case TCACODE_RETURN: - case TCACODE_NEXT: - default: - /* should not be passed from a user */ - return (-1); - } - return (0); -} - -static void -tca_import_action(ktca, utca) - struct tc_action *ktca, *utca; -{ - struct cdnr_block *cb; - - *ktca = *utca; - if (ktca->tca_code == TCACODE_HANDLE) { - cb = cdnr_handle2cb(ktca->tca_handle); - if (cb == NULL) { - ktca->tca_code = TCACODE_NONE; - return; - } - ktca->tca_code = TCACODE_NEXT; - ktca->tca_next = cb; - cb->cb_ref++; - } else if (ktca->tca_code == TCACODE_MARK) { - ktca->tca_dscp &= DSCP_MASK; - } - return; -} - -static void -tca_invalidate_action(tca) - struct tc_action *tca; -{ - struct cdnr_block *cb; - - if (tca->tca_code == TCACODE_NEXT) { - cb = tca->tca_next; - if (cb == NULL) - return; - cb->cb_ref--; - } - tca->tca_code = TCACODE_NONE; -} - -/* - * top level traffic conditioner - */ -static struct top_cdnr * -top_create(ifq) - struct ifaltq *ifq; -{ - struct top_cdnr *top; - - if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL) - return (NULL); - - top->tc_ifq = ifq; - /* set default action for the top level conditioner */ - top->tc_block.cb_action.tca_code = TCACODE_PASS; - - LIST_INSERT_HEAD(&tcb_list, top, tc_next); - - ifq->altq_cdnr = top; - - return (top); -} - -static int -top_destroy(top) - struct top_cdnr *top; -{ - struct cdnr_block *cb; - - if (ALTQ_IS_CNDTNING(top->tc_ifq)) - ALTQ_CLEAR_CNDTNING(top->tc_ifq); - top->tc_ifq->altq_cdnr = NULL; - - /* - * destroy all the conditioner elements belonging to this interface - */ - while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) { - while (cb != NULL && cb->cb_ref > 0) - cb = LIST_NEXT(cb, cb_next); - if (cb != NULL) - generic_element_destroy(cb); - } - - LIST_REMOVE(top, tc_next); - - cdnr_cbdestroy(top); - - /* if there is no active conditioner, remove the input hook */ - if (altq_input != NULL) { - LIST_FOREACH(top, &tcb_list, tc_next) - if (ALTQ_IS_CNDTNING(top->tc_ifq)) - break; - if (top == NULL) - altq_input = NULL; - } - - return (0); -} - -/* - * simple tc elements without input function (e.g., dropper and makers). - */ -static struct cdnr_block * -element_create(top, action) - struct top_cdnr *top; - struct tc_action *action; -{ - struct cdnr_block *cb; - - if (tca_verify_action(action) < 0) - return (NULL); - - if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL) - return (NULL); - - tca_import_action(&cb->cb_action, action); - - return (cb); -} - -static int -element_destroy(cb) - struct cdnr_block *cb; -{ - if (cb->cb_ref > 0) - return (EBUSY); - - tca_invalidate_action(&cb->cb_action); - - cdnr_cbdestroy(cb); - return (0); -} - -/* - * internal representation of token bucket parameters - * rate: byte_per_unittime << 32 - * (((bits_per_sec) / 8) << 32) / machclk_freq - * depth: byte << 32 - * - */ -#define TB_SHIFT 32 -#define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT) -#define TB_UNSCALE(x) ((x) >> TB_SHIFT) - -static void -tb_import_profile(tb, profile) - struct tbe *tb; - struct tb_profile *profile; -{ - tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq; - tb->depth = TB_SCALE(profile->depth); - if (tb->rate > 0) - tb->filluptime = tb->depth / tb->rate; - else - tb->filluptime = 0xffffffffffffffffLL; - tb->token = tb->depth; - tb->last = read_machclk(); -} - -/* - * simple token bucket meter - */ -static struct tbmeter * -tbm_create(top, profile, in_action, out_action) - struct top_cdnr *top; - struct tb_profile *profile; - struct tc_action *in_action, *out_action; -{ - struct tbmeter *tbm = NULL; - - if (tca_verify_action(in_action) < 0 - || tca_verify_action(out_action) < 0) - return (NULL); - - if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER, - tbm_input)) == NULL) - return (NULL); - - tb_import_profile(&tbm->tb, profile); - - tca_import_action(&tbm->in_action, in_action); - tca_import_action(&tbm->out_action, out_action); - - return (tbm); -} - -static int -tbm_destroy(tbm) - struct tbmeter *tbm; -{ - if (tbm->cdnrblk.cb_ref > 0) - return (EBUSY); - - tca_invalidate_action(&tbm->in_action); - tca_invalidate_action(&tbm->out_action); - - cdnr_cbdestroy(tbm); - return (0); -} - -static struct tc_action * -tbm_input(cb, pktinfo) - struct cdnr_block *cb; - struct cdnr_pktinfo *pktinfo; -{ - struct tbmeter *tbm = (struct tbmeter *)cb; - u_int64_t len; - u_int64_t interval, now; - - len = TB_SCALE(pktinfo->pkt_len); - - if (tbm->tb.token < len) { - now = read_machclk(); - interval = now - tbm->tb.last; - if (interval >= tbm->tb.filluptime) - tbm->tb.token = tbm->tb.depth; - else { - tbm->tb.token += interval * tbm->tb.rate; - if (tbm->tb.token > tbm->tb.depth) - tbm->tb.token = tbm->tb.depth; - } - tbm->tb.last = now; - } - - if (tbm->tb.token < len) { - PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len); - return (&tbm->out_action); - } - - tbm->tb.token -= len; - PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len); - return (&tbm->in_action); -} - -/* - * two rate three color marker - * as described in draft-heinanen-diffserv-trtcm-01.txt - */ -static struct trtcm * -trtcm_create(top, cmtd_profile, peak_profile, - green_action, yellow_action, red_action, coloraware) - struct top_cdnr *top; - struct tb_profile *cmtd_profile, *peak_profile; - struct tc_action *green_action, *yellow_action, *red_action; - int coloraware; -{ - struct trtcm *tcm = NULL; - - if (tca_verify_action(green_action) < 0 - || tca_verify_action(yellow_action) < 0 - || tca_verify_action(red_action) < 0) - return (NULL); - - if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM, - trtcm_input)) == NULL) - return (NULL); - - tb_import_profile(&tcm->cmtd_tb, cmtd_profile); - tb_import_profile(&tcm->peak_tb, peak_profile); - - tca_import_action(&tcm->green_action, green_action); - tca_import_action(&tcm->yellow_action, yellow_action); - tca_import_action(&tcm->red_action, red_action); - - /* set dscps to use */ - if (tcm->green_action.tca_code == TCACODE_MARK) - tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK; - else - tcm->green_dscp = DSCP_AF11; - if (tcm->yellow_action.tca_code == TCACODE_MARK) - tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK; - else - tcm->yellow_dscp = DSCP_AF12; - if (tcm->red_action.tca_code == TCACODE_MARK) - tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK; - else - tcm->red_dscp = DSCP_AF13; - - tcm->coloraware = coloraware; - - return (tcm); -} - -static int -trtcm_destroy(tcm) - struct trtcm *tcm; -{ - if (tcm->cdnrblk.cb_ref > 0) - return (EBUSY); - - tca_invalidate_action(&tcm->green_action); - tca_invalidate_action(&tcm->yellow_action); - tca_invalidate_action(&tcm->red_action); - - cdnr_cbdestroy(tcm); - return (0); -} - -static struct tc_action * -trtcm_input(cb, pktinfo) - struct cdnr_block *cb; - struct cdnr_pktinfo *pktinfo; -{ - struct trtcm *tcm = (struct trtcm *)cb; - u_int64_t len; - u_int64_t interval, now; - u_int8_t color; - - len = TB_SCALE(pktinfo->pkt_len); - if (tcm->coloraware) { - color = pktinfo->pkt_dscp; - if (color != tcm->yellow_dscp && color != tcm->red_dscp) - color = tcm->green_dscp; - } else { - /* if color-blind, precolor it as green */ - color = tcm->green_dscp; - } - - now = read_machclk(); - if (tcm->cmtd_tb.token < len) { - interval = now - tcm->cmtd_tb.last; - if (interval >= tcm->cmtd_tb.filluptime) - tcm->cmtd_tb.token = tcm->cmtd_tb.depth; - else { - tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate; - if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth) - tcm->cmtd_tb.token = tcm->cmtd_tb.depth; - } - tcm->cmtd_tb.last = now; - } - if (tcm->peak_tb.token < len) { - interval = now - tcm->peak_tb.last; - if (interval >= tcm->peak_tb.filluptime) - tcm->peak_tb.token = tcm->peak_tb.depth; - else { - tcm->peak_tb.token += interval * tcm->peak_tb.rate; - if (tcm->peak_tb.token > tcm->peak_tb.depth) - tcm->peak_tb.token = tcm->peak_tb.depth; - } - tcm->peak_tb.last = now; - } - - if (color == tcm->red_dscp || tcm->peak_tb.token < len) { - pktinfo->pkt_dscp = tcm->red_dscp; - PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len); - return (&tcm->red_action); - } - - if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) { - pktinfo->pkt_dscp = tcm->yellow_dscp; - tcm->peak_tb.token -= len; - PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len); - return (&tcm->yellow_action); - } - - pktinfo->pkt_dscp = tcm->green_dscp; - tcm->cmtd_tb.token -= len; - tcm->peak_tb.token -= len; - PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len); - return (&tcm->green_action); -} - -/* - * time sliding window three color marker - * as described in draft-fang-diffserv-tc-tswtcm-00.txt - */ -static struct tswtcm * -tswtcm_create(top, cmtd_rate, peak_rate, avg_interval, - green_action, yellow_action, red_action) - struct top_cdnr *top; - u_int32_t cmtd_rate, peak_rate, avg_interval; - struct tc_action *green_action, *yellow_action, *red_action; -{ - struct tswtcm *tsw; - - if (tca_verify_action(green_action) < 0 - || tca_verify_action(yellow_action) < 0 - || tca_verify_action(red_action) < 0) - return (NULL); - - if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM, - tswtcm_input)) == NULL) - return (NULL); - - tca_import_action(&tsw->green_action, green_action); - tca_import_action(&tsw->yellow_action, yellow_action); - tca_import_action(&tsw->red_action, red_action); - - /* set dscps to use */ - if (tsw->green_action.tca_code == TCACODE_MARK) - tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK; - else - tsw->green_dscp = DSCP_AF11; - if (tsw->yellow_action.tca_code == TCACODE_MARK) - tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK; - else - tsw->yellow_dscp = DSCP_AF12; - if (tsw->red_action.tca_code == TCACODE_MARK) - tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK; - else - tsw->red_dscp = DSCP_AF13; - - /* convert rates from bits/sec to bytes/sec */ - tsw->cmtd_rate = cmtd_rate / 8; - tsw->peak_rate = peak_rate / 8; - tsw->avg_rate = 0; - - /* timewin is converted from msec to machine clock unit */ - tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000; - - return (tsw); -} - -static int -tswtcm_destroy(tsw) - struct tswtcm *tsw; -{ - if (tsw->cdnrblk.cb_ref > 0) - return (EBUSY); - - tca_invalidate_action(&tsw->green_action); - tca_invalidate_action(&tsw->yellow_action); - tca_invalidate_action(&tsw->red_action); - - cdnr_cbdestroy(tsw); - return (0); -} - -static struct tc_action * -tswtcm_input(cb, pktinfo) - struct cdnr_block *cb; - struct cdnr_pktinfo *pktinfo; -{ - struct tswtcm *tsw = (struct tswtcm *)cb; - int len; - u_int32_t avg_rate; - u_int64_t interval, now, tmp; - - /* - * rate estimator - */ - len = pktinfo->pkt_len; - now = read_machclk(); - - interval = now - tsw->t_front; - /* - * calculate average rate: - * avg = (avg * timewin + pkt_len)/(timewin + interval) - * pkt_len needs to be multiplied by machclk_freq in order to - * get (bytes/sec). - * note: when avg_rate (bytes/sec) and timewin (machclk unit) are - * less than 32 bits, the following 64-bit operation has enough - * precision. - */ - tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin - + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval); - tsw->avg_rate = avg_rate = (u_int32_t)tmp; - tsw->t_front = now; - - /* - * marker - */ - if (avg_rate > tsw->cmtd_rate) { - u_int32_t randval = arc4random() % avg_rate; - - if (avg_rate > tsw->peak_rate) { - if (randval < avg_rate - tsw->peak_rate) { - /* mark red */ - pktinfo->pkt_dscp = tsw->red_dscp; - PKTCNTR_ADD(&tsw->red_cnt, len); - return (&tsw->red_action); - } else if (randval < avg_rate - tsw->cmtd_rate) - goto mark_yellow; - } else { - /* peak_rate >= avg_rate > cmtd_rate */ - if (randval < avg_rate - tsw->cmtd_rate) { - mark_yellow: - pktinfo->pkt_dscp = tsw->yellow_dscp; - PKTCNTR_ADD(&tsw->yellow_cnt, len); - return (&tsw->yellow_action); - } - } - } - - /* mark green */ - pktinfo->pkt_dscp = tsw->green_dscp; - PKTCNTR_ADD(&tsw->green_cnt, len); - return (&tsw->green_action); -} - -/* - * ioctl requests - */ -static int -cdnrcmd_if_attach(ifname) - char *ifname; -{ - struct ifnet *ifp; - struct top_cdnr *top; - - if ((ifp = ifunit(ifname)) == NULL) - return (EBADF); - - if (ifp->if_snd.altq_cdnr != NULL) - return (EBUSY); - - if ((top = top_create(&ifp->if_snd)) == NULL) - return (ENOMEM); - return (0); -} - -static int -cdnrcmd_if_detach(ifname) - char *ifname; -{ - struct top_cdnr *top; - - if ((top = tcb_lookup(ifname)) == NULL) - return (EBADF); - - return top_destroy(top); -} - -static int -cdnrcmd_add_element(ap) - struct cdnr_add_element *ap; -{ - struct top_cdnr *top; - struct cdnr_block *cb; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - cb = element_create(top, &ap->action); - if (cb == NULL) - return (EINVAL); - /* return a class handle to the user */ - ap->cdnr_handle = cdnr_cb2handle(cb); - return (0); -} - -static int -cdnrcmd_delete_element(ap) - struct cdnr_delete_element *ap; -{ - struct top_cdnr *top; - struct cdnr_block *cb; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - if (cb->cb_type != TCETYPE_ELEMENT) - return generic_element_destroy(cb); - - return element_destroy(cb); -} - -static int -cdnrcmd_add_filter(ap) - struct cdnr_add_filter *ap; -{ - struct top_cdnr *top; - struct cdnr_block *cb; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - return acc_add_filter(&top->tc_classifier, &ap->filter, - cb, &ap->filter_handle); -} - -static int -cdnrcmd_delete_filter(ap) - struct cdnr_delete_filter *ap; -{ - struct top_cdnr *top; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - return acc_delete_filter(&top->tc_classifier, ap->filter_handle); -} - -static int -cdnrcmd_add_tbm(ap) - struct cdnr_add_tbmeter *ap; -{ - struct top_cdnr *top; - struct tbmeter *tbm; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action); - if (tbm == NULL) - return (EINVAL); - /* return a class handle to the user */ - ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk); - return (0); -} - -static int -cdnrcmd_modify_tbm(ap) - struct cdnr_modify_tbmeter *ap; -{ - struct tbmeter *tbm; - - if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - tb_import_profile(&tbm->tb, &ap->profile); - - return (0); -} - -static int -cdnrcmd_tbm_stats(ap) - struct cdnr_tbmeter_stats *ap; -{ - struct tbmeter *tbm; - - if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - ap->in_cnt = tbm->in_cnt; - ap->out_cnt = tbm->out_cnt; - - return (0); -} - -static int -cdnrcmd_add_trtcm(ap) - struct cdnr_add_trtcm *ap; -{ - struct top_cdnr *top; - struct trtcm *tcm; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile, - &ap->green_action, &ap->yellow_action, - &ap->red_action, ap->coloraware); - if (tcm == NULL) - return (EINVAL); - - /* return a class handle to the user */ - ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk); - return (0); -} - -static int -cdnrcmd_modify_trtcm(ap) - struct cdnr_modify_trtcm *ap; -{ - struct trtcm *tcm; - - if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile); - tb_import_profile(&tcm->peak_tb, &ap->peak_profile); - - return (0); -} - -static int -cdnrcmd_tcm_stats(ap) - struct cdnr_tcm_stats *ap; -{ - struct cdnr_block *cb; - - if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - if (cb->cb_type == TCETYPE_TRTCM) { - struct trtcm *tcm = (struct trtcm *)cb; - - ap->green_cnt = tcm->green_cnt; - ap->yellow_cnt = tcm->yellow_cnt; - ap->red_cnt = tcm->red_cnt; - } else if (cb->cb_type == TCETYPE_TSWTCM) { - struct tswtcm *tsw = (struct tswtcm *)cb; - - ap->green_cnt = tsw->green_cnt; - ap->yellow_cnt = tsw->yellow_cnt; - ap->red_cnt = tsw->red_cnt; - } else - return (EINVAL); - - return (0); -} - -static int -cdnrcmd_add_tswtcm(ap) - struct cdnr_add_tswtcm *ap; -{ - struct top_cdnr *top; - struct tswtcm *tsw; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - if (ap->cmtd_rate > ap->peak_rate) - return (EINVAL); - - tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate, - ap->avg_interval, &ap->green_action, - &ap->yellow_action, &ap->red_action); - if (tsw == NULL) - return (EINVAL); - - /* return a class handle to the user */ - ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk); - return (0); -} - -static int -cdnrcmd_modify_tswtcm(ap) - struct cdnr_modify_tswtcm *ap; -{ - struct tswtcm *tsw; - - if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) - return (EINVAL); - - if (ap->cmtd_rate > ap->peak_rate) - return (EINVAL); - - /* convert rates from bits/sec to bytes/sec */ - tsw->cmtd_rate = ap->cmtd_rate / 8; - tsw->peak_rate = ap->peak_rate / 8; - tsw->avg_rate = 0; - - /* timewin is converted from msec to machine clock unit */ - tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000; - - return (0); -} - -static int -cdnrcmd_get_stats(ap) - struct cdnr_get_stats *ap; -{ - struct top_cdnr *top; - struct cdnr_block *cb; - struct tbmeter *tbm; - struct trtcm *tcm; - struct tswtcm *tsw; - struct tce_stats tce, *usp; - int error, n, nskip, nelements; - - if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) - return (EBADF); - - /* copy action stats */ - bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts)); - - /* stats for each element */ - nelements = ap->nelements; - usp = ap->tce_stats; - if (nelements <= 0 || usp == NULL) - return (0); - - nskip = ap->nskip; - n = 0; - LIST_FOREACH(cb, &top->tc_elements, cb_next) { - if (nskip > 0) { - nskip--; - continue; - } - - bzero(&tce, sizeof(tce)); - tce.tce_handle = cb->cb_handle; - tce.tce_type = cb->cb_type; - switch (cb->cb_type) { - case TCETYPE_TBMETER: - tbm = (struct tbmeter *)cb; - tce.tce_cnts[0] = tbm->in_cnt; - tce.tce_cnts[1] = tbm->out_cnt; - break; - case TCETYPE_TRTCM: - tcm = (struct trtcm *)cb; - tce.tce_cnts[0] = tcm->green_cnt; - tce.tce_cnts[1] = tcm->yellow_cnt; - tce.tce_cnts[2] = tcm->red_cnt; - break; - case TCETYPE_TSWTCM: - tsw = (struct tswtcm *)cb; - tce.tce_cnts[0] = tsw->green_cnt; - tce.tce_cnts[1] = tsw->yellow_cnt; - tce.tce_cnts[2] = tsw->red_cnt; - break; - default: - continue; - } - - if ((error = copyout((caddr_t)&tce, (caddr_t)usp++, - sizeof(tce))) != 0) - return (error); - - if (++n == nelements) - break; - } - ap->nelements = n; - - return (0); -} - -/* - * conditioner device interface - */ -int -cdnropen(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - if (machclk_freq == 0) - init_machclk(); - - if (machclk_freq == 0) { - printf("cdnr: no cpu clock available!\n"); - return (ENXIO); - } - - /* everything will be done when the queueing scheme is attached. */ - return 0; -} - -int -cdnrclose(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct top_cdnr *top; - int err, error = 0; - - while ((top = LIST_FIRST(&tcb_list)) != NULL) { - /* destroy all */ - err = top_destroy(top); - if (err != 0 && error == 0) - error = err; - } - altq_input = NULL; - - return (error); -} - -int -cdnrioctl(dev, cmd, addr, flag, p) - dev_t dev; - ioctlcmd_t cmd; - caddr_t addr; - int flag; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct top_cdnr *top; - struct cdnr_interface *ifacep; - int s, error = 0; - - /* check super-user privilege */ - switch (cmd) { - case CDNR_GETSTATS: - break; - default: -#if (__FreeBSD_version > 700000) - if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) -#elsif (__FreeBSD_version > 400000) - if ((error = suser(p)) != 0) -#else - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) -#endif - return (error); - break; - } - - s = splnet(); - switch (cmd) { - - case CDNR_IF_ATTACH: - ifacep = (struct cdnr_interface *)addr; - error = cdnrcmd_if_attach(ifacep->cdnr_ifname); - break; - - case CDNR_IF_DETACH: - ifacep = (struct cdnr_interface *)addr; - error = cdnrcmd_if_detach(ifacep->cdnr_ifname); - break; - - case CDNR_ENABLE: - case CDNR_DISABLE: - ifacep = (struct cdnr_interface *)addr; - if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) { - error = EBADF; - break; - } - - switch (cmd) { - - case CDNR_ENABLE: - ALTQ_SET_CNDTNING(top->tc_ifq); - if (altq_input == NULL) - altq_input = altq_cdnr_input; - break; - - case CDNR_DISABLE: - ALTQ_CLEAR_CNDTNING(top->tc_ifq); - LIST_FOREACH(top, &tcb_list, tc_next) - if (ALTQ_IS_CNDTNING(top->tc_ifq)) - break; - if (top == NULL) - altq_input = NULL; - break; - } - break; - - case CDNR_ADD_ELEM: - error = cdnrcmd_add_element((struct cdnr_add_element *)addr); - break; - - case CDNR_DEL_ELEM: - error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr); - break; - - case CDNR_ADD_TBM: - error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr); - break; - - case CDNR_MOD_TBM: - error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr); - break; - - case CDNR_TBM_STATS: - error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr); - break; - - case CDNR_ADD_TCM: - error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr); - break; - - case CDNR_MOD_TCM: - error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr); - break; - - case CDNR_TCM_STATS: - error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr); - break; - - case CDNR_ADD_FILTER: - error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr); - break; - - case CDNR_DEL_FILTER: - error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr); - break; - - case CDNR_GETSTATS: - error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr); - break; - - case CDNR_ADD_TSW: - error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr); - break; - - case CDNR_MOD_TSW: - error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr); - break; - - default: - error = EINVAL; - break; - } - splx(s); - - return error; -} - -#ifdef KLD_MODULE - -static struct altqsw cdnr_sw = - {"cdnr", cdnropen, cdnrclose, cdnrioctl}; - -ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw); - -#endif /* KLD_MODULE */ - -#endif /* ALTQ3_COMPAT */ -#endif /* ALTQ_CDNR */ Index: head/sys/net/altq/altq_hfsc.c =================================================================== --- head/sys/net/altq/altq_hfsc.c (revision 341506) +++ head/sys/net/altq/altq_hfsc.c (revision 341507) @@ -1,2329 +1,1751 @@ /*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. * * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ * $FreeBSD$ */ /* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */ #include #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#endif /* * function prototypes */ static int hfsc_clear_interface(struct hfsc_if *); static int hfsc_request(struct ifaltq *, int, void *); static void hfsc_purge(struct hfsc_if *); static struct hfsc_class *hfsc_class_create(struct hfsc_if *, struct service_curve *, struct service_curve *, struct service_curve *, struct hfsc_class *, int, int, int); static int hfsc_class_destroy(struct hfsc_class *); static struct hfsc_class *hfsc_nextclass(struct hfsc_class *); static int hfsc_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *hfsc_dequeue(struct ifaltq *, int); static int hfsc_addq(struct hfsc_class *, struct mbuf *); static struct mbuf *hfsc_getq(struct hfsc_class *); static struct mbuf *hfsc_pollq(struct hfsc_class *); static void hfsc_purgeq(struct hfsc_class *); static void update_cfmin(struct hfsc_class *); static void set_active(struct hfsc_class *, int); static void set_passive(struct hfsc_class *); static void init_ed(struct hfsc_class *, int); static void update_ed(struct hfsc_class *, int); static void update_d(struct hfsc_class *, int); static void init_vf(struct hfsc_class *, int); static void update_vf(struct hfsc_class *, int, u_int64_t); static void ellist_insert(struct hfsc_class *); static void ellist_remove(struct hfsc_class *); static void ellist_update(struct hfsc_class *); struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t); static void actlist_insert(struct hfsc_class *); static void actlist_remove(struct hfsc_class *); static void actlist_update(struct hfsc_class *); static struct hfsc_class *actlist_firstfit(struct hfsc_class *, u_int64_t); static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); static __inline u_int64_t m2sm(u_int64_t); static __inline u_int64_t m2ism(u_int64_t); static __inline u_int64_t d2dx(u_int); static u_int64_t sm2m(u_int64_t); static u_int dx2d(u_int64_t); static void sc2isc(struct service_curve *, struct internal_sc *); static void rtsc_init(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t); static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t); static void rtsc_min(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static void get_class_stats_v0(struct hfsc_classstats_v0 *, struct hfsc_class *); static void get_class_stats_v1(struct hfsc_classstats_v1 *, struct hfsc_class *); static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t); -#ifdef ALTQ3_COMPAT -static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int); -static int hfsc_detach(struct hfsc_if *); -static int hfsc_class_modify(struct hfsc_class *, struct service_curve *, - struct service_curve *, struct service_curve *); -static int hfsccmd_if_attach(struct hfsc_attach *); -static int hfsccmd_if_detach(struct hfsc_interface *); -static int hfsccmd_add_class(struct hfsc_add_class *); -static int hfsccmd_delete_class(struct hfsc_delete_class *); -static int hfsccmd_modify_class(struct hfsc_modify_class *); -static int hfsccmd_add_filter(struct hfsc_add_filter *); -static int hfsccmd_delete_filter(struct hfsc_delete_filter *); -static int hfsccmd_class_stats(struct hfsc_class_stats *); - -altqdev_decl(hfsc); -#endif /* ALTQ3_COMPAT */ - /* * macros */ #define is_a_parent_class(cl) ((cl)->cl_children != NULL) #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ -#ifdef ALTQ3_COMPAT -/* hif_list keeps all hfsc_if's allocated. */ -static struct hfsc_if *hif_list = NULL; -#endif /* ALTQ3_COMPAT */ int hfsc_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc, hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL); splx(s); return (error); } int hfsc_add_altq(struct pf_altq *a) { struct hfsc_if *hif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); TAILQ_INIT(&hif->hif_eligible); hif->hif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = hif; return (0); } int hfsc_remove_altq(struct pf_altq *a) { struct hfsc_if *hif; if ((hif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); free(hif, M_DEVBUF); return (0); } int hfsc_add_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl, *parent; struct hfsc_opts_v1 *opts; struct service_curve rtsc, lssc, ulsc; if ((hif = a->altq_disc) == NULL) return (EINVAL); opts = &a->pq_u.hfsc_opts; if (a->parent_qid == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); if (clh_to_clp(hif, a->qid) != NULL) return (EBUSY); rtsc.m1 = opts->rtsc_m1; rtsc.d = opts->rtsc_d; rtsc.m2 = opts->rtsc_m2; lssc.m1 = opts->lssc_m1; lssc.d = opts->lssc_d; lssc.m2 = opts->lssc_m2; ulsc.m1 = opts->ulsc_m1; ulsc.d = opts->ulsc_d; ulsc.m2 = opts->ulsc_m2; cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, parent, a->qlimit, opts->flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int hfsc_remove_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); return (hfsc_class_destroy(cl)); } int hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct hfsc_if *hif; struct hfsc_class *cl; union { struct hfsc_classstats_v0 v0; struct hfsc_classstats_v1 v1; } stats; size_t stats_size; int error = 0; if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); if (version > HFSC_STATS_VERSION) return (EINVAL); memset(&stats, 0, sizeof(stats)); switch (version) { case 0: get_class_stats_v0(&stats.v0, cl); stats_size = sizeof(struct hfsc_classstats_v0); break; case 1: get_class_stats_v1(&stats.v1, cl); stats_size = sizeof(struct hfsc_classstats_v1); break; } if (*nbytes < stats_size) return (EINVAL); if ((error = copyout((caddr_t)&stats, ubuf, stats_size)) != 0) return (error); *nbytes = stats_size; return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes except the root class. */ static int hfsc_clear_interface(struct hfsc_if *hif) { struct hfsc_class *cl; -#ifdef ALTQ3_COMPAT - /* free the filters for this interface */ - acc_discard_filters(&hif->hif_classifier, NULL, 1); -#endif /* clear out the classes */ while (hif->hif_rootclass != NULL && (cl = hif->hif_rootclass->cl_children) != NULL) { /* * remove the first leaf class found in the hierarchy * then start over */ for (; cl != NULL; cl = hfsc_nextclass(cl)) { if (!is_a_parent_class(cl)) { (void)hfsc_class_destroy(cl); break; } } } return (0); } static int hfsc_request(struct ifaltq *ifq, int req, void *arg) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: hfsc_purge(hif); break; } return (0); } /* discard all the queued packets on the interface */ static void hfsc_purge(struct hfsc_if *hif) { struct hfsc_class *cl; for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (ALTQ_IS_ENABLED(hif->hif_ifq)) hif->hif_ifq->ifq_len = 0; } struct hfsc_class * hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, struct service_curve *fsc, struct service_curve *usc, struct hfsc_class *parent, int qlimit, int flags, int qid) { struct hfsc_class *cl, *p; int i, s; if (hif->hif_classes >= HFSC_MAX_CLASSES) return (NULL); #ifndef ALTQ_RED if (flags & HFCF_RED) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: RED not configured for HFSC!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & HFCF_CODEL) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: CODEL not configured for HFSC!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; TAILQ_INIT(&cl->cl_actc); if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; qsize(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { int red_flags, red_pkttime; u_int m2; m2 = 0; if (rsc != NULL && rsc->m2 > m2) m2 = rsc->m2; if (fsc != NULL && fsc->m2 > m2) m2 = fsc->m2; if (usc != NULL && usc->m2 > m2) m2 = usc->m2; red_flags = 0; if (flags & HFCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & HFCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (m2 < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (m2 / 8); if (flags & HFCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RED; } #ifdef ALTQ_RIO else { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RIO; } #endif } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & HFCF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) qtype(cl->cl_q) = Q_CODEL; } #endif if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_rsc == NULL) goto err_ret; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { cl->cl_fsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_fsc == NULL) goto err_ret; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { cl->cl_usc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_usc == NULL) goto err_ret; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); } cl->cl_id = hif->hif_classid++; cl->cl_handle = qid; cl->cl_hif = hif; cl->cl_parent = parent; s = splnet(); IFQ_LOCK(hif->hif_ifq); hif->hif_classes++; /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = qid % HFSC_MAX_CLASSES; if (hif->hif_class_tbl[i] == NULL) hif->hif_class_tbl[i] = cl; else { for (i = 0; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) { hif->hif_class_tbl[i] = cl; break; } if (i == HFSC_MAX_CLASSES) { IFQ_UNLOCK(hif->hif_ifq); splx(s); goto err_ret; } } if (flags & HFCF_DEFAULTCLASS) hif->hif_defaultclass = cl; if (parent == NULL) { /* this is root class */ hif->hif_rootclass = cl; } else { /* add this class to the children list of the parent */ if ((p = parent->cl_children) == NULL) parent->cl_children = cl; else { while (p->cl_siblings != NULL) p = p->cl_siblings; p->cl_siblings = cl; } } IFQ_UNLOCK(hif->hif_ifq); splx(s); return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int hfsc_class_destroy(struct hfsc_class *cl) { int i, s; if (cl == NULL) return (0); if (is_a_parent_class(cl)) return (EBUSY); s = splnet(); IFQ_LOCK(cl->cl_hif->hif_ifq); -#ifdef ALTQ3_COMPAT - /* delete filters referencing to this class */ - acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0); -#endif /* ALTQ3_COMPAT */ if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (cl->cl_parent == NULL) { /* this is root class */ } else { struct hfsc_class *p = cl->cl_parent->cl_children; if (p == cl) cl->cl_parent->cl_children = cl->cl_siblings; else do { if (p->cl_siblings == cl) { p->cl_siblings = cl->cl_siblings; break; } } while ((p = p->cl_siblings) != NULL); ASSERT(p != NULL); } for (i = 0; i < HFSC_MAX_CLASSES; i++) if (cl->cl_hif->hif_class_tbl[i] == cl) { cl->cl_hif->hif_class_tbl[i] = NULL; break; } cl->cl_hif->hif_classes--; IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } IFQ_LOCK(cl->cl_hif->hif_ifq); if (cl == cl->cl_hif->hif_rootclass) cl->cl_hif->hif_rootclass = NULL; if (cl == cl->cl_hif->hif_defaultclass) cl->cl_hif->hif_defaultclass = NULL; IFQ_UNLOCK(cl->cl_hif->hif_ifq); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * hfsc_nextclass returns the next class in the tree. * usage: * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) * do_something; */ static struct hfsc_class * hfsc_nextclass(struct hfsc_class *cl) { if (cl->cl_children != NULL) cl = cl->cl_children; else if (cl->cl_siblings != NULL) cl = cl->cl_siblings; else { while ((cl = cl->cl_parent) != NULL) if (cl->cl_siblings) { cl = cl->cl_siblings; break; } } return (cl); } /* * hfsc_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(hif, t->qid); -#ifdef ALTQ3_COMPAT - else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) - cl = pktattr->pattr_class; -#endif if (cl == NULL || is_a_parent_class(cl)) { cl = hif->hif_defaultclass; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } -#ifdef ALTQ3_COMPAT - if (pktattr != NULL) - cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ - else -#endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (hfsc_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in hfsc_addq. */ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); cl->cl_hif->hif_packets++; /* successfully queued. */ if (qlen(cl->cl_q) == 1) set_active(cl, m_pktlen(m)); return (0); } /* * hfsc_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * hfsc_dequeue(struct ifaltq *ifq, int op) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct mbuf *m; int len, next_len; int realtime = 0; u_int64_t cur_time; IFQ_LOCK_ASSERT(ifq); if (hif->hif_packets == 0) /* no packet in the tree */ return (NULL); cur_time = read_machclk(); if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) { cl = hif->hif_pollcache; hif->hif_pollcache = NULL; /* check if the class was scheduled by real-time criteria */ if (cl->cl_rsc != NULL) realtime = (cl->cl_e <= cur_time); } else { /* * if there are eligible classes, use real-time criteria. * find the class with the minimum deadline among * the eligible classes. */ if ((cl = hfsc_get_mindl(hif, cur_time)) != NULL) { realtime = 1; } else { #ifdef ALTQ_DEBUG int fits = 0; #endif /* * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ cl = hif->hif_rootclass; while (is_a_parent_class(cl)) { cl = actlist_firstfit(cl, cur_time); if (cl == NULL) { #ifdef ALTQ_DEBUG if (fits > 0) printf("%d fit but none found\n",fits); #endif return (NULL); } /* * update parent's cl_cvtmin. * don't update if the new vt is smaller. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; #ifdef ALTQ_DEBUG fits++; #endif } } if (op == ALTDQ_POLL) { hif->hif_pollcache = cl; m = hfsc_pollq(cl); return (m); } } m = hfsc_getq(cl); if (m == NULL) panic("hfsc_dequeue:"); len = m_pktlen(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len); update_vf(cl, len, cur_time); if (realtime) cl->cl_cumul += len; if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) { /* update ed */ next_len = m_pktlen(qhead(cl->cl_q)); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); } } else { /* the class becomes passive */ set_passive(cl); } return (m); } static int hfsc_addq(struct hfsc_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_addq(cl->cl_codel, cl->cl_q, m); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & HFCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * hfsc_getq(struct hfsc_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_getq(cl->cl_codel, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * hfsc_pollq(struct hfsc_class *cl) { return qhead(cl->cl_q); } static void hfsc_purgeq(struct hfsc_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m)); m_freem(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(cl->cl_hif->hif_ifq); } ASSERT(qlen(cl->cl_q) == 0); update_vf(cl, 0, 0); /* remove cl from the actlist */ set_passive(cl); } static void set_active(struct hfsc_class *cl, int len) { if (cl->cl_rsc != NULL) init_ed(cl, len); if (cl->cl_fsc != NULL) init_vf(cl, len); cl->cl_stats.period++; } static void set_passive(struct hfsc_class *cl) { if (cl->cl_rsc != NULL) ellist_remove(cl); /* * actlist is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from actlist */ } static void init_ed(struct hfsc_class *cl, int next_len) { u_int64_t cur_time; cur_time = read_machclk(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); /* * update the eligible curve. * for concave, it is equal to the deadline curve. * for convex, it is a linear curve with slope m2. */ cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } /* compute e and d */ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_insert(cl); } static void update_ed(struct hfsc_class *cl, int next_len) { cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_update(cl); } static void update_d(struct hfsc_class *cl, int next_len) { cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } static void init_vf(struct hfsc_class *cl, int len) { struct hfsc_class *max_cl, *p; u_int64_t vt, f, cur_time; int go_active; cur_time = 0; go_active = 1; for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { if (go_active && cl->cl_nactive++ == 0) go_active = 1; else go_active = 0; if (go_active) { max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); if (max_cl != NULL) { /* * set vt to the average of the min and max * classes. if the parent's period didn't * change, don't decrease vt of the class. */ vt = max_cl->cl_vt; if (cl->cl_parent->cl_cvtmin != 0) vt = (cl->cl_parent->cl_cvtmin + vt)/2; if (cl->cl_parent->cl_vtperiod != cl->cl_parentperiod || vt > cl->cl_vt) cl->cl_vt = vt; } else { /* * first child for a new parent backlog period. * add parent's cvtmax to vtoff of children * to make a new vt (vtoff + vt) larger than * the vt in the last period for all children. */ vt = cl->cl_parent->cl_cvtmax; for (p = cl->cl_parent->cl_children; p != NULL; p = p->cl_siblings) p->cl_vtoff += vt; cl->cl_vt = 0; cl->cl_parent->cl_cvtmax = 0; cl->cl_parent->cl_cvtmin = 0; } cl->cl_initvt = cl->cl_vt; /* update the virtual curve */ vt = cl->cl_vt + cl->cl_vtoff; rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total); if (cl->cl_virtual.x == vt) { cl->cl_virtual.x -= cl->cl_vtoff; cl->cl_vtoff = 0; } cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; if (cl->cl_parent->cl_nactive == 0) cl->cl_parentperiod++; cl->cl_f = 0; actlist_insert(cl); if (cl->cl_usc != NULL) { /* class has upper limit curve */ if (cur_time == 0) cur_time = read_machclk(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); cl->cl_myfadj = 0; } } if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) { u_int64_t f, myf_bound, delta; int go_passive; go_passive = qempty(cl->cl_q); for (; cl->cl_parent != NULL; cl = cl->cl_parent) { cl->cl_total += len; if (cl->cl_fsc == NULL || cl->cl_nactive == 0) continue; if (go_passive && --cl->cl_nactive == 0) go_passive = 1; else go_passive = 0; if (go_passive) { /* no more active child, going passive */ /* update cvtmax of the parent class */ if (cl->cl_vt > cl->cl_parent->cl_cvtmax) cl->cl_parent->cl_cvtmax = cl->cl_vt; /* remove this class from the vt list */ actlist_remove(cl); update_cfmin(cl->cl_parent); continue; } /* * update vt and f */ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - cl->cl_vtoff + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, * the class was skipped in the past due to non-fit. * if so, we need to adjust vtadj. */ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; cl->cl_vt = cl->cl_parent->cl_cvtmin; } /* update the vt list */ actlist_update(cl); if (cl->cl_usc != NULL) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); /* * if myf lags behind by more than one clock tick * from the current time, adjust myfadj to prevent * a rate-limited class from going greedy. * in a steady state under rate-limiting, myf * fluctuates within one clock tick. */ myf_bound = cur_time - machclk_per_tick; if (cl->cl_myf < myf_bound) { delta = cur_time - cl->cl_myf; cl->cl_myfadj += delta; cl->cl_myf += delta; } } /* cl_f is max(cl_myf, cl_cfmin) */ if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_cfmin(struct hfsc_class *cl) { struct hfsc_class *p; u_int64_t cfmin; if (TAILQ_EMPTY(&cl->cl_actc)) { cl->cl_cfmin = 0; return; } cfmin = HT_INFINITY; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f == 0) { cl->cl_cfmin = 0; return; } if (p->cl_f < cfmin) cfmin = p->cl_f; } cl->cl_cfmin = cfmin; } /* * TAILQ based ellist and actlist implementation * (ion wanted to make a calendar queue based implementation) */ /* * eligible list holds backlogged classes being sorted by their eligible times. * there is one eligible list per interface. */ static void ellist_insert(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL || p->cl_e <= cl->cl_e) { TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (cl->cl_e < p->cl_e) { TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } static void ellist_remove(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); } static void ellist_update(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p, *last; /* * the eligible time of a class increases monotonically. * if the next entry has a larger eligible time, nothing to do. */ p = TAILQ_NEXT(cl, cl_ellist); if (p == NULL || cl->cl_e <= p->cl_e) return; /* check the last entry */ last = TAILQ_LAST(&hif->hif_eligible, elighead); ASSERT(last != NULL); if (last->cl_e <= cl->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { if (cl->cl_e < p->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } /* find the class with the minimum deadline among the eligible classes */ struct hfsc_class * hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) { struct hfsc_class *p, *cl = NULL; TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return (cl); } /* * active children list holds backlogged child classes being sorted * by their virtual time. * each intermediate class has one active children list. */ static void actlist_insert(struct hfsc_class *cl) { struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL || p->cl_vt <= cl->cl_vt) { TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { if (cl->cl_vt < p->cl_vt) { TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static void actlist_remove(struct hfsc_class *cl) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); } static void actlist_update(struct hfsc_class *cl) { struct hfsc_class *p, *last; /* * the virtual time of a class increases monotonically during its * backlogged period. * if the next entry has a larger virtual time, nothing to do. */ p = TAILQ_NEXT(cl, cl_actlist); if (p == NULL || cl->cl_vt < p->cl_vt) return; /* check the last entry */ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); ASSERT(last != NULL); if (last->cl_vt <= cl->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { if (cl->cl_vt < p->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static struct hfsc_class * actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) { struct hfsc_class *p; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f <= cur_time) return (p); } return (NULL); } /* * service curve support functions * * external service curve parameters * m: bits/sec * d: msec * internal service curve parameters * sm: (bytes/machclk tick) << SM_SHIFT * ism: (machclk ticks/byte) << ISM_SHIFT * dx: machclk ticks * * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. we * should be able to handle 100K-100Gbps linkspeed with 256 MHz machclk * frequency and at least 3 effective digits in decimal. * */ #define SM_SHIFT 24 #define ISM_SHIFT 14 #define SM_MASK ((1LL << SM_SHIFT) - 1) #define ISM_MASK ((1LL << ISM_SHIFT) - 1) static __inline u_int64_t seg_x2y(u_int64_t x, u_int64_t sm) { u_int64_t y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return (y); } static __inline u_int64_t seg_y2x(u_int64_t y, u_int64_t ism) { u_int64_t x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return (x); } static __inline u_int64_t m2sm(u_int64_t m) { u_int64_t sm; sm = (m << SM_SHIFT) / 8 / machclk_freq; return (sm); } static __inline u_int64_t m2ism(u_int64_t m) { u_int64_t ism; if (m == 0) ism = HT_INFINITY; else ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m; return (ism); } static __inline u_int64_t d2dx(u_int d) { u_int64_t dx; dx = ((u_int64_t)d * machclk_freq) / 1000; return (dx); } static u_int64_t sm2m(u_int64_t sm) { u_int64_t m; m = (sm * 8 * machclk_freq) >> SM_SHIFT; return (m); } static u_int dx2d(u_int64_t dx) { u_int64_t d; d = dx * 1000 / machclk_freq; return ((u_int)d); } static void sc2isc(struct service_curve *sc, struct internal_sc *isc) { isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2); } /* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */ static void rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x, u_int64_t y) { rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2; } /* * calculate the y-projection of the runtime service curve by the * given x-projection value */ static u_int64_t rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y) { u_int64_t x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return (x); } static u_int64_t rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x) { u_int64_t y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return (y); } /* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x, u_int64_t y) { u_int64_t y1, y2, dx, dy; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx) */ y1 = rtsc_x2y(rtsc, x); if (y1 <= y) { /* rtsc is below isc, no change to rtsc */ return; } y2 = rtsc_x2y(rtsc, x + isc->dx); if (y2 >= y + isc->dy) { /* rtsc is above isc, replace rtsc by isc */ rtsc->x = x; rtsc->y = y; rtsc->dx = isc->dx; rtsc->dy = isc->dy; return; } /* * the two curves intersect * compute the offsets (dx, dy) using the reverse * function of seg_x2y() * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) */ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. */ if (rtsc->x + rtsc->dx > x) dx += rtsc->x + rtsc->dx - x; dy = seg_x2y(dx, isc->sm1); rtsc->x = x; rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; return; } static void get_class_stats_v0(struct hfsc_classstats_v0 *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; #define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) if (cl->cl_rsc != NULL) { sp->rsc.m1 = SATU32(sm2m(cl->cl_rsc->sm1)); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = SATU32(sm2m(cl->cl_rsc->sm2)); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = SATU32(sm2m(cl->cl_fsc->sm1)); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = SATU32(sm2m(cl->cl_fsc->sm2)); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = SATU32(sm2m(cl->cl_usc->sm1)); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = SATU32(sm2m(cl->cl_usc->sm2)); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } #undef SATU32 sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } static void get_class_stats_v1(struct hfsc_classstats_v1 *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; if (cl->cl_rsc != NULL) { sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = sm2m(cl->cl_usc->sm1); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = sm2m(cl->cl_usc->sm2); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct hfsc_class * clh_to_clp(struct hfsc_if *hif, u_int32_t chandle) { int i; struct hfsc_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % HFSC_MAX_CLASSES; if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); for (i = 0; i < HFSC_MAX_CLASSES; i++) if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } -#ifdef ALTQ3_COMPAT -static struct hfsc_if * -hfsc_attach(ifq, bandwidth) - struct ifaltq *ifq; - u_int bandwidth; -{ - struct hfsc_if *hif; - - hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK); - if (hif == NULL) - return (NULL); - bzero(hif, sizeof(struct hfsc_if)); - - hif->hif_eligible = ellist_alloc(); - if (hif->hif_eligible == NULL) { - free(hif, M_DEVBUF); - return NULL; - } - - hif->hif_ifq = ifq; - - /* add this state to the hfsc list */ - hif->hif_next = hif_list; - hif_list = hif; - - return (hif); -} - -static int -hfsc_detach(hif) - struct hfsc_if *hif; -{ - (void)hfsc_clear_interface(hif); - (void)hfsc_class_destroy(hif->hif_rootclass); - - /* remove this interface from the hif list */ - if (hif_list == hif) - hif_list = hif->hif_next; - else { - struct hfsc_if *h; - - for (h = hif_list; h != NULL; h = h->hif_next) - if (h->hif_next == hif) { - h->hif_next = hif->hif_next; - break; - } - ASSERT(h != NULL); - } - - ellist_destroy(hif->hif_eligible); - - free(hif, M_DEVBUF); - - return (0); -} - -static int -hfsc_class_modify(cl, rsc, fsc, usc) - struct hfsc_class *cl; - struct service_curve *rsc, *fsc, *usc; -{ - struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp; - u_int64_t cur_time; - int s; - - rsc_tmp = fsc_tmp = usc_tmp = NULL; - if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) && - cl->cl_rsc == NULL) { - rsc_tmp = malloc(sizeof(struct internal_sc), - M_DEVBUF, M_WAITOK); - if (rsc_tmp == NULL) - return (ENOMEM); - } - if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) && - cl->cl_fsc == NULL) { - fsc_tmp = malloc(sizeof(struct internal_sc), - M_DEVBUF, M_WAITOK); - if (fsc_tmp == NULL) { - free(rsc_tmp); - return (ENOMEM); - } - } - if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) && - cl->cl_usc == NULL) { - usc_tmp = malloc(sizeof(struct internal_sc), - M_DEVBUF, M_WAITOK); - if (usc_tmp == NULL) { - free(rsc_tmp); - free(fsc_tmp); - return (ENOMEM); - } - } - - cur_time = read_machclk(); - s = splnet(); - IFQ_LOCK(cl->cl_hif->hif_ifq); - - if (rsc != NULL) { - if (rsc->m1 == 0 && rsc->m2 == 0) { - if (cl->cl_rsc != NULL) { - if (!qempty(cl->cl_q)) - hfsc_purgeq(cl); - free(cl->cl_rsc, M_DEVBUF); - cl->cl_rsc = NULL; - } - } else { - if (cl->cl_rsc == NULL) - cl->cl_rsc = rsc_tmp; - sc2isc(rsc, cl->cl_rsc); - rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time, - cl->cl_cumul); - cl->cl_eligible = cl->cl_deadline; - if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { - cl->cl_eligible.dx = 0; - cl->cl_eligible.dy = 0; - } - } - } - - if (fsc != NULL) { - if (fsc->m1 == 0 && fsc->m2 == 0) { - if (cl->cl_fsc != NULL) { - if (!qempty(cl->cl_q)) - hfsc_purgeq(cl); - free(cl->cl_fsc, M_DEVBUF); - cl->cl_fsc = NULL; - } - } else { - if (cl->cl_fsc == NULL) - cl->cl_fsc = fsc_tmp; - sc2isc(fsc, cl->cl_fsc); - rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt, - cl->cl_total); - } - } - - if (usc != NULL) { - if (usc->m1 == 0 && usc->m2 == 0) { - if (cl->cl_usc != NULL) { - free(cl->cl_usc, M_DEVBUF); - cl->cl_usc = NULL; - cl->cl_myf = 0; - } - } else { - if (cl->cl_usc == NULL) - cl->cl_usc = usc_tmp; - sc2isc(usc, cl->cl_usc); - rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time, - cl->cl_total); - } - } - - if (!qempty(cl->cl_q)) { - if (cl->cl_rsc != NULL) - update_ed(cl, m_pktlen(qhead(cl->cl_q))); - if (cl->cl_fsc != NULL) - update_vf(cl, 0, cur_time); - /* is this enough? */ - } - - IFQ_UNLOCK(cl->cl_hif->hif_ifq); - splx(s); - - return (0); -} - -/* - * hfsc device interface - */ -int -hfscopen(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - if (machclk_freq == 0) - init_machclk(); - - if (machclk_freq == 0) { - printf("hfsc: no cpu clock available!\n"); - return (ENXIO); - } - - /* everything will be done when the queueing scheme is attached. */ - return 0; -} - -int -hfscclose(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct hfsc_if *hif; - int err, error = 0; - - while ((hif = hif_list) != NULL) { - /* destroy all */ - if (ALTQ_IS_ENABLED(hif->hif_ifq)) - altq_disable(hif->hif_ifq); - - err = altq_detach(hif->hif_ifq); - if (err == 0) - err = hfsc_detach(hif); - if (err != 0 && error == 0) - error = err; - } - - return error; -} - -int -hfscioctl(dev, cmd, addr, flag, p) - dev_t dev; - ioctlcmd_t cmd; - caddr_t addr; - int flag; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct hfsc_if *hif; - struct hfsc_interface *ifacep; - int error = 0; - - /* check super-user privilege */ - switch (cmd) { - case HFSC_GETSTATS: - break; - default: -#if (__FreeBSD_version > 700000) - if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) - return (error); -#elsif (__FreeBSD_version > 400000) - if ((error = suser(p)) != 0) - return (error); -#else - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); -#endif - break; - } - - switch (cmd) { - - case HFSC_IF_ATTACH: - error = hfsccmd_if_attach((struct hfsc_attach *)addr); - break; - - case HFSC_IF_DETACH: - error = hfsccmd_if_detach((struct hfsc_interface *)addr); - break; - - case HFSC_ENABLE: - case HFSC_DISABLE: - case HFSC_CLEAR_HIERARCHY: - ifacep = (struct hfsc_interface *)addr; - if ((hif = altq_lookup(ifacep->hfsc_ifname, - ALTQT_HFSC)) == NULL) { - error = EBADF; - break; - } - - switch (cmd) { - - case HFSC_ENABLE: - if (hif->hif_defaultclass == NULL) { -#ifdef ALTQ_DEBUG - printf("hfsc: no default class\n"); -#endif - error = EINVAL; - break; - } - error = altq_enable(hif->hif_ifq); - break; - - case HFSC_DISABLE: - error = altq_disable(hif->hif_ifq); - break; - - case HFSC_CLEAR_HIERARCHY: - hfsc_clear_interface(hif); - break; - } - break; - - case HFSC_ADD_CLASS: - error = hfsccmd_add_class((struct hfsc_add_class *)addr); - break; - - case HFSC_DEL_CLASS: - error = hfsccmd_delete_class((struct hfsc_delete_class *)addr); - break; - - case HFSC_MOD_CLASS: - error = hfsccmd_modify_class((struct hfsc_modify_class *)addr); - break; - - case HFSC_ADD_FILTER: - error = hfsccmd_add_filter((struct hfsc_add_filter *)addr); - break; - - case HFSC_DEL_FILTER: - error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr); - break; - - case HFSC_GETSTATS: - error = hfsccmd_class_stats((struct hfsc_class_stats *)addr); - break; - - default: - error = EINVAL; - break; - } - return error; -} - -static int -hfsccmd_if_attach(ap) - struct hfsc_attach *ap; -{ - struct hfsc_if *hif; - struct ifnet *ifp; - int error; - - if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL) - return (ENXIO); - - if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL) - return (ENOMEM); - - /* - * set HFSC to this ifnet structure. - */ - if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif, - hfsc_enqueue, hfsc_dequeue, hfsc_request, - &hif->hif_classifier, acc_classify)) != 0) - (void)hfsc_detach(hif); - - return (error); -} - -static int -hfsccmd_if_detach(ap) - struct hfsc_interface *ap; -{ - struct hfsc_if *hif; - int error; - - if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - if (ALTQ_IS_ENABLED(hif->hif_ifq)) - altq_disable(hif->hif_ifq); - - if ((error = altq_detach(hif->hif_ifq))) - return (error); - - return hfsc_detach(hif); -} - -static int -hfsccmd_add_class(ap) - struct hfsc_add_class *ap; -{ - struct hfsc_if *hif; - struct hfsc_class *cl, *parent; - int i; - - if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - if (ap->parent_handle == HFSC_NULLCLASS_HANDLE && - hif->hif_rootclass == NULL) - parent = NULL; - else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL) - return (EINVAL); - - /* assign a class handle (use a free slot number for now) */ - for (i = 1; i < HFSC_MAX_CLASSES; i++) - if (hif->hif_class_tbl[i] == NULL) - break; - if (i == HFSC_MAX_CLASSES) - return (EBUSY); - - if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL, - parent, ap->qlimit, ap->flags, i)) == NULL) - return (ENOMEM); - - /* return a class handle to the user */ - ap->class_handle = i; - - return (0); -} - -static int -hfsccmd_delete_class(ap) - struct hfsc_delete_class *ap; -{ - struct hfsc_if *hif; - struct hfsc_class *cl; - - if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) - return (EINVAL); - - return hfsc_class_destroy(cl); -} - -static int -hfsccmd_modify_class(ap) - struct hfsc_modify_class *ap; -{ - struct hfsc_if *hif; - struct hfsc_class *cl; - struct service_curve *rsc = NULL; - struct service_curve *fsc = NULL; - struct service_curve *usc = NULL; - - if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) - return (EINVAL); - - if (ap->sctype & HFSC_REALTIMESC) - rsc = &ap->service_curve; - if (ap->sctype & HFSC_LINKSHARINGSC) - fsc = &ap->service_curve; - if (ap->sctype & HFSC_UPPERLIMITSC) - usc = &ap->service_curve; - - return hfsc_class_modify(cl, rsc, fsc, usc); -} - -static int -hfsccmd_add_filter(ap) - struct hfsc_add_filter *ap; -{ - struct hfsc_if *hif; - struct hfsc_class *cl; - - if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) - return (EINVAL); - - if (is_a_parent_class(cl)) { -#ifdef ALTQ_DEBUG - printf("hfsccmd_add_filter: not a leaf class!\n"); -#endif - return (EINVAL); - } - - return acc_add_filter(&hif->hif_classifier, &ap->filter, - cl, &ap->filter_handle); -} - -static int -hfsccmd_delete_filter(ap) - struct hfsc_delete_filter *ap; -{ - struct hfsc_if *hif; - - if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - return acc_delete_filter(&hif->hif_classifier, - ap->filter_handle); -} - -static int -hfsccmd_class_stats(ap) - struct hfsc_class_stats *ap; -{ - struct hfsc_if *hif; - struct hfsc_class *cl; - struct hfsc_classstats stats, *usp; - int n, nclasses, error; - - if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) - return (EBADF); - - ap->cur_time = read_machclk(); - ap->machclk_freq = machclk_freq; - ap->hif_classes = hif->hif_classes; - ap->hif_packets = hif->hif_packets; - - /* skip the first N classes in the tree */ - nclasses = ap->nskip; - for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses; - cl = hfsc_nextclass(cl), n++) - ; - if (n != nclasses) - return (EINVAL); - - /* then, read the next N classes in the tree */ - nclasses = ap->nclasses; - usp = ap->stats; - for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) { - - get_class_stats(&stats, cl); - - if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, - sizeof(stats))) != 0) - return (error); - } - - ap->nclasses = n; - - return (0); -} - -#ifdef KLD_MODULE - -static struct altqsw hfsc_sw = - {"hfsc", hfscopen, hfscclose, hfscioctl}; - -ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw); -MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1); -MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1); - -#endif /* KLD_MODULE */ -#endif /* ALTQ3_COMPAT */ #endif /* ALTQ_HFSC */ Index: head/sys/net/altq/altq_hfsc.h =================================================================== --- head/sys/net/altq/altq_hfsc.h (revision 341506) +++ head/sys/net/altq/altq_hfsc.h (revision 341507) @@ -1,406 +1,338 @@ /*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. * * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_HFSC_H_ #define _ALTQ_ALTQ_HFSC_H_ #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif struct service_curve_v0 { u_int m1; /* slope of the first segment in bits/sec */ u_int d; /* the x-projection of the first segment in msec */ u_int m2; /* slope of the second segment in bits/sec */ }; struct service_curve_v1 { u_int64_t m1; /* slope of the first segment in bits/sec */ u_int d; /* the x-projection of the first segment in msec */ u_int64_t m2; /* slope of the second segment in bits/sec */ }; /* Latest version of struct service_curve_vX */ #define HFSC_SERVICE_CURVE_VERSION 1 /* special class handles */ #define HFSC_NULLCLASS_HANDLE 0 #define HFSC_MAX_CLASSES 64 /* hfsc class flags */ #define HFCF_RED 0x0001 /* use RED */ #define HFCF_ECN 0x0002 /* use RED/ECN */ #define HFCF_RIO 0x0004 /* use RIO */ #define HFCF_CODEL 0x0008 /* use CoDel */ #define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define HFCF_DEFAULTCLASS 0x1000 /* default class */ /* service curve types */ #define HFSC_REALTIMESC 1 #define HFSC_LINKSHARINGSC 2 #define HFSC_UPPERLIMITSC 4 #define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC) struct hfsc_classstats_v0 { u_int class_id; u_int32_t class_handle; struct service_curve_v0 rsc; struct service_curve_v0 fsc; struct service_curve_v0 usc; /* upper limit service curve */ u_int64_t total; /* total work in bytes */ u_int64_t cumul; /* cumulative work in bytes done by real-time criteria */ u_int64_t d; /* deadline */ u_int64_t e; /* eligible time */ u_int64_t vt; /* virtual time */ u_int64_t f; /* fit time for upper-limit */ /* info helpful for debugging */ u_int64_t initvt; /* init virtual time */ u_int64_t vtoff; /* cl_vt_ipoff */ u_int64_t cvtmax; /* cl_maxvt */ u_int64_t myf; /* cl_myf */ u_int64_t cfmin; /* cl_mincf */ u_int64_t cvtmin; /* cl_mincvt */ u_int64_t myfadj; /* cl_myfadj */ u_int64_t vtadj; /* cl_vtadj */ u_int64_t cur_time; u_int32_t machclk_freq; u_int qlength; u_int qlimit; struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int period; u_int vtperiod; /* vt period sequence no */ u_int parentperiod; /* parent's vt period seqno */ int nactive; /* number of active children */ /* codel, red and rio related info */ int qtype; struct redstats red[3]; struct codel_stats codel; }; struct hfsc_classstats_v1 { u_int class_id; u_int32_t class_handle; struct service_curve_v1 rsc; struct service_curve_v1 fsc; struct service_curve_v1 usc; /* upper limit service curve */ u_int64_t total; /* total work in bytes */ u_int64_t cumul; /* cumulative work in bytes done by real-time criteria */ u_int64_t d; /* deadline */ u_int64_t e; /* eligible time */ u_int64_t vt; /* virtual time */ u_int64_t f; /* fit time for upper-limit */ /* info helpful for debugging */ u_int64_t initvt; /* init virtual time */ u_int64_t vtoff; /* cl_vt_ipoff */ u_int64_t cvtmax; /* cl_maxvt */ u_int64_t myf; /* cl_myf */ u_int64_t cfmin; /* cl_mincf */ u_int64_t cvtmin; /* cl_mincvt */ u_int64_t myfadj; /* cl_myfadj */ u_int64_t vtadj; /* cl_vtadj */ u_int64_t cur_time; u_int32_t machclk_freq; u_int qlength; u_int qlimit; struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int period; u_int vtperiod; /* vt period sequence no */ u_int parentperiod; /* parent's vt period seqno */ int nactive; /* number of active children */ /* codel, red and rio related info */ int qtype; struct redstats red[3]; struct codel_stats codel; }; /* * HFSC_STATS_VERSION is defined in altq.h to work around issues stemming * from mixing of public-API and internal bits in each scheduler-specific * header. */ -#ifdef ALTQ3_COMPAT -struct hfsc_interface { - char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */ -}; - -struct hfsc_attach { - struct hfsc_interface iface; - u_int bandwidth; /* link bandwidth in bits/sec */ -}; - -struct hfsc_add_class { - struct hfsc_interface iface; - u_int32_t parent_handle; - struct service_curve service_curve; - int qlimit; - int flags; - - u_int32_t class_handle; /* return value */ -}; - -struct hfsc_delete_class { - struct hfsc_interface iface; - u_int32_t class_handle; -}; - -struct hfsc_modify_class { - struct hfsc_interface iface; - u_int32_t class_handle; - struct service_curve service_curve; - int sctype; -}; - -struct hfsc_add_filter { - struct hfsc_interface iface; - u_int32_t class_handle; - struct flow_filter filter; - - u_long filter_handle; /* return value */ -}; - -struct hfsc_delete_filter { - struct hfsc_interface iface; - u_long filter_handle; -}; - -struct hfsc_class_stats { - struct hfsc_interface iface; - int nskip; /* skip # of classes */ - int nclasses; /* # of class stats (WR) */ - u_int64_t cur_time; /* current time */ - u_int32_t machclk_freq; /* machine clock frequency */ - u_int hif_classes; /* # of classes in the tree */ - u_int hif_packets; /* # of packets in the tree */ - struct hfsc_classstats *stats; /* pointer to stats array */ -}; - -#define HFSC_IF_ATTACH _IOW('Q', 1, struct hfsc_attach) -#define HFSC_IF_DETACH _IOW('Q', 2, struct hfsc_interface) -#define HFSC_ENABLE _IOW('Q', 3, struct hfsc_interface) -#define HFSC_DISABLE _IOW('Q', 4, struct hfsc_interface) -#define HFSC_CLEAR_HIERARCHY _IOW('Q', 5, struct hfsc_interface) -#define HFSC_ADD_CLASS _IOWR('Q', 7, struct hfsc_add_class) -#define HFSC_DEL_CLASS _IOW('Q', 8, struct hfsc_delete_class) -#define HFSC_MOD_CLASS _IOW('Q', 9, struct hfsc_modify_class) -#define HFSC_ADD_FILTER _IOWR('Q', 10, struct hfsc_add_filter) -#define HFSC_DEL_FILTER _IOW('Q', 11, struct hfsc_delete_filter) -#define HFSC_GETSTATS _IOWR('Q', 12, struct hfsc_class_stats) -#endif /* ALTQ3_COMPAT */ #ifdef _KERNEL /* * kernel internal service curve representation * coordinates are given by 64 bit unsigned integers. * x-axis: unit is clock count. for the intel x86 architecture, * the raw Pentium TSC (Timestamp Counter) value is used. * virtual time is also calculated in this time scale. * y-axis: unit is byte. * * the service curve parameters are converted to the internal * representation. * the slope values are scaled to avoid overflow. * the inverse slope values as well as the y-projection of the 1st * segment are kept in order to avoid 64-bit divide operations * that are expensive on 32-bit architectures. * * note: Intel Pentium TSC never wraps around in several thousands of years. * x-axis doesn't wrap around for 1089 years with 1GHz clock. * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth. */ /* kernel internal representation of a service curve */ struct internal_sc { u_int64_t sm1; /* scaled slope of the 1st segment */ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ u_int64_t dx; /* the x-projection of the 1st segment */ u_int64_t dy; /* the y-projection of the 1st segment */ u_int64_t sm2; /* scaled slope of the 2nd segment */ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ }; /* runtime service curve */ struct runtime_sc { u_int64_t x; /* current starting position on x-axis */ u_int64_t y; /* current starting position on x-axis */ u_int64_t sm1; /* scaled slope of the 1st segment */ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ u_int64_t dx; /* the x-projection of the 1st segment */ u_int64_t dy; /* the y-projection of the 1st segment */ u_int64_t sm2; /* scaled slope of the 2nd segment */ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ }; struct hfsc_class { u_int cl_id; /* class id (just for debug) */ u_int32_t cl_handle; /* class handle */ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */ int cl_flags; /* misc flags */ struct hfsc_class *cl_parent; /* parent class */ struct hfsc_class *cl_siblings; /* sibling classes */ struct hfsc_class *cl_children; /* child classes */ class_queue_t *cl_q; /* class queue structure */ union { struct red *cl_red; /* RED state */ struct codel *cl_codel; /* CoDel state */ } cl_aqm; #define cl_red cl_aqm.cl_red #define cl_codel cl_aqm.cl_codel struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ u_int64_t cl_total; /* total work in bytes */ u_int64_t cl_cumul; /* cumulative work in bytes done by real-time criteria */ u_int64_t cl_d; /* deadline */ u_int64_t cl_e; /* eligible time */ u_int64_t cl_vt; /* virtual time */ u_int64_t cl_f; /* time when this class will fit for link-sharing, max(myf, cfmin) */ u_int64_t cl_myf; /* my fit-time (as calculated from this class's own upperlimit curve) */ u_int64_t cl_myfadj; /* my fit-time adjustment (to cancel history dependence) */ u_int64_t cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u_int64_t cl_cvtmin; /* minimal virtual time among the children fit for link-sharing (monotonic within a period) */ u_int64_t cl_vtadj; /* intra-period cumulative vt adjustment */ u_int64_t cl_vtoff; /* inter-period cumulative vt offset */ u_int64_t cl_cvtmax; /* max child's vt in the last period */ u_int64_t cl_initvt; /* init virtual time (for debugging) */ struct internal_sc *cl_rsc; /* internal real-time service curve */ struct internal_sc *cl_fsc; /* internal fair service curve */ struct internal_sc *cl_usc; /* internal upperlimit service curve */ struct runtime_sc cl_deadline; /* deadline curve */ struct runtime_sc cl_eligible; /* eligible curve */ struct runtime_sc cl_virtual; /* virtual curve */ struct runtime_sc cl_ulimit; /* upperlimit curve */ u_int cl_vtperiod; /* vt period sequence no */ u_int cl_parentperiod; /* parent's vt period seqno */ int cl_nactive; /* number of active children */ TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */ TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */ TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */ struct { struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int period; } cl_stats; }; /* * hfsc interface state */ struct hfsc_if { struct hfsc_if *hif_next; /* interface state list */ struct ifaltq *hif_ifq; /* backpointer to ifaltq */ struct hfsc_class *hif_rootclass; /* root class */ struct hfsc_class *hif_defaultclass; /* default class */ struct hfsc_class *hif_class_tbl[HFSC_MAX_CLASSES]; struct hfsc_class *hif_pollcache; /* cache for poll operation */ u_int hif_classes; /* # of classes in the tree */ u_int hif_packets; /* # of packets in the tree */ u_int hif_classid; /* class id sequence number */ TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */ #ifdef ALTQ3_CLFIER_COMPAT struct acc_classifier hif_classifier; #endif }; /* * Kernel code always wants the latest version - avoid a bunch of renames in * the code to the current latest versioned name. */ #define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION) #else /* _KERNEL */ #ifdef PFIOC_USE_LATEST /* * Maintaining in-tree consumers of the ioctl interface is easier when that * code can be written in terms old names that refer to the latest interface * version as that reduces the required changes in the consumers to those * that are functionally necessary to accommodate a new interface version. */ #define hfsc_classstats __CONCAT(hfsc_classstats_v, HFSC_STATS_VERSION) #define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION) #else /* * When building out-of-tree code that is written for the old interface, * such as may exist in ports for example, resolve the old struct tags to * the v0 versions. */ #define hfsc_classstats __CONCAT(hfsc_classstats_v, 0) #define service_curve __CONCAT(service_curve_v, 0) #endif /* PFIOC_USE_LATEST */ #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_HFSC_H_ */ Index: head/sys/net/altq/altq_priq.c =================================================================== --- head/sys/net/altq/altq_priq.c (revision 341506) +++ head/sys/net/altq/altq_priq.c (revision 341507) @@ -1,1070 +1,645 @@ /*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ * $FreeBSD$ */ /* * priority queue */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#endif #include /* * function prototypes */ -#ifdef ALTQ3_COMPAT -static struct priq_if *priq_attach(struct ifaltq *, u_int); -static int priq_detach(struct priq_if *); -#endif static int priq_clear_interface(struct priq_if *); static int priq_request(struct ifaltq *, int, void *); static void priq_purge(struct priq_if *); static struct priq_class *priq_class_create(struct priq_if *, int, int, int, int); static int priq_class_destroy(struct priq_class *); static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *priq_dequeue(struct ifaltq *, int); static int priq_addq(struct priq_class *, struct mbuf *); static struct mbuf *priq_getq(struct priq_class *); static struct mbuf *priq_pollq(struct priq_class *); static void priq_purgeq(struct priq_class *); -#ifdef ALTQ3_COMPAT -static int priqcmd_if_attach(struct priq_interface *); -static int priqcmd_if_detach(struct priq_interface *); -static int priqcmd_add_class(struct priq_add_class *); -static int priqcmd_delete_class(struct priq_delete_class *); -static int priqcmd_modify_class(struct priq_modify_class *); -static int priqcmd_add_filter(struct priq_add_filter *); -static int priqcmd_delete_filter(struct priq_delete_filter *); -static int priqcmd_class_stats(struct priq_class_stats *); -#endif /* ALTQ3_COMPAT */ static void get_class_stats(struct priq_classstats *, struct priq_class *); static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t); -#ifdef ALTQ3_COMPAT -altqdev_decl(priq); -/* pif_list keeps all priq_if's allocated. */ -static struct priq_if *pif_list = NULL; -#endif /* ALTQ3_COMPAT */ - int priq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc, priq_enqueue, priq_dequeue, priq_request, NULL, NULL); splx(s); return (error); } int priq_add_altq(struct pf_altq *a) { struct priq_if *pif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int priq_remove_altq(struct pf_altq *a) { struct priq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)priq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int priq_add_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= PRIQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = priq_class_create(pif, a->priority, a->qlimit, a->pq_u.priq_opts.flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int priq_remove_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (priq_class_destroy(cl)); } int priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int priq_clear_interface(struct priq_if *pif) { struct priq_class *cl; int pri; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&pif->pif_classifier, NULL, 1); #endif /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) if ((cl = pif->pif_classes[pri]) != NULL) priq_class_destroy(cl); return (0); } static int priq_request(struct ifaltq *ifq, int req, void *arg) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: priq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void priq_purge(struct priq_if *pif) { struct priq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) priq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct priq_class * priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) { struct priq_class *cl; int s; #ifndef ALTQ_RED if (flags & PRCF_RED) { #ifdef ALTQ_DEBUG printf("priq_class_create: RED not configured for PRIQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & PRCF_CODEL) { #ifdef ALTQ_DEBUG printf("priq_class_create: CODEL not configured for PRIQ!\n"); #endif return (NULL); } #endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ s = splnet(); IFQ_LOCK(cl->cl_pif->pif_ifq); if (!qempty(cl->cl_q)) priq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; } pif->pif_classes[pri] = cl; if (flags & PRCF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; qsize(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; #ifdef ALTQ_RED if (flags & (PRCF_RED|PRCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & PRCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & PRCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & PRCF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RIO; } else #endif if (flags & PRCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RED; } } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & PRCF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) qtype(cl->cl_q) = Q_CODEL; } #endif return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int priq_class_destroy(struct priq_class *cl) { struct priq_if *pif; int s, pri; s = splnet(); IFQ_LOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ3_CLFIER_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0); #endif if (!qempty(cl->cl_q)) priq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * priq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(pif, t->qid); -#ifdef ALTQ3_COMPAT - else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) - cl = pktattr->pattr_class; -#endif if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } -#ifdef ALTQ3_COMPAT - if (pktattr != NULL) - cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ - else -#endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (priq_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in priq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); /* successfully queued. */ return (0); } /* * priq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * priq_dequeue(struct ifaltq *ifq, int op) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct mbuf *m; int pri; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) /* no packet in the queue */ return (NULL); for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) { if (op == ALTDQ_POLL) return (priq_pollq(cl)); m = priq_getq(cl); if (m != NULL) { IFQ_DEC_LEN(ifq); if (qempty(cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m)); } return (m); } } return (NULL); } static int priq_addq(struct priq_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_addq(cl->cl_codel, cl->cl_q, m); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & PRCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * priq_getq(struct priq_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_getq(cl->cl_codel, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * priq_pollq(cl) struct priq_class *cl; { return qhead(cl->cl_q); } static void priq_purgeq(struct priq_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(cl->cl_q) == 0); } static void get_class_stats(struct priq_classstats *sp, struct priq_class *cl) { sp->class_handle = cl->cl_handle; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->period = cl->cl_period; sp->xmitcnt = cl->cl_xmitcnt; sp->dropcnt = cl->cl_dropcnt; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct priq_class * clh_to_clp(struct priq_if *pif, u_int32_t chandle) { struct priq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } -#ifdef ALTQ3_COMPAT - -static struct priq_if * -priq_attach(ifq, bandwidth) - struct ifaltq *ifq; - u_int bandwidth; -{ - struct priq_if *pif; - - pif = malloc(sizeof(struct priq_if), - M_DEVBUF, M_WAITOK); - if (pif == NULL) - return (NULL); - bzero(pif, sizeof(struct priq_if)); - pif->pif_bandwidth = bandwidth; - pif->pif_maxpri = -1; - pif->pif_ifq = ifq; - - /* add this state to the priq list */ - pif->pif_next = pif_list; - pif_list = pif; - - return (pif); -} - -static int -priq_detach(pif) - struct priq_if *pif; -{ - (void)priq_clear_interface(pif); - - /* remove this interface from the pif list */ - if (pif_list == pif) - pif_list = pif->pif_next; - else { - struct priq_if *p; - - for (p = pif_list; p != NULL; p = p->pif_next) - if (p->pif_next == pif) { - p->pif_next = pif->pif_next; - break; - } - ASSERT(p != NULL); - } - - free(pif, M_DEVBUF); - return (0); -} - -/* - * priq device interface - */ -int -priqopen(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - /* everything will be done when the queueing scheme is attached. */ - return 0; -} - -int -priqclose(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct priq_if *pif; - int err, error = 0; - - while ((pif = pif_list) != NULL) { - /* destroy all */ - if (ALTQ_IS_ENABLED(pif->pif_ifq)) - altq_disable(pif->pif_ifq); - - err = altq_detach(pif->pif_ifq); - if (err == 0) - err = priq_detach(pif); - if (err != 0 && error == 0) - error = err; - } - - return error; -} - -int -priqioctl(dev, cmd, addr, flag, p) - dev_t dev; - ioctlcmd_t cmd; - caddr_t addr; - int flag; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - struct priq_if *pif; - struct priq_interface *ifacep; - int error = 0; - - /* check super-user privilege */ - switch (cmd) { - case PRIQ_GETSTATS: - break; - default: -#if (__FreeBSD_version > 700000) - if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) - return (error); -#elsif (__FreeBSD_version > 400000) - if ((error = suser(p)) != 0) - return (error); -#else - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); -#endif - break; - } - - switch (cmd) { - - case PRIQ_IF_ATTACH: - error = priqcmd_if_attach((struct priq_interface *)addr); - break; - - case PRIQ_IF_DETACH: - error = priqcmd_if_detach((struct priq_interface *)addr); - break; - - case PRIQ_ENABLE: - case PRIQ_DISABLE: - case PRIQ_CLEAR: - ifacep = (struct priq_interface *)addr; - if ((pif = altq_lookup(ifacep->ifname, - ALTQT_PRIQ)) == NULL) { - error = EBADF; - break; - } - - switch (cmd) { - case PRIQ_ENABLE: - if (pif->pif_default == NULL) { -#ifdef ALTQ_DEBUG - printf("priq: no default class\n"); -#endif - error = EINVAL; - break; - } - error = altq_enable(pif->pif_ifq); - break; - - case PRIQ_DISABLE: - error = altq_disable(pif->pif_ifq); - break; - - case PRIQ_CLEAR: - priq_clear_interface(pif); - break; - } - break; - - case PRIQ_ADD_CLASS: - error = priqcmd_add_class((struct priq_add_class *)addr); - break; - - case PRIQ_DEL_CLASS: - error = priqcmd_delete_class((struct priq_delete_class *)addr); - break; - - case PRIQ_MOD_CLASS: - error = priqcmd_modify_class((struct priq_modify_class *)addr); - break; - - case PRIQ_ADD_FILTER: - error = priqcmd_add_filter((struct priq_add_filter *)addr); - break; - - case PRIQ_DEL_FILTER: - error = priqcmd_delete_filter((struct priq_delete_filter *)addr); - break; - - case PRIQ_GETSTATS: - error = priqcmd_class_stats((struct priq_class_stats *)addr); - break; - - default: - error = EINVAL; - break; - } - return error; -} - -static int -priqcmd_if_attach(ap) - struct priq_interface *ap; -{ - struct priq_if *pif; - struct ifnet *ifp; - int error; - - if ((ifp = ifunit(ap->ifname)) == NULL) - return (ENXIO); - - if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL) - return (ENOMEM); - - /* - * set PRIQ to this ifnet structure. - */ - if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif, - priq_enqueue, priq_dequeue, priq_request, - &pif->pif_classifier, acc_classify)) != 0) - (void)priq_detach(pif); - - return (error); -} - -static int -priqcmd_if_detach(ap) - struct priq_interface *ap; -{ - struct priq_if *pif; - int error; - - if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - if (ALTQ_IS_ENABLED(pif->pif_ifq)) - altq_disable(pif->pif_ifq); - - if ((error = altq_detach(pif->pif_ifq))) - return (error); - - return priq_detach(pif); -} - -static int -priqcmd_add_class(ap) - struct priq_add_class *ap; -{ - struct priq_if *pif; - struct priq_class *cl; - int qid; - - if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) - return (EINVAL); - if (pif->pif_classes[ap->pri] != NULL) - return (EBUSY); - - qid = ap->pri + 1; - if ((cl = priq_class_create(pif, ap->pri, - ap->qlimit, ap->flags, qid)) == NULL) - return (ENOMEM); - - /* return a class handle to the user */ - ap->class_handle = cl->cl_handle; - - return (0); -} - -static int -priqcmd_delete_class(ap) - struct priq_delete_class *ap; -{ - struct priq_if *pif; - struct priq_class *cl; - - if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) - return (EINVAL); - - return priq_class_destroy(cl); -} - -static int -priqcmd_modify_class(ap) - struct priq_modify_class *ap; -{ - struct priq_if *pif; - struct priq_class *cl; - - if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) - return (EINVAL); - - if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) - return (EINVAL); - - /* - * if priority is changed, move the class to the new priority - */ - if (pif->pif_classes[ap->pri] != cl) { - if (pif->pif_classes[ap->pri] != NULL) - return (EEXIST); - pif->pif_classes[cl->cl_pri] = NULL; - pif->pif_classes[ap->pri] = cl; - cl->cl_pri = ap->pri; - } - - /* call priq_class_create to change class parameters */ - if ((cl = priq_class_create(pif, ap->pri, - ap->qlimit, ap->flags, ap->class_handle)) == NULL) - return (ENOMEM); - return 0; -} - -static int -priqcmd_add_filter(ap) - struct priq_add_filter *ap; -{ - struct priq_if *pif; - struct priq_class *cl; - - if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) - return (EINVAL); - - return acc_add_filter(&pif->pif_classifier, &ap->filter, - cl, &ap->filter_handle); -} - -static int -priqcmd_delete_filter(ap) - struct priq_delete_filter *ap; -{ - struct priq_if *pif; - - if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - return acc_delete_filter(&pif->pif_classifier, - ap->filter_handle); -} - -static int -priqcmd_class_stats(ap) - struct priq_class_stats *ap; -{ - struct priq_if *pif; - struct priq_class *cl; - struct priq_classstats stats, *usp; - int pri, error; - - if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) - return (EBADF); - - ap->maxpri = pif->pif_maxpri; - - /* then, read the next N classes in the tree */ - usp = ap->stats; - for (pri = 0; pri <= pif->pif_maxpri; pri++) { - cl = pif->pif_classes[pri]; - if (cl != NULL) - get_class_stats(&stats, cl); - else - bzero(&stats, sizeof(stats)); - if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, - sizeof(stats))) != 0) - return (error); - } - return (0); -} - -#ifdef KLD_MODULE - -static struct altqsw priq_sw = - {"priq", priqopen, priqclose, priqioctl}; - -ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw); -MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1); -MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1); - -#endif /* KLD_MODULE */ - -#endif /* ALTQ3_COMPAT */ #endif /* ALTQ_PRIQ */ Index: head/sys/net/altq/altq_priq.h =================================================================== --- head/sys/net/altq/altq_priq.h (revision 341506) +++ head/sys/net/altq/altq_priq.h (revision 341507) @@ -1,186 +1,123 @@ /*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_PRIQ_H_ #define _ALTQ_ALTQ_PRIQ_H_ #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define PRIQ_MAXPRI 16 /* upper limit of the number of priorities */ -#ifdef ALTQ3_COMPAT -struct priq_interface { - char ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */ - u_long arg; /* request-specific argument */ -}; -struct priq_add_class { - struct priq_interface iface; - int pri; /* priority (0 is the lowest) */ - int qlimit; /* queue size limit */ - int flags; /* misc flags (see below) */ - - u_int32_t class_handle; /* return value */ -}; -#endif /* ALTQ3_COMPAT */ - /* priq class flags */ #define PRCF_RED 0x0001 /* use RED */ #define PRCF_ECN 0x0002 /* use RED/ECN */ #define PRCF_RIO 0x0004 /* use RIO */ #define PRCF_CODEL 0x0008 /* use CoDel */ #define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define PRCF_DEFAULTCLASS 0x1000 /* default class */ /* special class handles */ #define PRIQ_NULLCLASS_HANDLE 0 -#ifdef ALTQ3_COMPAT -struct priq_delete_class { - struct priq_interface iface; - u_int32_t class_handle; -}; -struct priq_modify_class { - struct priq_interface iface; - u_int32_t class_handle; - int pri; - int qlimit; - int flags; -}; - -struct priq_add_filter { - struct priq_interface iface; - u_int32_t class_handle; - struct flow_filter filter; - - u_long filter_handle; /* return value */ -}; - -struct priq_delete_filter { - struct priq_interface iface; - u_long filter_handle; -}; -#endif /* ALTQ3_COMPAT */ - struct priq_classstats { u_int32_t class_handle; u_int qlength; u_int qlimit; u_int period; struct pktcntr xmitcnt; /* transmitted packet counter */ struct pktcntr dropcnt; /* dropped packet counter */ /* codel, red and rio related info */ int qtype; struct redstats red[3]; /* rio has 3 red stats */ struct codel_stats codel; }; /* * PRIQ_STATS_VERSION is defined in altq.h to work around issues stemming * from mixing of public-API and internal bits in each scheduler-specific * header. */ -#ifdef ALTQ3_COMPAT -struct priq_class_stats { - struct priq_interface iface; - int maxpri; /* in/out */ - - struct priq_classstats *stats; /* pointer to stats array */ -}; - -#define PRIQ_IF_ATTACH _IOW('Q', 1, struct priq_interface) -#define PRIQ_IF_DETACH _IOW('Q', 2, struct priq_interface) -#define PRIQ_ENABLE _IOW('Q', 3, struct priq_interface) -#define PRIQ_DISABLE _IOW('Q', 4, struct priq_interface) -#define PRIQ_CLEAR _IOW('Q', 5, struct priq_interface) -#define PRIQ_ADD_CLASS _IOWR('Q', 7, struct priq_add_class) -#define PRIQ_DEL_CLASS _IOW('Q', 8, struct priq_delete_class) -#define PRIQ_MOD_CLASS _IOW('Q', 9, struct priq_modify_class) -#define PRIQ_ADD_FILTER _IOWR('Q', 10, struct priq_add_filter) -#define PRIQ_DEL_FILTER _IOW('Q', 11, struct priq_delete_filter) -#define PRIQ_GETSTATS _IOWR('Q', 12, struct priq_class_stats) - -#endif /* ALTQ3_COMPAT */ #ifdef _KERNEL struct priq_class { u_int32_t cl_handle; /* class handle */ class_queue_t *cl_q; /* class queue structure */ union { struct red *cl_red; /* RED state */ struct codel *cl_codel; /* CoDel state */ } cl_aqm; #define cl_red cl_aqm.cl_red #define cl_codel cl_aqm.cl_codel int cl_pri; /* priority */ int cl_flags; /* class flags */ struct priq_if *cl_pif; /* back pointer to pif */ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ /* statistics */ u_int cl_period; /* backlog period */ struct pktcntr cl_xmitcnt; /* transmitted packet counter */ struct pktcntr cl_dropcnt; /* dropped packet counter */ }; /* * priq interface state */ struct priq_if { struct priq_if *pif_next; /* interface state list */ struct ifaltq *pif_ifq; /* backpointer to ifaltq */ u_int pif_bandwidth; /* link bandwidth in bps */ int pif_maxpri; /* max priority in use */ struct priq_class *pif_default; /* default class */ struct priq_class *pif_classes[PRIQ_MAXPRI]; /* classes */ #ifdef ALTQ3_CLFIER_COMPAT struct acc_classifier pif_classifier; /* classifier */ #endif }; #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_PRIQ_H_ */ Index: head/sys/net/altq/altq_red.c =================================================================== --- head/sys/net/altq/altq_red.c (revision 341506) +++ head/sys/net/altq/altq_red.c (revision 341507) @@ -1,1492 +1,633 @@ /*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #ifdef ALTQ_FLOWVALVE #include #include #endif #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#ifdef ALTQ_FLOWVALVE -#include -#endif -#endif /* * ALTQ/RED (Random Early Detection) implementation using 32-bit * fixed-point calculation. * * written by kjc using the ns code as a reference. * you can learn more about red and ns from Sally's home page at * http://www-nrg.ee.lbl.gov/floyd/ * * most of the red parameter values are fixed in this implementation * to prevent fixed-point overflow/underflow. * if you change the parameters, watch out for overflow/underflow! * * the parameters used are recommended values by Sally. * the corresponding ns config looks: * q_weight=0.00195 * minthresh=5 maxthresh=15 queue-size=60 * linterm=30 * dropmech=drop-tail * bytes=false (can't be handled by 32-bit fixed-point) * doubleq=false dqthresh=false * wait=true */ /* * alternative red parameters for a slow link. * * assume the queue length becomes from zero to L and keeps L, it takes * N packets for q_avg to reach 63% of L. * when q_weight is 0.002, N is about 500 packets. * for a slow link like dial-up, 500 packets takes more than 1 minute! * when q_weight is 0.008, N is about 127 packets. * when q_weight is 0.016, N is about 63 packets. * bursts of 50 packets are allowed for 0.002, bursts of 25 packets * are allowed for 0.016. * see Sally's paper for more details. */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RED_LIMIT 60 /* default max queue length */ #define RED_STATS /* collect statistics */ /* * our default policy for forced-drop is drop-tail. * (in altq-1.1.2 or earlier, the default was random-drop. * but it makes more sense to punish the cause of the surge.) * to switch to the random-drop policy, define "RED_RANDOM_DROP". */ -#ifdef ALTQ3_COMPAT -#ifdef ALTQ_FLOWVALVE -/* - * flow-valve is an extension to protect red from unresponsive flows - * and to promote end-to-end congestion control. - * flow-valve observes the average drop rates of the flows that have - * experienced packet drops in the recent past. - * when the average drop rate exceeds the threshold, the flow is - * blocked by the flow-valve. the trapped flow should back off - * exponentially to escape from the flow-valve. - */ -#ifdef RED_RANDOM_DROP -#error "random-drop can't be used with flow-valve!" -#endif -#endif /* ALTQ_FLOWVALVE */ -/* red_list keeps all red_queue_t's allocated. */ -static red_queue_t *red_list = NULL; - -#endif /* ALTQ3_COMPAT */ - /* default red parameter values */ static int default_th_min = TH_MIN; static int default_th_max = TH_MAX; static int default_inv_pmax = INV_P_MAX; -#ifdef ALTQ3_COMPAT -/* internal function prototypes */ -static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); -static struct mbuf *red_dequeue(struct ifaltq *, int); -static int red_request(struct ifaltq *, int, void *); -static void red_purgeq(red_queue_t *); -static int red_detach(red_queue_t *); -#ifdef ALTQ_FLOWVALVE -static __inline struct fve *flowlist_lookup(struct flowvalve *, - struct altq_pktattr *, struct timeval *); -static __inline struct fve *flowlist_reclaim(struct flowvalve *, - struct altq_pktattr *); -static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); -static __inline int fv_p2f(struct flowvalve *, int); -#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ -static struct flowvalve *fv_alloc(struct red *); -#endif -static void fv_destroy(struct flowvalve *); -static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, - struct fve **); -static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, - struct fve *); -#endif -#endif /* ALTQ3_COMPAT */ /* * red support routines */ red_t * red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime) { red_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); if (weight == 0) rp->red_weight = W_WEIGHT; else rp->red_weight = weight; /* allocate weight table */ rp->red_wtab = wtab_alloc(rp->red_weight); if (rp->red_wtab == NULL) { free(rp, M_DEVBUF); return (NULL); } rp->red_avg = 0; rp->red_idle = 1; if (inv_pmax == 0) rp->red_inv_pmax = default_inv_pmax; else rp->red_inv_pmax = inv_pmax; if (th_min == 0) rp->red_thmin = default_th_min; else rp->red_thmin = th_min; if (th_max == 0) rp->red_thmax = default_th_max; else rp->red_thmax = th_max; rp->red_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->red_pkttime = 800; else rp->red_pkttime = pkttime; if (weight == 0) { /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->red_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->red_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->red_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->red_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->red_wshift = i; w = 1 << rp->red_wshift; if (w != rp->red_weight) { printf("invalid weight value %d for red! use %d\n", rp->red_weight, w); rp->red_weight = w; } /* * thmin_s and thmax_s are scaled versions of th_min and th_max * to be compared with avg. */ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) * rp->red_inv_pmax) << FP_SHIFT; microtime(&rp->red_last); return (rp); } void red_destroy(red_t *rp) { -#ifdef ALTQ3_COMPAT -#ifdef ALTQ_FLOWVALVE - if (rp->red_flowvalve != NULL) - fv_destroy(rp->red_flowvalve); -#endif -#endif /* ALTQ3_COMPAT */ wtab_destroy(rp->red_wtab); free(rp, M_DEVBUF); } void red_getstats(red_t *rp, struct redstats *sp) { sp->q_avg = rp->red_avg >> rp->red_wshift; sp->xmit_cnt = rp->red_stats.xmit_cnt; sp->drop_cnt = rp->red_stats.drop_cnt; sp->drop_forced = rp->red_stats.drop_forced; sp->drop_unforced = rp->red_stats.drop_unforced; sp->marked_packets = rp->red_stats.marked_packets; } int red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; int n; -#ifdef ALTQ3_COMPAT -#ifdef ALTQ_FLOWVALVE - struct fve *fve = NULL; - if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) - if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { - m_freem(m); - return (-1); - } -#endif -#endif /* ALTQ3_COMPAT */ - avg = rp->red_avg; /* * if we were idle, we pretend that n packets arrived during * the idle period. */ if (rp->red_idle) { struct timeval now; int t; rp->red_idle = 0; microtime(&now); t = (now.tv_sec - rp->red_last.tv_sec); if (t > 60) { /* * being idle for more than 1 minute, set avg to zero. * this prevents t from overflow. */ avg = 0; } else { t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); n = t / rp->red_pkttime - 1; /* the following line does (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->red_wtab, n); } } /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); rp->red_avg = avg; /* save the new value */ /* * red_count keeps a tally of arriving traffic that has not * been dropped. */ rp->red_count++; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= rp->red_thmin_s && qlen(q) > 1) { if (avg >= rp->red_thmax_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (rp->red_old == 0) { /* first exceeds th_min */ rp->red_count = 1; rp->red_old = 1; } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, rp->red_probd, rp->red_count)) { /* mark or drop by red */ if ((rp->red_flags & REDF_ECN) && mark_ecn(m, pktattr, rp->red_flags)) { /* successfully marked. do not drop. */ rp->red_count = 0; #ifdef RED_STATS rp->red_stats.marked_packets++; #endif } else { /* unforced drop by red */ droptype = DTYPE_EARLY; } } } else { /* avg < th_min */ rp->red_old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; #ifdef RED_RANDOM_DROP /* if successful or forced drop, enqueue this packet. */ if (droptype != DTYPE_EARLY) _addq(q, m); #else /* if successful, enqueue this packet. */ if (droptype == DTYPE_NODROP) _addq(q, m); #endif if (droptype != DTYPE_NODROP) { if (droptype == DTYPE_EARLY) { /* drop the incoming packet */ #ifdef RED_STATS rp->red_stats.drop_unforced++; #endif } else { /* forced drop, select a victim packet in the queue. */ #ifdef RED_RANDOM_DROP m = _getq_random(q); #endif #ifdef RED_STATS rp->red_stats.drop_forced++; #endif } #ifdef RED_STATS PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); #endif rp->red_count = 0; -#ifdef ALTQ3_COMPAT -#ifdef ALTQ_FLOWVALVE - if (rp->red_flowvalve != NULL) - fv_dropbyred(rp->red_flowvalve, pktattr, fve); -#endif -#endif /* ALTQ3_COMPAT */ m_freem(m); return (-1); } /* successfully queued */ #ifdef RED_STATS PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); #endif return (0); } /* * early-drop probability is calculated as follows: * prob = p_max * (avg - th_min) / (th_max - th_min) * prob_a = prob / (2 - count*prob) * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) * here prob_a increases as successive undrop count increases. * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), * becomes 1 when (count >= (2 / prob))). */ int drop_early(int fp_len, int fp_probd, int count) { int d; /* denominator of drop-probability */ d = fp_probd - count * fp_len; if (d <= 0) /* count exceeds the hard limit: drop or mark */ return (1); /* * now the range of d is [1..600] in fixed-point. (when * th_max-th_min=10 and p_max=1/30) * drop probability = (avg - TH_MIN) / d */ if ((arc4random() % d) < fp_len) { /* drop or mark */ return (1); } /* no drop/mark */ return (0); } /* * try to mark CE bit to the packet. * returns 1 if successfully marked, 0 otherwise. */ int mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) { struct mbuf *m0; struct pf_mtag *at; void *hdr; at = pf_find_mtag(m); if (at != NULL) { hdr = at->hdr; -#ifdef ALTQ3_COMPAT - } else if (pktattr != NULL) { - af = pktattr->pattr_af; - hdr = pktattr->pattr_hdr; -#endif /* ALTQ3_COMPAT */ } else return (0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)hdr >= m0->m_data) && ((caddr_t)hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, tag info is stale */ return (0); } switch (((struct ip *)hdr)->ip_v) { case IPVERSION: if (flags & REDF_ECN4) { struct ip *ip = hdr; u_int8_t otos; int sum; if (ip->ip_v != 4) return (0); /* version mismatch! */ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) return (0); /* not-ECT */ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) return (1); /* already marked */ /* * ecn-capable but not marked, * mark CE and update checksum */ otos = ip->ip_tos; ip->ip_tos |= IPTOS_ECN_CE; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += (~otos & 0xffff) + ip->ip_tos; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); return (1); } break; #ifdef INET6 case (IPV6_VERSION >> 4): if (flags & REDF_ECN6) { struct ip6_hdr *ip6 = hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return (0); /* version mismatch! */ if ((flowlabel & (IPTOS_ECN_MASK << 20)) == (IPTOS_ECN_NOTECT << 20)) return (0); /* not-ECT */ if ((flowlabel & (IPTOS_ECN_MASK << 20)) == (IPTOS_ECN_CE << 20)) return (1); /* already marked */ /* * ecn-capable but not marked, mark CE */ flowlabel |= (IPTOS_ECN_CE << 20); ip6->ip6_flow = htonl(flowlabel); return (1); } break; #endif /* INET6 */ } /* not marked */ return (0); } struct mbuf * red_getq(rp, q) red_t *rp; class_queue_t *q; { struct mbuf *m; if ((m = _getq(q)) == NULL) { if (rp->red_idle == 0) { rp->red_idle = 1; microtime(&rp->red_last); } return NULL; } rp->red_idle = 0; return (m); } /* * helper routine to calibrate avg during idle. * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point * here Wq = 1/weight and the code assumes Wq is close to zero. * * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. */ static struct wtab *wtab_list = NULL; /* pointer to wtab list */ struct wtab * wtab_alloc(int weight) { struct wtab *w; int i; for (w = wtab_list; w != NULL; w = w->w_next) if (w->w_weight == weight) { w->w_refcount++; return (w); } w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO); if (w == NULL) return (NULL); w->w_weight = weight; w->w_refcount = 1; w->w_next = wtab_list; wtab_list = w; /* initialize the weight table */ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; for (i = 1; i < 32; i++) { w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; if (w->w_tab[i] == 0 && w->w_param_max == 0) w->w_param_max = 1 << i; } return (w); } int wtab_destroy(struct wtab *w) { struct wtab *prev; if (--w->w_refcount > 0) return (0); if (wtab_list == w) wtab_list = w->w_next; else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) if (prev->w_next == w) { prev->w_next = w->w_next; break; } free(w, M_DEVBUF); return (0); } int32_t pow_w(struct wtab *w, int n) { int i, bit; int32_t val; if (n >= w->w_param_max) return (0); val = 1 << FP_SHIFT; if (n <= 0) return (val); bit = 1; i = 0; while (n) { if (n & bit) { val = (val * w->w_tab[i]) >> FP_SHIFT; n &= ~bit; } i++; bit <<= 1; } return (val); } -#ifdef ALTQ3_COMPAT -/* - * red device interface - */ -altqdev_decl(red); - -int -redopen(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - /* everything will be done when the queueing scheme is attached. */ - return 0; -} - -int -redclose(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - red_queue_t *rqp; - int err, error = 0; - - while ((rqp = red_list) != NULL) { - /* destroy all */ - err = red_detach(rqp); - if (err != 0 && error == 0) - error = err; - } - - return error; -} - -int -redioctl(dev, cmd, addr, flag, p) - dev_t dev; - ioctlcmd_t cmd; - caddr_t addr; - int flag; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - red_queue_t *rqp; - struct red_interface *ifacep; - struct ifnet *ifp; - int error = 0; - - /* check super-user privilege */ - switch (cmd) { - case RED_GETSTATS: - break; - default: -#if (__FreeBSD_version > 700000) - if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) -#elsif (__FreeBSD_version > 400000) - if ((error = suser(p)) != 0) -#else - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) -#endif - return (error); - break; - } - - switch (cmd) { - - case RED_ENABLE: - ifacep = (struct red_interface *)addr; - if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { - error = EBADF; - break; - } - error = altq_enable(rqp->rq_ifq); - break; - - case RED_DISABLE: - ifacep = (struct red_interface *)addr; - if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { - error = EBADF; - break; - } - error = altq_disable(rqp->rq_ifq); - break; - - case RED_IF_ATTACH: - ifp = ifunit(((struct red_interface *)addr)->red_ifname); - if (ifp == NULL) { - error = ENXIO; - break; - } - - /* allocate and initialize red_queue_t */ - rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); - if (rqp == NULL) { - error = ENOMEM; - break; - } - bzero(rqp, sizeof(red_queue_t)); - - rqp->rq_q = malloc(sizeof(class_queue_t), - M_DEVBUF, M_WAITOK); - if (rqp->rq_q == NULL) { - free(rqp, M_DEVBUF); - error = ENOMEM; - break; - } - bzero(rqp->rq_q, sizeof(class_queue_t)); - - rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); - if (rqp->rq_red == NULL) { - free(rqp->rq_q, M_DEVBUF); - free(rqp, M_DEVBUF); - error = ENOMEM; - break; - } - - rqp->rq_ifq = &ifp->if_snd; - qtail(rqp->rq_q) = NULL; - qlen(rqp->rq_q) = 0; - qlimit(rqp->rq_q) = RED_LIMIT; - qtype(rqp->rq_q) = Q_RED; - - /* - * set RED to this ifnet structure. - */ - error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, - red_enqueue, red_dequeue, red_request, - NULL, NULL); - if (error) { - red_destroy(rqp->rq_red); - free(rqp->rq_q, M_DEVBUF); - free(rqp, M_DEVBUF); - break; - } - - /* add this state to the red list */ - rqp->rq_next = red_list; - red_list = rqp; - break; - - case RED_IF_DETACH: - ifacep = (struct red_interface *)addr; - if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { - error = EBADF; - break; - } - error = red_detach(rqp); - break; - - case RED_GETSTATS: - do { - struct red_stats *q_stats; - red_t *rp; - - q_stats = (struct red_stats *)addr; - if ((rqp = altq_lookup(q_stats->iface.red_ifname, - ALTQT_RED)) == NULL) { - error = EBADF; - break; - } - - q_stats->q_len = qlen(rqp->rq_q); - q_stats->q_limit = qlimit(rqp->rq_q); - - rp = rqp->rq_red; - q_stats->q_avg = rp->red_avg >> rp->red_wshift; - q_stats->xmit_cnt = rp->red_stats.xmit_cnt; - q_stats->drop_cnt = rp->red_stats.drop_cnt; - q_stats->drop_forced = rp->red_stats.drop_forced; - q_stats->drop_unforced = rp->red_stats.drop_unforced; - q_stats->marked_packets = rp->red_stats.marked_packets; - - q_stats->weight = rp->red_weight; - q_stats->inv_pmax = rp->red_inv_pmax; - q_stats->th_min = rp->red_thmin; - q_stats->th_max = rp->red_thmax; - -#ifdef ALTQ_FLOWVALVE - if (rp->red_flowvalve != NULL) { - struct flowvalve *fv = rp->red_flowvalve; - q_stats->fv_flows = fv->fv_flows; - q_stats->fv_pass = fv->fv_stats.pass; - q_stats->fv_predrop = fv->fv_stats.predrop; - q_stats->fv_alloc = fv->fv_stats.alloc; - q_stats->fv_escape = fv->fv_stats.escape; - } else { -#endif /* ALTQ_FLOWVALVE */ - q_stats->fv_flows = 0; - q_stats->fv_pass = 0; - q_stats->fv_predrop = 0; - q_stats->fv_alloc = 0; - q_stats->fv_escape = 0; -#ifdef ALTQ_FLOWVALVE - } -#endif /* ALTQ_FLOWVALVE */ - } while (/*CONSTCOND*/ 0); - break; - - case RED_CONFIG: - do { - struct red_conf *fc; - red_t *new; - int s, limit; - - fc = (struct red_conf *)addr; - if ((rqp = altq_lookup(fc->iface.red_ifname, - ALTQT_RED)) == NULL) { - error = EBADF; - break; - } - new = red_alloc(fc->red_weight, - fc->red_inv_pmax, - fc->red_thmin, - fc->red_thmax, - fc->red_flags, - fc->red_pkttime); - if (new == NULL) { - error = ENOMEM; - break; - } - - s = splnet(); - red_purgeq(rqp); - limit = fc->red_limit; - if (limit < fc->red_thmax) - limit = fc->red_thmax; - qlimit(rqp->rq_q) = limit; - fc->red_limit = limit; /* write back the new value */ - - red_destroy(rqp->rq_red); - rqp->rq_red = new; - - splx(s); - - /* write back new values */ - fc->red_limit = limit; - fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; - fc->red_thmin = rqp->rq_red->red_thmin; - fc->red_thmax = rqp->rq_red->red_thmax; - - } while (/*CONSTCOND*/ 0); - break; - - case RED_SETDEFAULTS: - do { - struct redparams *rp; - - rp = (struct redparams *)addr; - - default_th_min = rp->th_min; - default_th_max = rp->th_max; - default_inv_pmax = rp->inv_pmax; - } while (/*CONSTCOND*/ 0); - break; - - default: - error = EINVAL; - break; - } - return error; -} - -static int -red_detach(rqp) - red_queue_t *rqp; -{ - red_queue_t *tmp; - int error = 0; - - if (ALTQ_IS_ENABLED(rqp->rq_ifq)) - altq_disable(rqp->rq_ifq); - - if ((error = altq_detach(rqp->rq_ifq))) - return (error); - - if (red_list == rqp) - red_list = rqp->rq_next; - else { - for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) - if (tmp->rq_next == rqp) { - tmp->rq_next = rqp->rq_next; - break; - } - if (tmp == NULL) - printf("red_detach: no state found in red_list!\n"); - } - - red_destroy(rqp->rq_red); - free(rqp->rq_q, M_DEVBUF); - free(rqp, M_DEVBUF); - return (error); -} - -/* - * enqueue routine: - * - * returns: 0 when successfully queued. - * ENOBUFS when drop occurs. - */ -static int -red_enqueue(ifq, m, pktattr) - struct ifaltq *ifq; - struct mbuf *m; - struct altq_pktattr *pktattr; -{ - red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; - - IFQ_LOCK_ASSERT(ifq); - - if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) - return ENOBUFS; - ifq->ifq_len++; - return 0; -} - -/* - * dequeue routine: - * must be called in splimp. - * - * returns: mbuf dequeued. - * NULL when no packet is available in the queue. - */ - -static struct mbuf * -red_dequeue(ifq, op) - struct ifaltq *ifq; - int op; -{ - red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; - struct mbuf *m; - - IFQ_LOCK_ASSERT(ifq); - - if (op == ALTDQ_POLL) - return qhead(rqp->rq_q); - - /* op == ALTDQ_REMOVE */ - m = red_getq(rqp->rq_red, rqp->rq_q); - if (m != NULL) - ifq->ifq_len--; - return (m); -} - -static int -red_request(ifq, req, arg) - struct ifaltq *ifq; - int req; - void *arg; -{ - red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; - - IFQ_LOCK_ASSERT(ifq); - - switch (req) { - case ALTRQ_PURGE: - red_purgeq(rqp); - break; - } - return (0); -} - -static void -red_purgeq(rqp) - red_queue_t *rqp; -{ - _flushq(rqp->rq_q); - if (ALTQ_IS_ENABLED(rqp->rq_ifq)) - rqp->rq_ifq->ifq_len = 0; -} - -#ifdef ALTQ_FLOWVALVE - -#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ -#define FV_PSCALE(x) ((x) << FV_PSHIFT) -#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) -#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ -#define FV_FSCALE(x) ((x) << FV_FSHIFT) -#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) - -#define FV_TIMER (3 * hz) /* timer value for garbage collector */ -#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ - -#define FV_N 10 /* update fve_f every FV_N packets */ - -#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ -#define FV_TTHRESH 3 /* time threshold to delete fve */ -#define FV_ALPHA 5 /* extra packet count */ - -#define FV_STATS - -#if (__FreeBSD_version > 300000) -#define FV_TIMESTAMP(tp) getmicrotime(tp) -#else -#define FV_TIMESTAMP(tp) { (*(tp)) = time; } -#endif - -/* - * Brtt table: 127 entry table to convert drop rate (p) to - * the corresponding bandwidth fraction (f) - * the following equation is implemented to use scaled values, - * fve_p and fve_f, in the fixed point format. - * - * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) - * f = Brtt(p) / (max_th + alpha) - */ -#define BRTT_SIZE 128 -#define BRTT_SHIFT 12 -#define BRTT_MASK 0x0007f000 -#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) - -const int brtt_tab[BRTT_SIZE] = { - 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, - 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, - 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, - 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, - 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, - 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, - 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, - 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, - 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, - 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, - 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, - 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, - 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, - 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, - 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, - 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 -}; - -static __inline struct fve * -flowlist_lookup(fv, pktattr, now) - struct flowvalve *fv; - struct altq_pktattr *pktattr; - struct timeval *now; -{ - struct fve *fve; - int flows; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - struct timeval tthresh; - - if (pktattr == NULL) - return (NULL); - - tthresh.tv_sec = now->tv_sec - FV_TTHRESH; - flows = 0; - /* - * search the flow list - */ - switch (pktattr->pattr_af) { - case AF_INET: - ip = (struct ip *)pktattr->pattr_hdr; - TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ - if (fve->fve_lastdrop.tv_sec == 0) - break; - if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { - fve->fve_lastdrop.tv_sec = 0; - break; - } - if (fve->fve_flow.flow_af == AF_INET && - fve->fve_flow.flow_ip.ip_src.s_addr == - ip->ip_src.s_addr && - fve->fve_flow.flow_ip.ip_dst.s_addr == - ip->ip_dst.s_addr) - return (fve); - flows++; - } - break; -#ifdef INET6 - case AF_INET6: - ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; - TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ - if (fve->fve_lastdrop.tv_sec == 0) - break; - if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { - fve->fve_lastdrop.tv_sec = 0; - break; - } - if (fve->fve_flow.flow_af == AF_INET6 && - IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, - &ip6->ip6_src) && - IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, - &ip6->ip6_dst)) - return (fve); - flows++; - } - break; -#endif /* INET6 */ - - default: - /* unknown protocol. no drop. */ - return (NULL); - } - fv->fv_flows = flows; /* save the number of active fve's */ - return (NULL); -} - -static __inline struct fve * -flowlist_reclaim(fv, pktattr) - struct flowvalve *fv; - struct altq_pktattr *pktattr; -{ - struct fve *fve; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - - /* - * get an entry from the tail of the LRU list. - */ - fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); - - switch (pktattr->pattr_af) { - case AF_INET: - ip = (struct ip *)pktattr->pattr_hdr; - fve->fve_flow.flow_af = AF_INET; - fve->fve_flow.flow_ip.ip_src = ip->ip_src; - fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; - break; -#ifdef INET6 - case AF_INET6: - ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; - fve->fve_flow.flow_af = AF_INET6; - fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; - fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; - break; -#endif - } - - fve->fve_state = Green; - fve->fve_p = 0.0; - fve->fve_f = 0.0; - fve->fve_ifseq = fv->fv_ifseq - 1; - fve->fve_count = 0; - - fv->fv_flows++; -#ifdef FV_STATS - fv->fv_stats.alloc++; -#endif - return (fve); -} - -static __inline void -flowlist_move_to_head(fv, fve) - struct flowvalve *fv; - struct fve *fve; -{ - if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { - TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); - TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); - } -} - -#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ -/* - * allocate flowvalve structure - */ -static struct flowvalve * -fv_alloc(rp) - struct red *rp; -{ - struct flowvalve *fv; - struct fve *fve; - int i, num; - - num = FV_FLOWLISTSIZE; - fv = malloc(sizeof(struct flowvalve), - M_DEVBUF, M_WAITOK); - if (fv == NULL) - return (NULL); - bzero(fv, sizeof(struct flowvalve)); - - fv->fv_fves = malloc(sizeof(struct fve) * num, - M_DEVBUF, M_WAITOK); - if (fv->fv_fves == NULL) { - free(fv, M_DEVBUF); - return (NULL); - } - bzero(fv->fv_fves, sizeof(struct fve) * num); - - fv->fv_flows = 0; - TAILQ_INIT(&fv->fv_flowlist); - for (i = 0; i < num; i++) { - fve = &fv->fv_fves[i]; - fve->fve_lastdrop.tv_sec = 0; - TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); - } - - /* initialize drop rate threshold in scaled fixed-point */ - fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; - - /* initialize drop rate to fraction table */ - fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, - M_DEVBUF, M_WAITOK); - if (fv->fv_p2ftab == NULL) { - free(fv->fv_fves, M_DEVBUF); - free(fv, M_DEVBUF); - return (NULL); - } - /* - * create the p2f table. - * (shift is used to keep the precision) - */ - for (i = 1; i < BRTT_SIZE; i++) { - int f; - - f = brtt_tab[i] << 8; - fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; - } - - return (fv); -} -#endif - -static void fv_destroy(fv) - struct flowvalve *fv; -{ - free(fv->fv_p2ftab, M_DEVBUF); - free(fv->fv_fves, M_DEVBUF); - free(fv, M_DEVBUF); -} - -static __inline int -fv_p2f(fv, p) - struct flowvalve *fv; - int p; -{ - int val, f; - - if (p >= BRTT_PMAX) - f = fv->fv_p2ftab[BRTT_SIZE-1]; - else if ((val = (p & BRTT_MASK))) - f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; - else - f = fv->fv_p2ftab[1]; - return (f); -} - -/* - * check if an arriving packet should be pre-dropped. - * called from red_addq() when a packet arrives. - * returns 1 when the packet should be pre-dropped. - * should be called in splimp. - */ -static int -fv_checkflow(fv, pktattr, fcache) - struct flowvalve *fv; - struct altq_pktattr *pktattr; - struct fve **fcache; -{ - struct fve *fve; - struct timeval now; - - fv->fv_ifseq++; - FV_TIMESTAMP(&now); - - if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) - /* no matching entry in the flowlist */ - return (0); - - *fcache = fve; - - /* update fraction f for every FV_N packets */ - if (++fve->fve_count == FV_N) { - /* - * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f - */ - fve->fve_f = - (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) - + fve->fve_f - FV_FUNSCALE(fve->fve_f); - fve->fve_ifseq = fv->fv_ifseq; - fve->fve_count = 0; - } - - /* - * overpumping test - */ - if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { - int fthresh; - - /* calculate a threshold */ - fthresh = fv_p2f(fv, fve->fve_p); - if (fve->fve_f > fthresh) - fve->fve_state = Red; - } - - if (fve->fve_state == Red) { - /* - * backoff test - */ - if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { - /* no drop for at least FV_BACKOFFTHRESH sec */ - fve->fve_p = 0; - fve->fve_state = Green; -#ifdef FV_STATS - fv->fv_stats.escape++; -#endif - } else { - /* block this flow */ - flowlist_move_to_head(fv, fve); - fve->fve_lastdrop = now; -#ifdef FV_STATS - fv->fv_stats.predrop++; -#endif - return (1); - } - } - - /* - * p = (1 - Wp) * p - */ - fve->fve_p -= FV_PUNSCALE(fve->fve_p); - if (fve->fve_p < 0) - fve->fve_p = 0; -#ifdef FV_STATS - fv->fv_stats.pass++; -#endif - return (0); -} - -/* - * called from red_addq when a packet is dropped by red. - * should be called in splimp. - */ -static void fv_dropbyred(fv, pktattr, fcache) - struct flowvalve *fv; - struct altq_pktattr *pktattr; - struct fve *fcache; -{ - struct fve *fve; - struct timeval now; - - if (pktattr == NULL) - return; - FV_TIMESTAMP(&now); - - if (fcache != NULL) - /* the fve of this packet is already cached */ - fve = fcache; - else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) - fve = flowlist_reclaim(fv, pktattr); - - flowlist_move_to_head(fv, fve); - - /* - * update p: the following line cancels the update - * in fv_checkflow() and calculate - * p = Wp + (1 - Wp) * p - */ - fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; - - fve->fve_lastdrop = now; -} - -#endif /* ALTQ_FLOWVALVE */ - -#ifdef KLD_MODULE - -static struct altqsw red_sw = - {"red", redopen, redclose, redioctl}; - -ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); -MODULE_VERSION(altq_red, 1); - -#endif /* KLD_MODULE */ -#endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RED */ Index: head/sys/net/altq/altq_red.h =================================================================== --- head/sys/net/altq/altq_red.h (revision 341506) +++ head/sys/net/altq/altq_red.h (revision 341507) @@ -1,199 +1,129 @@ /*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_RED_H_ #define _ALTQ_ALTQ_RED_H_ #include -#ifdef ALTQ3_COMPAT -struct red_interface { - char red_ifname[IFNAMSIZ]; -}; -struct red_stats { - struct red_interface iface; - int q_len; - int q_avg; - - struct pktcntr xmit_cnt; - struct pktcntr drop_cnt; - u_int drop_forced; - u_int drop_unforced; - u_int marked_packets; - - /* static red parameters */ - int q_limit; - int weight; - int inv_pmax; - int th_min; - int th_max; - - /* flowvalve related stuff */ - u_int fv_flows; - u_int fv_pass; - u_int fv_predrop; - u_int fv_alloc; - u_int fv_escape; -}; - -struct red_conf { - struct red_interface iface; - int red_weight; /* weight for EWMA */ - int red_inv_pmax; /* inverse of max drop probability */ - int red_thmin; /* red min threshold */ - int red_thmax; /* red max threshold */ - int red_limit; /* max queue length */ - int red_pkttime; /* average packet time in usec */ - int red_flags; /* see below */ -}; -#endif /* ALTQ3_COMPAT */ - /* red flags */ #define REDF_ECN4 0x01 /* use packet marking for IPv4 packets */ #define REDF_ECN6 0x02 /* use packet marking for IPv6 packets */ #define REDF_ECN (REDF_ECN4 | REDF_ECN6) #define REDF_FLOWVALVE 0x04 /* use flowvalve (aka penalty-box) */ /* * simpler versions of red parameters and statistics used by other * disciplines (e.g., CBQ) */ struct redparams { int th_min; /* red min threshold */ int th_max; /* red max threshold */ int inv_pmax; /* inverse of max drop probability */ }; struct redstats { int q_avg; struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int drop_forced; u_int drop_unforced; u_int marked_packets; }; -#ifdef ALTQ3_COMPAT -/* - * IOCTLs for RED - */ -#define RED_IF_ATTACH _IOW('Q', 1, struct red_interface) -#define RED_IF_DETACH _IOW('Q', 2, struct red_interface) -#define RED_ENABLE _IOW('Q', 3, struct red_interface) -#define RED_DISABLE _IOW('Q', 4, struct red_interface) -#define RED_CONFIG _IOWR('Q', 6, struct red_conf) -#define RED_GETSTATS _IOWR('Q', 12, struct red_stats) -#define RED_SETDEFAULTS _IOW('Q', 30, struct redparams) -#endif /* ALTQ3_COMPAT */ #ifdef _KERNEL -#ifdef ALTQ3_COMPAT -struct flowvalve; -#endif /* weight table structure for idle time calibration */ struct wtab { struct wtab *w_next; int w_weight; int w_param_max; int w_refcount; int32_t w_tab[32]; }; typedef struct red { int red_pkttime; /* average packet time in micro sec used for idle calibration */ int red_flags; /* red flags */ /* red parameters */ int red_weight; /* weight for EWMA */ int red_inv_pmax; /* inverse of max drop probability */ int red_thmin; /* red min threshold */ int red_thmax; /* red max threshold */ /* variables for internal use */ int red_wshift; /* log(red_weight) */ int red_thmin_s; /* th_min scaled by avgshift */ int red_thmax_s; /* th_max scaled by avgshift */ int red_probd; /* drop probability denominator */ int red_avg; /* queue len avg scaled by avgshift */ int red_count; /* packet count since last dropped/ marked packet */ int red_idle; /* queue was empty */ int red_old; /* avg is above th_min */ struct wtab *red_wtab; /* weight table */ struct timeval red_last; /* time when the queue becomes idle */ -#ifdef ALTQ3_COMPAT - struct flowvalve *red_flowvalve; /* flowvalve state */ -#endif struct { struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int drop_forced; u_int drop_unforced; u_int marked_packets; } red_stats; } red_t; -#ifdef ALTQ3_COMPAT -typedef struct red_queue { - struct red_queue *rq_next; /* next red_state in the list */ - struct ifaltq *rq_ifq; /* backpointer to ifaltq */ - - class_queue_t *rq_q; - - red_t *rq_red; -} red_queue_t; -#endif /* ALTQ3_COMPAT */ /* red drop types */ #define DTYPE_NODROP 0 /* no drop */ #define DTYPE_FORCED 1 /* a "forced" drop */ #define DTYPE_EARLY 2 /* an "unforced" (early) drop */ extern red_t *red_alloc(int, int, int, int, int, int); extern void red_destroy(red_t *); extern void red_getstats(red_t *, struct redstats *); extern int red_addq(red_t *, class_queue_t *, struct mbuf *, struct altq_pktattr *); extern struct mbuf *red_getq(red_t *, class_queue_t *); extern int drop_early(int, int, int); extern int mark_ecn(struct mbuf *, struct altq_pktattr *, int); extern struct wtab *wtab_alloc(int); extern int wtab_destroy(struct wtab *); extern int32_t pow_w(struct wtab *, int); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_RED_H_ */ Index: head/sys/net/altq/altq_rio.c =================================================================== --- head/sys/net/altq/altq_rio.c (revision 341506) +++ head/sys/net/altq/altq_rio.c (revision 341507) @@ -1,844 +1,451 @@ /*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#endif /* * RIO: RED with IN/OUT bit * described in * "Explicit Allocation of Best Effort Packet Delivery Service" * David D. Clark and Wenjia Fang, MIT Lab for Computer Science * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} * * this implementation is extended to support more than 2 drop precedence * values as described in RFC2597 (Assured Forwarding PHB Group). * */ /* * AF DS (differentiated service) codepoints. * (classes can be mapped to CBQ or H-FSC classes.) * * 0 1 2 3 4 5 6 7 * +---+---+---+---+---+---+---+---+ * | CLASS |DropPre| 0 | CU | * +---+---+---+---+---+---+---+---+ * * class 1: 001 * class 2: 010 * class 3: 011 * class 4: 100 * * low drop prec: 01 * medium drop prec: 10 * high drop prec: 01 */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RIO_LIMIT 60 /* default max queue length */ #define RIO_STATS /* collect statistics */ #define TV_DELTA(a, b, delta) { \ int xxs; \ \ delta = (a)->tv_usec - (b)->tv_usec; \ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ if (xxs < 0) { \ delta = 60000000; \ } else if (xxs > 4) { \ if (xxs > 60) \ delta = 60000000; \ else \ delta += xxs * 1000000; \ } else while (xxs > 0) { \ delta += 1000000; \ xxs--; \ } \ } \ } -#ifdef ALTQ3_COMPAT -/* rio_list keeps all rio_queue_t's allocated. */ -static rio_queue_t *rio_list = NULL; -#endif /* default rio parameter values */ static struct redparams default_rio_params[RIO_NDROPPREC] = { /* th_min, th_max, inv_pmax */ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ }; /* internal function prototypes */ static int dscp2index(u_int8_t); -#ifdef ALTQ3_COMPAT -static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); -static struct mbuf *rio_dequeue(struct ifaltq *, int); -static int rio_request(struct ifaltq *, int, void *); -static int rio_detach(rio_queue_t *); -/* - * rio device interface - */ -altqdev_decl(rio); - -#endif /* ALTQ3_COMPAT */ - rio_t * rio_alloc(int weight, struct redparams *params, int flags, int pkttime) { rio_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); rp->rio_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->rio_pkttime = 800; else rp->rio_pkttime = pkttime; if (weight != 0) rp->rio_weight = weight; else { /* use default */ rp->rio_weight = W_WEIGHT; /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->rio_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->rio_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->rio_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->rio_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->rio_wshift = i; w = 1 << rp->rio_wshift; if (w != rp->rio_weight) { printf("invalid weight value %d for red! use %d\n", rp->rio_weight, w); rp->rio_weight = w; } /* allocate weight table */ rp->rio_wtab = wtab_alloc(rp->rio_weight); for (i = 0; i < RIO_NDROPPREC; i++) { struct dropprec_state *prec = &rp->rio_precstate[i]; prec->avg = 0; prec->idle = 1; if (params == NULL || params[i].inv_pmax == 0) prec->inv_pmax = default_rio_params[i].inv_pmax; else prec->inv_pmax = params[i].inv_pmax; if (params == NULL || params[i].th_min == 0) prec->th_min = default_rio_params[i].th_min; else prec->th_min = params[i].th_min; if (params == NULL || params[i].th_max == 0) prec->th_max = default_rio_params[i].th_max; else prec->th_max = params[i].th_max; /* * th_min_s and th_max_s are scaled versions of th_min * and th_max to be compared with avg. */ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ prec->probd = (2 * (prec->th_max - prec->th_min) * prec->inv_pmax) << FP_SHIFT; microtime(&prec->last); } return (rp); } void rio_destroy(rio_t *rp) { wtab_destroy(rp->rio_wtab); free(rp, M_DEVBUF); } void rio_getstats(rio_t *rp, struct redstats *sp) { int i; for (i = 0; i < RIO_NDROPPREC; i++) { bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; sp++; } } #if (RIO_NDROPPREC == 3) /* * internally, a drop precedence value is converted to an index * starting from 0. */ static int dscp2index(u_int8_t dscp) { int dpindex = dscp & AF_DROPPRECMASK; if (dpindex == 0) return (0); return ((dpindex >> 3) - 1); } #endif #if 1 /* * kludge: when a packet is dequeued, we need to know its drop precedence * in order to keep the queue length of each drop precedence. * use m_pkthdr.rcvif to pass this info. */ #define RIOM_SET_PRECINDEX(m, idx) \ do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) #define RIOM_GET_PRECINDEX(m) \ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ (m)->m_pkthdr.rcvif = NULL; idx; }) #endif int rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; u_int8_t dsfield, odsfield; int dpindex, i, n, t; struct timeval now; struct dropprec_state *prec; dsfield = odsfield = read_dsfield(m, pktattr); dpindex = dscp2index(dsfield); /* * update avg of the precedence states whose drop precedence * is larger than or equal to the drop precedence of the packet */ now.tv_sec = 0; for (i = dpindex; i < RIO_NDROPPREC; i++) { prec = &rp->rio_precstate[i]; avg = prec->avg; if (prec->idle) { prec->idle = 0; if (now.tv_sec == 0) microtime(&now); t = (now.tv_sec - prec->last.tv_sec); if (t > 60) avg = 0; else { t = t * 1000000 + (now.tv_usec - prec->last.tv_usec); n = t / rp->rio_pkttime; /* calculate (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->rio_wtab, n); } } /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); prec->avg = avg; /* save the new value */ /* * count keeps a tally of arriving traffic that has not * been dropped. */ prec->count++; } prec = &rp->rio_precstate[dpindex]; avg = prec->avg; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= prec->th_min_s && prec->qlen > 1) { if (avg >= prec->th_max_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (prec->old == 0) { /* first exceeds th_min */ prec->count = 1; prec->old = 1; } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, prec->probd, prec->count)) { /* unforced drop by red */ droptype = DTYPE_EARLY; } } else { /* avg < th_min */ prec->old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; if (droptype != DTYPE_NODROP) { /* always drop incoming packet (as opposed to randomdrop) */ for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].count = 0; #ifdef RIO_STATS if (droptype == DTYPE_EARLY) rp->q_stats[dpindex].drop_unforced++; else rp->q_stats[dpindex].drop_forced++; PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); #endif m_freem(m); return (-1); } for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].qlen++; /* save drop precedence index in mbuf hdr */ RIOM_SET_PRECINDEX(m, dpindex); if (rp->rio_flags & RIOF_CLEARDSCP) dsfield &= ~DSCP_MASK; if (dsfield != odsfield) write_dsfield(m, pktattr, dsfield); _addq(q, m); #ifdef RIO_STATS PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); #endif return (0); } struct mbuf * rio_getq(rio_t *rp, class_queue_t *q) { struct mbuf *m; int dpindex, i; if ((m = _getq(q)) == NULL) return NULL; dpindex = RIOM_GET_PRECINDEX(m); for (i = dpindex; i < RIO_NDROPPREC; i++) { if (--rp->rio_precstate[i].qlen == 0) { if (rp->rio_precstate[i].idle == 0) { rp->rio_precstate[i].idle = 1; microtime(&rp->rio_precstate[i].last); } } } return (m); } -#ifdef ALTQ3_COMPAT -int -rioopen(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - /* everything will be done when the queueing scheme is attached. */ - return 0; -} - -int -rioclose(dev, flag, fmt, p) - dev_t dev; - int flag, fmt; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - rio_queue_t *rqp; - int err, error = 0; - - while ((rqp = rio_list) != NULL) { - /* destroy all */ - err = rio_detach(rqp); - if (err != 0 && error == 0) - error = err; - } - - return error; -} - -int -rioioctl(dev, cmd, addr, flag, p) - dev_t dev; - ioctlcmd_t cmd; - caddr_t addr; - int flag; -#if (__FreeBSD_version > 500000) - struct thread *p; -#else - struct proc *p; -#endif -{ - rio_queue_t *rqp; - struct rio_interface *ifacep; - struct ifnet *ifp; - int error = 0; - - /* check super-user privilege */ - switch (cmd) { - case RIO_GETSTATS: - break; - default: -#if (__FreeBSD_version > 700000) - if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) - return (error); -#elsif (__FreeBSD_version > 400000) - if ((error = suser(p)) != 0) - return (error); -#else - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); -#endif - break; - } - - switch (cmd) { - - case RIO_ENABLE: - ifacep = (struct rio_interface *)addr; - if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { - error = EBADF; - break; - } - error = altq_enable(rqp->rq_ifq); - break; - - case RIO_DISABLE: - ifacep = (struct rio_interface *)addr; - if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { - error = EBADF; - break; - } - error = altq_disable(rqp->rq_ifq); - break; - - case RIO_IF_ATTACH: - ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); - if (ifp == NULL) { - error = ENXIO; - break; - } - - /* allocate and initialize rio_queue_t */ - rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); - if (rqp == NULL) { - error = ENOMEM; - break; - } - bzero(rqp, sizeof(rio_queue_t)); - - rqp->rq_q = malloc(sizeof(class_queue_t), - M_DEVBUF, M_WAITOK); - if (rqp->rq_q == NULL) { - free(rqp, M_DEVBUF); - error = ENOMEM; - break; - } - bzero(rqp->rq_q, sizeof(class_queue_t)); - - rqp->rq_rio = rio_alloc(0, NULL, 0, 0); - if (rqp->rq_rio == NULL) { - free(rqp->rq_q, M_DEVBUF); - free(rqp, M_DEVBUF); - error = ENOMEM; - break; - } - - rqp->rq_ifq = &ifp->if_snd; - qtail(rqp->rq_q) = NULL; - qlen(rqp->rq_q) = 0; - qlimit(rqp->rq_q) = RIO_LIMIT; - qtype(rqp->rq_q) = Q_RIO; - - /* - * set RIO to this ifnet structure. - */ - error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, - rio_enqueue, rio_dequeue, rio_request, - NULL, NULL); - if (error) { - rio_destroy(rqp->rq_rio); - free(rqp->rq_q, M_DEVBUF); - free(rqp, M_DEVBUF); - break; - } - - /* add this state to the rio list */ - rqp->rq_next = rio_list; - rio_list = rqp; - break; - - case RIO_IF_DETACH: - ifacep = (struct rio_interface *)addr; - if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { - error = EBADF; - break; - } - error = rio_detach(rqp); - break; - - case RIO_GETSTATS: - do { - struct rio_stats *q_stats; - rio_t *rp; - int i; - - q_stats = (struct rio_stats *)addr; - if ((rqp = altq_lookup(q_stats->iface.rio_ifname, - ALTQT_RIO)) == NULL) { - error = EBADF; - break; - } - - rp = rqp->rq_rio; - - q_stats->q_limit = qlimit(rqp->rq_q); - q_stats->weight = rp->rio_weight; - q_stats->flags = rp->rio_flags; - - for (i = 0; i < RIO_NDROPPREC; i++) { - q_stats->q_len[i] = rp->rio_precstate[i].qlen; - bcopy(&rp->q_stats[i], &q_stats->q_stats[i], - sizeof(struct redstats)); - q_stats->q_stats[i].q_avg = - rp->rio_precstate[i].avg >> rp->rio_wshift; - - q_stats->q_params[i].inv_pmax - = rp->rio_precstate[i].inv_pmax; - q_stats->q_params[i].th_min - = rp->rio_precstate[i].th_min; - q_stats->q_params[i].th_max - = rp->rio_precstate[i].th_max; - } - } while (/*CONSTCOND*/ 0); - break; - - case RIO_CONFIG: - do { - struct rio_conf *fc; - rio_t *new; - int s, limit, i; - - fc = (struct rio_conf *)addr; - if ((rqp = altq_lookup(fc->iface.rio_ifname, - ALTQT_RIO)) == NULL) { - error = EBADF; - break; - } - - new = rio_alloc(fc->rio_weight, &fc->q_params[0], - fc->rio_flags, fc->rio_pkttime); - if (new == NULL) { - error = ENOMEM; - break; - } - - s = splnet(); - _flushq(rqp->rq_q); - limit = fc->rio_limit; - if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) - limit = fc->q_params[RIO_NDROPPREC-1].th_max; - qlimit(rqp->rq_q) = limit; - - rio_destroy(rqp->rq_rio); - rqp->rq_rio = new; - - splx(s); - - /* write back new values */ - fc->rio_limit = limit; - for (i = 0; i < RIO_NDROPPREC; i++) { - fc->q_params[i].inv_pmax = - rqp->rq_rio->rio_precstate[i].inv_pmax; - fc->q_params[i].th_min = - rqp->rq_rio->rio_precstate[i].th_min; - fc->q_params[i].th_max = - rqp->rq_rio->rio_precstate[i].th_max; - } - } while (/*CONSTCOND*/ 0); - break; - - case RIO_SETDEFAULTS: - do { - struct redparams *rp; - int i; - - rp = (struct redparams *)addr; - for (i = 0; i < RIO_NDROPPREC; i++) - default_rio_params[i] = rp[i]; - } while (/*CONSTCOND*/ 0); - break; - - default: - error = EINVAL; - break; - } - - return error; -} - -static int -rio_detach(rqp) - rio_queue_t *rqp; -{ - rio_queue_t *tmp; - int error = 0; - - if (ALTQ_IS_ENABLED(rqp->rq_ifq)) - altq_disable(rqp->rq_ifq); - - if ((error = altq_detach(rqp->rq_ifq))) - return (error); - - if (rio_list == rqp) - rio_list = rqp->rq_next; - else { - for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) - if (tmp->rq_next == rqp) { - tmp->rq_next = rqp->rq_next; - break; - } - if (tmp == NULL) - printf("rio_detach: no state found in rio_list!\n"); - } - - rio_destroy(rqp->rq_rio); - free(rqp->rq_q, M_DEVBUF); - free(rqp, M_DEVBUF); - return (error); -} - -/* - * rio support routines - */ -static int -rio_request(ifq, req, arg) - struct ifaltq *ifq; - int req; - void *arg; -{ - rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; - - IFQ_LOCK_ASSERT(ifq); - - switch (req) { - case ALTRQ_PURGE: - _flushq(rqp->rq_q); - if (ALTQ_IS_ENABLED(ifq)) - ifq->ifq_len = 0; - break; - } - return (0); -} - -/* - * enqueue routine: - * - * returns: 0 when successfully queued. - * ENOBUFS when drop occurs. - */ -static int -rio_enqueue(ifq, m, pktattr) - struct ifaltq *ifq; - struct mbuf *m; - struct altq_pktattr *pktattr; -{ - rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; - int error = 0; - - IFQ_LOCK_ASSERT(ifq); - - if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) - ifq->ifq_len++; - else - error = ENOBUFS; - return error; -} - -/* - * dequeue routine: - * must be called in splimp. - * - * returns: mbuf dequeued. - * NULL when no packet is available in the queue. - */ - -static struct mbuf * -rio_dequeue(ifq, op) - struct ifaltq *ifq; - int op; -{ - rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; - struct mbuf *m = NULL; - - IFQ_LOCK_ASSERT(ifq); - - if (op == ALTDQ_POLL) - return qhead(rqp->rq_q); - - m = rio_getq(rqp->rq_rio, rqp->rq_q); - if (m != NULL) - ifq->ifq_len--; - return m; -} - -#ifdef KLD_MODULE - -static struct altqsw rio_sw = - {"rio", rioopen, rioclose, rioioctl}; - -ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); -MODULE_VERSION(altq_rio, 1); -MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); - -#endif /* KLD_MODULE */ -#endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RIO */ Index: head/sys/net/altq/altq_rio.h =================================================================== --- head/sys/net/altq/altq_rio.h (revision 341506) +++ head/sys/net/altq/altq_rio.h (revision 341507) @@ -1,145 +1,97 @@ /*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_RIO_H_ #define _ALTQ_ALTQ_RIO_H_ #include /* * RIO: RED with IN/OUT bit * (extended to support more than 2 drop precedence values) */ #define RIO_NDROPPREC 3 /* number of drop precedence values */ -#ifdef ALTQ3_COMPAT -struct rio_interface { - char rio_ifname[IFNAMSIZ]; -}; -struct rio_stats { - struct rio_interface iface; - int q_len[RIO_NDROPPREC]; - struct redstats q_stats[RIO_NDROPPREC]; - - /* static red parameters */ - int q_limit; - int weight; - int flags; - struct redparams q_params[RIO_NDROPPREC]; -}; - -struct rio_conf { - struct rio_interface iface; - struct redparams q_params[RIO_NDROPPREC]; - int rio_weight; /* weight for EWMA */ - int rio_limit; /* max queue length */ - int rio_pkttime; /* average packet time in usec */ - int rio_flags; /* see below */ -}; -#endif /* ALTQ3_COMPAT */ - /* rio flags */ #define RIOF_ECN4 0x01 /* use packet marking for IPv4 packets */ #define RIOF_ECN6 0x02 /* use packet marking for IPv6 packets */ #define RIOF_ECN (RIOF_ECN4 | RIOF_ECN6) #define RIOF_CLEARDSCP 0x200 /* clear diffserv codepoint */ -#ifdef ALTQ3_COMPAT -/* - * IOCTLs for RIO - */ -#define RIO_IF_ATTACH _IOW('Q', 1, struct rio_interface) -#define RIO_IF_DETACH _IOW('Q', 2, struct rio_interface) -#define RIO_ENABLE _IOW('Q', 3, struct rio_interface) -#define RIO_DISABLE _IOW('Q', 4, struct rio_interface) -#define RIO_CONFIG _IOWR('Q', 6, struct rio_conf) -#define RIO_GETSTATS _IOWR('Q', 12, struct rio_stats) -#define RIO_SETDEFAULTS _IOW('Q', 30, struct redparams[RIO_NDROPPREC]) -#endif /* ALTQ3_COMPAT */ #ifdef _KERNEL typedef struct rio { /* per drop precedence structure */ struct dropprec_state { /* red parameters */ int inv_pmax; /* inverse of max drop probability */ int th_min; /* red min threshold */ int th_max; /* red max threshold */ /* variables for internal use */ int th_min_s; /* th_min scaled by avgshift */ int th_max_s; /* th_max scaled by avgshift */ int probd; /* drop probability denominator */ int qlen; /* queue length */ int avg; /* (scaled) queue length average */ int count; /* packet count since the last dropped/ marked packet */ int idle; /* queue was empty */ int old; /* avg is above th_min */ struct timeval last; /* timestamp when queue becomes idle */ } rio_precstate[RIO_NDROPPREC]; int rio_wshift; /* log(red_weight) */ int rio_weight; /* weight for EWMA */ struct wtab *rio_wtab; /* weight table */ int rio_pkttime; /* average packet time in micro sec used for idle calibration */ int rio_flags; /* rio flags */ u_int8_t rio_codepoint; /* codepoint value to tag packets */ u_int8_t rio_codepointmask; /* codepoint mask bits */ struct redstats q_stats[RIO_NDROPPREC]; /* statistics */ } rio_t; -#ifdef ALTQ3_COMPAT -typedef struct rio_queue { - struct rio_queue *rq_next; /* next red_state in the list */ - struct ifaltq *rq_ifq; /* backpointer to ifaltq */ - - class_queue_t *rq_q; - - rio_t *rq_rio; -} rio_queue_t; -#endif /* ALTQ3_COMPAT */ extern rio_t *rio_alloc(int, struct redparams *, int, int); extern void rio_destroy(rio_t *); extern void rio_getstats(rio_t *, struct redstats *); extern int rio_addq(rio_t *, class_queue_t *, struct mbuf *, struct altq_pktattr *); extern struct mbuf *rio_getq(rio_t *, class_queue_t *); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_RIO_H_ */ Index: head/sys/net/altq/altq_rmclass.c =================================================================== --- head/sys/net/altq/altq_rmclass.c (revision 341506) +++ head/sys/net/altq/altq_rmclass.c (revision 341507) @@ -1,1839 +1,1831 @@ /*- * Copyright (c) 1991-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Network Research * Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * LBL code modified by speer@eng.sun.com, May 1977. * For questions and/or comments, please send mail to cbq@ee.lbl.gov * * @(#)rm_class.c 1.48 97/12/05 SMI * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#endif #include #include -#ifdef ALTQ3_COMPAT -#include -#include -#include -#endif #include #include #include #include #include #include #include /* * Local Macros */ #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } /* * Local routines. */ static int rmc_satisfied(struct rm_class *, struct timeval *); static void rmc_wrr_set_weights(struct rm_ifdat *); static void rmc_depth_compute(struct rm_class *); static void rmc_depth_recompute(rm_class_t *); static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); static int _rmc_addq(rm_class_t *, mbuf_t *); static void _rmc_dropq(rm_class_t *); static mbuf_t *_rmc_getq(rm_class_t *); static mbuf_t *_rmc_pollq(rm_class_t *); static int rmc_under_limit(struct rm_class *, struct timeval *); static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); static void rmc_drop_action(struct rm_class *); static void rmc_restart(struct rm_class *); static void rmc_root_overlimit(struct rm_class *, struct rm_class *); #define BORROW_OFFTIME /* * BORROW_OFFTIME (experimental): * borrow the offtime of the class borrowing from. * the reason is that when its own offtime is set, the class is unable * to borrow much, especially when cutoff is taking effect. * but when the borrowed class is overloaded (advidle is close to minidle), * use the borrowing class's offtime to avoid overload. */ #define ADJUST_CUTOFF /* * ADJUST_CUTOFF (experimental): * if no underlimit class is found due to cutoff, increase cutoff and * retry the scheduling loop. * also, don't invoke delay_actions while cutoff is taking effect, * since a sleeping class won't have a chance to be scheduled in the * next loop. * * now heuristics for setting the top-level variable (cutoff_) becomes: * 1. if a packet arrives for a not-overlimit class, set cutoff * to the depth of the class. * 2. if cutoff is i, and a packet arrives for an overlimit class * with an underlimit ancestor at a lower level than i (say j), * then set cutoff to j. * 3. at scheduling a packet, if there is no underlimit class * due to the current cutoff level, increase cutoff by 1 and * then try to schedule again. */ /* * rm_class_t * * rmc_newclass(...) - Create a new resource management class at priority * 'pri' on the interface given by 'ifd'. * * nsecPerByte is the data rate of the interface in nanoseconds/byte. * E.g., 800 for a 10Mb/s ethernet. If the class gets less * than 100% of the bandwidth, this number should be the * 'effective' rate for the class. Let f be the * bandwidth fraction allocated to this class, and let * nsPerByte be the data rate of the output link in * nanoseconds/byte. Then nsecPerByte is set to * nsPerByte / f. E.g., 1600 (= 800 / .5) * for a class that gets 50% of an ethernet's bandwidth. * * action the routine to call when the class is over limit. * * maxq max allowable queue size for class (in packets). * * parent parent class pointer. * * borrow class to borrow from (should be either 'parent' or null). * * maxidle max value allowed for class 'idle' time estimate (this * parameter determines how large an initial burst of packets * can be before overlimit action is invoked. * * offtime how long 'delay' action will delay when class goes over * limit (this parameter determines the steady-state burst * size when a class is running over its limit). * * Maxidle and offtime have to be computed from the following: If the * average packet size is s, the bandwidth fraction allocated to this * class is f, we want to allow b packet bursts, and the gain of the * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: * * ptime = s * nsPerByte * (1 - f) / f * maxidle = ptime * (1 - g^b) / g^b * minidle = -ptime * (1 / (f - 1)) * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) * * Operationally, it's convenient to specify maxidle & offtime in units * independent of the link bandwidth so the maxidle & offtime passed to * this routine are the above values multiplied by 8*f/(1000*nsPerByte). * (The constant factor is a scale factor needed to make the parameters * integers. This scaling also means that the 'unscaled' values of * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, * not nanoseconds.) Also note that the 'idle' filter computation keeps * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of * maxidle also must be scaled upward by this value. Thus, the passed * values for maxidle and offtime can be computed as follows: * * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) * offtime = offtime * 8 / (1000 * nsecPerByte) * * When USE_HRTIME is employed, then maxidle and offtime become: * maxidle = maxilde * (8.0 / nsecPerByte); * offtime = offtime * (8.0 / nsecPerByte); */ struct rm_class * rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, void (*action)(rm_class_t *, rm_class_t *), int maxq, struct rm_class *parent, struct rm_class *borrow, u_int maxidle, int minidle, u_int offtime, int pktsize, int flags) { struct rm_class *cl; struct rm_class *peer; int s; if (pri >= RM_MAXPRIO) return (NULL); #ifndef ALTQ_RED if (flags & RMCF_RED) { #ifdef ALTQ_DEBUG printf("rmc_newclass: RED not configured for CBQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_RIO if (flags & RMCF_RIO) { #ifdef ALTQ_DEBUG printf("rmc_newclass: RIO not configured for CBQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & RMCF_CODEL) { #ifdef ALTQ_DEBUG printf("rmc_newclass: CODEL not configured for CBQ!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); CALLOUT_INIT(&cl->callout_); cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->q_ == NULL) { free(cl, M_DEVBUF); return (NULL); } /* * Class initialization. */ cl->children_ = NULL; cl->parent_ = parent; cl->borrow_ = borrow; cl->leaf_ = 1; cl->ifdat_ = ifd; cl->pri_ = pri; cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ cl->depth_ = 0; cl->qthresh_ = 0; cl->ns_per_byte_ = nsecPerByte; qlimit(cl->q_) = maxq; qtype(cl->q_) = Q_DROPHEAD; qlen(cl->q_) = 0; cl->flags_ = flags; #if 1 /* minidle is also scaled in ALTQ */ cl->minidle_ = (minidle * (int)nsecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif cl->maxidle_ = (maxidle * nsecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else cl->avgidle_ = 0; cl->offtime_ = (offtime * nsecPerByte) / 8; #endif cl->overlimit = action; #ifdef ALTQ_RED if (flags & (RMCF_RED|RMCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & RMCF_ECN) red_flags |= REDF_ECN; if (flags & RMCF_FLOWVALVE) red_flags |= REDF_FLOWVALVE; #ifdef ALTQ_RIO if (flags & RMCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif red_pkttime = nsecPerByte * pktsize / 1000; if (flags & RMCF_RED) { cl->red_ = red_alloc(0, 0, qlimit(cl->q_) * 10/100, qlimit(cl->q_) * 30/100, red_flags, red_pkttime); if (cl->red_ != NULL) qtype(cl->q_) = Q_RED; } #ifdef ALTQ_RIO else { cl->red_ = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->red_ != NULL) qtype(cl->q_) = Q_RIO; } #endif } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & RMCF_CODEL) { cl->codel_ = codel_alloc(5, 100, 0); if (cl->codel_ != NULL) qtype(cl->q_) = Q_CODEL; } #endif /* * put the class into the class tree */ s = splnet(); IFQ_LOCK(ifd->ifq_); if ((peer = ifd->active_[pri]) != NULL) { /* find the last class at this pri */ cl->peer_ = peer; while (peer->peer_ != ifd->active_[pri]) peer = peer->peer_; peer->peer_ = cl; } else { ifd->active_[pri] = cl; cl->peer_ = cl; } if (cl->parent_) { cl->next_ = parent->children_; parent->children_ = cl; parent->leaf_ = 0; } /* * Compute the depth of this class and its ancestors in the class * hierarchy. */ rmc_depth_compute(cl); /* * If CBQ's WRR is enabled, then initialize the class WRR state. */ if (ifd->wrr_) { ifd->num_[pri]++; ifd->alloc_[pri] += cl->allotment_; rmc_wrr_set_weights(ifd); } IFQ_UNLOCK(ifd->ifq_); splx(s); return (cl); } int rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, int minidle, u_int offtime, int pktsize) { struct rm_ifdat *ifd; u_int old_allotment; int s; ifd = cl->ifdat_; old_allotment = cl->allotment_; s = splnet(); IFQ_LOCK(ifd->ifq_); cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ cl->qthresh_ = 0; cl->ns_per_byte_ = nsecPerByte; qlimit(cl->q_) = maxq; #if 1 /* minidle is also scaled in ALTQ */ cl->minidle_ = (minidle * nsecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif cl->maxidle_ = (maxidle * nsecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else cl->avgidle_ = 0; cl->offtime_ = (offtime * nsecPerByte) / 8; #endif /* * If CBQ's WRR is enabled, then initialize the class WRR state. */ if (ifd->wrr_) { ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; rmc_wrr_set_weights(ifd); } IFQ_UNLOCK(ifd->ifq_); splx(s); return (0); } /* * static void * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes * the appropriate run robin weights for the CBQ weighted round robin * algorithm. * * Returns: NONE */ static void rmc_wrr_set_weights(struct rm_ifdat *ifd) { int i; struct rm_class *cl, *clh; for (i = 0; i < RM_MAXPRIO; i++) { /* * This is inverted from that of the simulator to * maintain precision. */ if (ifd->num_[i] == 0) ifd->M_[i] = 0; else ifd->M_[i] = ifd->alloc_[i] / (ifd->num_[i] * ifd->maxpkt_); /* * Compute the weighted allotment for each class. * This takes the expensive div instruction out * of the main loop for the wrr scheduling path. * These only get recomputed when a class comes or * goes. */ if (ifd->active_[i] != NULL) { clh = cl = ifd->active_[i]; do { /* safe-guard for slow link or alloc_ == 0 */ if (ifd->M_[i] == 0) cl->w_allotment_ = 0; else cl->w_allotment_ = cl->allotment_ / ifd->M_[i]; cl = cl->peer_; } while ((cl != NULL) && (cl != clh)); } } } int rmc_get_weight(struct rm_ifdat *ifd, int pri) { if ((pri >= 0) && (pri < RM_MAXPRIO)) return (ifd->M_[pri]); else return (0); } /* * static void * rmc_depth_compute(struct rm_class *cl) - This function computes the * appropriate depth of class 'cl' and its ancestors. * * Returns: NONE */ static void rmc_depth_compute(struct rm_class *cl) { rm_class_t *t = cl, *p; /* * Recompute the depth for the branch of the tree. */ while (t != NULL) { p = t->parent_; if (p && (t->depth_ >= p->depth_)) { p->depth_ = t->depth_ + 1; t = p; } else t = NULL; } } /* * static void * rmc_depth_recompute(struct rm_class *cl) - This function re-computes * the depth of the tree after a class has been deleted. * * Returns: NONE */ static void rmc_depth_recompute(rm_class_t *cl) { #if 1 /* ALTQ */ rm_class_t *p, *t; p = cl; while (p != NULL) { if ((t = p->children_) == NULL) { p->depth_ = 0; } else { int cdepth = 0; while (t != NULL) { if (t->depth_ > cdepth) cdepth = t->depth_; t = t->next_; } if (p->depth_ == cdepth + 1) /* no change to this parent */ return; p->depth_ = cdepth + 1; } p = p->parent_; } #else rm_class_t *t; if (cl->depth_ >= 1) { if (cl->children_ == NULL) { cl->depth_ = 0; } else if ((t = cl->children_) != NULL) { while (t != NULL) { if (t->children_ != NULL) rmc_depth_recompute(t); t = t->next_; } } else rmc_depth_compute(cl); } #endif } /* * void * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This * function deletes a class from the link-sharing structure and frees * all resources associated with the class. * * Returns: NONE */ void rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) { struct rm_class *p, *head, *previous; int s; ASSERT(cl->children_ == NULL); if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); s = splnet(); IFQ_LOCK(ifd->ifq_); /* * Free packets in the packet queue. * XXX - this may not be a desired behavior. Packets should be * re-queued. */ rmc_dropall(cl); /* * If the class has a parent, then remove the class from the * class from the parent's children chain. */ if (cl->parent_ != NULL) { head = cl->parent_->children_; p = previous = head; if (head->next_ == NULL) { ASSERT(head == cl); cl->parent_->children_ = NULL; cl->parent_->leaf_ = 1; } else while (p != NULL) { if (p == cl) { if (cl == head) cl->parent_->children_ = cl->next_; else previous->next_ = cl->next_; cl->next_ = NULL; p = NULL; } else { previous = p; p = p->next_; } } } /* * Delete class from class priority peer list. */ if ((p = ifd->active_[cl->pri_]) != NULL) { /* * If there is more than one member of this priority * level, then look for class(cl) in the priority level. */ if (p != p->peer_) { while (p->peer_ != cl) p = p->peer_; p->peer_ = cl->peer_; if (ifd->active_[cl->pri_] == cl) ifd->active_[cl->pri_] = cl->peer_; } else { ASSERT(p == cl); ifd->active_[cl->pri_] = NULL; } } /* * Recompute the WRR weights. */ if (ifd->wrr_) { ifd->alloc_[cl->pri_] -= cl->allotment_; ifd->num_[cl->pri_]--; rmc_wrr_set_weights(ifd); } /* * Re-compute the depth of the tree. */ #if 1 /* ALTQ */ rmc_depth_recompute(cl->parent_); #else rmc_depth_recompute(ifd->root_); #endif IFQ_UNLOCK(ifd->ifq_); splx(s); /* * Free the class structure. */ if (cl->red_ != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_destroy((rio_t *)cl->red_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_destroy(cl->red_); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->q_)) codel_destroy(cl->codel_); #endif } free(cl->q_, M_DEVBUF); free(cl, M_DEVBUF); } /* * void * rmc_init(...) - Initialize the resource management data structures * associated with the output portion of interface 'ifp'. 'ifd' is * where the structures will be built (for backwards compatibility, the * structures aren't kept in the ifnet struct). 'nsecPerByte' * gives the link speed (inverse of bandwidth) in nanoseconds/byte. * 'restart' is the driver-specific routine that the generic 'delay * until under limit' action will call to restart output. `maxq' * is the queue size of the 'link' & 'default' classes. 'maxqueued' * is the maximum number of packets that the resource management * code will allow to be queued 'downstream' (this is typically 1). * * Returns: NONE */ void rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, int minidle, u_int offtime, int flags) { int i, mtu; /* * Initialize the CBQ tracing/debug facility. */ CBQTRACEINIT(); bzero((char *)ifd, sizeof (*ifd)); mtu = ifq->altq_ifp->if_mtu; ifd->ifq_ = ifq; ifd->restart = restart; ifd->maxqueued_ = maxqueued; ifd->ns_per_byte_ = nsecPerByte; ifd->maxpkt_ = mtu; ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; #if 1 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; if (mtu * nsecPerByte > 10 * 1000000) ifd->maxiftime_ /= 4; #endif reset_cutoff(ifd); CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); /* * Initialize the CBQ's WRR state. */ for (i = 0; i < RM_MAXPRIO; i++) { ifd->alloc_[i] = 0; ifd->M_[i] = 0; ifd->num_[i] = 0; ifd->na_[i] = 0; ifd->active_[i] = NULL; } /* * Initialize current packet state. */ ifd->qi_ = 0; ifd->qo_ = 0; for (i = 0; i < RM_MAXQUEUED; i++) { ifd->class_[i] = NULL; ifd->curlen_[i] = 0; ifd->borrowed_[i] = NULL; } /* * Create the root class of the link-sharing structure. */ if ((ifd->root_ = rmc_newclass(0, ifd, nsecPerByte, rmc_root_overlimit, maxq, 0, 0, maxidle, minidle, offtime, 0, 0)) == NULL) { printf("rmc_init: root class not allocated\n"); return ; } ifd->root_->depth_ = 0; } /* * void * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by * mbuf 'm' to queue for resource class 'cl'. This routine is called * by a driver's if_output routine. This routine must be called with * output packet completion interrupts locked out (to avoid racing with * rmc_dequeue_next). * * Returns: 0 on successful queueing * -1 when packet drop occurs */ int rmc_queue_packet(struct rm_class *cl, mbuf_t *m) { struct timeval now; struct rm_ifdat *ifd = cl->ifdat_; int cpri = cl->pri_; int is_empty = qempty(cl->q_); RM_GETTIME(now); if (ifd->cutoff_ > 0) { if (TV_LT(&cl->undertime_, &now)) { if (ifd->cutoff_ > cl->depth_) ifd->cutoff_ = cl->depth_; CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); } #if 1 /* ALTQ */ else { /* * the class is overlimit. if the class has * underlimit ancestors, set cutoff to the lowest * depth among them. */ struct rm_class *borrow = cl->borrow_; while (borrow != NULL && borrow->depth_ < ifd->cutoff_) { if (TV_LT(&borrow->undertime_, &now)) { ifd->cutoff_ = borrow->depth_; CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); break; } borrow = borrow->borrow_; } } #else /* !ALTQ */ else if ((ifd->cutoff_ > 1) && cl->borrow_) { if (TV_LT(&cl->borrow_->undertime_, &now)) { ifd->cutoff_ = cl->borrow_->depth_; CBQTRACE(rmc_queue_packet, 'ffob', cl->borrow_->depth_); } } #endif /* !ALTQ */ } if (_rmc_addq(cl, m) < 0) /* failed */ return (-1); if (is_empty) { CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); ifd->na_[cpri]++; } if (qlen(cl->q_) > qlimit(cl->q_)) { /* note: qlimit can be set to 0 or 1 */ rmc_drop_action(cl); return (-1); } return (0); } /* * void * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all * classes to see if there are satified. */ static void rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) { int i; rm_class_t *p, *bp; for (i = RM_MAXPRIO - 1; i >= 0; i--) { if ((bp = ifd->active_[i]) != NULL) { p = bp; do { if (!rmc_satisfied(p, now)) { ifd->cutoff_ = p->depth_; return; } p = p->peer_; } while (p != bp); } } reset_cutoff(ifd); } /* * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. */ static int rmc_satisfied(struct rm_class *cl, struct timeval *now) { rm_class_t *p; if (cl == NULL) return (1); if (TV_LT(now, &cl->undertime_)) return (1); if (cl->depth_ == 0) { if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) return (0); else return (1); } if (cl->children_ != NULL) { p = cl->children_; while (p != NULL) { if (!rmc_satisfied(p, now)) return (0); p = p->next_; } } return (1); } /* * Return 1 if class 'cl' is under limit or can borrow from a parent, * 0 if overlimit. As a side-effect, this routine will invoke the * class overlimit action if the class if overlimit. */ static int rmc_under_limit(struct rm_class *cl, struct timeval *now) { rm_class_t *p = cl; rm_class_t *top; struct rm_ifdat *ifd = cl->ifdat_; ifd->borrowed_[ifd->qi_] = NULL; /* * If cl is the root class, then always return that it is * underlimit. Otherwise, check to see if the class is underlimit. */ if (cl->parent_ == NULL) return (1); if (cl->sleeping_) { if (TV_LT(now, &cl->undertime_)) return (0); CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; return (1); } top = NULL; while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { if (((cl = cl->borrow_) == NULL) || (cl->depth_ > ifd->cutoff_)) { #ifdef ADJUST_CUTOFF if (cl != NULL) /* cutoff is taking effect, just return false without calling the delay action. */ return (0); #endif #ifdef BORROW_OFFTIME /* * check if the class can borrow offtime too. * borrow offtime from the top of the borrow * chain if the top class is not overloaded. */ if (cl != NULL) { /* cutoff is taking effect, use this class as top. */ top = cl; CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); } if (top != NULL && top->avgidle_ == top->minidle_) top = NULL; p->overtime_ = *now; (p->overlimit)(p, top); #else p->overtime_ = *now; (p->overlimit)(p, NULL); #endif return (0); } top = cl; } if (cl != p) ifd->borrowed_[ifd->qi_] = cl; return (1); } /* * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to * Packet-by-packet round robin. * * The heart of the weighted round-robin scheduler, which decides which * class next gets to send a packet. Highest priority first, then * weighted round-robin within priorites. * * Each able-to-send class gets to send until its byte allocation is * exhausted. Thus, the active pointer is only changed after a class has * exhausted its allocation. * * If the scheduler finds no class that is underlimit or able to borrow, * then the first class found that had a nonzero queue and is allowed to * borrow gets to send. */ static mbuf_t * _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) { struct rm_class *cl = NULL, *first = NULL; u_int deficit; int cpri; mbuf_t *m; struct timeval now; RM_GETTIME(now); /* * if the driver polls the top of the queue and then removes * the polled packet, we must return the same packet. */ if (op == ALTDQ_REMOVE && ifd->pollcache_) { cl = ifd->pollcache_; cpri = cl->pri_; if (ifd->efficient_) { /* check if this class is overlimit */ if (cl->undertime_.tv_sec != 0 && rmc_under_limit(cl, &now) == 0) first = cl; } ifd->pollcache_ = NULL; goto _wrr_out; } else { /* mode == ALTDQ_POLL || pollcache == NULL */ ifd->pollcache_ = NULL; ifd->borrowed_[ifd->qi_] = NULL; } #ifdef ADJUST_CUTOFF _again: #endif for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { if (ifd->na_[cpri] == 0) continue; deficit = 0; /* * Loop through twice for a priority level, if some class * was unable to send a packet the first round because * of the weighted round-robin mechanism. * During the second loop at this level, deficit==2. * (This second loop is not needed if for every class, * "M[cl->pri_])" times "cl->allotment" is greater than * the byte size for the largest packet in the class.) */ _wrr_loop: cl = ifd->active_[cpri]; ASSERT(cl != NULL); do { if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) cl->bytes_alloc_ += cl->w_allotment_; if (!qempty(cl->q_)) { if ((cl->undertime_.tv_sec == 0) || rmc_under_limit(cl, &now)) { if (cl->bytes_alloc_ > 0 || deficit > 1) goto _wrr_out; /* underlimit but no alloc */ deficit = 1; #if 1 ifd->borrowed_[ifd->qi_] = NULL; #endif } else if (first == NULL && cl->borrow_ != NULL) first = cl; /* borrowing candidate */ } cl->bytes_alloc_ = 0; cl = cl->peer_; } while (cl != ifd->active_[cpri]); if (deficit == 1) { /* first loop found an underlimit class with deficit */ /* Loop on same priority level, with new deficit. */ deficit = 2; goto _wrr_loop; } } #ifdef ADJUST_CUTOFF /* * no underlimit class found. if cutoff is taking effect, * increase cutoff and try again. */ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { ifd->cutoff_++; CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); goto _again; } #endif /* ADJUST_CUTOFF */ /* * If LINK_EFFICIENCY is turned on, then the first overlimit * class we encounter will send a packet if all the classes * of the link-sharing structure are overlimit. */ reset_cutoff(ifd); CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); if (!ifd->efficient_ || first == NULL) return (NULL); cl = first; cpri = cl->pri_; #if 0 /* too time-consuming for nothing */ if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; #endif ifd->borrowed_[ifd->qi_] = cl->borrow_; ifd->cutoff_ = cl->borrow_->depth_; /* * Deque the packet and do the book keeping... */ _wrr_out: if (op == ALTDQ_REMOVE) { m = _rmc_getq(cl); if (m == NULL) panic("_rmc_wrr_dequeue_next"); if (qempty(cl->q_)) ifd->na_[cpri]--; /* * Update class statistics and link data. */ if (cl->bytes_alloc_ > 0) cl->bytes_alloc_ -= m_pktlen(m); if ((cl->bytes_alloc_ <= 0) || first == cl) ifd->active_[cl->pri_] = cl->peer_; else ifd->active_[cl->pri_] = cl; ifd->class_[ifd->qi_] = cl; ifd->curlen_[ifd->qi_] = m_pktlen(m); ifd->now_[ifd->qi_] = now; ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; ifd->queued_++; } else { /* mode == ALTDQ_PPOLL */ m = _rmc_pollq(cl); ifd->pollcache_ = cl; } return (m); } /* * Dequeue & return next packet from the highest priority class that * has a packet to send & has enough allocation to send it. This * routine is called by a driver whenever it needs a new packet to * output. */ static mbuf_t * _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) { mbuf_t *m; int cpri; struct rm_class *cl, *first = NULL; struct timeval now; RM_GETTIME(now); /* * if the driver polls the top of the queue and then removes * the polled packet, we must return the same packet. */ if (op == ALTDQ_REMOVE && ifd->pollcache_) { cl = ifd->pollcache_; cpri = cl->pri_; ifd->pollcache_ = NULL; goto _prr_out; } else { /* mode == ALTDQ_POLL || pollcache == NULL */ ifd->pollcache_ = NULL; ifd->borrowed_[ifd->qi_] = NULL; } #ifdef ADJUST_CUTOFF _again: #endif for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { if (ifd->na_[cpri] == 0) continue; cl = ifd->active_[cpri]; ASSERT(cl != NULL); do { if (!qempty(cl->q_)) { if ((cl->undertime_.tv_sec == 0) || rmc_under_limit(cl, &now)) goto _prr_out; if (first == NULL && cl->borrow_ != NULL) first = cl; } cl = cl->peer_; } while (cl != ifd->active_[cpri]); } #ifdef ADJUST_CUTOFF /* * no underlimit class found. if cutoff is taking effect, increase * cutoff and try again. */ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { ifd->cutoff_++; goto _again; } #endif /* ADJUST_CUTOFF */ /* * If LINK_EFFICIENCY is turned on, then the first overlimit * class we encounter will send a packet if all the classes * of the link-sharing structure are overlimit. */ reset_cutoff(ifd); if (!ifd->efficient_ || first == NULL) return (NULL); cl = first; cpri = cl->pri_; #if 0 /* too time-consuming for nothing */ if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; #endif ifd->borrowed_[ifd->qi_] = cl->borrow_; ifd->cutoff_ = cl->borrow_->depth_; /* * Deque the packet and do the book keeping... */ _prr_out: if (op == ALTDQ_REMOVE) { m = _rmc_getq(cl); if (m == NULL) panic("_rmc_prr_dequeue_next"); if (qempty(cl->q_)) ifd->na_[cpri]--; ifd->active_[cpri] = cl->peer_; ifd->class_[ifd->qi_] = cl; ifd->curlen_[ifd->qi_] = m_pktlen(m); ifd->now_[ifd->qi_] = now; ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; ifd->queued_++; } else { /* mode == ALTDQ_POLL */ m = _rmc_pollq(cl); ifd->pollcache_ = cl; } return (m); } /* * mbuf_t * * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function * is invoked by the packet driver to get the next packet to be * dequeued and output on the link. If WRR is enabled, then the * WRR dequeue next routine will determine the next packet to sent. * Otherwise, packet-by-packet round robin is invoked. * * Returns: NULL, if a packet is not available or if all * classes are overlimit. * * Otherwise, Pointer to the next packet. */ mbuf_t * rmc_dequeue_next(struct rm_ifdat *ifd, int mode) { if (ifd->queued_ >= ifd->maxqueued_) return (NULL); else if (ifd->wrr_) return (_rmc_wrr_dequeue_next(ifd, mode)); else return (_rmc_prr_dequeue_next(ifd, mode)); } /* * Update the utilization estimate for the packet that just completed. * The packet's class & the parent(s) of that class all get their * estimators updated. This routine is called by the driver's output- * packet-completion interrupt service routine. */ /* * a macro to approximate "divide by 1000" that gives 0.000999, * if a value has enough effective digits. * (on pentium, mul takes 9 cycles but div takes 46!) */ #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) void rmc_update_class_util(struct rm_ifdat *ifd) { int idle, avgidle, pktlen; int pkt_time, tidle; rm_class_t *cl, *borrowed; rm_class_t *borrows; struct timeval *nowp; /* * Get the most recent completed class. */ if ((cl = ifd->class_[ifd->qo_]) == NULL) return; pktlen = ifd->curlen_[ifd->qo_]; borrowed = ifd->borrowed_[ifd->qo_]; borrows = borrowed; PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); /* * Run estimator on class and its ancestors. */ /* * rm_update_class_util is designed to be called when the * transfer is completed from a xmit complete interrupt, * but most drivers don't implement an upcall for that. * so, just use estimated completion time. * as a result, ifd->qi_ and ifd->qo_ are always synced. */ nowp = &ifd->now_[ifd->qo_]; /* get pkt_time (for link) in usec */ #if 1 /* use approximation */ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; pkt_time = NSEC_TO_USEC(pkt_time); #else pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; #endif #if 1 /* ALTQ4PPP */ if (TV_LT(nowp, &ifd->ifnow_)) { int iftime; /* * make sure the estimated completion time does not go * too far. it can happen when the link layer supports * data compression or the interface speed is set to * a much lower value. */ TV_DELTA(&ifd->ifnow_, nowp, iftime); if (iftime+pkt_time < ifd->maxiftime_) { TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); } } else { TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #else if (TV_LT(nowp, &ifd->ifnow_)) { TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #endif while (cl != NULL) { TV_DELTA(&ifd->ifnow_, &cl->last_, idle); if (idle >= 2000000) /* * this class is idle enough, reset avgidle. * (TV_DELTA returns 2000000 us when delta is large.) */ cl->avgidle_ = cl->maxidle_; /* get pkt_time (for class) in usec */ #if 1 /* use approximation */ pkt_time = pktlen * cl->ns_per_byte_; pkt_time = NSEC_TO_USEC(pkt_time); #else pkt_time = pktlen * cl->ns_per_byte_ / 1000; #endif idle -= pkt_time; avgidle = cl->avgidle_; avgidle += idle - (avgidle >> RM_FILTER_GAIN); cl->avgidle_ = avgidle; /* Are we overlimit ? */ if (avgidle <= 0) { CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); #if 1 /* ALTQ */ /* * need some lower bound for avgidle, otherwise * a borrowing class gets unbounded penalty. */ if (avgidle < cl->minidle_) avgidle = cl->avgidle_ = cl->minidle_; #endif /* set next idle to make avgidle 0 */ tidle = pkt_time + (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); TV_ADD_DELTA(nowp, tidle, &cl->undertime_); ++cl->stats_.over; } else { cl->avgidle_ = (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; cl->undertime_.tv_sec = 0; if (cl->sleeping_) { CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; } } if (borrows != NULL) { if (borrows != cl) ++cl->stats_.borrows; else borrows = NULL; } cl->last_ = ifd->ifnow_; cl->last_pkttime_ = pkt_time; #if 1 if (cl->parent_ == NULL) { /* take stats of root class */ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); } #endif cl = cl->parent_; } /* * Check to see if cutoff needs to set to a new level. */ cl = ifd->class_[ifd->qo_]; if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { #if 1 /* ALTQ */ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { rmc_tl_satisfied(ifd, nowp); CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { ifd->cutoff_ = borrowed->depth_; CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #else /* !ALTQ */ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { reset_cutoff(ifd); #ifdef notdef rmc_tl_satisfied(ifd, &now); #endif CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { ifd->cutoff_ = borrowed->depth_; CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #endif /* !ALTQ */ } /* * Release class slot */ ifd->borrowed_[ifd->qo_] = NULL; ifd->class_[ifd->qo_] = NULL; ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; ifd->queued_--; } /* * void * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) * over-limit action routines. These get invoked by rmc_under_limit() * if a class with packets to send if over its bandwidth limit & can't * borrow from a parent class. * * Returns: NONE */ static void rmc_drop_action(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; ASSERT(qlen(cl->q_) > 0); _rmc_dropq(cl); if (qempty(cl->q_)) ifd->na_[cl->pri_]--; } void rmc_dropall(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; if (!qempty(cl->q_)) { _flushq(cl->q_); ifd->na_[cl->pri_]--; } } #if (__FreeBSD_version > 300000) /* hzto() is removed from FreeBSD-3.0 */ static int hzto(struct timeval *); static int hzto(tv) struct timeval *tv; { struct timeval t2; getmicrotime(&t2); t2.tv_sec = tv->tv_sec - t2.tv_sec; t2.tv_usec = tv->tv_usec - t2.tv_usec; return (tvtohz(&t2)); } #endif /* __FreeBSD_version > 300000 */ /* * void * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ * delay action routine. It is invoked via rmc_under_limit when the * packet is discoverd to be overlimit. * * If the delay action is result of borrow class being overlimit, then * delay for the offtime of the borrowing class that is overlimit. * * Returns: NONE */ void rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) { int delay, t, extradelay; cl->stats_.overactions++; TV_DELTA(&cl->undertime_, &cl->overtime_, delay); #ifndef BORROW_OFFTIME delay += cl->offtime_; #endif if (!cl->sleeping_) { CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); #ifdef BORROW_OFFTIME if (borrow != NULL) extradelay = borrow->offtime_; else #endif extradelay = cl->offtime_; #ifdef ALTQ /* * XXX recalculate suspend time: * current undertime is (tidle + pkt_time) calculated * from the last transmission. * tidle: time required to bring avgidle back to 0 * pkt_time: target waiting time for this class * we need to replace pkt_time by offtime */ extradelay -= cl->last_pkttime_; #endif if (extradelay > 0) { TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); delay += extradelay; } cl->sleeping_ = 1; cl->stats_.delays++; /* * Since packets are phased randomly with respect to the * clock, 1 tick (the next clock tick) can be an arbitrarily * short time so we have to wait for at least two ticks. * NOTE: If there's no other traffic, we need the timer as * a 'backstop' to restart this class. */ if (delay > tick * 2) { /* FreeBSD rounds up the tick */ t = hzto(&cl->undertime_); } else t = 2; CALLOUT_RESET(&cl->callout_, t, (timeout_t *)rmc_restart, (caddr_t)cl); } } /* * void * rmc_restart() - is just a helper routine for rmc_delay_action -- it is * called by the system timer code & is responsible checking if the * class is still sleeping (it might have been restarted as a side * effect of the queue scan on a packet arrival) and, if so, restarting * output for the class. Inspecting the class state & restarting output * require locking the class structure. In general the driver is * responsible for locking but this is the only routine that is not * called directly or indirectly from the interface driver so it has * know about system locking conventions. Under bsd, locking is done * by raising IPL to splimp so that's what's implemented here. On a * different system this would probably need to be changed. * * Returns: NONE */ static void rmc_restart(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; int s; s = splnet(); IFQ_LOCK(ifd->ifq_); if (cl->sleeping_) { cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); (ifd->restart)(ifd->ifq_); } } IFQ_UNLOCK(ifd->ifq_); splx(s); } /* * void * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit * handling routine for the root class of the link sharing structure. * * Returns: NONE */ static void rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) { panic("rmc_root_overlimit"); } /* * Packet Queue handling routines. Eventually, this is to localize the * effects on the code whether queues are red queues or droptail * queues. */ static int _rmc_addq(rm_class_t *cl, mbuf_t *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) return red_addq(cl->red_, cl->q_, m, cl->pktattr_); #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (q_is_codel(cl->q_)) return codel_addq(cl->codel_, cl->q_, m); #endif if (cl->flags_ & RMCF_CLEARDSCP) write_dsfield(m, cl->pktattr_, 0); _addq(cl->q_, m); return (0); } /* note: _rmc_dropq is not called for red */ static void _rmc_dropq(rm_class_t *cl) { mbuf_t *m; if ((m = _getq(cl->q_)) != NULL) m_freem(m); } static mbuf_t * _rmc_getq(rm_class_t *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) return rio_getq((rio_t *)cl->red_, cl->q_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) return red_getq(cl->red_, cl->q_); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->q_)) return codel_getq(cl->codel_, cl->q_); #endif return _getq(cl->q_); } static mbuf_t * _rmc_pollq(rm_class_t *cl) { return qhead(cl->q_); } #ifdef CBQ_TRACE struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; struct cbqtrace *cbqtrace_ptr = NULL; int cbqtrace_count; /* * DDB hook to trace cbq events: * the last 1024 events are held in a circular buffer. * use "call cbqtrace_dump(N)" to display 20 events from Nth event. */ void cbqtrace_dump(int); static char *rmc_funcname(void *); static struct rmc_funcs { void *func; char *name; } rmc_funcs[] = { rmc_init, "rmc_init", rmc_queue_packet, "rmc_queue_packet", rmc_under_limit, "rmc_under_limit", rmc_update_class_util, "rmc_update_class_util", rmc_delay_action, "rmc_delay_action", rmc_restart, "rmc_restart", _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", NULL, NULL }; static char *rmc_funcname(void *func) { struct rmc_funcs *fp; for (fp = rmc_funcs; fp->func != NULL; fp++) if (fp->func == func) return (fp->name); return ("unknown"); } void cbqtrace_dump(int counter) { int i, *p; char *cp; counter = counter % NCBQTRACE; p = (int *)&cbqtrace_buffer[counter]; for (i=0; i<20; i++) { printf("[0x%x] ", *p++); printf("%s: ", rmc_funcname((void *)*p++)); cp = (char *)p++; printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); printf("%d\n",*p++); if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) p = (int *)cbqtrace_buffer; } } #endif /* CBQ_TRACE */ #endif /* ALTQ_CBQ */ #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \ defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) void _addq(class_queue_t *q, mbuf_t *m) { mbuf_t *m0; if ((m0 = qtail(q)) != NULL) m->m_nextpkt = m0->m_nextpkt; else m0 = m; m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; } mbuf_t * _getq(class_queue_t *q) { mbuf_t *m, *m0; if ((m = qtail(q)) == NULL) return (NULL); if ((m0 = m->m_nextpkt) != m) m->m_nextpkt = m0->m_nextpkt; else { ASSERT(qlen(q) == 1); qtail(q) = NULL; } qlen(q)--; m0->m_nextpkt = NULL; return (m0); } /* drop a packet at the tail of the queue */ mbuf_t * _getq_tail(class_queue_t *q) { mbuf_t *m, *m0, *prev; if ((m = m0 = qtail(q)) == NULL) return NULL; do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) { ASSERT(qlen(q) == 1); qtail(q) = NULL; } else qtail(q) = prev; qlen(q)--; m->m_nextpkt = NULL; return (m); } /* randomly select a packet in the queue */ mbuf_t * _getq_random(class_queue_t *q) { struct mbuf *m; int i, n; if ((m = qtail(q)) == NULL) return NULL; if (m->m_nextpkt == m) { ASSERT(qlen(q) == 1); qtail(q) = NULL; } else { struct mbuf *prev = NULL; n = arc4random() % qlen(q) + 1; for (i = 0; i < n; i++) { prev = m; m = m->m_nextpkt; } prev->m_nextpkt = m->m_nextpkt; if (m == qtail(q)) qtail(q) = prev; } qlen(q)--; m->m_nextpkt = NULL; return (m); } void _removeq(class_queue_t *q, mbuf_t *m) { mbuf_t *m0, *prev; m0 = qtail(q); do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else if (qtail(q) == m) qtail(q) = prev; qlen(q)--; } void _flushq(class_queue_t *q) { mbuf_t *m; while ((m = _getq(q)) != NULL) m_freem(m); ASSERT(qlen(q) == 0); } #endif /* !__GNUC__ || ALTQ_DEBUG */ #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ Index: head/sys/net/altq/altq_subr.c =================================================================== --- head/sys/net/altq/altq_subr.c (revision 341506) +++ head/sys/net/altq/altq_subr.c (revision 341507) @@ -1,1966 +1,1937 @@ /*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include -#ifdef ALTQ3_COMPAT -#include -#endif /* machine dependent clock related includes */ #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include /* for pentium tsc */ #include /* for CPUID_TSC */ #include /* for cpu_feature */ #endif /* __amd64 || __i386__ */ /* * internal function prototypes */ static void tbr_timeout(void *); int (*altq_input)(struct mbuf *, int) = NULL; static struct mbuf *tbr_dequeue(struct ifaltq *, int); static int tbr_timer = 0; /* token bucket regulator timer */ #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) static struct callout tbr_callout = CALLOUT_INITIALIZER; #else static struct callout tbr_callout; #endif #ifdef ALTQ3_CLFIER_COMPAT static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); #ifdef INET6 static int extract_ports6(struct mbuf *, struct ip6_hdr *, struct flowinfo_in6 *); #endif static int apply_filter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static int apply_ppfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); #ifdef INET6 static int apply_filter6(u_int32_t, struct flow_filter6 *, struct flowinfo_in6 *); #endif static int apply_tosfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static u_long get_filt_handle(struct acc_classifier *, int); static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); static u_int32_t filt2fibmask(struct flow_filter *); static void ip4f_cache(struct ip *, struct flowinfo_in *); static int ip4f_lookup(struct ip *, struct flowinfo_in *); static int ip4f_init(void); static struct ip4_frag *ip4f_alloc(void); static void ip4f_free(struct ip4_frag *); #endif /* ALTQ3_CLFIER_COMPAT */ /* * alternate queueing support routines */ /* look up the queue state by the interface name and the queueing type. */ void * altq_lookup(name, type) char *name; int type; { struct ifnet *ifp; if ((ifp = ifunit(name)) != NULL) { /* read if_snd unlocked */ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) return (ifp->if_snd.altq_disc); } return NULL; } int altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) struct ifaltq *ifq; int type; void *discipline; int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*dequeue)(struct ifaltq *, int); int (*request)(struct ifaltq *, int, void *); void *clfier; void *(*classify)(void *, struct mbuf *, int); { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } -#ifdef ALTQ3_COMPAT - /* - * pfaltq can override the existing discipline, but altq3 cannot. - * check these if clfier is not NULL (which implies altq3). - */ - if (clfier != NULL) { - if (ALTQ_IS_ENABLED(ifq)) { - IFQ_UNLOCK(ifq); - return EBUSY; - } - if (ALTQ_IS_ATTACHED(ifq)) { - IFQ_UNLOCK(ifq); - return EEXIST; - } - } -#endif ifq->altq_type = type; ifq->altq_disc = discipline; ifq->altq_enqueue = enqueue; ifq->altq_dequeue = dequeue; ifq->altq_request = request; ifq->altq_clfier = clfier; ifq->altq_classify = classify; ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); -#ifdef ALTQ3_COMPAT -#ifdef ALTQ_KLD - altq_module_incref(type); -#endif -#endif IFQ_UNLOCK(ifq); return 0; } int altq_detach(ifq) struct ifaltq *ifq; { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (!ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return (0); } -#ifdef ALTQ3_COMPAT -#ifdef ALTQ_KLD - altq_module_declref(ifq->altq_type); -#endif -#endif ifq->altq_type = ALTQT_NONE; ifq->altq_disc = NULL; ifq->altq_enqueue = NULL; ifq->altq_dequeue = NULL; ifq->altq_request = NULL; ifq->altq_clfier = NULL; ifq->altq_classify = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; IFQ_UNLOCK(ifq); return 0; } int altq_enable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } s = splnet(); IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ ifq->altq_flags |= ALTQF_ENABLED; if (ifq->altq_clfier != NULL) ifq->altq_flags |= ALTQF_CLASSIFY; splx(s); IFQ_UNLOCK(ifq); return 0; } int altq_disable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } s = splnet(); IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); splx(s); IFQ_UNLOCK(ifq); return 0; } #ifdef ALTQ_DEBUG void altq_assert(file, line, failedexpr) const char *file, *failedexpr; int line; { (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", failedexpr, file, line); panic("altq assertion"); /* NOTREACHED */ } #endif /* * internal representation of token bucket parameters * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq * depth: byte << TBR_SHIFT * */ #define TBR_SHIFT 29 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) static struct mbuf * tbr_dequeue(ifq, op) struct ifaltq *ifq; int op; { struct tb_regulator *tbr; struct mbuf *m; int64_t interval; u_int64_t now; IFQ_LOCK_ASSERT(ifq); tbr = ifq->altq_tbr; if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { /* if this is a remove after poll, bypass tbr check */ } else { /* update token only when it is negative */ if (tbr->tbr_token <= 0) { now = read_machclk(); interval = now - tbr->tbr_last; if (interval >= tbr->tbr_filluptime) tbr->tbr_token = tbr->tbr_depth; else { tbr->tbr_token += interval * tbr->tbr_rate; if (tbr->tbr_token > tbr->tbr_depth) tbr->tbr_token = tbr->tbr_depth; } tbr->tbr_last = now; } /* if token is still negative, don't allow dequeue */ if (tbr->tbr_token <= 0) return (NULL); } if (ALTQ_IS_ENABLED(ifq)) m = (*ifq->altq_dequeue)(ifq, op); else { if (op == ALTDQ_POLL) _IF_POLL(ifq, m); else _IF_DEQUEUE(ifq, m); } if (m != NULL && op == ALTDQ_REMOVE) tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); tbr->tbr_lastop = op; return (m); } /* * set a token bucket regulator. * if the specified rate is zero, the token bucket regulator is deleted. */ int tbr_set(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr, *otbr; if (tbr_dequeue_ptr == NULL) tbr_dequeue_ptr = tbr_dequeue; if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("tbr_set: no cpu clock available!\n"); return (ENXIO); } IFQ_LOCK(ifq); if (profile->rate == 0) { /* delete this tbr */ if ((tbr = ifq->altq_tbr) == NULL) { IFQ_UNLOCK(ifq); return (ENOENT); } ifq->altq_tbr = NULL; free(tbr, M_DEVBUF); IFQ_UNLOCK(ifq); return (0); } tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); if (tbr->tbr_rate > 0) tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; else tbr->tbr_filluptime = LLONG_MAX; /* * The longest time between tbr_dequeue() calls will be about 1 * system tick, as the callout that drives it is scheduled once per * tick. The refill-time detection logic in tbr_dequeue() can only * properly detect the passage of up to LLONG_MAX machclk ticks. * Therefore, in order for this logic to function properly in the * extreme case, the maximum value of tbr_filluptime should be * LLONG_MAX less one system tick's worth of machclk ticks less * some additional slop factor (here one more system tick's worth * of machclk ticks). */ if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; tbr->tbr_token = tbr->tbr_depth; tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ if (otbr != NULL) free(otbr, M_DEVBUF); else { if (tbr_timer == 0) { CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); tbr_timer = 1; } } IFQ_UNLOCK(ifq); return (0); } /* * tbr_timeout goes through the interface list, and kicks the drivers * if necessary. * * MPSAFE */ static void tbr_timeout(arg) void *arg; { VNET_ITERATOR_DECL(vnet_iter); struct ifnet *ifp; int active, s; active = 0; s = splnet(); IFNET_RLOCK_NOSLEEP(); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) { /* read from if_snd unlocked */ if (!TBR_IS_ENABLED(&ifp->if_snd)) continue; active++; if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) (*ifp->if_start)(ifp); } CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); IFNET_RUNLOCK_NOSLEEP(); splx(s); if (active > 0) CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); else tbr_timer = 0; /* don't need tbr_timer anymore */ } /* * attach a discipline to the interface. if one already exists, it is * overridden. * Locking is done in the discipline specific attach functions. Basically * they call back to altq_attach which takes care of the attach and locking. */ int altq_pfattach(struct pf_altq *a) { int error = 0; switch (a->scheduler) { case ALTQT_NONE: break; #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_pfattach(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_pfattach(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_pfattach(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_pfattach(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_pfattach(a); break; #endif default: error = ENXIO; } return (error); } /* * detach a discipline from the interface. * it is possible that the discipline was already overridden by another * discipline. */ int altq_pfdetach(struct pf_altq *a) { struct ifnet *ifp; int s, error = 0; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); /* if this discipline is no longer referenced, just return */ /* read unlocked from if_snd */ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) return (0); s = splnet(); /* read unlocked from if_snd, _disable and _detach take care */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) error = altq_disable(&ifp->if_snd); if (error == 0) error = altq_detach(&ifp->if_snd); splx(s); return (error); } /* * add a discipline or a queue * Locking is done in the discipline specific functions with regards to * malloc with WAITOK, also it is not yet clear which lock to use. */ int altq_add(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_add_queue(a)); if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) panic("altq_add: no cpu clock"); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_altq(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_add_altq(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_add_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a discipline or a queue * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_remove_queue(a)); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_altq(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_remove_altq(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_remove_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * add a queue to the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_add_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_queue(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_add_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a queue from the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_queue(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_remove_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * get queue statistics * Locking is done in the discipline specific functions with regards to * copyout operations, also it is not yet clear which lock to use. */ int altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_getqstats(a, ubuf, nbytes, version); break; #endif default: error = ENXIO; } return (error); } /* * read and write diffserv field in IPv4 or IPv6 header */ u_int8_t read_dsfield(m, pktattr) struct mbuf *m; struct altq_pktattr *pktattr; { struct mbuf *m0; u_int8_t ds_field = 0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return ((u_int8_t)0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("read_dsfield: can't locate header!\n"); #endif return ((u_int8_t)0); } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; if (ip->ip_v != 4) return ((u_int8_t)0); /* version mismatch! */ ds_field = ip->ip_tos; } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return ((u_int8_t)0); /* version mismatch! */ ds_field = (flowlabel >> 20) & 0xff; } #endif return (ds_field); } void write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) { struct mbuf *m0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return; /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("write_dsfield: can't locate header!\n"); #endif return; } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; u_int8_t old; int32_t sum; if (ip->ip_v != 4) return; /* version mismatch! */ old = ip->ip_tos; dsfield |= old & 3; /* leave CU bits */ if (old == dsfield) return; ip->ip_tos = dsfield; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += 0xff00 + (~old & 0xff) + dsfield; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return; /* version mismatch! */ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); ip6->ip6_flow = htonl(flowlabel); } #endif return; } /* * high resolution clock support taking advantage of a machine dependent * high resolution time counter (e.g., timestamp counter of intel pentium). * we assume * - 64-bit-long monotonically-increasing counter * - frequency range is 100M-4GHz (CPU speed) */ /* if pcc is not available or disabled, emulate 256MHz using microtime() */ #define MACHCLK_SHIFT 8 int machclk_usepcc; u_int32_t machclk_freq; u_int32_t machclk_per_tick; #if defined(__i386__) && defined(__NetBSD__) extern u_int64_t cpu_tsc_freq; #endif #if (__FreeBSD_version >= 700035) /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) /* If TSC is P-state invariant, don't do anything. */ if (tsc_is_invariant) return; #endif /* Total setting for this level gives the new frequency in MHz. */ init_machclk(); } EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_LAST); #endif /* __FreeBSD_version >= 700035 */ static void init_machclk_setup(void) { #if (__FreeBSD_version >= 600000) callout_init(&tbr_callout, 0); #endif machclk_usepcc = 1; #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) machclk_usepcc = 0; #endif #if defined(__FreeBSD__) && defined(SMP) machclk_usepcc = 0; #endif #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif #if defined(__amd64__) || defined(__i386__) /* check if TSC is available */ if ((cpu_feature & CPUID_TSC) == 0 || atomic_load_acq_64(&tsc_freq) == 0) machclk_usepcc = 0; #endif } void init_machclk(void) { static int called; /* Call one-time initialization function. */ if (!called) { init_machclk_setup(); called = 1; } if (machclk_usepcc == 0) { /* emulate 256MHz using microtime() */ machclk_freq = 1000000 << MACHCLK_SHIFT; machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: emulate %uHz cpu clock\n", machclk_freq); #endif return; } /* * if the clock frequency (of Pentium TSC or Alpha PCC) is * accessible, just use it. */ #if defined(__amd64__) || defined(__i386__) machclk_freq = atomic_load_acq_64(&tsc_freq); #endif /* * if we don't know the clock frequency, measure it. */ if (machclk_freq == 0) { static int wait; struct timeval tv_start, tv_end; u_int64_t start, end, diff; int timo; microtime(&tv_start); start = read_machclk(); timo = hz; /* 1 sec */ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); microtime(&tv_end); end = read_machclk(); diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 + tv_end.tv_usec - tv_start.tv_usec; if (diff != 0) machclk_freq = (u_int)((end - start) * 1000000 / diff); } machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: CPU clock: %uHz\n", machclk_freq); #endif } #if defined(__OpenBSD__) && defined(__i386__) static __inline u_int64_t rdtsc(void) { u_int64_t rv; __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); return (rv); } #endif /* __OpenBSD__ && __i386__ */ u_int64_t read_machclk(void) { u_int64_t val; if (machclk_usepcc) { #if defined(__amd64__) || defined(__i386__) val = rdtsc(); #else panic("read_machclk"); #endif } else { struct timeval tv, boottime; microtime(&tv); getboottime(&boottime); val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 + tv.tv_usec) << MACHCLK_SHIFT); } return (val); } #ifdef ALTQ3_CLFIER_COMPAT #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 /* encapsulating security payload */ #endif #ifndef IPPROTO_AH #define IPPROTO_AH 51 /* authentication header */ #endif /* * extract flow information from a given packet. * filt_mask shows flowinfo fields required. * we assume the ip header is in one mbuf, and addresses and ports are * in network byte order. */ int altq_extractflow(m, af, flow, filt_bmask) struct mbuf *m; int af; struct flowinfo *flow; u_int32_t filt_bmask; { switch (af) { case PF_INET: { struct flowinfo_in *fin; struct ip *ip; ip = mtod(m, struct ip *); if (ip->ip_v != 4) break; fin = (struct flowinfo_in *)flow; fin->fi_len = sizeof(struct flowinfo_in); fin->fi_family = AF_INET; fin->fi_proto = ip->ip_p; fin->fi_tos = ip->ip_tos; fin->fi_src.s_addr = ip->ip_src.s_addr; fin->fi_dst.s_addr = ip->ip_dst.s_addr; if (filt_bmask & FIMB4_PORTS) /* if port info is required, extract port numbers */ extract_ports4(m, ip, fin); else { fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; } return (1); } #ifdef INET6 case PF_INET6: { struct flowinfo_in6 *fin6; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); /* should we check the ip version? */ fin6 = (struct flowinfo_in6 *)flow; fin6->fi6_len = sizeof(struct flowinfo_in6); fin6->fi6_family = AF_INET6; fin6->fi6_proto = ip6->ip6_nxt; fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); fin6->fi6_src = ip6->ip6_src; fin6->fi6_dst = ip6->ip6_dst; if ((filt_bmask & FIMB6_PORTS) || ((filt_bmask & FIMB6_PROTO) && ip6->ip6_nxt > IPPROTO_IPV6)) /* * if port info is required, or proto is required * but there are option headers, extract port * and protocol numbers. */ extract_ports6(m, ip6, fin6); else { fin6->fi6_sport = 0; fin6->fi6_dport = 0; fin6->fi6_gpi = 0; } return (1); } #endif /* INET6 */ default: break; } /* failed */ flow->fi_len = sizeof(struct flowinfo); flow->fi_family = AF_UNSPEC; return (0); } /* * helper routine to extract port numbers */ /* structure for ipsec and ipv6 option header template */ struct _opt6 { u_int8_t opt6_nxt; /* next header */ u_int8_t opt6_hlen; /* header extension length */ u_int16_t _pad; u_int32_t ah_spi; /* security parameter index for authentication header */ }; /* * extract port numbers from a ipv4 packet. */ static int extract_ports4(m, ip, fin) struct mbuf *m; struct ip *ip; struct flowinfo_in *fin; { struct mbuf *m0; u_short ip_off; u_int8_t proto; int off; fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; ip_off = ntohs(ip->ip_off); /* if it is a fragment, try cached fragment info */ if (ip_off & IP_OFFMASK) { ip4f_lookup(ip, fin); return (1); } /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip >= m0->m_data) && ((caddr_t)ip < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports4: can't locate header! ip=%p\n", ip); #endif return (0); } off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); proto = ip->ip_p; #ifdef ALTQ_IPSEC again: #endif while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); /* bogus ip_hl! */ } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin->fi_sport = udp->uh_sport; fin->fi_dport = udp->uh_dport; fin->fi_proto = proto; } break; #ifdef ALTQ_IPSEC case IPPROTO_ESP: if (fin->fi_gpi == 0){ u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin->fi_gpi = *gpi; } fin->fi_proto = proto; break; case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); if (fin->fi_gpi == 0 && m0->m_len >= off + 8) fin->fi_gpi = opt6->ah_spi; } /* goto the next header */ goto again; #endif /* ALTQ_IPSEC */ default: fin->fi_proto = proto; return (0); } /* if this is a first fragment, cache it. */ if (ip_off & IP_MF) ip4f_cache(ip, fin); return (1); } #ifdef INET6 static int extract_ports6(m, ip6, fin6) struct mbuf *m; struct ip6_hdr *ip6; struct flowinfo_in6 *fin6; { struct mbuf *m0; int off; u_int8_t proto; fin6->fi6_gpi = 0; fin6->fi6_sport = 0; fin6->fi6_dport = 0; /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip6 >= m0->m_data) && ((caddr_t)ip6 < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports6: can't locate header! ip6=%p\n", ip6); #endif return (0); } off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; do { while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin6->fi6_sport = udp->uh_sport; fin6->fi6_dport = udp->uh_dport; fin6->fi6_proto = proto; } return (1); case IPPROTO_ESP: if (fin6->fi6_gpi == 0) { u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin6->fi6_gpi = *gpi; } fin6->fi6_proto = proto; return (1); case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) fin6->fi6_gpi = opt6->ah_spi; proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); /* goto the next header */ break; } case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += (opt6->opt6_hlen + 1) * 8; /* goto the next header */ break; } case IPPROTO_FRAGMENT: /* ipv6 fragmentations are not supported yet */ default: fin6->fi6_proto = proto; return (0); } } while (1); /*NOTREACHED*/ } #endif /* INET6 */ /* * altq common classifier */ int acc_add_filter(classifier, filter, class, phandle) struct acc_classifier *classifier; struct flow_filter *filter; void *class; u_long *phandle; { struct acc_filter *afp, *prev, *tmp; int i, s; #ifdef INET6 if (filter->ff_flow.fi_family != AF_INET && filter->ff_flow.fi_family != AF_INET6) return (EINVAL); #else if (filter->ff_flow.fi_family != AF_INET) return (EINVAL); #endif afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK); if (afp == NULL) return (ENOMEM); bzero(afp, sizeof(struct acc_filter)); afp->f_filter = *filter; afp->f_class = class; i = ACC_WILDCARD_INDEX; if (filter->ff_flow.fi_family == AF_INET) { struct flow_filter *filter4 = &afp->f_filter; /* * if address is 0, it's a wildcard. if address mask * isn't set, use full mask. */ if (filter4->ff_flow.fi_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0; else if (filter4->ff_mask.mask_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0xffffffff; if (filter4->ff_flow.fi_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0; else if (filter4->ff_mask.mask_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0xffffffff; /* clear extra bits in addresses */ filter4->ff_flow.fi_dst.s_addr &= filter4->ff_mask.mask_dst.s_addr; filter4->ff_flow.fi_src.s_addr &= filter4->ff_mask.mask_src.s_addr; /* * if dst address is a wildcard, use hash-entry * ACC_WILDCARD_INDEX. */ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); } #ifdef INET6 else if (filter->ff_flow.fi_family == AF_INET6) { struct flow_filter6 *filter6 = (struct flow_filter6 *)&afp->f_filter; #ifndef IN6MASK0 /* taken from kame ipv6 */ #define IN6MASK0 {{{ 0, 0, 0, 0 }}} #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask128 = IN6MASK128; #endif if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) filter6->ff_mask6.mask6_dst = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) filter6->ff_mask6.mask6_dst = in6mask128; if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) filter6->ff_mask6.mask6_src = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) filter6->ff_mask6.mask6_src = in6mask128; /* clear extra bits in addresses */ for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_dst.s6_addr[i] &= filter6->ff_mask6.mask6_dst.s6_addr[i]; for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_src.s6_addr[i] &= filter6->ff_mask6.mask6_src.s6_addr[i]; if (filter6->ff_flow6.fi6_flowlabel == 0) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); } #endif /* INET6 */ afp->f_handle = get_filt_handle(classifier, i); /* update filter bitmask */ afp->f_fbmask = filt2fibmask(filter); classifier->acc_fbmask |= afp->f_fbmask; /* * add this filter to the filter list. * filters are ordered from the highest rule number. */ s = splnet(); prev = NULL; LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) prev = tmp; else break; } if (prev == NULL) LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); else LIST_INSERT_AFTER(prev, afp, f_chain); splx(s); *phandle = afp->f_handle; return (0); } int acc_delete_filter(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int s; if ((afp = filth_to_filtp(classifier, handle)) == NULL) return (EINVAL); s = splnet(); LIST_REMOVE(afp, f_chain); splx(s); free(afp, M_DEVBUF); /* todo: update filt_bmask */ return (0); } /* * delete filters referencing to the specified class. * if the all flag is not 0, delete all the filters. */ int acc_discard_filters(classifier, class, all) struct acc_classifier *classifier; void *class; int all; { struct acc_filter *afp; int i, s; s = splnet(); for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (all || afp->f_class == class) { LIST_REMOVE(afp, f_chain); free(afp, M_DEVBUF); /* start again from the head */ break; } } while (afp != NULL); } splx(s); if (all) classifier->acc_fbmask = 0; return (0); } void * acc_classify(clfier, m, af) void *clfier; struct mbuf *m; int af; { struct acc_classifier *classifier; struct flowinfo flow; struct acc_filter *afp; int i; classifier = (struct acc_classifier *)clfier; altq_extractflow(m, af, &flow, classifier->acc_fbmask); if (flow.fi_family == AF_INET) { struct flowinfo_in *fp = (struct flowinfo_in *)&flow; if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { /* only tos is used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_tosfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else if ((classifier->acc_fbmask & (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) == 0) { /* only proto and ports are used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_ppfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else { /* get the filter hash entry from its dest address */ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); do { /* * go through this loop twice. first for dst * hash, second for wildcards. */ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); /* * check again for filters with a dst addr * wildcard. * (daddr == 0 || dmask != 0xffffffff). */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } } #ifdef INET6 else if (flow.fi_family == AF_INET6) { struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; /* get the filter hash entry from its flow ID */ if (fp6->fi6_flowlabel != 0) i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); else /* flowlable can be zero */ i = ACC_WILDCARD_INDEX; /* go through this loop twice. first for flow hash, second for wildcards. */ do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter6(afp->f_fbmask, (struct flow_filter6 *)&afp->f_filter, fp6)) /* filter matched */ return (afp->f_class); /* * check again for filters with a wildcard. */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } #endif /* INET6 */ /* no filter matched */ return (NULL); } static int apply_filter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_DADDR) && filt->ff_flow.fi_dst.s_addr != (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) return (0); if ((fbmask & FIMB4_SADDR) && filt->ff_flow.fi_src.s_addr != (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) return (0); /* match */ return (1); } /* * filter matching function optimized for a common case that checks * only protocol and port numbers */ static int apply_ppfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); /* match */ return (1); } /* * filter matching function only for tos field. */ static int apply_tosfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); /* match */ return (1); } #ifdef INET6 static int apply_filter6(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter6 *filt; struct flowinfo_in6 *pkt; { int i; if (filt->ff_flow6.fi6_family != AF_INET6) return (0); if ((fbmask & FIMB6_FLABEL) && filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) return (0); if ((fbmask & FIMB6_PROTO) && filt->ff_flow6.fi6_proto != pkt->fi6_proto) return (0); if ((fbmask & FIMB6_SPORT) && filt->ff_flow6.fi6_sport != pkt->fi6_sport) return (0); if ((fbmask & FIMB6_DPORT) && filt->ff_flow6.fi6_dport != pkt->fi6_dport) return (0); if (fbmask & FIMB6_SADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_src.s6_addr32[i] != (pkt->fi6_src.s6_addr32[i] & filt->ff_mask6.mask6_src.s6_addr32[i])) return (0); } if (fbmask & FIMB6_DADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_dst.s6_addr32[i] != (pkt->fi6_dst.s6_addr32[i] & filt->ff_mask6.mask6_dst.s6_addr32[i])) return (0); } if ((fbmask & FIMB6_TCLASS) && filt->ff_flow6.fi6_tclass != (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) return (0); if ((fbmask & FIMB6_GPI) && filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) return (0); /* match */ return (1); } #endif /* INET6 */ /* * filter handle: * bit 20-28: index to the filter hash table * bit 0-19: unique id in the hash bucket. */ static u_long get_filt_handle(classifier, i) struct acc_classifier *classifier; int i; { static u_long handle_number = 1; u_long handle; struct acc_filter *afp; while (1) { handle = handle_number++ & 0x000fffff; if (LIST_EMPTY(&classifier->acc_filters[i])) break; LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if ((afp->f_handle & 0x000fffff) == handle) break; if (afp == NULL) break; /* this handle is already used, try again */ } return ((i << 20) | handle); } /* convert filter handle to filter pointer */ static struct acc_filter * filth_to_filtp(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int i; i = ACC_GET_HINDEX(handle); LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (afp->f_handle == handle) return (afp); return (NULL); } /* create flowinfo bitmask */ static u_int32_t filt2fibmask(filt) struct flow_filter *filt; { u_int32_t mask = 0; #ifdef INET6 struct flow_filter6 *filt6; #endif switch (filt->ff_flow.fi_family) { case AF_INET: if (filt->ff_flow.fi_proto != 0) mask |= FIMB4_PROTO; if (filt->ff_flow.fi_tos != 0) mask |= FIMB4_TOS; if (filt->ff_flow.fi_dst.s_addr != 0) mask |= FIMB4_DADDR; if (filt->ff_flow.fi_src.s_addr != 0) mask |= FIMB4_SADDR; if (filt->ff_flow.fi_sport != 0) mask |= FIMB4_SPORT; if (filt->ff_flow.fi_dport != 0) mask |= FIMB4_DPORT; if (filt->ff_flow.fi_gpi != 0) mask |= FIMB4_GPI; break; #ifdef INET6 case AF_INET6: filt6 = (struct flow_filter6 *)filt; if (filt6->ff_flow6.fi6_proto != 0) mask |= FIMB6_PROTO; if (filt6->ff_flow6.fi6_tclass != 0) mask |= FIMB6_TCLASS; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) mask |= FIMB6_DADDR; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) mask |= FIMB6_SADDR; if (filt6->ff_flow6.fi6_sport != 0) mask |= FIMB6_SPORT; if (filt6->ff_flow6.fi6_dport != 0) mask |= FIMB6_DPORT; if (filt6->ff_flow6.fi6_gpi != 0) mask |= FIMB6_GPI; if (filt6->ff_flow6.fi6_flowlabel != 0) mask |= FIMB6_FLABEL; break; #endif /* INET6 */ } return (mask); } /* * helper functions to handle IPv4 fragments. * currently only in-sequence fragments are handled. * - fragment info is cached in a LRU list. * - when a first fragment is found, cache its flow info. * - when a non-first fragment is found, lookup the cache. */ struct ip4_frag { TAILQ_ENTRY(ip4_frag) ip4f_chain; char ip4f_valid; u_short ip4f_id; struct flowinfo_in ip4f_info; }; static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ static void ip4f_cache(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; if (TAILQ_EMPTY(&ip4f_list)) { /* first time call, allocate fragment cache entries. */ if (ip4f_init() < 0) /* allocation failed! */ return; } fp = ip4f_alloc(); fp->ip4f_id = ip->ip_id; fp->ip4f_info.fi_proto = ip->ip_p; fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; /* save port numbers */ fp->ip4f_info.fi_sport = fin->fi_sport; fp->ip4f_info.fi_dport = fin->fi_dport; fp->ip4f_info.fi_gpi = fin->fi_gpi; } static int ip4f_lookup(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; fp = TAILQ_NEXT(fp, ip4f_chain)) if (ip->ip_id == fp->ip4f_id && ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && ip->ip_p == fp->ip4f_info.fi_proto) { /* found the matching entry */ fin->fi_sport = fp->ip4f_info.fi_sport; fin->fi_dport = fp->ip4f_info.fi_dport; fin->fi_gpi = fp->ip4f_info.fi_gpi; if ((ntohs(ip->ip_off) & IP_MF) == 0) /* this is the last fragment, release the entry. */ ip4f_free(fp); return (1); } /* no matching entry found */ return (0); } static int ip4f_init(void) { struct ip4_frag *fp; int i; TAILQ_INIT(&ip4f_list); for (i=0; iip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } return (0); } static struct ip4_frag * ip4f_alloc(void) { struct ip4_frag *fp; /* reclaim an entry at the tail, put it at the head */ fp = TAILQ_LAST(&ip4f_list, ip4f_list); TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 1; TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); return (fp); } static void ip4f_free(fp) struct ip4_frag *fp; { TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } #endif /* ALTQ3_CLFIER_COMPAT */