Index: head/sys/net/altq/altqconf.h =================================================================== --- head/sys/net/altq/altqconf.h (revision 281641) +++ head/sys/net/altq/altqconf.h (nonexistent) @@ -1,29 +0,0 @@ -/* $OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */ -/* $NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $ */ - -#if defined(_KERNEL_OPT) || defined(__OpenBSD__) - -#if defined(_KERNEL_OPT) -#include "opt_altq_enabled.h" -#endif - -#include - -#ifdef ALTQ -#define NALTQ 1 -#else -#define NALTQ 0 -#endif - -cdev_decl(altq); - -#ifdef __OpenBSD__ -#define cdev_altq_init(c,n) { \ - dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \ - (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \ - (dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \ - (dev_type_mmap((*))) enodev } -#else -#define cdev_altq_init(x,y) cdev__oci_init(x,y) -#endif -#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */ Property changes on: head/sys/net/altq/altqconf.h ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/sys/net/altq/altq.h =================================================================== --- head/sys/net/altq/altq.h (revision 281641) +++ head/sys/net/altq/altq.h (revision 281642) @@ -1,204 +1,204 @@ -/* $FreeBSD$ */ -/* $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ */ - -/* +/*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_H_ #define _ALTQ_ALTQ_H_ #if 0 /* * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq. * altq3 is mainly for research experiments. pf-based altq is for daily use. */ #define ALTQ3_COMPAT /* for compatibility with altq-3 */ #define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */ #endif #ifdef ALTQ3_COMPAT #include #include #include #include #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif #endif /* ALTQ3_COMPAT */ /* altq discipline type */ #define ALTQT_NONE 0 /* reserved */ #define ALTQT_CBQ 1 /* cbq */ #define ALTQT_WFQ 2 /* wfq */ #define ALTQT_AFMAP 3 /* afmap */ #define ALTQT_FIFOQ 4 /* fifoq */ #define ALTQT_RED 5 /* red */ #define ALTQT_RIO 6 /* rio */ #define ALTQT_LOCALQ 7 /* local use */ #define ALTQT_HFSC 8 /* hfsc */ #define ALTQT_CDNR 9 /* traffic conditioner */ #define ALTQT_BLUE 10 /* blue */ #define ALTQT_PRIQ 11 /* priority queue */ #define ALTQT_JOBS 12 /* JoBS */ #define ALTQT_MAX 13 /* should be max discipline type + 1 */ #ifdef ALTQ3_COMPAT struct altqreq { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ u_long arg; /* request-specific argument */ }; #endif /* simple token backet meter profile */ struct tb_profile { u_int rate; /* rate in bit-per-sec */ u_int depth; /* depth in bytes */ }; #ifdef ALTQ3_COMPAT struct tbrreq { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct tb_profile tb_prof; /* token bucket profile */ }; #ifdef ALTQ3_CLFIER_COMPAT /* * common network flow info structure */ struct flowinfo { u_char fi_len; /* total length */ u_char fi_family; /* address family */ u_int8_t fi_data[46]; /* actually longer; address family specific flow info. */ }; /* * flow info structure for internet protocol family. * (currently this is the only protocol family supported) */ struct flowinfo_in { u_char fi_len; /* sizeof(struct flowinfo_in) */ u_char fi_family; /* AF_INET */ u_int8_t fi_proto; /* IPPROTO_XXX */ u_int8_t fi_tos; /* type-of-service */ struct in_addr fi_dst; /* dest address */ struct in_addr fi_src; /* src address */ u_int16_t fi_dport; /* dest port */ u_int16_t fi_sport; /* src port */ u_int32_t fi_gpi; /* generalized port id for ipsec */ u_int8_t _pad[28]; /* make the size equal to flowinfo_in6 */ }; #ifdef SIN6_LEN struct flowinfo_in6 { u_char fi6_len; /* sizeof(struct flowinfo_in6) */ u_char fi6_family; /* AF_INET6 */ u_int8_t fi6_proto; /* IPPROTO_XXX */ u_int8_t fi6_tclass; /* traffic class */ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */ u_int16_t fi6_dport; /* dest port */ u_int16_t fi6_sport; /* src port */ u_int32_t fi6_gpi; /* generalized port id */ struct in6_addr fi6_dst; /* dest address */ struct in6_addr fi6_src; /* src address */ }; #endif /* INET6 */ /* * flow filters for AF_INET and AF_INET6 */ struct flow_filter { int ff_ruleno; struct flowinfo_in ff_flow; struct { struct in_addr mask_dst; struct in_addr mask_src; u_int8_t mask_tos; u_int8_t _pad[3]; } ff_mask; u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */ }; #ifdef SIN6_LEN struct flow_filter6 { int ff_ruleno; struct flowinfo_in6 ff_flow6; struct { struct in6_addr mask6_dst; struct in6_addr mask6_src; u_int8_t mask6_tclass; u_int8_t _pad[3]; } ff_mask6; }; #endif /* INET6 */ #endif /* ALTQ3_CLFIER_COMPAT */ #endif /* ALTQ3_COMPAT */ /* * generic packet counter */ struct pktcntr { u_int64_t packets; u_int64_t bytes; }; #define PKTCNTR_ADD(cntr, len) \ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0) #ifdef ALTQ3_COMPAT /* * altq related ioctls */ #define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */ #if 0 /* * these ioctls are currently discipline-specific but could be shared * in the future. */ #define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */ #define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */ #define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */ #define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/ #define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */ #define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */ #define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */ #define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */ #define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */ #define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */ #define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */ #define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */ #define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */ #endif /* 0 */ #define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */ #define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */ #endif /* ALTQ3_COMPAT */ #ifdef _KERNEL #include #endif #endif /* _ALTQ_ALTQ_H_ */ Index: head/sys/net/altq/altq_cbq.c =================================================================== --- head/sys/net/altq/altq_cbq.c (revision 281641) +++ head/sys/net/altq/altq_cbq.c (revision 281642) @@ -1,1173 +1,1165 @@ -/* $FreeBSD$ */ -/* $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ */ - -/* +/*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. + * + * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #include #endif #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #ifdef ALTQ3_COMPAT /* * Local Data structures. */ static cbq_state_t *cbq_list = NULL; #endif /* * Forward Declarations. */ static int cbq_class_destroy(cbq_state_t *, struct rm_class *); static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t); static int cbq_clear_interface(cbq_state_t *); static int cbq_request(struct ifaltq *, int, void *); static int cbq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *cbq_dequeue(struct ifaltq *, int); static void cbqrestart(struct ifaltq *); static void get_class_stats(class_stats_t *, struct rm_class *); static void cbq_purge(cbq_state_t *); #ifdef ALTQ3_COMPAT static int cbq_add_class(struct cbq_add_class *); static int cbq_delete_class(struct cbq_delete_class *); static int cbq_modify_class(struct cbq_modify_class *); static int cbq_class_create(cbq_state_t *, struct cbq_add_class *, struct rm_class *, struct rm_class *); static int cbq_clear_hierarchy(struct cbq_interface *); static int cbq_set_enable(struct cbq_interface *, int); static int cbq_ifattach(struct cbq_interface *); static int cbq_ifdetach(struct cbq_interface *); static int cbq_getstats(struct cbq_getstats *); static int cbq_add_filter(struct cbq_add_filter *); static int cbq_delete_filter(struct cbq_delete_filter *); #endif /* ALTQ3_COMPAT */ /* * int * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This * function destroys a given traffic class. Before destroying * the class, all traffic for that class is released. */ static int cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl) { int i; /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; #ifdef ALTQ3_COMPAT if (cl == cbqp->ifnp.ctl_) cbqp->ifnp.ctl_ = NULL; #endif return (0); } /* convert class handle to class pointer */ static struct rm_class * clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle) { int i; struct rm_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % CBQ_MAX_CLASSES; if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); return (NULL); } static int cbq_clear_interface(cbq_state_t *cbqp) { int again, i; struct rm_class *cl; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1); #endif /* clear out the classes now */ do { again = 0; for (i = 0; i < CBQ_MAX_CLASSES; i++) { if ((cl = cbqp->cbq_class_tbl[i]) != NULL) { if (is_a_parent_class(cl)) again++; else { cbq_class_destroy(cbqp, cl); cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; #ifdef ALTQ3_COMPAT if (cl == cbqp->ifnp.ctl_) cbqp->ifnp.ctl_ = NULL; #endif } } } } while (again); return (0); } static int cbq_request(struct ifaltq *ifq, int req, void *arg) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: cbq_purge(cbqp); break; } return (0); } /* copy the stats info in rm_class to class_states_t */ static void get_class_stats(class_stats_t *statsp, struct rm_class *cl) { statsp->xmit_cnt = cl->stats_.xmit_cnt; statsp->drop_cnt = cl->stats_.drop_cnt; statsp->over = cl->stats_.over; statsp->borrows = cl->stats_.borrows; statsp->overactions = cl->stats_.overactions; statsp->delays = cl->stats_.delays; statsp->depth = cl->depth_; statsp->priority = cl->pri_; statsp->maxidle = cl->maxidle_; statsp->minidle = cl->minidle_; statsp->offtime = cl->offtime_; statsp->qmax = qlimit(cl->q_); statsp->ns_per_byte = cl->ns_per_byte_; statsp->wrr_allot = cl->w_allotment_; statsp->qcnt = qlen(cl->q_); statsp->avgidle = cl->avgidle_; statsp->qtype = qtype(cl->q_); #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_getstats(cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif } int cbq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc, cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL); splx(s); return (error); } int cbq_add_altq(struct pf_altq *a) { cbq_state_t *cbqp; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); /* allocate and initialize cbq_state_t */ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* keep the state in pf_altq */ a->altq_disc = cbqp; return (0); } int cbq_remove_altq(struct pf_altq *a) { cbq_state_t *cbqp; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; cbq_clear_interface(cbqp); if (cbqp->ifnp.default_) cbq_class_destroy(cbqp, cbqp->ifnp.default_); if (cbqp->ifnp.root_) cbq_class_destroy(cbqp, cbqp->ifnp.root_); /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } int cbq_add_queue(struct pf_altq *a) { struct rm_class *borrow, *parent; cbq_state_t *cbqp; struct rm_class *cl; struct cbq_opts *opts; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = a->qid % CBQ_MAX_CLASSES; if (cbqp->cbq_class_tbl[i] != NULL) { for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); } opts = &a->pq_u.cbq_opts; /* check parameters */ if (a->priority >= CBQ_MAXPRI) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, a->parent_qid); if (opts->flags & CBQCLF_BORROW) borrow = parent; else borrow = NULL; /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_queue: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } /* * check parameters */ switch (opts->flags & CBQCLF_CLASSMASK) { case CBQCLF_ROOTCLASS: if (parent != NULL) return (EINVAL); if (cbqp->ifnp.root_) return (EINVAL); break; case CBQCLF_DEFCLASS: if (cbqp->ifnp.default_) return (EINVAL); break; case 0: if (a->qid == 0) return (EINVAL); break; default: /* more than two flags bits set */ return (EINVAL); } /* * create a class. if this is a root class, initialize the * interface. */ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte, cbqrestart, a->qlimit, RM_MAXQUEUED, opts->maxidle, opts->minidle, opts->offtime, opts->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(a->priority, &cbqp->ifnp, opts->ns_per_byte, rmc_delay_action, a->qlimit, parent, borrow, opts->maxidle, opts->minidle, opts->offtime, opts->pktsize, opts->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ cl->stats_.handle = a->qid; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; return (0); } int cbq_remove_queue(struct pf_altq *a) { struct rm_class *cl; cbq_state_t *cbqp; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) { cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; break; } return (0); } int cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats; int error = 0; if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * int * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr) * - Queue data packets. * * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper * layer (e.g. ether_output). cbq_enqueue queues the given packet * to the cbq, then invokes the driver's start routine. * * Assumptions: called in splimp * Returns: 0 if the queueing is successful. * ENOBUFS if a packet dropping occurred as a result of * the queueing. */ static int cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct rm_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(cbqp, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL) { cl = cbqp->ifnp.default_; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->pktattr_ = pktattr; /* save proto hdr used by ECN */ else #endif cl->pktattr_ = NULL; len = m_pktlen(m); if (rmc_queue_packet(cl, m) != 0) { /* drop occurred. some mbuf was freed in rmc_queue_packet. */ PKTCNTR_ADD(&cl->stats_.drop_cnt, len); return (ENOBUFS); } /* successfully queued. */ ++cbqp->cbq_qlen; IFQ_INC_LEN(ifq); return (0); } static struct mbuf * cbq_dequeue(struct ifaltq *ifq, int op) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); m = rmc_dequeue_next(&cbqp->ifnp, op); if (m && op == ALTDQ_REMOVE) { --cbqp->cbq_qlen; /* decrement # of packets in cbq */ IFQ_DEC_LEN(ifq); /* Update the class. */ rmc_update_class_util(&cbqp->ifnp); } return (m); } /* * void * cbqrestart(queue_t *) - Restart sending of data. * called from rmc_restart in splimp via timeout after waking up * a suspended class. * Returns: NONE */ static void cbqrestart(struct ifaltq *ifq) { cbq_state_t *cbqp; struct ifnet *ifp; IFQ_LOCK_ASSERT(ifq); if (!ALTQ_IS_ENABLED(ifq)) /* cbq must have been detached */ return; if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL) /* should not happen */ return; ifp = ifq->altq_ifp; if (ifp->if_start && cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_UNLOCK(ifq); (*ifp->if_start)(ifp); IFQ_LOCK(ifq); } } static void cbq_purge(cbq_state_t *cbqp) { struct rm_class *cl; int i; for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL) rmc_dropall(cl); if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_)) cbqp->ifnp.ifq_->ifq_len = 0; } #ifdef ALTQ3_COMPAT static int cbq_add_class(acp) struct cbq_add_class *acp; { char *ifacename; struct rm_class *borrow, *parent; cbq_state_t *cbqp; ifacename = acp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* check parameters */ if (acp->cbq_class.priority >= CBQ_MAXPRI || acp->cbq_class.maxq > CBQ_MAXQSIZE) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle); borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle); /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_class: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } return cbq_class_create(cbqp, acp, parent, borrow); } static int cbq_delete_class(dcp) struct cbq_delete_class *dcp; { char *ifacename; struct rm_class *cl; cbq_state_t *cbqp; ifacename = dcp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* if a filter has a reference to this class delete the filter */ acc_discard_filters(&cbqp->cbq_classifier, cl, 0); return cbq_class_destroy(cbqp, cl); } static int cbq_modify_class(acp) struct cbq_modify_class *acp; { char *ifacename; struct rm_class *cl; cbq_state_t *cbqp; ifacename = acp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* Get pointer to this class */ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL) return (EINVAL); if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte, acp->cbq_class.maxq, acp->cbq_class.maxidle, acp->cbq_class.minidle, acp->cbq_class.offtime, acp->cbq_class.pktsize) < 0) return (EINVAL); return (0); } /* * struct rm_class * * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp, * struct rm_class *parent, struct rm_class *borrow) * * This function create a new traffic class in the CBQ class hierarchy of * given paramters. The class that created is either the root, default, * or a new dynamic class. If CBQ is not initilaized, the the root class * will be created. */ static int cbq_class_create(cbqp, acp, parent, borrow) cbq_state_t *cbqp; struct cbq_add_class *acp; struct rm_class *parent, *borrow; { struct rm_class *cl; cbq_class_spec_t *spec = &acp->cbq_class; u_int32_t chandle; int i; /* * allocate class handle */ for (i = 1; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); chandle = i; /* use the slot number as class handle */ /* * create a class. if this is a root class, initialize the * interface. */ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte, cbqrestart, spec->maxq, RM_MAXQUEUED, spec->maxidle, spec->minidle, spec->offtime, spec->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(spec->priority, &cbqp->ifnp, spec->nano_sec_per_byte, rmc_delay_action, spec->maxq, parent, borrow, spec->maxidle, spec->minidle, spec->offtime, spec->pktsize, spec->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ acp->cbq_class_handle = chandle; cl->stats_.handle = chandle; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS) cbqp->ifnp.ctl_ = cl; return (0); } static int cbq_add_filter(afp) struct cbq_add_filter *afp; { char *ifacename; cbq_state_t *cbqp; struct rm_class *cl; ifacename = afp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* Get the pointer to class. */ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL) return (EINVAL); return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter, cl, &afp->cbq_filter_handle); } static int cbq_delete_filter(dfp) struct cbq_delete_filter *dfp; { char *ifacename; cbq_state_t *cbqp; ifacename = dfp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); return acc_delete_filter(&cbqp->cbq_classifier, dfp->cbq_filter_handle); } /* * cbq_clear_hierarchy deletes all classes and their filters on the * given interface. */ static int cbq_clear_hierarchy(ifacep) struct cbq_interface *ifacep; { char *ifacename; cbq_state_t *cbqp; ifacename = ifacep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); return cbq_clear_interface(cbqp); } /* * static int * cbq_set_enable(struct cbq_enable *ep) - this function processed the * ioctl request to enable class based queueing. It searches the list * of interfaces for the specified interface and then enables CBQ on * that interface. * * Returns: 0, for no error. * EBADF, for specified inteface not found. */ static int cbq_set_enable(ep, enable) struct cbq_interface *ep; int enable; { int error = 0; cbq_state_t *cbqp; char *ifacename; ifacename = ep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); switch (enable) { case ENABLE: if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL || cbqp->ifnp.ctl_ == NULL) { if (cbqp->ifnp.root_ == NULL) printf("No Root Class for %s\n", ifacename); if (cbqp->ifnp.default_ == NULL) printf("No Default Class for %s\n", ifacename); if (cbqp->ifnp.ctl_ == NULL) printf("No Control Class for %s\n", ifacename); error = EINVAL; } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) { cbqp->cbq_qlen = 0; } break; case DISABLE: error = altq_disable(cbqp->ifnp.ifq_); break; } return (error); } static int cbq_getstats(gsp) struct cbq_getstats *gsp; { char *ifacename; int i, n, nclasses; cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats, *usp; int error = 0; ifacename = gsp->iface.cbq_ifacename; nclasses = gsp->nclasses; usp = gsp->stats; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); if (nclasses <= 0) return (EINVAL); for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) { while ((cl = cbqp->cbq_class_tbl[i]) == NULL) if (++i >= CBQ_MAX_CLASSES) goto out; get_class_stats(&stats, cl); stats.handle = cl->stats_.handle; if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } out: gsp->nclasses = n; return (error); } static int cbq_ifattach(ifacep) struct cbq_interface *ifacep; { int error = 0; char *ifacename; cbq_state_t *new_cbqp; struct ifnet *ifp; ifacename = ifacep->cbq_ifacename; if ((ifp = ifunit(ifacename)) == NULL) return (ENXIO); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENXIO); /* allocate and initialize cbq_state_t */ new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK); if (new_cbqp == NULL) return (ENOMEM); bzero(new_cbqp, sizeof(cbq_state_t)); CALLOUT_INIT(&new_cbqp->cbq_callout); new_cbqp->cbq_qlen = 0; new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* * set CBQ to this ifnet structure. */ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp, cbq_enqueue, cbq_dequeue, cbq_request, &new_cbqp->cbq_classifier, acc_classify); if (error) { free(new_cbqp, M_DEVBUF); return (error); } /* prepend to the list of cbq_state_t's. */ new_cbqp->cbq_next = cbq_list; cbq_list = new_cbqp; return (0); } static int cbq_ifdetach(ifacep) struct cbq_interface *ifacep; { char *ifacename; cbq_state_t *cbqp; ifacename = ifacep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); (void)cbq_set_enable(ifacep, DISABLE); cbq_clear_interface(cbqp); /* remove CBQ from the ifnet structure. */ (void)altq_detach(cbqp->ifnp.ifq_); /* remove from the list of cbq_state_t's. */ if (cbq_list == cbqp) cbq_list = cbqp->cbq_next; else { cbq_state_t *cp; for (cp = cbq_list; cp != NULL; cp = cp->cbq_next) if (cp->cbq_next == cbqp) { cp->cbq_next = cbqp->cbq_next; break; } ASSERT(cp != NULL); } /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } /* * cbq device interface */ altqdev_decl(cbq); int cbqopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { return (0); } int cbqclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct ifnet *ifp; struct cbq_interface iface; int err, error = 0; while (cbq_list) { ifp = cbq_list->ifnp.ifq_->altq_ifp; sprintf(iface.cbq_ifacename, "%s", ifp->if_xname); err = cbq_ifdetach(&iface); if (err != 0 && error == 0) error = err; } return (error); } int cbqioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { int error = 0; /* check cmd for superuser only */ switch (cmd) { case CBQ_GETSTATS: /* currently only command that an ordinary user can call */ break; default: #if (__FreeBSD_version > 700000) error = priv_check(p, PRIV_ALTQ_MANAGE); #elsif (__FreeBSD_version > 400000) error = suser(p); #else error = suser(p->p_ucred, &p->p_acflag); #endif if (error) return (error); break; } switch (cmd) { case CBQ_ENABLE: error = cbq_set_enable((struct cbq_interface *)addr, ENABLE); break; case CBQ_DISABLE: error = cbq_set_enable((struct cbq_interface *)addr, DISABLE); break; case CBQ_ADD_FILTER: error = cbq_add_filter((struct cbq_add_filter *)addr); break; case CBQ_DEL_FILTER: error = cbq_delete_filter((struct cbq_delete_filter *)addr); break; case CBQ_ADD_CLASS: error = cbq_add_class((struct cbq_add_class *)addr); break; case CBQ_DEL_CLASS: error = cbq_delete_class((struct cbq_delete_class *)addr); break; case CBQ_MODIFY_CLASS: error = cbq_modify_class((struct cbq_modify_class *)addr); break; case CBQ_CLEAR_HIERARCHY: error = cbq_clear_hierarchy((struct cbq_interface *)addr); break; case CBQ_IF_ATTACH: error = cbq_ifattach((struct cbq_interface *)addr); break; case CBQ_IF_DETACH: error = cbq_ifdetach((struct cbq_interface *)addr); break; case CBQ_GETSTATS: error = cbq_getstats((struct cbq_getstats *)addr); break; default: error = EINVAL; break; } return error; } #if 0 /* for debug */ static void cbq_class_dump(int); static void cbq_class_dump(i) int i; { struct rm_class *cl; rm_class_stats_t *s; struct _class_queue_ *q; if (cbq_list == NULL) { printf("cbq_class_dump: no cbq_state found\n"); return; } cl = cbq_list->cbq_class_tbl[i]; printf("class %d cl=%p\n", i, cl); if (cl != NULL) { s = &cl->stats_; q = cl->q_; printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n", cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_); printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n", cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_, cl->maxidle_); printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n", cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_); printf("handle=%d, depth=%d, packets=%d, bytes=%d\n", s->handle, s->depth, (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes); printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n", s->over, s->borrows, (int)s->drop_cnt.packets, s->overactions, s->delays); printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n", q->tail_, q->head_, q->qlen_, q->qlim_, q->qthresh_, q->qtype_); } } #endif /* 0 */ #ifdef KLD_MODULE static struct altqsw cbq_sw = {"cbq", cbqopen, cbqclose, cbqioctl}; ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw); MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1); MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_CBQ */ Index: head/sys/net/altq/altq_cdnr.c =================================================================== --- head/sys/net/altq/altq_cdnr.c (revision 281641) +++ head/sys/net/altq/altq_cdnr.c (revision 281642) @@ -1,1390 +1,1382 @@ -/* $FreeBSD$ */ -/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */ - -/* +/*- * Copyright (C) 1999-2002 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #ifdef ALTQ3_COMPAT #include #endif #include #ifdef ALTQ3_COMPAT /* * diffserv traffic conditioning module */ int altq_cdnr_enabled = 0; /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */ #ifdef ALTQ_CDNR /* cdnr_list keeps all cdnr's allocated. */ static LIST_HEAD(, top_cdnr) tcb_list; static int altq_cdnr_input(struct mbuf *, int); static struct top_cdnr *tcb_lookup(char *ifname); static struct cdnr_block *cdnr_handle2cb(u_long); static u_long cdnr_cb2handle(struct cdnr_block *); static void *cdnr_cballoc(struct top_cdnr *, int, struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *)); static void cdnr_cbdestroy(void *); static int tca_verify_action(struct tc_action *); static void tca_import_action(struct tc_action *, struct tc_action *); static void tca_invalidate_action(struct tc_action *); static int generic_element_destroy(struct cdnr_block *); static struct top_cdnr *top_create(struct ifaltq *); static int top_destroy(struct top_cdnr *); static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *); static int element_destroy(struct cdnr_block *); static void tb_import_profile(struct tbe *, struct tb_profile *); static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *, struct tc_action *, struct tc_action *); static int tbm_destroy(struct tbmeter *); static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *); static struct trtcm *trtcm_create(struct top_cdnr *, struct tb_profile *, struct tb_profile *, struct tc_action *, struct tc_action *, struct tc_action *, int); static int trtcm_destroy(struct trtcm *); static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); static struct tswtcm *tswtcm_create(struct top_cdnr *, u_int32_t, u_int32_t, u_int32_t, struct tc_action *, struct tc_action *, struct tc_action *); static int tswtcm_destroy(struct tswtcm *); static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); static int cdnrcmd_if_attach(char *); static int cdnrcmd_if_detach(char *); static int cdnrcmd_add_element(struct cdnr_add_element *); static int cdnrcmd_delete_element(struct cdnr_delete_element *); static int cdnrcmd_add_filter(struct cdnr_add_filter *); static int cdnrcmd_delete_filter(struct cdnr_delete_filter *); static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *); static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *); static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *); static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *); static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *); static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *); static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *); static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *); static int cdnrcmd_get_stats(struct cdnr_get_stats *); altqdev_decl(cdnr); /* * top level input function called from ip_input. * should be called before converting header fields to host-byte-order. */ int altq_cdnr_input(m, af) struct mbuf *m; int af; /* address family */ { struct ifnet *ifp; struct ip *ip; struct top_cdnr *top; struct tc_action *tca; struct cdnr_block *cb; struct cdnr_pktinfo pktinfo; ifp = m->m_pkthdr.rcvif; if (!ALTQ_IS_CNDTNING(&ifp->if_snd)) /* traffic conditioner is not enabled on this interface */ return (1); top = ifp->if_snd.altq_cdnr; ip = mtod(m, struct ip *); #ifdef INET6 if (af == AF_INET6) { u_int32_t flowlabel; flowlabel = ((struct ip6_hdr *)ip)->ip6_flow; pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK; } else #endif pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK; pktinfo.pkt_len = m_pktlen(m); tca = NULL; cb = acc_classify(&top->tc_classifier, m, af); if (cb != NULL) tca = &cb->cb_action; if (tca == NULL) tca = &top->tc_block.cb_action; while (1) { PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len); switch (tca->tca_code) { case TCACODE_PASS: return (1); case TCACODE_DROP: m_freem(m); return (0); case TCACODE_RETURN: return (0); case TCACODE_MARK: #ifdef INET6 if (af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); flowlabel = (tca->tca_dscp << 20) | (flowlabel & ~(DSCP_MASK << 20)); ip6->ip6_flow = htonl(flowlabel); } else #endif ip->ip_tos = tca->tca_dscp | (ip->ip_tos & DSCP_CUMASK); return (1); case TCACODE_NEXT: cb = tca->tca_next; tca = (*cb->cb_input)(cb, &pktinfo); break; case TCACODE_NONE: default: return (1); } } } static struct top_cdnr * tcb_lookup(ifname) char *ifname; { struct top_cdnr *top; struct ifnet *ifp; if ((ifp = ifunit(ifname)) != NULL) LIST_FOREACH(top, &tcb_list, tc_next) if (top->tc_ifq->altq_ifp == ifp) return (top); return (NULL); } static struct cdnr_block * cdnr_handle2cb(handle) u_long handle; { struct cdnr_block *cb; cb = (struct cdnr_block *)handle; if (handle != ALIGN(cb)) return (NULL); if (cb == NULL || cb->cb_handle != handle) return (NULL); return (cb); } static u_long cdnr_cb2handle(cb) struct cdnr_block *cb; { return (cb->cb_handle); } static void * cdnr_cballoc(top, type, input_func) struct top_cdnr *top; int type; struct tc_action *(*input_func)(struct cdnr_block *, struct cdnr_pktinfo *); { struct cdnr_block *cb; int size; switch (type) { case TCETYPE_TOP: size = sizeof(struct top_cdnr); break; case TCETYPE_ELEMENT: size = sizeof(struct cdnr_block); break; case TCETYPE_TBMETER: size = sizeof(struct tbmeter); break; case TCETYPE_TRTCM: size = sizeof(struct trtcm); break; case TCETYPE_TSWTCM: size = sizeof(struct tswtcm); break; default: return (NULL); } cb = malloc(size, M_DEVBUF, M_WAITOK); if (cb == NULL) return (NULL); bzero(cb, size); cb->cb_len = size; cb->cb_type = type; cb->cb_ref = 0; cb->cb_handle = (u_long)cb; if (top == NULL) cb->cb_top = (struct top_cdnr *)cb; else cb->cb_top = top; if (input_func != NULL) { /* * if this cdnr has an action function, * make tc_action to call itself. */ cb->cb_action.tca_code = TCACODE_NEXT; cb->cb_action.tca_next = cb; cb->cb_input = input_func; } else cb->cb_action.tca_code = TCACODE_NONE; /* if this isn't top, register the element to the top level cdnr */ if (top != NULL) LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next); return ((void *)cb); } static void cdnr_cbdestroy(cblock) void *cblock; { struct cdnr_block *cb = cblock; /* delete filters belonging to this cdnr */ acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0); /* remove from the top level cdnr */ if (cb->cb_top != cblock) LIST_REMOVE(cb, cb_next); free(cb, M_DEVBUF); } /* * conditioner common destroy routine */ static int generic_element_destroy(cb) struct cdnr_block *cb; { int error = 0; switch (cb->cb_type) { case TCETYPE_TOP: error = top_destroy((struct top_cdnr *)cb); break; case TCETYPE_ELEMENT: error = element_destroy(cb); break; case TCETYPE_TBMETER: error = tbm_destroy((struct tbmeter *)cb); break; case TCETYPE_TRTCM: error = trtcm_destroy((struct trtcm *)cb); break; case TCETYPE_TSWTCM: error = tswtcm_destroy((struct tswtcm *)cb); break; default: error = EINVAL; } return (error); } static int tca_verify_action(utca) struct tc_action *utca; { switch (utca->tca_code) { case TCACODE_PASS: case TCACODE_DROP: case TCACODE_MARK: /* these are ok */ break; case TCACODE_HANDLE: /* verify handle value */ if (cdnr_handle2cb(utca->tca_handle) == NULL) return (-1); break; case TCACODE_NONE: case TCACODE_RETURN: case TCACODE_NEXT: default: /* should not be passed from a user */ return (-1); } return (0); } static void tca_import_action(ktca, utca) struct tc_action *ktca, *utca; { struct cdnr_block *cb; *ktca = *utca; if (ktca->tca_code == TCACODE_HANDLE) { cb = cdnr_handle2cb(ktca->tca_handle); if (cb == NULL) { ktca->tca_code = TCACODE_NONE; return; } ktca->tca_code = TCACODE_NEXT; ktca->tca_next = cb; cb->cb_ref++; } else if (ktca->tca_code == TCACODE_MARK) { ktca->tca_dscp &= DSCP_MASK; } return; } static void tca_invalidate_action(tca) struct tc_action *tca; { struct cdnr_block *cb; if (tca->tca_code == TCACODE_NEXT) { cb = tca->tca_next; if (cb == NULL) return; cb->cb_ref--; } tca->tca_code = TCACODE_NONE; } /* * top level traffic conditioner */ static struct top_cdnr * top_create(ifq) struct ifaltq *ifq; { struct top_cdnr *top; if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL) return (NULL); top->tc_ifq = ifq; /* set default action for the top level conditioner */ top->tc_block.cb_action.tca_code = TCACODE_PASS; LIST_INSERT_HEAD(&tcb_list, top, tc_next); ifq->altq_cdnr = top; return (top); } static int top_destroy(top) struct top_cdnr *top; { struct cdnr_block *cb; if (ALTQ_IS_CNDTNING(top->tc_ifq)) ALTQ_CLEAR_CNDTNING(top->tc_ifq); top->tc_ifq->altq_cdnr = NULL; /* * destroy all the conditioner elements belonging to this interface */ while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) { while (cb != NULL && cb->cb_ref > 0) cb = LIST_NEXT(cb, cb_next); if (cb != NULL) generic_element_destroy(cb); } LIST_REMOVE(top, tc_next); cdnr_cbdestroy(top); /* if there is no active conditioner, remove the input hook */ if (altq_input != NULL) { LIST_FOREACH(top, &tcb_list, tc_next) if (ALTQ_IS_CNDTNING(top->tc_ifq)) break; if (top == NULL) altq_input = NULL; } return (0); } /* * simple tc elements without input function (e.g., dropper and makers). */ static struct cdnr_block * element_create(top, action) struct top_cdnr *top; struct tc_action *action; { struct cdnr_block *cb; if (tca_verify_action(action) < 0) return (NULL); if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL) return (NULL); tca_import_action(&cb->cb_action, action); return (cb); } static int element_destroy(cb) struct cdnr_block *cb; { if (cb->cb_ref > 0) return (EBUSY); tca_invalidate_action(&cb->cb_action); cdnr_cbdestroy(cb); return (0); } /* * internal representation of token bucket parameters * rate: byte_per_unittime << 32 * (((bits_per_sec) / 8) << 32) / machclk_freq * depth: byte << 32 * */ #define TB_SHIFT 32 #define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT) #define TB_UNSCALE(x) ((x) >> TB_SHIFT) static void tb_import_profile(tb, profile) struct tbe *tb; struct tb_profile *profile; { tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq; tb->depth = TB_SCALE(profile->depth); if (tb->rate > 0) tb->filluptime = tb->depth / tb->rate; else tb->filluptime = 0xffffffffffffffffLL; tb->token = tb->depth; tb->last = read_machclk(); } /* * simple token bucket meter */ static struct tbmeter * tbm_create(top, profile, in_action, out_action) struct top_cdnr *top; struct tb_profile *profile; struct tc_action *in_action, *out_action; { struct tbmeter *tbm = NULL; if (tca_verify_action(in_action) < 0 || tca_verify_action(out_action) < 0) return (NULL); if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER, tbm_input)) == NULL) return (NULL); tb_import_profile(&tbm->tb, profile); tca_import_action(&tbm->in_action, in_action); tca_import_action(&tbm->out_action, out_action); return (tbm); } static int tbm_destroy(tbm) struct tbmeter *tbm; { if (tbm->cdnrblk.cb_ref > 0) return (EBUSY); tca_invalidate_action(&tbm->in_action); tca_invalidate_action(&tbm->out_action); cdnr_cbdestroy(tbm); return (0); } static struct tc_action * tbm_input(cb, pktinfo) struct cdnr_block *cb; struct cdnr_pktinfo *pktinfo; { struct tbmeter *tbm = (struct tbmeter *)cb; u_int64_t len; u_int64_t interval, now; len = TB_SCALE(pktinfo->pkt_len); if (tbm->tb.token < len) { now = read_machclk(); interval = now - tbm->tb.last; if (interval >= tbm->tb.filluptime) tbm->tb.token = tbm->tb.depth; else { tbm->tb.token += interval * tbm->tb.rate; if (tbm->tb.token > tbm->tb.depth) tbm->tb.token = tbm->tb.depth; } tbm->tb.last = now; } if (tbm->tb.token < len) { PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len); return (&tbm->out_action); } tbm->tb.token -= len; PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len); return (&tbm->in_action); } /* * two rate three color marker * as described in draft-heinanen-diffserv-trtcm-01.txt */ static struct trtcm * trtcm_create(top, cmtd_profile, peak_profile, green_action, yellow_action, red_action, coloraware) struct top_cdnr *top; struct tb_profile *cmtd_profile, *peak_profile; struct tc_action *green_action, *yellow_action, *red_action; int coloraware; { struct trtcm *tcm = NULL; if (tca_verify_action(green_action) < 0 || tca_verify_action(yellow_action) < 0 || tca_verify_action(red_action) < 0) return (NULL); if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM, trtcm_input)) == NULL) return (NULL); tb_import_profile(&tcm->cmtd_tb, cmtd_profile); tb_import_profile(&tcm->peak_tb, peak_profile); tca_import_action(&tcm->green_action, green_action); tca_import_action(&tcm->yellow_action, yellow_action); tca_import_action(&tcm->red_action, red_action); /* set dscps to use */ if (tcm->green_action.tca_code == TCACODE_MARK) tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK; else tcm->green_dscp = DSCP_AF11; if (tcm->yellow_action.tca_code == TCACODE_MARK) tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK; else tcm->yellow_dscp = DSCP_AF12; if (tcm->red_action.tca_code == TCACODE_MARK) tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK; else tcm->red_dscp = DSCP_AF13; tcm->coloraware = coloraware; return (tcm); } static int trtcm_destroy(tcm) struct trtcm *tcm; { if (tcm->cdnrblk.cb_ref > 0) return (EBUSY); tca_invalidate_action(&tcm->green_action); tca_invalidate_action(&tcm->yellow_action); tca_invalidate_action(&tcm->red_action); cdnr_cbdestroy(tcm); return (0); } static struct tc_action * trtcm_input(cb, pktinfo) struct cdnr_block *cb; struct cdnr_pktinfo *pktinfo; { struct trtcm *tcm = (struct trtcm *)cb; u_int64_t len; u_int64_t interval, now; u_int8_t color; len = TB_SCALE(pktinfo->pkt_len); if (tcm->coloraware) { color = pktinfo->pkt_dscp; if (color != tcm->yellow_dscp && color != tcm->red_dscp) color = tcm->green_dscp; } else { /* if color-blind, precolor it as green */ color = tcm->green_dscp; } now = read_machclk(); if (tcm->cmtd_tb.token < len) { interval = now - tcm->cmtd_tb.last; if (interval >= tcm->cmtd_tb.filluptime) tcm->cmtd_tb.token = tcm->cmtd_tb.depth; else { tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate; if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth) tcm->cmtd_tb.token = tcm->cmtd_tb.depth; } tcm->cmtd_tb.last = now; } if (tcm->peak_tb.token < len) { interval = now - tcm->peak_tb.last; if (interval >= tcm->peak_tb.filluptime) tcm->peak_tb.token = tcm->peak_tb.depth; else { tcm->peak_tb.token += interval * tcm->peak_tb.rate; if (tcm->peak_tb.token > tcm->peak_tb.depth) tcm->peak_tb.token = tcm->peak_tb.depth; } tcm->peak_tb.last = now; } if (color == tcm->red_dscp || tcm->peak_tb.token < len) { pktinfo->pkt_dscp = tcm->red_dscp; PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len); return (&tcm->red_action); } if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) { pktinfo->pkt_dscp = tcm->yellow_dscp; tcm->peak_tb.token -= len; PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len); return (&tcm->yellow_action); } pktinfo->pkt_dscp = tcm->green_dscp; tcm->cmtd_tb.token -= len; tcm->peak_tb.token -= len; PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len); return (&tcm->green_action); } /* * time sliding window three color marker * as described in draft-fang-diffserv-tc-tswtcm-00.txt */ static struct tswtcm * tswtcm_create(top, cmtd_rate, peak_rate, avg_interval, green_action, yellow_action, red_action) struct top_cdnr *top; u_int32_t cmtd_rate, peak_rate, avg_interval; struct tc_action *green_action, *yellow_action, *red_action; { struct tswtcm *tsw; if (tca_verify_action(green_action) < 0 || tca_verify_action(yellow_action) < 0 || tca_verify_action(red_action) < 0) return (NULL); if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM, tswtcm_input)) == NULL) return (NULL); tca_import_action(&tsw->green_action, green_action); tca_import_action(&tsw->yellow_action, yellow_action); tca_import_action(&tsw->red_action, red_action); /* set dscps to use */ if (tsw->green_action.tca_code == TCACODE_MARK) tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK; else tsw->green_dscp = DSCP_AF11; if (tsw->yellow_action.tca_code == TCACODE_MARK) tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK; else tsw->yellow_dscp = DSCP_AF12; if (tsw->red_action.tca_code == TCACODE_MARK) tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK; else tsw->red_dscp = DSCP_AF13; /* convert rates from bits/sec to bytes/sec */ tsw->cmtd_rate = cmtd_rate / 8; tsw->peak_rate = peak_rate / 8; tsw->avg_rate = 0; /* timewin is converted from msec to machine clock unit */ tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000; return (tsw); } static int tswtcm_destroy(tsw) struct tswtcm *tsw; { if (tsw->cdnrblk.cb_ref > 0) return (EBUSY); tca_invalidate_action(&tsw->green_action); tca_invalidate_action(&tsw->yellow_action); tca_invalidate_action(&tsw->red_action); cdnr_cbdestroy(tsw); return (0); } static struct tc_action * tswtcm_input(cb, pktinfo) struct cdnr_block *cb; struct cdnr_pktinfo *pktinfo; { struct tswtcm *tsw = (struct tswtcm *)cb; int len; u_int32_t avg_rate; u_int64_t interval, now, tmp; /* * rate estimator */ len = pktinfo->pkt_len; now = read_machclk(); interval = now - tsw->t_front; /* * calculate average rate: * avg = (avg * timewin + pkt_len)/(timewin + interval) * pkt_len needs to be multiplied by machclk_freq in order to * get (bytes/sec). * note: when avg_rate (bytes/sec) and timewin (machclk unit) are * less than 32 bits, the following 64-bit operation has enough * precision. */ tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval); tsw->avg_rate = avg_rate = (u_int32_t)tmp; tsw->t_front = now; /* * marker */ if (avg_rate > tsw->cmtd_rate) { u_int32_t randval = arc4random() % avg_rate; if (avg_rate > tsw->peak_rate) { if (randval < avg_rate - tsw->peak_rate) { /* mark red */ pktinfo->pkt_dscp = tsw->red_dscp; PKTCNTR_ADD(&tsw->red_cnt, len); return (&tsw->red_action); } else if (randval < avg_rate - tsw->cmtd_rate) goto mark_yellow; } else { /* peak_rate >= avg_rate > cmtd_rate */ if (randval < avg_rate - tsw->cmtd_rate) { mark_yellow: pktinfo->pkt_dscp = tsw->yellow_dscp; PKTCNTR_ADD(&tsw->yellow_cnt, len); return (&tsw->yellow_action); } } } /* mark green */ pktinfo->pkt_dscp = tsw->green_dscp; PKTCNTR_ADD(&tsw->green_cnt, len); return (&tsw->green_action); } /* * ioctl requests */ static int cdnrcmd_if_attach(ifname) char *ifname; { struct ifnet *ifp; struct top_cdnr *top; if ((ifp = ifunit(ifname)) == NULL) return (EBADF); if (ifp->if_snd.altq_cdnr != NULL) return (EBUSY); if ((top = top_create(&ifp->if_snd)) == NULL) return (ENOMEM); return (0); } static int cdnrcmd_if_detach(ifname) char *ifname; { struct top_cdnr *top; if ((top = tcb_lookup(ifname)) == NULL) return (EBADF); return top_destroy(top); } static int cdnrcmd_add_element(ap) struct cdnr_add_element *ap; { struct top_cdnr *top; struct cdnr_block *cb; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); cb = element_create(top, &ap->action); if (cb == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(cb); return (0); } static int cdnrcmd_delete_element(ap) struct cdnr_delete_element *ap; { struct top_cdnr *top; struct cdnr_block *cb; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); if (cb->cb_type != TCETYPE_ELEMENT) return generic_element_destroy(cb); return element_destroy(cb); } static int cdnrcmd_add_filter(ap) struct cdnr_add_filter *ap; { struct top_cdnr *top; struct cdnr_block *cb; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); return acc_add_filter(&top->tc_classifier, &ap->filter, cb, &ap->filter_handle); } static int cdnrcmd_delete_filter(ap) struct cdnr_delete_filter *ap; { struct top_cdnr *top; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); return acc_delete_filter(&top->tc_classifier, ap->filter_handle); } static int cdnrcmd_add_tbm(ap) struct cdnr_add_tbmeter *ap; { struct top_cdnr *top; struct tbmeter *tbm; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action); if (tbm == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk); return (0); } static int cdnrcmd_modify_tbm(ap) struct cdnr_modify_tbmeter *ap; { struct tbmeter *tbm; if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); tb_import_profile(&tbm->tb, &ap->profile); return (0); } static int cdnrcmd_tbm_stats(ap) struct cdnr_tbmeter_stats *ap; { struct tbmeter *tbm; if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); ap->in_cnt = tbm->in_cnt; ap->out_cnt = tbm->out_cnt; return (0); } static int cdnrcmd_add_trtcm(ap) struct cdnr_add_trtcm *ap; { struct top_cdnr *top; struct trtcm *tcm; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile, &ap->green_action, &ap->yellow_action, &ap->red_action, ap->coloraware); if (tcm == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk); return (0); } static int cdnrcmd_modify_trtcm(ap) struct cdnr_modify_trtcm *ap; { struct trtcm *tcm; if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile); tb_import_profile(&tcm->peak_tb, &ap->peak_profile); return (0); } static int cdnrcmd_tcm_stats(ap) struct cdnr_tcm_stats *ap; { struct cdnr_block *cb; if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); if (cb->cb_type == TCETYPE_TRTCM) { struct trtcm *tcm = (struct trtcm *)cb; ap->green_cnt = tcm->green_cnt; ap->yellow_cnt = tcm->yellow_cnt; ap->red_cnt = tcm->red_cnt; } else if (cb->cb_type == TCETYPE_TSWTCM) { struct tswtcm *tsw = (struct tswtcm *)cb; ap->green_cnt = tsw->green_cnt; ap->yellow_cnt = tsw->yellow_cnt; ap->red_cnt = tsw->red_cnt; } else return (EINVAL); return (0); } static int cdnrcmd_add_tswtcm(ap) struct cdnr_add_tswtcm *ap; { struct top_cdnr *top; struct tswtcm *tsw; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); if (ap->cmtd_rate > ap->peak_rate) return (EINVAL); tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate, ap->avg_interval, &ap->green_action, &ap->yellow_action, &ap->red_action); if (tsw == NULL) return (EINVAL); /* return a class handle to the user */ ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk); return (0); } static int cdnrcmd_modify_tswtcm(ap) struct cdnr_modify_tswtcm *ap; { struct tswtcm *tsw; if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) return (EINVAL); if (ap->cmtd_rate > ap->peak_rate) return (EINVAL); /* convert rates from bits/sec to bytes/sec */ tsw->cmtd_rate = ap->cmtd_rate / 8; tsw->peak_rate = ap->peak_rate / 8; tsw->avg_rate = 0; /* timewin is converted from msec to machine clock unit */ tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000; return (0); } static int cdnrcmd_get_stats(ap) struct cdnr_get_stats *ap; { struct top_cdnr *top; struct cdnr_block *cb; struct tbmeter *tbm; struct trtcm *tcm; struct tswtcm *tsw; struct tce_stats tce, *usp; int error, n, nskip, nelements; if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) return (EBADF); /* copy action stats */ bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts)); /* stats for each element */ nelements = ap->nelements; usp = ap->tce_stats; if (nelements <= 0 || usp == NULL) return (0); nskip = ap->nskip; n = 0; LIST_FOREACH(cb, &top->tc_elements, cb_next) { if (nskip > 0) { nskip--; continue; } bzero(&tce, sizeof(tce)); tce.tce_handle = cb->cb_handle; tce.tce_type = cb->cb_type; switch (cb->cb_type) { case TCETYPE_TBMETER: tbm = (struct tbmeter *)cb; tce.tce_cnts[0] = tbm->in_cnt; tce.tce_cnts[1] = tbm->out_cnt; break; case TCETYPE_TRTCM: tcm = (struct trtcm *)cb; tce.tce_cnts[0] = tcm->green_cnt; tce.tce_cnts[1] = tcm->yellow_cnt; tce.tce_cnts[2] = tcm->red_cnt; break; case TCETYPE_TSWTCM: tsw = (struct tswtcm *)cb; tce.tce_cnts[0] = tsw->green_cnt; tce.tce_cnts[1] = tsw->yellow_cnt; tce.tce_cnts[2] = tsw->red_cnt; break; default: continue; } if ((error = copyout((caddr_t)&tce, (caddr_t)usp++, sizeof(tce))) != 0) return (error); if (++n == nelements) break; } ap->nelements = n; return (0); } /* * conditioner device interface */ int cdnropen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("cdnr: no cpu clock available!\n"); return (ENXIO); } /* everything will be done when the queueing scheme is attached. */ return 0; } int cdnrclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct top_cdnr *top; int err, error = 0; while ((top = LIST_FIRST(&tcb_list)) != NULL) { /* destroy all */ err = top_destroy(top); if (err != 0 && error == 0) error = err; } altq_input = NULL; return (error); } int cdnrioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct top_cdnr *top; struct cdnr_interface *ifacep; int s, error = 0; /* check super-user privilege */ switch (cmd) { case CDNR_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) #endif return (error); break; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif switch (cmd) { case CDNR_IF_ATTACH: ifacep = (struct cdnr_interface *)addr; error = cdnrcmd_if_attach(ifacep->cdnr_ifname); break; case CDNR_IF_DETACH: ifacep = (struct cdnr_interface *)addr; error = cdnrcmd_if_detach(ifacep->cdnr_ifname); break; case CDNR_ENABLE: case CDNR_DISABLE: ifacep = (struct cdnr_interface *)addr; if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) { error = EBADF; break; } switch (cmd) { case CDNR_ENABLE: ALTQ_SET_CNDTNING(top->tc_ifq); if (altq_input == NULL) altq_input = altq_cdnr_input; break; case CDNR_DISABLE: ALTQ_CLEAR_CNDTNING(top->tc_ifq); LIST_FOREACH(top, &tcb_list, tc_next) if (ALTQ_IS_CNDTNING(top->tc_ifq)) break; if (top == NULL) altq_input = NULL; break; } break; case CDNR_ADD_ELEM: error = cdnrcmd_add_element((struct cdnr_add_element *)addr); break; case CDNR_DEL_ELEM: error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr); break; case CDNR_ADD_TBM: error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr); break; case CDNR_MOD_TBM: error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr); break; case CDNR_TBM_STATS: error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr); break; case CDNR_ADD_TCM: error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr); break; case CDNR_MOD_TCM: error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr); break; case CDNR_TCM_STATS: error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr); break; case CDNR_ADD_FILTER: error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr); break; case CDNR_DEL_FILTER: error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr); break; case CDNR_GETSTATS: error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr); break; case CDNR_ADD_TSW: error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr); break; case CDNR_MOD_TSW: error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr); break; default: error = EINVAL; break; } splx(s); return error; } #ifdef KLD_MODULE static struct altqsw cdnr_sw = {"cdnr", cdnropen, cdnrclose, cdnrioctl}; ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_CDNR */ Index: head/sys/net/altq/altq_classq.h =================================================================== --- head/sys/net/altq/altq_classq.h (revision 281641) +++ head/sys/net/altq/altq_classq.h (revision 281642) @@ -1,206 +1,207 @@ -/* $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ */ - -/* +/*- * Copyright (c) 1991-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Network Research * Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ + * $FreeBSD$ */ /* * class queue definitions extracted from rm_class.h. */ #ifndef _ALTQ_ALTQ_CLASSQ_H_ #define _ALTQ_ALTQ_CLASSQ_H_ #ifdef __cplusplus extern "C" { #endif /* * Packet Queue types: RED or DROPHEAD. */ #define Q_DROPHEAD 0x00 #define Q_RED 0x01 #define Q_RIO 0x02 #define Q_DROPTAIL 0x03 #ifdef _KERNEL /* * Packet Queue structures and macros to manipulate them. */ struct _class_queue_ { struct mbuf *tail_; /* Tail of packet queue */ int qlen_; /* Queue length (in number of packets) */ int qlim_; /* Queue limit (in number of packets*) */ int qtype_; /* Queue type */ }; typedef struct _class_queue_ class_queue_t; #define qtype(q) (q)->qtype_ /* Get queue type */ #define qlimit(q) (q)->qlim_ /* Max packets to be queued */ #define qlen(q) (q)->qlen_ /* Current queue length. */ #define qtail(q) (q)->tail_ /* Tail of the queue */ #define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL) #define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */ #define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */ #define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */ #define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) extern void _addq(class_queue_t *, struct mbuf *); extern struct mbuf *_getq(class_queue_t *); extern struct mbuf *_getq_tail(class_queue_t *); extern struct mbuf *_getq_random(class_queue_t *); extern void _removeq(class_queue_t *, struct mbuf *); extern void _flushq(class_queue_t *); #else /* __GNUC__ && !ALTQ_DEBUG */ /* * inlined versions */ static __inline void _addq(class_queue_t *q, struct mbuf *m) { struct mbuf *m0; if ((m0 = qtail(q)) != NULL) m->m_nextpkt = m0->m_nextpkt; else m0 = m; m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; } static __inline struct mbuf * _getq(class_queue_t *q) { struct mbuf *m, *m0; if ((m = qtail(q)) == NULL) return (NULL); if ((m0 = m->m_nextpkt) != m) m->m_nextpkt = m0->m_nextpkt; else qtail(q) = NULL; qlen(q)--; m0->m_nextpkt = NULL; return (m0); } /* drop a packet at the tail of the queue */ static __inline struct mbuf * _getq_tail(class_queue_t *q) { struct mbuf *m, *m0, *prev; if ((m = m0 = qtail(q)) == NULL) return NULL; do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else qtail(q) = prev; qlen(q)--; m->m_nextpkt = NULL; return (m); } /* randomly select a packet in the queue */ static __inline struct mbuf * _getq_random(class_queue_t *q) { struct mbuf *m; int i, n; if ((m = qtail(q)) == NULL) return NULL; if (m->m_nextpkt == m) qtail(q) = NULL; else { struct mbuf *prev = NULL; n = random() % qlen(q) + 1; for (i = 0; i < n; i++) { prev = m; m = m->m_nextpkt; } prev->m_nextpkt = m->m_nextpkt; if (m == qtail(q)) qtail(q) = prev; } qlen(q)--; m->m_nextpkt = NULL; return (m); } static __inline void _removeq(class_queue_t *q, struct mbuf *m) { struct mbuf *m0, *prev; m0 = qtail(q); do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else if (qtail(q) == m) qtail(q) = prev; qlen(q)--; } static __inline void _flushq(class_queue_t *q) { struct mbuf *m; while ((m = _getq(q)) != NULL) m_freem(m); } #endif /* __GNUC__ && !ALTQ_DEBUG */ #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_CLASSQ_H_ */ Index: head/sys/net/altq/altq_hfsc.c =================================================================== --- head/sys/net/altq/altq_hfsc.c (revision 281641) +++ head/sys/net/altq/altq_hfsc.c (revision 281642) @@ -1,2222 +1,2202 @@ -/* $FreeBSD$ */ -/* $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ */ - -/* +/*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. + * + * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ + * $FreeBSD$ */ /* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */ #include #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* * function prototypes */ static int hfsc_clear_interface(struct hfsc_if *); static int hfsc_request(struct ifaltq *, int, void *); static void hfsc_purge(struct hfsc_if *); static struct hfsc_class *hfsc_class_create(struct hfsc_if *, struct service_curve *, struct service_curve *, struct service_curve *, struct hfsc_class *, int, int, int); static int hfsc_class_destroy(struct hfsc_class *); static struct hfsc_class *hfsc_nextclass(struct hfsc_class *); static int hfsc_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *hfsc_dequeue(struct ifaltq *, int); static int hfsc_addq(struct hfsc_class *, struct mbuf *); static struct mbuf *hfsc_getq(struct hfsc_class *); static struct mbuf *hfsc_pollq(struct hfsc_class *); static void hfsc_purgeq(struct hfsc_class *); static void update_cfmin(struct hfsc_class *); static void set_active(struct hfsc_class *, int); static void set_passive(struct hfsc_class *); static void init_ed(struct hfsc_class *, int); static void update_ed(struct hfsc_class *, int); static void update_d(struct hfsc_class *, int); static void init_vf(struct hfsc_class *, int); static void update_vf(struct hfsc_class *, int, u_int64_t); static void ellist_insert(struct hfsc_class *); static void ellist_remove(struct hfsc_class *); static void ellist_update(struct hfsc_class *); struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t); static void actlist_insert(struct hfsc_class *); static void actlist_remove(struct hfsc_class *); static void actlist_update(struct hfsc_class *); static struct hfsc_class *actlist_firstfit(struct hfsc_class *, u_int64_t); static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); static __inline u_int64_t m2sm(u_int); static __inline u_int64_t m2ism(u_int); static __inline u_int64_t d2dx(u_int); static u_int sm2m(u_int64_t); static u_int dx2d(u_int64_t); static void sc2isc(struct service_curve *, struct internal_sc *); static void rtsc_init(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t); static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t); static void rtsc_min(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static void get_class_stats(struct hfsc_classstats *, struct hfsc_class *); static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t); #ifdef ALTQ3_COMPAT static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int); static int hfsc_detach(struct hfsc_if *); static int hfsc_class_modify(struct hfsc_class *, struct service_curve *, struct service_curve *, struct service_curve *); static int hfsccmd_if_attach(struct hfsc_attach *); static int hfsccmd_if_detach(struct hfsc_interface *); static int hfsccmd_add_class(struct hfsc_add_class *); static int hfsccmd_delete_class(struct hfsc_delete_class *); static int hfsccmd_modify_class(struct hfsc_modify_class *); static int hfsccmd_add_filter(struct hfsc_add_filter *); static int hfsccmd_delete_filter(struct hfsc_delete_filter *); static int hfsccmd_class_stats(struct hfsc_class_stats *); altqdev_decl(hfsc); #endif /* ALTQ3_COMPAT */ /* * macros */ #define is_a_parent_class(cl) ((cl)->cl_children != NULL) #define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ #ifdef ALTQ3_COMPAT /* hif_list keeps all hfsc_if's allocated. */ static struct hfsc_if *hif_list = NULL; #endif /* ALTQ3_COMPAT */ int hfsc_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc, hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL); splx(s); return (error); } int hfsc_add_altq(struct pf_altq *a) { struct hfsc_if *hif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); TAILQ_INIT(&hif->hif_eligible); hif->hif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = hif; return (0); } int hfsc_remove_altq(struct pf_altq *a) { struct hfsc_if *hif; if ((hif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); free(hif, M_DEVBUF); return (0); } int hfsc_add_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl, *parent; struct hfsc_opts *opts; struct service_curve rtsc, lssc, ulsc; if ((hif = a->altq_disc) == NULL) return (EINVAL); opts = &a->pq_u.hfsc_opts; if (a->parent_qid == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); if (clh_to_clp(hif, a->qid) != NULL) return (EBUSY); rtsc.m1 = opts->rtsc_m1; rtsc.d = opts->rtsc_d; rtsc.m2 = opts->rtsc_m2; lssc.m1 = opts->lssc_m1; lssc.d = opts->lssc_d; lssc.m2 = opts->lssc_m2; ulsc.m1 = opts->ulsc_m1; ulsc.d = opts->ulsc_d; ulsc.m2 = opts->ulsc_m2; cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, parent, a->qlimit, opts->flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int hfsc_remove_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); return (hfsc_class_destroy(cl)); } int hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { struct hfsc_if *hif; struct hfsc_class *cl; struct hfsc_classstats stats; int error = 0; if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes except the root class. */ static int hfsc_clear_interface(struct hfsc_if *hif) { struct hfsc_class *cl; #ifdef ALTQ3_COMPAT /* free the filters for this interface */ acc_discard_filters(&hif->hif_classifier, NULL, 1); #endif /* clear out the classes */ while (hif->hif_rootclass != NULL && (cl = hif->hif_rootclass->cl_children) != NULL) { /* * remove the first leaf class found in the hierarchy * then start over */ for (; cl != NULL; cl = hfsc_nextclass(cl)) { if (!is_a_parent_class(cl)) { (void)hfsc_class_destroy(cl); break; } } } return (0); } static int hfsc_request(struct ifaltq *ifq, int req, void *arg) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: hfsc_purge(hif); break; } return (0); } /* discard all the queued packets on the interface */ static void hfsc_purge(struct hfsc_if *hif) { struct hfsc_class *cl; for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (ALTQ_IS_ENABLED(hif->hif_ifq)) hif->hif_ifq->ifq_len = 0; } struct hfsc_class * hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, struct service_curve *fsc, struct service_curve *usc, struct hfsc_class *parent, int qlimit, int flags, int qid) { struct hfsc_class *cl, *p; int i, s; if (hif->hif_classes >= HFSC_MAX_CLASSES) return (NULL); #ifndef ALTQ_RED if (flags & HFCF_RED) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: RED not configured for HFSC!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; TAILQ_INIT(&cl->cl_actc); if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { int red_flags, red_pkttime; u_int m2; m2 = 0; if (rsc != NULL && rsc->m2 > m2) m2 = rsc->m2; if (fsc != NULL && fsc->m2 > m2) m2 = fsc->m2; if (usc != NULL && usc->m2 > m2) m2 = usc->m2; red_flags = 0; if (flags & HFCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & HFCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (m2 < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (m2 / 8); if (flags & HFCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RED; } #ifdef ALTQ_RIO else { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RIO; } #endif } #endif /* ALTQ_RED */ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_rsc == NULL) goto err_ret; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { cl->cl_fsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_fsc == NULL) goto err_ret; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { cl->cl_usc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_usc == NULL) goto err_ret; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); } cl->cl_id = hif->hif_classid++; cl->cl_handle = qid; cl->cl_hif = hif; cl->cl_parent = parent; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(hif->hif_ifq); hif->hif_classes++; /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = qid % HFSC_MAX_CLASSES; if (hif->hif_class_tbl[i] == NULL) hif->hif_class_tbl[i] = cl; else { for (i = 0; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) { hif->hif_class_tbl[i] = cl; break; } if (i == HFSC_MAX_CLASSES) { IFQ_UNLOCK(hif->hif_ifq); splx(s); goto err_ret; } } if (flags & HFCF_DEFAULTCLASS) hif->hif_defaultclass = cl; if (parent == NULL) { /* this is root class */ hif->hif_rootclass = cl; } else { /* add this class to the children list of the parent */ if ((p = parent->cl_children) == NULL) parent->cl_children = cl; else { while (p->cl_siblings != NULL) p = p->cl_siblings; p->cl_siblings = cl; } } IFQ_UNLOCK(hif->hif_ifq); splx(s); return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int hfsc_class_destroy(struct hfsc_class *cl) { int i, s; if (cl == NULL) return (0); if (is_a_parent_class(cl)) return (EBUSY); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_hif->hif_ifq); #ifdef ALTQ3_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0); #endif /* ALTQ3_COMPAT */ if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (cl->cl_parent == NULL) { /* this is root class */ } else { struct hfsc_class *p = cl->cl_parent->cl_children; if (p == cl) cl->cl_parent->cl_children = cl->cl_siblings; else do { if (p->cl_siblings == cl) { p->cl_siblings = cl->cl_siblings; break; } } while ((p = p->cl_siblings) != NULL); ASSERT(p != NULL); } for (i = 0; i < HFSC_MAX_CLASSES; i++) if (cl->cl_hif->hif_class_tbl[i] == cl) { cl->cl_hif->hif_class_tbl[i] = NULL; break; } cl->cl_hif->hif_classes--; IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } IFQ_LOCK(cl->cl_hif->hif_ifq); if (cl == cl->cl_hif->hif_rootclass) cl->cl_hif->hif_rootclass = NULL; if (cl == cl->cl_hif->hif_defaultclass) cl->cl_hif->hif_defaultclass = NULL; IFQ_UNLOCK(cl->cl_hif->hif_ifq); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * hfsc_nextclass returns the next class in the tree. * usage: * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) * do_something; */ static struct hfsc_class * hfsc_nextclass(struct hfsc_class *cl) { if (cl->cl_children != NULL) cl = cl->cl_children; else if (cl->cl_siblings != NULL) cl = cl->cl_siblings; else { while ((cl = cl->cl_parent) != NULL) if (cl->cl_siblings) { cl = cl->cl_siblings; break; } } return (cl); } /* * hfsc_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(hif, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL || is_a_parent_class(cl)) { cl = hif->hif_defaultclass; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ else #endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (hfsc_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in hfsc_addq. */ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); cl->cl_hif->hif_packets++; /* successfully queued. */ if (qlen(cl->cl_q) == 1) set_active(cl, m_pktlen(m)); return (0); } /* * hfsc_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * hfsc_dequeue(struct ifaltq *ifq, int op) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct mbuf *m; int len, next_len; int realtime = 0; u_int64_t cur_time; IFQ_LOCK_ASSERT(ifq); if (hif->hif_packets == 0) /* no packet in the tree */ return (NULL); cur_time = read_machclk(); if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) { cl = hif->hif_pollcache; hif->hif_pollcache = NULL; /* check if the class was scheduled by real-time criteria */ if (cl->cl_rsc != NULL) realtime = (cl->cl_e <= cur_time); } else { /* * if there are eligible classes, use real-time criteria. * find the class with the minimum deadline among * the eligible classes. */ if ((cl = hfsc_get_mindl(hif, cur_time)) != NULL) { realtime = 1; } else { #ifdef ALTQ_DEBUG int fits = 0; #endif /* * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ cl = hif->hif_rootclass; while (is_a_parent_class(cl)) { cl = actlist_firstfit(cl, cur_time); if (cl == NULL) { #ifdef ALTQ_DEBUG if (fits > 0) printf("%d fit but none found\n",fits); #endif return (NULL); } /* * update parent's cl_cvtmin. * don't update if the new vt is smaller. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; #ifdef ALTQ_DEBUG fits++; #endif } } if (op == ALTDQ_POLL) { hif->hif_pollcache = cl; m = hfsc_pollq(cl); return (m); } } m = hfsc_getq(cl); if (m == NULL) panic("hfsc_dequeue:"); len = m_pktlen(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len); update_vf(cl, len, cur_time); if (realtime) cl->cl_cumul += len; if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) { /* update ed */ next_len = m_pktlen(qhead(cl->cl_q)); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); } } else { /* the class becomes passive */ set_passive(cl); } return (m); } static int hfsc_addq(struct hfsc_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & HFCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * hfsc_getq(struct hfsc_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * hfsc_pollq(struct hfsc_class *cl) { return qhead(cl->cl_q); } static void hfsc_purgeq(struct hfsc_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m)); m_freem(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(cl->cl_hif->hif_ifq); } ASSERT(qlen(cl->cl_q) == 0); update_vf(cl, 0, 0); /* remove cl from the actlist */ set_passive(cl); } static void set_active(struct hfsc_class *cl, int len) { if (cl->cl_rsc != NULL) init_ed(cl, len); if (cl->cl_fsc != NULL) init_vf(cl, len); cl->cl_stats.period++; } static void set_passive(struct hfsc_class *cl) { if (cl->cl_rsc != NULL) ellist_remove(cl); /* * actlist is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from actlist */ } static void init_ed(struct hfsc_class *cl, int next_len) { u_int64_t cur_time; cur_time = read_machclk(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); /* * update the eligible curve. * for concave, it is equal to the deadline curve. * for convex, it is a linear curve with slope m2. */ cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } /* compute e and d */ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_insert(cl); } static void update_ed(struct hfsc_class *cl, int next_len) { cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_update(cl); } static void update_d(struct hfsc_class *cl, int next_len) { cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } static void init_vf(struct hfsc_class *cl, int len) { struct hfsc_class *max_cl, *p; u_int64_t vt, f, cur_time; int go_active; cur_time = 0; go_active = 1; for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { if (go_active && cl->cl_nactive++ == 0) go_active = 1; else go_active = 0; if (go_active) { max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); if (max_cl != NULL) { /* * set vt to the average of the min and max * classes. if the parent's period didn't * change, don't decrease vt of the class. */ vt = max_cl->cl_vt; if (cl->cl_parent->cl_cvtmin != 0) vt = (cl->cl_parent->cl_cvtmin + vt)/2; if (cl->cl_parent->cl_vtperiod != cl->cl_parentperiod || vt > cl->cl_vt) cl->cl_vt = vt; } else { /* * first child for a new parent backlog period. * add parent's cvtmax to vtoff of children * to make a new vt (vtoff + vt) larger than * the vt in the last period for all children. */ vt = cl->cl_parent->cl_cvtmax; for (p = cl->cl_parent->cl_children; p != NULL; p = p->cl_siblings) p->cl_vtoff += vt; cl->cl_vt = 0; cl->cl_parent->cl_cvtmax = 0; cl->cl_parent->cl_cvtmin = 0; } cl->cl_initvt = cl->cl_vt; /* update the virtual curve */ vt = cl->cl_vt + cl->cl_vtoff; rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total); if (cl->cl_virtual.x == vt) { cl->cl_virtual.x -= cl->cl_vtoff; cl->cl_vtoff = 0; } cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; if (cl->cl_parent->cl_nactive == 0) cl->cl_parentperiod++; cl->cl_f = 0; actlist_insert(cl); if (cl->cl_usc != NULL) { /* class has upper limit curve */ if (cur_time == 0) cur_time = read_machclk(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); cl->cl_myfadj = 0; } } if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) { u_int64_t f, myf_bound, delta; int go_passive; go_passive = qempty(cl->cl_q); for (; cl->cl_parent != NULL; cl = cl->cl_parent) { cl->cl_total += len; if (cl->cl_fsc == NULL || cl->cl_nactive == 0) continue; if (go_passive && --cl->cl_nactive == 0) go_passive = 1; else go_passive = 0; if (go_passive) { /* no more active child, going passive */ /* update cvtmax of the parent class */ if (cl->cl_vt > cl->cl_parent->cl_cvtmax) cl->cl_parent->cl_cvtmax = cl->cl_vt; /* remove this class from the vt list */ actlist_remove(cl); update_cfmin(cl->cl_parent); continue; } /* * update vt and f */ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - cl->cl_vtoff + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, * the class was skipped in the past due to non-fit. * if so, we need to adjust vtadj. */ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; cl->cl_vt = cl->cl_parent->cl_cvtmin; } /* update the vt list */ actlist_update(cl); if (cl->cl_usc != NULL) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); /* * if myf lags behind by more than one clock tick * from the current time, adjust myfadj to prevent * a rate-limited class from going greedy. * in a steady state under rate-limiting, myf * fluctuates within one clock tick. */ myf_bound = cur_time - machclk_per_tick; if (cl->cl_myf < myf_bound) { delta = cur_time - cl->cl_myf; cl->cl_myfadj += delta; cl->cl_myf += delta; } } /* cl_f is max(cl_myf, cl_cfmin) */ if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_cfmin(struct hfsc_class *cl) { struct hfsc_class *p; u_int64_t cfmin; if (TAILQ_EMPTY(&cl->cl_actc)) { cl->cl_cfmin = 0; return; } cfmin = HT_INFINITY; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f == 0) { cl->cl_cfmin = 0; return; } if (p->cl_f < cfmin) cfmin = p->cl_f; } cl->cl_cfmin = cfmin; } /* * TAILQ based ellist and actlist implementation * (ion wanted to make a calendar queue based implementation) */ /* * eligible list holds backlogged classes being sorted by their eligible times. * there is one eligible list per interface. */ static void ellist_insert(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL || p->cl_e <= cl->cl_e) { TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (cl->cl_e < p->cl_e) { TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } static void ellist_remove(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); } static void ellist_update(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p, *last; /* * the eligible time of a class increases monotonically. * if the next entry has a larger eligible time, nothing to do. */ p = TAILQ_NEXT(cl, cl_ellist); if (p == NULL || cl->cl_e <= p->cl_e) return; /* check the last entry */ last = TAILQ_LAST(&hif->hif_eligible, elighead); ASSERT(last != NULL); if (last->cl_e <= cl->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { if (cl->cl_e < p->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } /* find the class with the minimum deadline among the eligible classes */ struct hfsc_class * hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) { struct hfsc_class *p, *cl = NULL; TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return (cl); } /* * active children list holds backlogged child classes being sorted * by their virtual time. * each intermediate class has one active children list. */ static void actlist_insert(struct hfsc_class *cl) { struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL || p->cl_vt <= cl->cl_vt) { TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { if (cl->cl_vt < p->cl_vt) { TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static void actlist_remove(struct hfsc_class *cl) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); } static void actlist_update(struct hfsc_class *cl) { struct hfsc_class *p, *last; /* * the virtual time of a class increases monotonically during its * backlogged period. * if the next entry has a larger virtual time, nothing to do. */ p = TAILQ_NEXT(cl, cl_actlist); if (p == NULL || cl->cl_vt < p->cl_vt) return; /* check the last entry */ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); ASSERT(last != NULL); if (last->cl_vt <= cl->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { if (cl->cl_vt < p->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static struct hfsc_class * actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) { struct hfsc_class *p; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f <= cur_time) return (p); } return (NULL); } /* * service curve support functions * * external service curve parameters * m: bits/sec * d: msec * internal service curve parameters * sm: (bytes/tsc_interval) << SM_SHIFT * ism: (tsc_count/byte) << ISM_SHIFT * dx: tsc_count * * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective * digits in decimal using the following table. * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ----------+------------------------------------------------------- * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6 * * nsec/byte 80000 8000 800 80 8 * ism(500MHz) 40000 4000 400 40 4 * ism(200MHz) 16000 1600 160 16 1.6 */ #define SM_SHIFT 24 #define ISM_SHIFT 10 #define SM_MASK ((1LL << SM_SHIFT) - 1) #define ISM_MASK ((1LL << ISM_SHIFT) - 1) static __inline u_int64_t seg_x2y(u_int64_t x, u_int64_t sm) { u_int64_t y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return (y); } static __inline u_int64_t seg_y2x(u_int64_t y, u_int64_t ism) { u_int64_t x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return (x); } static __inline u_int64_t m2sm(u_int m) { u_int64_t sm; sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq; return (sm); } static __inline u_int64_t m2ism(u_int m) { u_int64_t ism; if (m == 0) ism = HT_INFINITY; else ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m; return (ism); } static __inline u_int64_t d2dx(u_int d) { u_int64_t dx; dx = ((u_int64_t)d * machclk_freq) / 1000; return (dx); } static u_int sm2m(u_int64_t sm) { u_int64_t m; m = (sm * 8 * machclk_freq) >> SM_SHIFT; return ((u_int)m); } static u_int dx2d(u_int64_t dx) { u_int64_t d; d = dx * 1000 / machclk_freq; return ((u_int)d); } static void sc2isc(struct service_curve *sc, struct internal_sc *isc) { isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2); } /* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */ static void rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x, u_int64_t y) { rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2; } /* * calculate the y-projection of the runtime service curve by the * given x-projection value */ static u_int64_t rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y) { u_int64_t x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return (x); } static u_int64_t rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x) { u_int64_t y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return (y); } /* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x, u_int64_t y) { u_int64_t y1, y2, dx, dy; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx) */ y1 = rtsc_x2y(rtsc, x); if (y1 <= y) { /* rtsc is below isc, no change to rtsc */ return; } y2 = rtsc_x2y(rtsc, x + isc->dx); if (y2 >= y + isc->dy) { /* rtsc is above isc, replace rtsc by isc */ rtsc->x = x; rtsc->y = y; rtsc->dx = isc->dx; rtsc->dy = isc->dy; return; } /* * the two curves intersect * compute the offsets (dx, dy) using the reverse * function of seg_x2y() * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) */ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. */ if (rtsc->x + rtsc->dx > x) dx += rtsc->x + rtsc->dx - x; dy = seg_x2y(dx, isc->sm1); rtsc->x = x; rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; return; } static void get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; if (cl->cl_rsc != NULL) { sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = sm2m(cl->cl_usc->sm1); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = sm2m(cl->cl_usc->sm2); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif } /* convert a class handle to the corresponding class pointer */ static struct hfsc_class * clh_to_clp(struct hfsc_if *hif, u_int32_t chandle) { int i; struct hfsc_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % HFSC_MAX_CLASSES; if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); for (i = 0; i < HFSC_MAX_CLASSES; i++) if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #ifdef ALTQ3_COMPAT static struct hfsc_if * hfsc_attach(ifq, bandwidth) struct ifaltq *ifq; u_int bandwidth; { struct hfsc_if *hif; hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK); if (hif == NULL) return (NULL); bzero(hif, sizeof(struct hfsc_if)); hif->hif_eligible = ellist_alloc(); if (hif->hif_eligible == NULL) { free(hif, M_DEVBUF); return NULL; } hif->hif_ifq = ifq; /* add this state to the hfsc list */ hif->hif_next = hif_list; hif_list = hif; return (hif); } static int hfsc_detach(hif) struct hfsc_if *hif; { (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); /* remove this interface from the hif list */ if (hif_list == hif) hif_list = hif->hif_next; else { struct hfsc_if *h; for (h = hif_list; h != NULL; h = h->hif_next) if (h->hif_next == hif) { h->hif_next = hif->hif_next; break; } ASSERT(h != NULL); } ellist_destroy(hif->hif_eligible); free(hif, M_DEVBUF); return (0); } static int hfsc_class_modify(cl, rsc, fsc, usc) struct hfsc_class *cl; struct service_curve *rsc, *fsc, *usc; { struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp; u_int64_t cur_time; int s; rsc_tmp = fsc_tmp = usc_tmp = NULL; if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) && cl->cl_rsc == NULL) { rsc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (rsc_tmp == NULL) return (ENOMEM); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) && cl->cl_fsc == NULL) { fsc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (fsc_tmp == NULL) { free(rsc_tmp); return (ENOMEM); } } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) && cl->cl_usc == NULL) { usc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (usc_tmp == NULL) { free(rsc_tmp); free(fsc_tmp); return (ENOMEM); } } cur_time = read_machclk(); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_hif->hif_ifq); if (rsc != NULL) { if (rsc->m1 == 0 && rsc->m2 == 0) { if (cl->cl_rsc != NULL) { if (!qempty(cl->cl_q)) hfsc_purgeq(cl); free(cl->cl_rsc, M_DEVBUF); cl->cl_rsc = NULL; } } else { if (cl->cl_rsc == NULL) cl->cl_rsc = rsc_tmp; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } } } if (fsc != NULL) { if (fsc->m1 == 0 && fsc->m2 == 0) { if (cl->cl_fsc != NULL) { if (!qempty(cl->cl_q)) hfsc_purgeq(cl); free(cl->cl_fsc, M_DEVBUF); cl->cl_fsc = NULL; } } else { if (cl->cl_fsc == NULL) cl->cl_fsc = fsc_tmp; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt, cl->cl_total); } } if (usc != NULL) { if (usc->m1 == 0 && usc->m2 == 0) { if (cl->cl_usc != NULL) { free(cl->cl_usc, M_DEVBUF); cl->cl_usc = NULL; cl->cl_myf = 0; } } else { if (cl->cl_usc == NULL) cl->cl_usc = usc_tmp; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); } } if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) update_ed(cl, m_pktlen(qhead(cl->cl_q))); if (cl->cl_fsc != NULL) update_vf(cl, 0, cur_time); /* is this enough? */ } IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); return (0); } /* * hfsc device interface */ int hfscopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("hfsc: no cpu clock available!\n"); return (ENXIO); } /* everything will be done when the queueing scheme is attached. */ return 0; } int hfscclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct hfsc_if *hif; int err, error = 0; while ((hif = hif_list) != NULL) { /* destroy all */ if (ALTQ_IS_ENABLED(hif->hif_ifq)) altq_disable(hif->hif_ifq); err = altq_detach(hif->hif_ifq); if (err == 0) err = hfsc_detach(hif); if (err != 0 && error == 0) error = err; } return error; } int hfscioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct hfsc_if *hif; struct hfsc_interface *ifacep; int error = 0; /* check super-user privilege */ switch (cmd) { case HFSC_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case HFSC_IF_ATTACH: error = hfsccmd_if_attach((struct hfsc_attach *)addr); break; case HFSC_IF_DETACH: error = hfsccmd_if_detach((struct hfsc_interface *)addr); break; case HFSC_ENABLE: case HFSC_DISABLE: case HFSC_CLEAR_HIERARCHY: ifacep = (struct hfsc_interface *)addr; if ((hif = altq_lookup(ifacep->hfsc_ifname, ALTQT_HFSC)) == NULL) { error = EBADF; break; } switch (cmd) { case HFSC_ENABLE: if (hif->hif_defaultclass == NULL) { #ifdef ALTQ_DEBUG printf("hfsc: no default class\n"); #endif error = EINVAL; break; } error = altq_enable(hif->hif_ifq); break; case HFSC_DISABLE: error = altq_disable(hif->hif_ifq); break; case HFSC_CLEAR_HIERARCHY: hfsc_clear_interface(hif); break; } break; case HFSC_ADD_CLASS: error = hfsccmd_add_class((struct hfsc_add_class *)addr); break; case HFSC_DEL_CLASS: error = hfsccmd_delete_class((struct hfsc_delete_class *)addr); break; case HFSC_MOD_CLASS: error = hfsccmd_modify_class((struct hfsc_modify_class *)addr); break; case HFSC_ADD_FILTER: error = hfsccmd_add_filter((struct hfsc_add_filter *)addr); break; case HFSC_DEL_FILTER: error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr); break; case HFSC_GETSTATS: error = hfsccmd_class_stats((struct hfsc_class_stats *)addr); break; default: error = EINVAL; break; } return error; } static int hfsccmd_if_attach(ap) struct hfsc_attach *ap; { struct hfsc_if *hif; struct ifnet *ifp; int error; if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL) return (ENXIO); if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL) return (ENOMEM); /* * set HFSC to this ifnet structure. */ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif, hfsc_enqueue, hfsc_dequeue, hfsc_request, &hif->hif_classifier, acc_classify)) != 0) (void)hfsc_detach(hif); return (error); } static int hfsccmd_if_detach(ap) struct hfsc_interface *ap; { struct hfsc_if *hif; int error; if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if (ALTQ_IS_ENABLED(hif->hif_ifq)) altq_disable(hif->hif_ifq); if ((error = altq_detach(hif->hif_ifq))) return (error); return hfsc_detach(hif); } static int hfsccmd_add_class(ap) struct hfsc_add_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl, *parent; int i; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if (ap->parent_handle == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL) return (EINVAL); /* assign a class handle (use a free slot number for now) */ for (i = 1; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) break; if (i == HFSC_MAX_CLASSES) return (EBUSY); if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL, parent, ap->qlimit, ap->flags, i)) == NULL) return (ENOMEM); /* return a class handle to the user */ ap->class_handle = i; return (0); } static int hfsccmd_delete_class(ap) struct hfsc_delete_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); return hfsc_class_destroy(cl); } static int hfsccmd_modify_class(ap) struct hfsc_modify_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl; struct service_curve *rsc = NULL; struct service_curve *fsc = NULL; struct service_curve *usc = NULL; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); if (ap->sctype & HFSC_REALTIMESC) rsc = &ap->service_curve; if (ap->sctype & HFSC_LINKSHARINGSC) fsc = &ap->service_curve; if (ap->sctype & HFSC_UPPERLIMITSC) usc = &ap->service_curve; return hfsc_class_modify(cl, rsc, fsc, usc); } static int hfsccmd_add_filter(ap) struct hfsc_add_filter *ap; { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); if (is_a_parent_class(cl)) { #ifdef ALTQ_DEBUG printf("hfsccmd_add_filter: not a leaf class!\n"); #endif return (EINVAL); } return acc_add_filter(&hif->hif_classifier, &ap->filter, cl, &ap->filter_handle); } static int hfsccmd_delete_filter(ap) struct hfsc_delete_filter *ap; { struct hfsc_if *hif; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); return acc_delete_filter(&hif->hif_classifier, ap->filter_handle); } static int hfsccmd_class_stats(ap) struct hfsc_class_stats *ap; { struct hfsc_if *hif; struct hfsc_class *cl; struct hfsc_classstats stats, *usp; int n, nclasses, error; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); ap->cur_time = read_machclk(); ap->machclk_freq = machclk_freq; ap->hif_classes = hif->hif_classes; ap->hif_packets = hif->hif_packets; /* skip the first N classes in the tree */ nclasses = ap->nskip; for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) ; if (n != nclasses) return (EINVAL); /* then, read the next N classes in the tree */ nclasses = ap->nclasses; usp = ap->stats; for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) { get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } ap->nclasses = n; return (0); } #ifdef KLD_MODULE static struct altqsw hfsc_sw = {"hfsc", hfscopen, hfscclose, hfscioctl}; ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw); MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1); MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_HFSC */ Index: head/sys/net/altq/altq_priq.c =================================================================== --- head/sys/net/altq/altq_priq.c (revision 281641) +++ head/sys/net/altq/altq_priq.c (revision 281642) @@ -1,1046 +1,1031 @@ -/* $FreeBSD$ */ -/* $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ */ -/* +/*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ + * $FreeBSD$ */ /* * priority queue */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #include /* * function prototypes */ #ifdef ALTQ3_COMPAT static struct priq_if *priq_attach(struct ifaltq *, u_int); static int priq_detach(struct priq_if *); #endif static int priq_clear_interface(struct priq_if *); static int priq_request(struct ifaltq *, int, void *); static void priq_purge(struct priq_if *); static struct priq_class *priq_class_create(struct priq_if *, int, int, int, int); static int priq_class_destroy(struct priq_class *); static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *priq_dequeue(struct ifaltq *, int); static int priq_addq(struct priq_class *, struct mbuf *); static struct mbuf *priq_getq(struct priq_class *); static struct mbuf *priq_pollq(struct priq_class *); static void priq_purgeq(struct priq_class *); #ifdef ALTQ3_COMPAT static int priqcmd_if_attach(struct priq_interface *); static int priqcmd_if_detach(struct priq_interface *); static int priqcmd_add_class(struct priq_add_class *); static int priqcmd_delete_class(struct priq_delete_class *); static int priqcmd_modify_class(struct priq_modify_class *); static int priqcmd_add_filter(struct priq_add_filter *); static int priqcmd_delete_filter(struct priq_delete_filter *); static int priqcmd_class_stats(struct priq_class_stats *); #endif /* ALTQ3_COMPAT */ static void get_class_stats(struct priq_classstats *, struct priq_class *); static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t); #ifdef ALTQ3_COMPAT altqdev_decl(priq); /* pif_list keeps all priq_if's allocated. */ static struct priq_if *pif_list = NULL; #endif /* ALTQ3_COMPAT */ int priq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc, priq_enqueue, priq_dequeue, priq_request, NULL, NULL); splx(s); return (error); } int priq_add_altq(struct pf_altq *a) { struct priq_if *pif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int priq_remove_altq(struct pf_altq *a) { struct priq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)priq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int priq_add_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= PRIQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = priq_class_create(pif, a->priority, a->qlimit, a->pq_u.priq_opts.flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int priq_remove_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (priq_class_destroy(cl)); } int priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int priq_clear_interface(struct priq_if *pif) { struct priq_class *cl; int pri; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&pif->pif_classifier, NULL, 1); #endif /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) if ((cl = pif->pif_classes[pri]) != NULL) priq_class_destroy(cl); return (0); } static int priq_request(struct ifaltq *ifq, int req, void *arg) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: priq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void priq_purge(struct priq_if *pif) { struct priq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) priq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct priq_class * priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) { struct priq_class *cl; int s; #ifndef ALTQ_RED if (flags & PRCF_RED) { #ifdef ALTQ_DEBUG printf("priq_class_create: RED not configured for PRIQ!\n"); #endif return (NULL); } #endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_pif->pif_ifq); if (!qempty(cl->cl_q)) priq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; } pif->pif_classes[pri] = cl; if (flags & PRCF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; #ifdef ALTQ_RED if (flags & (PRCF_RED|PRCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & PRCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & PRCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & PRCF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RIO; } else #endif if (flags & PRCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RED; } } #endif /* ALTQ_RED */ return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int priq_class_destroy(struct priq_class *cl) { struct priq_if *pif; int s, pri; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ3_CLFIER_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0); #endif if (!qempty(cl->cl_q)) priq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * priq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(pif, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ else #endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (priq_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in priq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); /* successfully queued. */ return (0); } /* * priq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * priq_dequeue(struct ifaltq *ifq, int op) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct mbuf *m; int pri; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) /* no packet in the queue */ return (NULL); for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) { if (op == ALTDQ_POLL) return (priq_pollq(cl)); m = priq_getq(cl); if (m != NULL) { IFQ_DEC_LEN(ifq); if (qempty(cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m)); } return (m); } } return (NULL); } static int priq_addq(struct priq_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & PRCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * priq_getq(struct priq_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * priq_pollq(cl) struct priq_class *cl; { return qhead(cl->cl_q); } static void priq_purgeq(struct priq_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(cl->cl_q) == 0); } static void get_class_stats(struct priq_classstats *sp, struct priq_class *cl) { sp->class_handle = cl->cl_handle; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->period = cl->cl_period; sp->xmitcnt = cl->cl_xmitcnt; sp->dropcnt = cl->cl_dropcnt; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif } /* convert a class handle to the corresponding class pointer */ static struct priq_class * clh_to_clp(struct priq_if *pif, u_int32_t chandle) { struct priq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #ifdef ALTQ3_COMPAT static struct priq_if * priq_attach(ifq, bandwidth) struct ifaltq *ifq; u_int bandwidth; { struct priq_if *pif; pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_WAITOK); if (pif == NULL) return (NULL); bzero(pif, sizeof(struct priq_if)); pif->pif_bandwidth = bandwidth; pif->pif_maxpri = -1; pif->pif_ifq = ifq; /* add this state to the priq list */ pif->pif_next = pif_list; pif_list = pif; return (pif); } static int priq_detach(pif) struct priq_if *pif; { (void)priq_clear_interface(pif); /* remove this interface from the pif list */ if (pif_list == pif) pif_list = pif->pif_next; else { struct priq_if *p; for (p = pif_list; p != NULL; p = p->pif_next) if (p->pif_next == pif) { p->pif_next = pif->pif_next; break; } ASSERT(p != NULL); } free(pif, M_DEVBUF); return (0); } /* * priq device interface */ int priqopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int priqclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct priq_if *pif; int err, error = 0; while ((pif = pif_list) != NULL) { /* destroy all */ if (ALTQ_IS_ENABLED(pif->pif_ifq)) altq_disable(pif->pif_ifq); err = altq_detach(pif->pif_ifq); if (err == 0) err = priq_detach(pif); if (err != 0 && error == 0) error = err; } return error; } int priqioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct priq_if *pif; struct priq_interface *ifacep; int error = 0; /* check super-user privilege */ switch (cmd) { case PRIQ_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case PRIQ_IF_ATTACH: error = priqcmd_if_attach((struct priq_interface *)addr); break; case PRIQ_IF_DETACH: error = priqcmd_if_detach((struct priq_interface *)addr); break; case PRIQ_ENABLE: case PRIQ_DISABLE: case PRIQ_CLEAR: ifacep = (struct priq_interface *)addr; if ((pif = altq_lookup(ifacep->ifname, ALTQT_PRIQ)) == NULL) { error = EBADF; break; } switch (cmd) { case PRIQ_ENABLE: if (pif->pif_default == NULL) { #ifdef ALTQ_DEBUG printf("priq: no default class\n"); #endif error = EINVAL; break; } error = altq_enable(pif->pif_ifq); break; case PRIQ_DISABLE: error = altq_disable(pif->pif_ifq); break; case PRIQ_CLEAR: priq_clear_interface(pif); break; } break; case PRIQ_ADD_CLASS: error = priqcmd_add_class((struct priq_add_class *)addr); break; case PRIQ_DEL_CLASS: error = priqcmd_delete_class((struct priq_delete_class *)addr); break; case PRIQ_MOD_CLASS: error = priqcmd_modify_class((struct priq_modify_class *)addr); break; case PRIQ_ADD_FILTER: error = priqcmd_add_filter((struct priq_add_filter *)addr); break; case PRIQ_DEL_FILTER: error = priqcmd_delete_filter((struct priq_delete_filter *)addr); break; case PRIQ_GETSTATS: error = priqcmd_class_stats((struct priq_class_stats *)addr); break; default: error = EINVAL; break; } return error; } static int priqcmd_if_attach(ap) struct priq_interface *ap; { struct priq_if *pif; struct ifnet *ifp; int error; if ((ifp = ifunit(ap->ifname)) == NULL) return (ENXIO); if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL) return (ENOMEM); /* * set PRIQ to this ifnet structure. */ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif, priq_enqueue, priq_dequeue, priq_request, &pif->pif_classifier, acc_classify)) != 0) (void)priq_detach(pif); return (error); } static int priqcmd_if_detach(ap) struct priq_interface *ap; { struct priq_if *pif; int error; if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ALTQ_IS_ENABLED(pif->pif_ifq)) altq_disable(pif->pif_ifq); if ((error = altq_detach(pif->pif_ifq))) return (error); return priq_detach(pif); } static int priqcmd_add_class(ap) struct priq_add_class *ap; { struct priq_if *pif; struct priq_class *cl; int qid; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) return (EINVAL); if (pif->pif_classes[ap->pri] != NULL) return (EBUSY); qid = ap->pri + 1; if ((cl = priq_class_create(pif, ap->pri, ap->qlimit, ap->flags, qid)) == NULL) return (ENOMEM); /* return a class handle to the user */ ap->class_handle = cl->cl_handle; return (0); } static int priqcmd_delete_class(ap) struct priq_delete_class *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); return priq_class_destroy(cl); } static int priqcmd_modify_class(ap) struct priq_modify_class *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) return (EINVAL); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); /* * if priority is changed, move the class to the new priority */ if (pif->pif_classes[ap->pri] != cl) { if (pif->pif_classes[ap->pri] != NULL) return (EEXIST); pif->pif_classes[cl->cl_pri] = NULL; pif->pif_classes[ap->pri] = cl; cl->cl_pri = ap->pri; } /* call priq_class_create to change class parameters */ if ((cl = priq_class_create(pif, ap->pri, ap->qlimit, ap->flags, ap->class_handle)) == NULL) return (ENOMEM); return 0; } static int priqcmd_add_filter(ap) struct priq_add_filter *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); return acc_add_filter(&pif->pif_classifier, &ap->filter, cl, &ap->filter_handle); } static int priqcmd_delete_filter(ap) struct priq_delete_filter *ap; { struct priq_if *pif; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); return acc_delete_filter(&pif->pif_classifier, ap->filter_handle); } static int priqcmd_class_stats(ap) struct priq_class_stats *ap; { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats, *usp; int pri, error; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); ap->maxpri = pif->pif_maxpri; /* then, read the next N classes in the tree */ usp = ap->stats; for (pri = 0; pri <= pif->pif_maxpri; pri++) { cl = pif->pif_classes[pri]; if (cl != NULL) get_class_stats(&stats, cl); else bzero(&stats, sizeof(stats)); if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } return (0); } #ifdef KLD_MODULE static struct altqsw priq_sw = {"priq", priqopen, priqclose, priqioctl}; ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw); MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1); MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_PRIQ */ Index: head/sys/net/altq/altq_red.c =================================================================== --- head/sys/net/altq/altq_red.c (revision 281641) +++ head/sys/net/altq/altq_red.c (revision 281642) @@ -1,1500 +1,1492 @@ -/* $FreeBSD$ */ -/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */ - -/* +/*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ -/* +/*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #ifdef ALTQ_FLOWVALVE #include #include #endif #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #ifdef ALTQ_FLOWVALVE #include #endif #endif /* * ALTQ/RED (Random Early Detection) implementation using 32-bit * fixed-point calculation. * * written by kjc using the ns code as a reference. * you can learn more about red and ns from Sally's home page at * http://www-nrg.ee.lbl.gov/floyd/ * * most of the red parameter values are fixed in this implementation * to prevent fixed-point overflow/underflow. * if you change the parameters, watch out for overflow/underflow! * * the parameters used are recommended values by Sally. * the corresponding ns config looks: * q_weight=0.00195 * minthresh=5 maxthresh=15 queue-size=60 * linterm=30 * dropmech=drop-tail * bytes=false (can't be handled by 32-bit fixed-point) * doubleq=false dqthresh=false * wait=true */ /* * alternative red parameters for a slow link. * * assume the queue length becomes from zero to L and keeps L, it takes * N packets for q_avg to reach 63% of L. * when q_weight is 0.002, N is about 500 packets. * for a slow link like dial-up, 500 packets takes more than 1 minute! * when q_weight is 0.008, N is about 127 packets. * when q_weight is 0.016, N is about 63 packets. * bursts of 50 packets are allowed for 0.002, bursts of 25 packets * are allowed for 0.016. * see Sally's paper for more details. */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RED_LIMIT 60 /* default max queue lenght */ #define RED_STATS /* collect statistics */ /* * our default policy for forced-drop is drop-tail. * (in altq-1.1.2 or earlier, the default was random-drop. * but it makes more sense to punish the cause of the surge.) * to switch to the random-drop policy, define "RED_RANDOM_DROP". */ #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE /* * flow-valve is an extention to protect red from unresponsive flows * and to promote end-to-end congestion control. * flow-valve observes the average drop rates of the flows that have * experienced packet drops in the recent past. * when the average drop rate exceeds the threshold, the flow is * blocked by the flow-valve. the trapped flow should back off * exponentially to escape from the flow-valve. */ #ifdef RED_RANDOM_DROP #error "random-drop can't be used with flow-valve!" #endif #endif /* ALTQ_FLOWVALVE */ /* red_list keeps all red_queue_t's allocated. */ static red_queue_t *red_list = NULL; #endif /* ALTQ3_COMPAT */ /* default red parameter values */ static int default_th_min = TH_MIN; static int default_th_max = TH_MAX; static int default_inv_pmax = INV_P_MAX; #ifdef ALTQ3_COMPAT /* internal function prototypes */ static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *red_dequeue(struct ifaltq *, int); static int red_request(struct ifaltq *, int, void *); static void red_purgeq(red_queue_t *); static int red_detach(red_queue_t *); #ifdef ALTQ_FLOWVALVE static __inline struct fve *flowlist_lookup(struct flowvalve *, struct altq_pktattr *, struct timeval *); static __inline struct fve *flowlist_reclaim(struct flowvalve *, struct altq_pktattr *); static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); static __inline int fv_p2f(struct flowvalve *, int); #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ static struct flowvalve *fv_alloc(struct red *); #endif static void fv_destroy(struct flowvalve *); static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, struct fve **); static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, struct fve *); #endif #endif /* ALTQ3_COMPAT */ /* * red support routines */ red_t * red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime) { red_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); if (weight == 0) rp->red_weight = W_WEIGHT; else rp->red_weight = weight; /* allocate weight table */ rp->red_wtab = wtab_alloc(rp->red_weight); if (rp->red_wtab == NULL) { free(rp, M_DEVBUF); return (NULL); } rp->red_avg = 0; rp->red_idle = 1; if (inv_pmax == 0) rp->red_inv_pmax = default_inv_pmax; else rp->red_inv_pmax = inv_pmax; if (th_min == 0) rp->red_thmin = default_th_min; else rp->red_thmin = th_min; if (th_max == 0) rp->red_thmax = default_th_max; else rp->red_thmax = th_max; rp->red_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->red_pkttime = 800; else rp->red_pkttime = pkttime; if (weight == 0) { /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->red_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->red_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->red_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->red_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->red_wshift = i; w = 1 << rp->red_wshift; if (w != rp->red_weight) { printf("invalid weight value %d for red! use %d\n", rp->red_weight, w); rp->red_weight = w; } /* * thmin_s and thmax_s are scaled versions of th_min and th_max * to be compared with avg. */ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) * rp->red_inv_pmax) << FP_SHIFT; microtime(&rp->red_last); return (rp); } void red_destroy(red_t *rp) { #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE if (rp->red_flowvalve != NULL) fv_destroy(rp->red_flowvalve); #endif #endif /* ALTQ3_COMPAT */ wtab_destroy(rp->red_wtab); free(rp, M_DEVBUF); } void red_getstats(red_t *rp, struct redstats *sp) { sp->q_avg = rp->red_avg >> rp->red_wshift; sp->xmit_cnt = rp->red_stats.xmit_cnt; sp->drop_cnt = rp->red_stats.drop_cnt; sp->drop_forced = rp->red_stats.drop_forced; sp->drop_unforced = rp->red_stats.drop_unforced; sp->marked_packets = rp->red_stats.marked_packets; } int red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; int n; #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE struct fve *fve = NULL; if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { m_freem(m); return (-1); } #endif #endif /* ALTQ3_COMPAT */ avg = rp->red_avg; /* * if we were idle, we pretend that n packets arrived during * the idle period. */ if (rp->red_idle) { struct timeval now; int t; rp->red_idle = 0; microtime(&now); t = (now.tv_sec - rp->red_last.tv_sec); if (t > 60) { /* * being idle for more than 1 minute, set avg to zero. * this prevents t from overflow. */ avg = 0; } else { t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); n = t / rp->red_pkttime - 1; /* the following line does (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->red_wtab, n); } } /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); rp->red_avg = avg; /* save the new value */ /* * red_count keeps a tally of arriving traffic that has not * been dropped. */ rp->red_count++; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= rp->red_thmin_s && qlen(q) > 1) { if (avg >= rp->red_thmax_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (rp->red_old == 0) { /* first exceeds th_min */ rp->red_count = 1; rp->red_old = 1; } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, rp->red_probd, rp->red_count)) { /* mark or drop by red */ if ((rp->red_flags & REDF_ECN) && mark_ecn(m, pktattr, rp->red_flags)) { /* successfully marked. do not drop. */ rp->red_count = 0; #ifdef RED_STATS rp->red_stats.marked_packets++; #endif } else { /* unforced drop by red */ droptype = DTYPE_EARLY; } } } else { /* avg < th_min */ rp->red_old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; #ifdef RED_RANDOM_DROP /* if successful or forced drop, enqueue this packet. */ if (droptype != DTYPE_EARLY) _addq(q, m); #else /* if successful, enqueue this packet. */ if (droptype == DTYPE_NODROP) _addq(q, m); #endif if (droptype != DTYPE_NODROP) { if (droptype == DTYPE_EARLY) { /* drop the incoming packet */ #ifdef RED_STATS rp->red_stats.drop_unforced++; #endif } else { /* forced drop, select a victim packet in the queue. */ #ifdef RED_RANDOM_DROP m = _getq_random(q); #endif #ifdef RED_STATS rp->red_stats.drop_forced++; #endif } #ifdef RED_STATS PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); #endif rp->red_count = 0; #ifdef ALTQ3_COMPAT #ifdef ALTQ_FLOWVALVE if (rp->red_flowvalve != NULL) fv_dropbyred(rp->red_flowvalve, pktattr, fve); #endif #endif /* ALTQ3_COMPAT */ m_freem(m); return (-1); } /* successfully queued */ #ifdef RED_STATS PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); #endif return (0); } /* * early-drop probability is calculated as follows: * prob = p_max * (avg - th_min) / (th_max - th_min) * prob_a = prob / (2 - count*prob) * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) * here prob_a increases as successive undrop count increases. * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), * becomes 1 when (count >= (2 / prob))). */ int drop_early(int fp_len, int fp_probd, int count) { int d; /* denominator of drop-probability */ d = fp_probd - count * fp_len; if (d <= 0) /* count exceeds the hard limit: drop or mark */ return (1); /* * now the range of d is [1..600] in fixed-point. (when * th_max-th_min=10 and p_max=1/30) * drop probability = (avg - TH_MIN) / d */ if ((arc4random() % d) < fp_len) { /* drop or mark */ return (1); } /* no drop/mark */ return (0); } /* * try to mark CE bit to the packet. * returns 1 if successfully marked, 0 otherwise. */ int mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) { struct mbuf *m0; struct pf_mtag *at; void *hdr; at = pf_find_mtag(m); if (at != NULL) { hdr = at->hdr; #ifdef ALTQ3_COMPAT } else if (pktattr != NULL) { af = pktattr->pattr_af; hdr = pktattr->pattr_hdr; #endif /* ALTQ3_COMPAT */ } else return (0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)hdr >= m0->m_data) && ((caddr_t)hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, tag info is stale */ return (0); } switch (((struct ip *)hdr)->ip_v) { case IPVERSION: if (flags & REDF_ECN4) { struct ip *ip = hdr; u_int8_t otos; int sum; if (ip->ip_v != 4) return (0); /* version mismatch! */ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) return (0); /* not-ECT */ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) return (1); /* already marked */ /* * ecn-capable but not marked, * mark CE and update checksum */ otos = ip->ip_tos; ip->ip_tos |= IPTOS_ECN_CE; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += (~otos & 0xffff) + ip->ip_tos; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); return (1); } break; #ifdef INET6 case (IPV6_VERSION >> 4): if (flags & REDF_ECN6) { struct ip6_hdr *ip6 = hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return (0); /* version mismatch! */ if ((flowlabel & (IPTOS_ECN_MASK << 20)) == (IPTOS_ECN_NOTECT << 20)) return (0); /* not-ECT */ if ((flowlabel & (IPTOS_ECN_MASK << 20)) == (IPTOS_ECN_CE << 20)) return (1); /* already marked */ /* * ecn-capable but not marked, mark CE */ flowlabel |= (IPTOS_ECN_CE << 20); ip6->ip6_flow = htonl(flowlabel); return (1); } break; #endif /* INET6 */ } /* not marked */ return (0); } struct mbuf * red_getq(rp, q) red_t *rp; class_queue_t *q; { struct mbuf *m; if ((m = _getq(q)) == NULL) { if (rp->red_idle == 0) { rp->red_idle = 1; microtime(&rp->red_last); } return NULL; } rp->red_idle = 0; return (m); } /* * helper routine to calibrate avg during idle. * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point * here Wq = 1/weight and the code assumes Wq is close to zero. * * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. */ static struct wtab *wtab_list = NULL; /* pointer to wtab list */ struct wtab * wtab_alloc(int weight) { struct wtab *w; int i; for (w = wtab_list; w != NULL; w = w->w_next) if (w->w_weight == weight) { w->w_refcount++; return (w); } w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO); if (w == NULL) return (NULL); w->w_weight = weight; w->w_refcount = 1; w->w_next = wtab_list; wtab_list = w; /* initialize the weight table */ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; for (i = 1; i < 32; i++) { w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; if (w->w_tab[i] == 0 && w->w_param_max == 0) w->w_param_max = 1 << i; } return (w); } int wtab_destroy(struct wtab *w) { struct wtab *prev; if (--w->w_refcount > 0) return (0); if (wtab_list == w) wtab_list = w->w_next; else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) if (prev->w_next == w) { prev->w_next = w->w_next; break; } free(w, M_DEVBUF); return (0); } int32_t pow_w(struct wtab *w, int n) { int i, bit; int32_t val; if (n >= w->w_param_max) return (0); val = 1 << FP_SHIFT; if (n <= 0) return (val); bit = 1; i = 0; while (n) { if (n & bit) { val = (val * w->w_tab[i]) >> FP_SHIFT; n &= ~bit; } i++; bit <<= 1; } return (val); } #ifdef ALTQ3_COMPAT /* * red device interface */ altqdev_decl(red); int redopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int redclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { red_queue_t *rqp; int err, error = 0; while ((rqp = red_list) != NULL) { /* destroy all */ err = red_detach(rqp); if (err != 0 && error == 0) error = err; } return error; } int redioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { red_queue_t *rqp; struct red_interface *ifacep; struct ifnet *ifp; int error = 0; /* check super-user privilege */ switch (cmd) { case RED_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) #endif return (error); break; } switch (cmd) { case RED_ENABLE: ifacep = (struct red_interface *)addr; if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } error = altq_enable(rqp->rq_ifq); break; case RED_DISABLE: ifacep = (struct red_interface *)addr; if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } error = altq_disable(rqp->rq_ifq); break; case RED_IF_ATTACH: ifp = ifunit(((struct red_interface *)addr)->red_ifname); if (ifp == NULL) { error = ENXIO; break; } /* allocate and initialize red_queue_t */ rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); if (rqp == NULL) { error = ENOMEM; break; } bzero(rqp, sizeof(red_queue_t)); rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK); if (rqp->rq_q == NULL) { free(rqp, M_DEVBUF); error = ENOMEM; break; } bzero(rqp->rq_q, sizeof(class_queue_t)); rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); if (rqp->rq_red == NULL) { free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); error = ENOMEM; break; } rqp->rq_ifq = &ifp->if_snd; qtail(rqp->rq_q) = NULL; qlen(rqp->rq_q) = 0; qlimit(rqp->rq_q) = RED_LIMIT; qtype(rqp->rq_q) = Q_RED; /* * set RED to this ifnet structure. */ error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, red_enqueue, red_dequeue, red_request, NULL, NULL); if (error) { red_destroy(rqp->rq_red); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); break; } /* add this state to the red list */ rqp->rq_next = red_list; red_list = rqp; break; case RED_IF_DETACH: ifacep = (struct red_interface *)addr; if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } error = red_detach(rqp); break; case RED_GETSTATS: do { struct red_stats *q_stats; red_t *rp; q_stats = (struct red_stats *)addr; if ((rqp = altq_lookup(q_stats->iface.red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } q_stats->q_len = qlen(rqp->rq_q); q_stats->q_limit = qlimit(rqp->rq_q); rp = rqp->rq_red; q_stats->q_avg = rp->red_avg >> rp->red_wshift; q_stats->xmit_cnt = rp->red_stats.xmit_cnt; q_stats->drop_cnt = rp->red_stats.drop_cnt; q_stats->drop_forced = rp->red_stats.drop_forced; q_stats->drop_unforced = rp->red_stats.drop_unforced; q_stats->marked_packets = rp->red_stats.marked_packets; q_stats->weight = rp->red_weight; q_stats->inv_pmax = rp->red_inv_pmax; q_stats->th_min = rp->red_thmin; q_stats->th_max = rp->red_thmax; #ifdef ALTQ_FLOWVALVE if (rp->red_flowvalve != NULL) { struct flowvalve *fv = rp->red_flowvalve; q_stats->fv_flows = fv->fv_flows; q_stats->fv_pass = fv->fv_stats.pass; q_stats->fv_predrop = fv->fv_stats.predrop; q_stats->fv_alloc = fv->fv_stats.alloc; q_stats->fv_escape = fv->fv_stats.escape; } else { #endif /* ALTQ_FLOWVALVE */ q_stats->fv_flows = 0; q_stats->fv_pass = 0; q_stats->fv_predrop = 0; q_stats->fv_alloc = 0; q_stats->fv_escape = 0; #ifdef ALTQ_FLOWVALVE } #endif /* ALTQ_FLOWVALVE */ } while (/*CONSTCOND*/ 0); break; case RED_CONFIG: do { struct red_conf *fc; red_t *new; int s, limit; fc = (struct red_conf *)addr; if ((rqp = altq_lookup(fc->iface.red_ifname, ALTQT_RED)) == NULL) { error = EBADF; break; } new = red_alloc(fc->red_weight, fc->red_inv_pmax, fc->red_thmin, fc->red_thmax, fc->red_flags, fc->red_pkttime); if (new == NULL) { error = ENOMEM; break; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif red_purgeq(rqp); limit = fc->red_limit; if (limit < fc->red_thmax) limit = fc->red_thmax; qlimit(rqp->rq_q) = limit; fc->red_limit = limit; /* write back the new value */ red_destroy(rqp->rq_red); rqp->rq_red = new; splx(s); /* write back new values */ fc->red_limit = limit; fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; fc->red_thmin = rqp->rq_red->red_thmin; fc->red_thmax = rqp->rq_red->red_thmax; } while (/*CONSTCOND*/ 0); break; case RED_SETDEFAULTS: do { struct redparams *rp; rp = (struct redparams *)addr; default_th_min = rp->th_min; default_th_max = rp->th_max; default_inv_pmax = rp->inv_pmax; } while (/*CONSTCOND*/ 0); break; default: error = EINVAL; break; } return error; } static int red_detach(rqp) red_queue_t *rqp; { red_queue_t *tmp; int error = 0; if (ALTQ_IS_ENABLED(rqp->rq_ifq)) altq_disable(rqp->rq_ifq); if ((error = altq_detach(rqp->rq_ifq))) return (error); if (red_list == rqp) red_list = rqp->rq_next; else { for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) if (tmp->rq_next == rqp) { tmp->rq_next = rqp->rq_next; break; } if (tmp == NULL) printf("red_detach: no state found in red_list!\n"); } red_destroy(rqp->rq_red); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); return (error); } /* * enqueue routine: * * returns: 0 when successfully queued. * ENOBUFS when drop occurs. */ static int red_enqueue(ifq, m, pktattr) struct ifaltq *ifq; struct mbuf *m; struct altq_pktattr *pktattr; { red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) return ENOBUFS; ifq->ifq_len++; return 0; } /* * dequeue routine: * must be called in splimp. * * returns: mbuf dequeued. * NULL when no packet is available in the queue. */ static struct mbuf * red_dequeue(ifq, op) struct ifaltq *ifq; int op; { red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); if (op == ALTDQ_POLL) return qhead(rqp->rq_q); /* op == ALTDQ_REMOVE */ m = red_getq(rqp->rq_red, rqp->rq_q); if (m != NULL) ifq->ifq_len--; return (m); } static int red_request(ifq, req, arg) struct ifaltq *ifq; int req; void *arg; { red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: red_purgeq(rqp); break; } return (0); } static void red_purgeq(rqp) red_queue_t *rqp; { _flushq(rqp->rq_q); if (ALTQ_IS_ENABLED(rqp->rq_ifq)) rqp->rq_ifq->ifq_len = 0; } #ifdef ALTQ_FLOWVALVE #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ #define FV_PSCALE(x) ((x) << FV_PSHIFT) #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ #define FV_FSCALE(x) ((x) << FV_FSHIFT) #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) #define FV_TIMER (3 * hz) /* timer value for garbage collector */ #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ #define FV_N 10 /* update fve_f every FV_N packets */ #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ #define FV_TTHRESH 3 /* time threshold to delete fve */ #define FV_ALPHA 5 /* extra packet count */ #define FV_STATS #if (__FreeBSD_version > 300000) #define FV_TIMESTAMP(tp) getmicrotime(tp) #else #define FV_TIMESTAMP(tp) { (*(tp)) = time; } #endif /* * Brtt table: 127 entry table to convert drop rate (p) to * the corresponding bandwidth fraction (f) * the following equation is implemented to use scaled values, * fve_p and fve_f, in the fixed point format. * * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) * f = Brtt(p) / (max_th + alpha) */ #define BRTT_SIZE 128 #define BRTT_SHIFT 12 #define BRTT_MASK 0x0007f000 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) const int brtt_tab[BRTT_SIZE] = { 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 }; static __inline struct fve * flowlist_lookup(fv, pktattr, now) struct flowvalve *fv; struct altq_pktattr *pktattr; struct timeval *now; { struct fve *fve; int flows; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif struct timeval tthresh; if (pktattr == NULL) return (NULL); tthresh.tv_sec = now->tv_sec - FV_TTHRESH; flows = 0; /* * search the flow list */ switch (pktattr->pattr_af) { case AF_INET: ip = (struct ip *)pktattr->pattr_hdr; TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ if (fve->fve_lastdrop.tv_sec == 0) break; if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { fve->fve_lastdrop.tv_sec = 0; break; } if (fve->fve_flow.flow_af == AF_INET && fve->fve_flow.flow_ip.ip_src.s_addr == ip->ip_src.s_addr && fve->fve_flow.flow_ip.ip_dst.s_addr == ip->ip_dst.s_addr) return (fve); flows++; } break; #ifdef INET6 case AF_INET6: ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ if (fve->fve_lastdrop.tv_sec == 0) break; if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { fve->fve_lastdrop.tv_sec = 0; break; } if (fve->fve_flow.flow_af == AF_INET6 && IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, &ip6->ip6_src) && IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, &ip6->ip6_dst)) return (fve); flows++; } break; #endif /* INET6 */ default: /* unknown protocol. no drop. */ return (NULL); } fv->fv_flows = flows; /* save the number of active fve's */ return (NULL); } static __inline struct fve * flowlist_reclaim(fv, pktattr) struct flowvalve *fv; struct altq_pktattr *pktattr; { struct fve *fve; struct ip *ip; #ifdef INET6 struct ip6_hdr *ip6; #endif /* * get an entry from the tail of the LRU list. */ fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); switch (pktattr->pattr_af) { case AF_INET: ip = (struct ip *)pktattr->pattr_hdr; fve->fve_flow.flow_af = AF_INET; fve->fve_flow.flow_ip.ip_src = ip->ip_src; fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; break; #ifdef INET6 case AF_INET6: ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; fve->fve_flow.flow_af = AF_INET6; fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; break; #endif } fve->fve_state = Green; fve->fve_p = 0.0; fve->fve_f = 0.0; fve->fve_ifseq = fv->fv_ifseq - 1; fve->fve_count = 0; fv->fv_flows++; #ifdef FV_STATS fv->fv_stats.alloc++; #endif return (fve); } static __inline void flowlist_move_to_head(fv, fve) struct flowvalve *fv; struct fve *fve; { if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); } } #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ /* * allocate flowvalve structure */ static struct flowvalve * fv_alloc(rp) struct red *rp; { struct flowvalve *fv; struct fve *fve; int i, num; num = FV_FLOWLISTSIZE; fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK); if (fv == NULL) return (NULL); bzero(fv, sizeof(struct flowvalve)); fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF, M_WAITOK); if (fv->fv_fves == NULL) { free(fv, M_DEVBUF); return (NULL); } bzero(fv->fv_fves, sizeof(struct fve) * num); fv->fv_flows = 0; TAILQ_INIT(&fv->fv_flowlist); for (i = 0; i < num; i++) { fve = &fv->fv_fves[i]; fve->fve_lastdrop.tv_sec = 0; TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); } /* initialize drop rate threshold in scaled fixed-point */ fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; /* initialize drop rate to fraction table */ fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK); if (fv->fv_p2ftab == NULL) { free(fv->fv_fves, M_DEVBUF); free(fv, M_DEVBUF); return (NULL); } /* * create the p2f table. * (shift is used to keep the precision) */ for (i = 1; i < BRTT_SIZE; i++) { int f; f = brtt_tab[i] << 8; fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; } return (fv); } #endif static void fv_destroy(fv) struct flowvalve *fv; { free(fv->fv_p2ftab, M_DEVBUF); free(fv->fv_fves, M_DEVBUF); free(fv, M_DEVBUF); } static __inline int fv_p2f(fv, p) struct flowvalve *fv; int p; { int val, f; if (p >= BRTT_PMAX) f = fv->fv_p2ftab[BRTT_SIZE-1]; else if ((val = (p & BRTT_MASK))) f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; else f = fv->fv_p2ftab[1]; return (f); } /* * check if an arriving packet should be pre-dropped. * called from red_addq() when a packet arrives. * returns 1 when the packet should be pre-dropped. * should be called in splimp. */ static int fv_checkflow(fv, pktattr, fcache) struct flowvalve *fv; struct altq_pktattr *pktattr; struct fve **fcache; { struct fve *fve; struct timeval now; fv->fv_ifseq++; FV_TIMESTAMP(&now); if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) /* no matching entry in the flowlist */ return (0); *fcache = fve; /* update fraction f for every FV_N packets */ if (++fve->fve_count == FV_N) { /* * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f */ fve->fve_f = (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) + fve->fve_f - FV_FUNSCALE(fve->fve_f); fve->fve_ifseq = fv->fv_ifseq; fve->fve_count = 0; } /* * overpumping test */ if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { int fthresh; /* calculate a threshold */ fthresh = fv_p2f(fv, fve->fve_p); if (fve->fve_f > fthresh) fve->fve_state = Red; } if (fve->fve_state == Red) { /* * backoff test */ if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { /* no drop for at least FV_BACKOFFTHRESH sec */ fve->fve_p = 0; fve->fve_state = Green; #ifdef FV_STATS fv->fv_stats.escape++; #endif } else { /* block this flow */ flowlist_move_to_head(fv, fve); fve->fve_lastdrop = now; #ifdef FV_STATS fv->fv_stats.predrop++; #endif return (1); } } /* * p = (1 - Wp) * p */ fve->fve_p -= FV_PUNSCALE(fve->fve_p); if (fve->fve_p < 0) fve->fve_p = 0; #ifdef FV_STATS fv->fv_stats.pass++; #endif return (0); } /* * called from red_addq when a packet is dropped by red. * should be called in splimp. */ static void fv_dropbyred(fv, pktattr, fcache) struct flowvalve *fv; struct altq_pktattr *pktattr; struct fve *fcache; { struct fve *fve; struct timeval now; if (pktattr == NULL) return; FV_TIMESTAMP(&now); if (fcache != NULL) /* the fve of this packet is already cached */ fve = fcache; else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) fve = flowlist_reclaim(fv, pktattr); flowlist_move_to_head(fv, fve); /* * update p: the following line cancels the update * in fv_checkflow() and calculate * p = Wp + (1 - Wp) * p */ fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; fve->fve_lastdrop = now; } #endif /* ALTQ_FLOWVALVE */ #ifdef KLD_MODULE static struct altqsw red_sw = {"red", redopen, redclose, redioctl}; ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); MODULE_VERSION(altq_red, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RED */ Index: head/sys/net/altq/altq_rio.c =================================================================== --- head/sys/net/altq/altq_rio.c (revision 281641) +++ head/sys/net/altq/altq_rio.c (revision 281642) @@ -1,852 +1,844 @@ -/* $FreeBSD$ */ -/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */ - -/* +/*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -/* +/*- * Copyright (c) 1990-1994 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* * RIO: RED with IN/OUT bit * described in * "Explicit Allocation of Best Effort Packet Delivery Service" * David D. Clark and Wenjia Fang, MIT Lab for Computer Science * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} * * this implementation is extended to support more than 2 drop precedence * values as described in RFC2597 (Assured Forwarding PHB Group). * */ /* * AF DS (differentiated service) codepoints. * (classes can be mapped to CBQ or H-FSC classes.) * * 0 1 2 3 4 5 6 7 * +---+---+---+---+---+---+---+---+ * | CLASS |DropPre| 0 | CU | * +---+---+---+---+---+---+---+---+ * * class 1: 001 * class 2: 010 * class 3: 011 * class 4: 100 * * low drop prec: 01 * medium drop prec: 10 * high drop prec: 01 */ /* normal red parameters */ #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ /* q_weight = 0.00195 */ /* red parameters for a slow link */ #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ /* q_weight = 0.0078125 */ /* red parameters for a very slow link (e.g., dialup) */ #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ /* q_weight = 0.015625 */ /* fixed-point uses 12-bit decimal places */ #define FP_SHIFT 12 /* fixed-point shift */ /* red parameters for drop probability */ #define INV_P_MAX 10 /* inverse of max drop probability */ #define TH_MIN 5 /* min threshold */ #define TH_MAX 15 /* max threshold */ #define RIO_LIMIT 60 /* default max queue lenght */ #define RIO_STATS /* collect statistics */ #define TV_DELTA(a, b, delta) { \ register int xxs; \ \ delta = (a)->tv_usec - (b)->tv_usec; \ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ if (xxs < 0) { \ delta = 60000000; \ } else if (xxs > 4) { \ if (xxs > 60) \ delta = 60000000; \ else \ delta += xxs * 1000000; \ } else while (xxs > 0) { \ delta += 1000000; \ xxs--; \ } \ } \ } #ifdef ALTQ3_COMPAT /* rio_list keeps all rio_queue_t's allocated. */ static rio_queue_t *rio_list = NULL; #endif /* default rio parameter values */ static struct redparams default_rio_params[RIO_NDROPPREC] = { /* th_min, th_max, inv_pmax */ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ }; /* internal function prototypes */ static int dscp2index(u_int8_t); #ifdef ALTQ3_COMPAT static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *rio_dequeue(struct ifaltq *, int); static int rio_request(struct ifaltq *, int, void *); static int rio_detach(rio_queue_t *); /* * rio device interface */ altqdev_decl(rio); #endif /* ALTQ3_COMPAT */ rio_t * rio_alloc(int weight, struct redparams *params, int flags, int pkttime) { rio_t *rp; int w, i; int npkts_per_sec; rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) return (NULL); rp->rio_flags = flags; if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->rio_pkttime = 800; else rp->rio_pkttime = pkttime; if (weight != 0) rp->rio_weight = weight; else { /* use default */ rp->rio_weight = W_WEIGHT; /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->rio_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->rio_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->rio_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->rio_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->rio_wshift = i; w = 1 << rp->rio_wshift; if (w != rp->rio_weight) { printf("invalid weight value %d for red! use %d\n", rp->rio_weight, w); rp->rio_weight = w; } /* allocate weight table */ rp->rio_wtab = wtab_alloc(rp->rio_weight); for (i = 0; i < RIO_NDROPPREC; i++) { struct dropprec_state *prec = &rp->rio_precstate[i]; prec->avg = 0; prec->idle = 1; if (params == NULL || params[i].inv_pmax == 0) prec->inv_pmax = default_rio_params[i].inv_pmax; else prec->inv_pmax = params[i].inv_pmax; if (params == NULL || params[i].th_min == 0) prec->th_min = default_rio_params[i].th_min; else prec->th_min = params[i].th_min; if (params == NULL || params[i].th_max == 0) prec->th_max = default_rio_params[i].th_max; else prec->th_max = params[i].th_max; /* * th_min_s and th_max_s are scaled versions of th_min * and th_max to be compared with avg. */ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ prec->probd = (2 * (prec->th_max - prec->th_min) * prec->inv_pmax) << FP_SHIFT; microtime(&prec->last); } return (rp); } void rio_destroy(rio_t *rp) { wtab_destroy(rp->rio_wtab); free(rp, M_DEVBUF); } void rio_getstats(rio_t *rp, struct redstats *sp) { int i; for (i = 0; i < RIO_NDROPPREC; i++) { bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; sp++; } } #if (RIO_NDROPPREC == 3) /* * internally, a drop precedence value is converted to an index * starting from 0. */ static int dscp2index(u_int8_t dscp) { int dpindex = dscp & AF_DROPPRECMASK; if (dpindex == 0) return (0); return ((dpindex >> 3) - 1); } #endif #if 1 /* * kludge: when a packet is dequeued, we need to know its drop precedence * in order to keep the queue length of each drop precedence. * use m_pkthdr.rcvif to pass this info. */ #define RIOM_SET_PRECINDEX(m, idx) \ do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) #define RIOM_GET_PRECINDEX(m) \ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ (m)->m_pkthdr.rcvif = NULL; idx; }) #endif int rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) { int avg, droptype; u_int8_t dsfield, odsfield; int dpindex, i, n, t; struct timeval now; struct dropprec_state *prec; dsfield = odsfield = read_dsfield(m, pktattr); dpindex = dscp2index(dsfield); /* * update avg of the precedence states whose drop precedence * is larger than or equal to the drop precedence of the packet */ now.tv_sec = 0; for (i = dpindex; i < RIO_NDROPPREC; i++) { prec = &rp->rio_precstate[i]; avg = prec->avg; if (prec->idle) { prec->idle = 0; if (now.tv_sec == 0) microtime(&now); t = (now.tv_sec - prec->last.tv_sec); if (t > 60) avg = 0; else { t = t * 1000000 + (now.tv_usec - prec->last.tv_usec); n = t / rp->rio_pkttime; /* calculate (avg = (1 - Wq)^n * avg) */ if (n > 0) avg = (avg >> FP_SHIFT) * pow_w(rp->rio_wtab, n); } } /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); prec->avg = avg; /* save the new value */ /* * count keeps a tally of arriving traffic that has not * been dropped. */ prec->count++; } prec = &rp->rio_precstate[dpindex]; avg = prec->avg; /* see if we drop early */ droptype = DTYPE_NODROP; if (avg >= prec->th_min_s && prec->qlen > 1) { if (avg >= prec->th_max_s) { /* avg >= th_max: forced drop */ droptype = DTYPE_FORCED; } else if (prec->old == 0) { /* first exceeds th_min */ prec->count = 1; prec->old = 1; } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, prec->probd, prec->count)) { /* unforced drop by red */ droptype = DTYPE_EARLY; } } else { /* avg < th_min */ prec->old = 0; } /* * if the queue length hits the hard limit, it's a forced drop. */ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) droptype = DTYPE_FORCED; if (droptype != DTYPE_NODROP) { /* always drop incoming packet (as opposed to randomdrop) */ for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].count = 0; #ifdef RIO_STATS if (droptype == DTYPE_EARLY) rp->q_stats[dpindex].drop_unforced++; else rp->q_stats[dpindex].drop_forced++; PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); #endif m_freem(m); return (-1); } for (i = dpindex; i < RIO_NDROPPREC; i++) rp->rio_precstate[i].qlen++; /* save drop precedence index in mbuf hdr */ RIOM_SET_PRECINDEX(m, dpindex); if (rp->rio_flags & RIOF_CLEARDSCP) dsfield &= ~DSCP_MASK; if (dsfield != odsfield) write_dsfield(m, pktattr, dsfield); _addq(q, m); #ifdef RIO_STATS PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); #endif return (0); } struct mbuf * rio_getq(rio_t *rp, class_queue_t *q) { struct mbuf *m; int dpindex, i; if ((m = _getq(q)) == NULL) return NULL; dpindex = RIOM_GET_PRECINDEX(m); for (i = dpindex; i < RIO_NDROPPREC; i++) { if (--rp->rio_precstate[i].qlen == 0) { if (rp->rio_precstate[i].idle == 0) { rp->rio_precstate[i].idle = 1; microtime(&rp->rio_precstate[i].last); } } } return (m); } #ifdef ALTQ3_COMPAT int rioopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int rioclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { rio_queue_t *rqp; int err, error = 0; while ((rqp = rio_list) != NULL) { /* destroy all */ err = rio_detach(rqp); if (err != 0 && error == 0) error = err; } return error; } int rioioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { rio_queue_t *rqp; struct rio_interface *ifacep; struct ifnet *ifp; int error = 0; /* check super-user privilege */ switch (cmd) { case RIO_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case RIO_ENABLE: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = altq_enable(rqp->rq_ifq); break; case RIO_DISABLE: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = altq_disable(rqp->rq_ifq); break; case RIO_IF_ATTACH: ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); if (ifp == NULL) { error = ENXIO; break; } /* allocate and initialize rio_queue_t */ rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); if (rqp == NULL) { error = ENOMEM; break; } bzero(rqp, sizeof(rio_queue_t)); rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK); if (rqp->rq_q == NULL) { free(rqp, M_DEVBUF); error = ENOMEM; break; } bzero(rqp->rq_q, sizeof(class_queue_t)); rqp->rq_rio = rio_alloc(0, NULL, 0, 0); if (rqp->rq_rio == NULL) { free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); error = ENOMEM; break; } rqp->rq_ifq = &ifp->if_snd; qtail(rqp->rq_q) = NULL; qlen(rqp->rq_q) = 0; qlimit(rqp->rq_q) = RIO_LIMIT; qtype(rqp->rq_q) = Q_RIO; /* * set RIO to this ifnet structure. */ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, rio_enqueue, rio_dequeue, rio_request, NULL, NULL); if (error) { rio_destroy(rqp->rq_rio); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); break; } /* add this state to the rio list */ rqp->rq_next = rio_list; rio_list = rqp; break; case RIO_IF_DETACH: ifacep = (struct rio_interface *)addr; if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } error = rio_detach(rqp); break; case RIO_GETSTATS: do { struct rio_stats *q_stats; rio_t *rp; int i; q_stats = (struct rio_stats *)addr; if ((rqp = altq_lookup(q_stats->iface.rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } rp = rqp->rq_rio; q_stats->q_limit = qlimit(rqp->rq_q); q_stats->weight = rp->rio_weight; q_stats->flags = rp->rio_flags; for (i = 0; i < RIO_NDROPPREC; i++) { q_stats->q_len[i] = rp->rio_precstate[i].qlen; bcopy(&rp->q_stats[i], &q_stats->q_stats[i], sizeof(struct redstats)); q_stats->q_stats[i].q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; q_stats->q_params[i].inv_pmax = rp->rio_precstate[i].inv_pmax; q_stats->q_params[i].th_min = rp->rio_precstate[i].th_min; q_stats->q_params[i].th_max = rp->rio_precstate[i].th_max; } } while (/*CONSTCOND*/ 0); break; case RIO_CONFIG: do { struct rio_conf *fc; rio_t *new; int s, limit, i; fc = (struct rio_conf *)addr; if ((rqp = altq_lookup(fc->iface.rio_ifname, ALTQT_RIO)) == NULL) { error = EBADF; break; } new = rio_alloc(fc->rio_weight, &fc->q_params[0], fc->rio_flags, fc->rio_pkttime); if (new == NULL) { error = ENOMEM; break; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif _flushq(rqp->rq_q); limit = fc->rio_limit; if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) limit = fc->q_params[RIO_NDROPPREC-1].th_max; qlimit(rqp->rq_q) = limit; rio_destroy(rqp->rq_rio); rqp->rq_rio = new; splx(s); /* write back new values */ fc->rio_limit = limit; for (i = 0; i < RIO_NDROPPREC; i++) { fc->q_params[i].inv_pmax = rqp->rq_rio->rio_precstate[i].inv_pmax; fc->q_params[i].th_min = rqp->rq_rio->rio_precstate[i].th_min; fc->q_params[i].th_max = rqp->rq_rio->rio_precstate[i].th_max; } } while (/*CONSTCOND*/ 0); break; case RIO_SETDEFAULTS: do { struct redparams *rp; int i; rp = (struct redparams *)addr; for (i = 0; i < RIO_NDROPPREC; i++) default_rio_params[i] = rp[i]; } while (/*CONSTCOND*/ 0); break; default: error = EINVAL; break; } return error; } static int rio_detach(rqp) rio_queue_t *rqp; { rio_queue_t *tmp; int error = 0; if (ALTQ_IS_ENABLED(rqp->rq_ifq)) altq_disable(rqp->rq_ifq); if ((error = altq_detach(rqp->rq_ifq))) return (error); if (rio_list == rqp) rio_list = rqp->rq_next; else { for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) if (tmp->rq_next == rqp) { tmp->rq_next = rqp->rq_next; break; } if (tmp == NULL) printf("rio_detach: no state found in rio_list!\n"); } rio_destroy(rqp->rq_rio); free(rqp->rq_q, M_DEVBUF); free(rqp, M_DEVBUF); return (error); } /* * rio support routines */ static int rio_request(ifq, req, arg) struct ifaltq *ifq; int req; void *arg; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: _flushq(rqp->rq_q); if (ALTQ_IS_ENABLED(ifq)) ifq->ifq_len = 0; break; } return (0); } /* * enqueue routine: * * returns: 0 when successfully queued. * ENOBUFS when drop occurs. */ static int rio_enqueue(ifq, m, pktattr) struct ifaltq *ifq; struct mbuf *m; struct altq_pktattr *pktattr; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; int error = 0; IFQ_LOCK_ASSERT(ifq); if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) ifq->ifq_len++; else error = ENOBUFS; return error; } /* * dequeue routine: * must be called in splimp. * * returns: mbuf dequeued. * NULL when no packet is available in the queue. */ static struct mbuf * rio_dequeue(ifq, op) struct ifaltq *ifq; int op; { rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; struct mbuf *m = NULL; IFQ_LOCK_ASSERT(ifq); if (op == ALTDQ_POLL) return qhead(rqp->rq_q); m = rio_getq(rqp->rq_rio, rqp->rq_q); if (m != NULL) ifq->ifq_len--; return m; } #ifdef KLD_MODULE static struct altqsw rio_sw = {"rio", rioopen, rioclose, rioioctl}; ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); MODULE_VERSION(altq_rio, 1); MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_RIO */ Index: head/sys/net/altq/altq_rmclass.c =================================================================== --- head/sys/net/altq/altq_rmclass.c (revision 281641) +++ head/sys/net/altq/altq_rmclass.c (revision 281642) @@ -1,1836 +1,1810 @@ -/* $FreeBSD$ */ -/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */ - -/* +/*- * Copyright (c) 1991-1997 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Network Research * Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * LBL code modified by speer@eng.sun.com, May 1977. * For questions and/or comments, please send mail to cbq@ee.lbl.gov * * @(#)rm_class.c 1.48 97/12/05 SMI + * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #include #include #ifdef ALTQ3_COMPAT #include #include #include #endif #include #include #include #include #include #include /* * Local Macros */ #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } /* * Local routines. */ static int rmc_satisfied(struct rm_class *, struct timeval *); static void rmc_wrr_set_weights(struct rm_ifdat *); static void rmc_depth_compute(struct rm_class *); static void rmc_depth_recompute(rm_class_t *); static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); static int _rmc_addq(rm_class_t *, mbuf_t *); static void _rmc_dropq(rm_class_t *); static mbuf_t *_rmc_getq(rm_class_t *); static mbuf_t *_rmc_pollq(rm_class_t *); static int rmc_under_limit(struct rm_class *, struct timeval *); static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); static void rmc_drop_action(struct rm_class *); static void rmc_restart(struct rm_class *); static void rmc_root_overlimit(struct rm_class *, struct rm_class *); #define BORROW_OFFTIME /* * BORROW_OFFTIME (experimental): * borrow the offtime of the class borrowing from. * the reason is that when its own offtime is set, the class is unable * to borrow much, especially when cutoff is taking effect. * but when the borrowed class is overloaded (advidle is close to minidle), * use the borrowing class's offtime to avoid overload. */ #define ADJUST_CUTOFF /* * ADJUST_CUTOFF (experimental): * if no underlimit class is found due to cutoff, increase cutoff and * retry the scheduling loop. * also, don't invoke delay_actions while cutoff is taking effect, * since a sleeping class won't have a chance to be scheduled in the * next loop. * * now heuristics for setting the top-level variable (cutoff_) becomes: * 1. if a packet arrives for a not-overlimit class, set cutoff * to the depth of the class. * 2. if cutoff is i, and a packet arrives for an overlimit class * with an underlimit ancestor at a lower level than i (say j), * then set cutoff to j. * 3. at scheduling a packet, if there is no underlimit class * due to the current cutoff level, increase cutoff by 1 and * then try to schedule again. */ /* * rm_class_t * * rmc_newclass(...) - Create a new resource management class at priority * 'pri' on the interface given by 'ifd'. * * nsecPerByte is the data rate of the interface in nanoseconds/byte. * E.g., 800 for a 10Mb/s ethernet. If the class gets less * than 100% of the bandwidth, this number should be the * 'effective' rate for the class. Let f be the * bandwidth fraction allocated to this class, and let * nsPerByte be the data rate of the output link in * nanoseconds/byte. Then nsecPerByte is set to * nsPerByte / f. E.g., 1600 (= 800 / .5) * for a class that gets 50% of an ethernet's bandwidth. * * action the routine to call when the class is over limit. * * maxq max allowable queue size for class (in packets). * * parent parent class pointer. * * borrow class to borrow from (should be either 'parent' or null). * * maxidle max value allowed for class 'idle' time estimate (this * parameter determines how large an initial burst of packets * can be before overlimit action is invoked. * * offtime how long 'delay' action will delay when class goes over * limit (this parameter determines the steady-state burst * size when a class is running over its limit). * * Maxidle and offtime have to be computed from the following: If the * average packet size is s, the bandwidth fraction allocated to this * class is f, we want to allow b packet bursts, and the gain of the * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: * * ptime = s * nsPerByte * (1 - f) / f * maxidle = ptime * (1 - g^b) / g^b * minidle = -ptime * (1 / (f - 1)) * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) * * Operationally, it's convenient to specify maxidle & offtime in units * independent of the link bandwidth so the maxidle & offtime passed to * this routine are the above values multiplied by 8*f/(1000*nsPerByte). * (The constant factor is a scale factor needed to make the parameters * integers. This scaling also means that the 'unscaled' values of * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, * not nanoseconds.) Also note that the 'idle' filter computation keeps * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of * maxidle also must be scaled upward by this value. Thus, the passed * values for maxidle and offtime can be computed as follows: * * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) * offtime = offtime * 8 / (1000 * nsecPerByte) * * When USE_HRTIME is employed, then maxidle and offtime become: * maxidle = maxilde * (8.0 / nsecPerByte); * offtime = offtime * (8.0 / nsecPerByte); */ struct rm_class * rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, void (*action)(rm_class_t *, rm_class_t *), int maxq, struct rm_class *parent, struct rm_class *borrow, u_int maxidle, int minidle, u_int offtime, int pktsize, int flags) { struct rm_class *cl; struct rm_class *peer; int s; if (pri >= RM_MAXPRIO) return (NULL); #ifndef ALTQ_RED if (flags & RMCF_RED) { #ifdef ALTQ_DEBUG printf("rmc_newclass: RED not configured for CBQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_RIO if (flags & RMCF_RIO) { #ifdef ALTQ_DEBUG printf("rmc_newclass: RIO not configured for CBQ!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); CALLOUT_INIT(&cl->callout_); cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->q_ == NULL) { free(cl, M_DEVBUF); return (NULL); } /* * Class initialization. */ cl->children_ = NULL; cl->parent_ = parent; cl->borrow_ = borrow; cl->leaf_ = 1; cl->ifdat_ = ifd; cl->pri_ = pri; cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ cl->depth_ = 0; cl->qthresh_ = 0; cl->ns_per_byte_ = nsecPerByte; qlimit(cl->q_) = maxq; qtype(cl->q_) = Q_DROPHEAD; qlen(cl->q_) = 0; cl->flags_ = flags; #if 1 /* minidle is also scaled in ALTQ */ cl->minidle_ = (minidle * (int)nsecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif cl->maxidle_ = (maxidle * nsecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else cl->avgidle_ = 0; cl->offtime_ = (offtime * nsecPerByte) / 8; #endif cl->overlimit = action; #ifdef ALTQ_RED if (flags & (RMCF_RED|RMCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & RMCF_ECN) red_flags |= REDF_ECN; if (flags & RMCF_FLOWVALVE) red_flags |= REDF_FLOWVALVE; #ifdef ALTQ_RIO if (flags & RMCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif red_pkttime = nsecPerByte * pktsize / 1000; if (flags & RMCF_RED) { cl->red_ = red_alloc(0, 0, qlimit(cl->q_) * 10/100, qlimit(cl->q_) * 30/100, red_flags, red_pkttime); if (cl->red_ != NULL) qtype(cl->q_) = Q_RED; } #ifdef ALTQ_RIO else { cl->red_ = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->red_ != NULL) qtype(cl->q_) = Q_RIO; } #endif } #endif /* ALTQ_RED */ /* * put the class into the class tree */ -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); if ((peer = ifd->active_[pri]) != NULL) { /* find the last class at this pri */ cl->peer_ = peer; while (peer->peer_ != ifd->active_[pri]) peer = peer->peer_; peer->peer_ = cl; } else { ifd->active_[pri] = cl; cl->peer_ = cl; } if (cl->parent_) { cl->next_ = parent->children_; parent->children_ = cl; parent->leaf_ = 0; } /* * Compute the depth of this class and its ancestors in the class * hierarchy. */ rmc_depth_compute(cl); /* * If CBQ's WRR is enabled, then initialize the class WRR state. */ if (ifd->wrr_) { ifd->num_[pri]++; ifd->alloc_[pri] += cl->allotment_; rmc_wrr_set_weights(ifd); } IFQ_UNLOCK(ifd->ifq_); splx(s); return (cl); } int rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, int minidle, u_int offtime, int pktsize) { struct rm_ifdat *ifd; u_int old_allotment; int s; ifd = cl->ifdat_; old_allotment = cl->allotment_; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ cl->qthresh_ = 0; cl->ns_per_byte_ = nsecPerByte; qlimit(cl->q_) = maxq; #if 1 /* minidle is also scaled in ALTQ */ cl->minidle_ = (minidle * nsecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif cl->maxidle_ = (maxidle * nsecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else cl->avgidle_ = 0; cl->offtime_ = (offtime * nsecPerByte) / 8; #endif /* * If CBQ's WRR is enabled, then initialize the class WRR state. */ if (ifd->wrr_) { ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; rmc_wrr_set_weights(ifd); } IFQ_UNLOCK(ifd->ifq_); splx(s); return (0); } /* * static void * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes * the appropriate run robin weights for the CBQ weighted round robin * algorithm. * * Returns: NONE */ static void rmc_wrr_set_weights(struct rm_ifdat *ifd) { int i; struct rm_class *cl, *clh; for (i = 0; i < RM_MAXPRIO; i++) { /* * This is inverted from that of the simulator to * maintain precision. */ if (ifd->num_[i] == 0) ifd->M_[i] = 0; else ifd->M_[i] = ifd->alloc_[i] / (ifd->num_[i] * ifd->maxpkt_); /* * Compute the weighted allotment for each class. * This takes the expensive div instruction out * of the main loop for the wrr scheduling path. * These only get recomputed when a class comes or * goes. */ if (ifd->active_[i] != NULL) { clh = cl = ifd->active_[i]; do { /* safe-guard for slow link or alloc_ == 0 */ if (ifd->M_[i] == 0) cl->w_allotment_ = 0; else cl->w_allotment_ = cl->allotment_ / ifd->M_[i]; cl = cl->peer_; } while ((cl != NULL) && (cl != clh)); } } } int rmc_get_weight(struct rm_ifdat *ifd, int pri) { if ((pri >= 0) && (pri < RM_MAXPRIO)) return (ifd->M_[pri]); else return (0); } /* * static void * rmc_depth_compute(struct rm_class *cl) - This function computes the * appropriate depth of class 'cl' and its ancestors. * * Returns: NONE */ static void rmc_depth_compute(struct rm_class *cl) { rm_class_t *t = cl, *p; /* * Recompute the depth for the branch of the tree. */ while (t != NULL) { p = t->parent_; if (p && (t->depth_ >= p->depth_)) { p->depth_ = t->depth_ + 1; t = p; } else t = NULL; } } /* * static void * rmc_depth_recompute(struct rm_class *cl) - This function re-computes * the depth of the tree after a class has been deleted. * * Returns: NONE */ static void rmc_depth_recompute(rm_class_t *cl) { #if 1 /* ALTQ */ rm_class_t *p, *t; p = cl; while (p != NULL) { if ((t = p->children_) == NULL) { p->depth_ = 0; } else { int cdepth = 0; while (t != NULL) { if (t->depth_ > cdepth) cdepth = t->depth_; t = t->next_; } if (p->depth_ == cdepth + 1) /* no change to this parent */ return; p->depth_ = cdepth + 1; } p = p->parent_; } #else rm_class_t *t; if (cl->depth_ >= 1) { if (cl->children_ == NULL) { cl->depth_ = 0; } else if ((t = cl->children_) != NULL) { while (t != NULL) { if (t->children_ != NULL) rmc_depth_recompute(t); t = t->next_; } } else rmc_depth_compute(cl); } #endif } /* * void * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This * function deletes a class from the link-sharing structure and frees * all resources associated with the class. * * Returns: NONE */ void rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) { struct rm_class *p, *head, *previous; int s; ASSERT(cl->children_ == NULL); if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); /* * Free packets in the packet queue. * XXX - this may not be a desired behavior. Packets should be * re-queued. */ rmc_dropall(cl); /* * If the class has a parent, then remove the class from the * class from the parent's children chain. */ if (cl->parent_ != NULL) { head = cl->parent_->children_; p = previous = head; if (head->next_ == NULL) { ASSERT(head == cl); cl->parent_->children_ = NULL; cl->parent_->leaf_ = 1; } else while (p != NULL) { if (p == cl) { if (cl == head) cl->parent_->children_ = cl->next_; else previous->next_ = cl->next_; cl->next_ = NULL; p = NULL; } else { previous = p; p = p->next_; } } } /* * Delete class from class priority peer list. */ if ((p = ifd->active_[cl->pri_]) != NULL) { /* * If there is more than one member of this priority * level, then look for class(cl) in the priority level. */ if (p != p->peer_) { while (p->peer_ != cl) p = p->peer_; p->peer_ = cl->peer_; if (ifd->active_[cl->pri_] == cl) ifd->active_[cl->pri_] = cl->peer_; } else { ASSERT(p == cl); ifd->active_[cl->pri_] = NULL; } } /* * Recompute the WRR weights. */ if (ifd->wrr_) { ifd->alloc_[cl->pri_] -= cl->allotment_; ifd->num_[cl->pri_]--; rmc_wrr_set_weights(ifd); } /* * Re-compute the depth of the tree. */ #if 1 /* ALTQ */ rmc_depth_recompute(cl->parent_); #else rmc_depth_recompute(ifd->root_); #endif IFQ_UNLOCK(ifd->ifq_); splx(s); /* * Free the class structure. */ if (cl->red_ != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_destroy((rio_t *)cl->red_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_destroy(cl->red_); #endif } free(cl->q_, M_DEVBUF); free(cl, M_DEVBUF); } /* * void * rmc_init(...) - Initialize the resource management data structures * associated with the output portion of interface 'ifp'. 'ifd' is * where the structures will be built (for backwards compatibility, the * structures aren't kept in the ifnet struct). 'nsecPerByte' * gives the link speed (inverse of bandwidth) in nanoseconds/byte. * 'restart' is the driver-specific routine that the generic 'delay * until under limit' action will call to restart output. `maxq' * is the queue size of the 'link' & 'default' classes. 'maxqueued' * is the maximum number of packets that the resource management * code will allow to be queued 'downstream' (this is typically 1). * * Returns: NONE */ void rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, int minidle, u_int offtime, int flags) { int i, mtu; /* * Initialize the CBQ tracing/debug facility. */ CBQTRACEINIT(); bzero((char *)ifd, sizeof (*ifd)); mtu = ifq->altq_ifp->if_mtu; ifd->ifq_ = ifq; ifd->restart = restart; ifd->maxqueued_ = maxqueued; ifd->ns_per_byte_ = nsecPerByte; ifd->maxpkt_ = mtu; ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; #if 1 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; if (mtu * nsecPerByte > 10 * 1000000) ifd->maxiftime_ /= 4; #endif reset_cutoff(ifd); CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); /* * Initialize the CBQ's WRR state. */ for (i = 0; i < RM_MAXPRIO; i++) { ifd->alloc_[i] = 0; ifd->M_[i] = 0; ifd->num_[i] = 0; ifd->na_[i] = 0; ifd->active_[i] = NULL; } /* * Initialize current packet state. */ ifd->qi_ = 0; ifd->qo_ = 0; for (i = 0; i < RM_MAXQUEUED; i++) { ifd->class_[i] = NULL; ifd->curlen_[i] = 0; ifd->borrowed_[i] = NULL; } /* * Create the root class of the link-sharing structure. */ if ((ifd->root_ = rmc_newclass(0, ifd, nsecPerByte, rmc_root_overlimit, maxq, 0, 0, maxidle, minidle, offtime, 0, 0)) == NULL) { printf("rmc_init: root class not allocated\n"); return ; } ifd->root_->depth_ = 0; } /* * void * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by * mbuf 'm' to queue for resource class 'cl'. This routine is called * by a driver's if_output routine. This routine must be called with * output packet completion interrupts locked out (to avoid racing with * rmc_dequeue_next). * * Returns: 0 on successful queueing * -1 when packet drop occurs */ int rmc_queue_packet(struct rm_class *cl, mbuf_t *m) { struct timeval now; struct rm_ifdat *ifd = cl->ifdat_; int cpri = cl->pri_; int is_empty = qempty(cl->q_); RM_GETTIME(now); if (ifd->cutoff_ > 0) { if (TV_LT(&cl->undertime_, &now)) { if (ifd->cutoff_ > cl->depth_) ifd->cutoff_ = cl->depth_; CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); } #if 1 /* ALTQ */ else { /* * the class is overlimit. if the class has * underlimit ancestors, set cutoff to the lowest * depth among them. */ struct rm_class *borrow = cl->borrow_; while (borrow != NULL && borrow->depth_ < ifd->cutoff_) { if (TV_LT(&borrow->undertime_, &now)) { ifd->cutoff_ = borrow->depth_; CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); break; } borrow = borrow->borrow_; } } #else /* !ALTQ */ else if ((ifd->cutoff_ > 1) && cl->borrow_) { if (TV_LT(&cl->borrow_->undertime_, &now)) { ifd->cutoff_ = cl->borrow_->depth_; CBQTRACE(rmc_queue_packet, 'ffob', cl->borrow_->depth_); } } #endif /* !ALTQ */ } if (_rmc_addq(cl, m) < 0) /* failed */ return (-1); if (is_empty) { CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); ifd->na_[cpri]++; } if (qlen(cl->q_) > qlimit(cl->q_)) { /* note: qlimit can be set to 0 or 1 */ rmc_drop_action(cl); return (-1); } return (0); } /* * void * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all * classes to see if there are satified. */ static void rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) { int i; rm_class_t *p, *bp; for (i = RM_MAXPRIO - 1; i >= 0; i--) { if ((bp = ifd->active_[i]) != NULL) { p = bp; do { if (!rmc_satisfied(p, now)) { ifd->cutoff_ = p->depth_; return; } p = p->peer_; } while (p != bp); } } reset_cutoff(ifd); } /* * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. */ static int rmc_satisfied(struct rm_class *cl, struct timeval *now) { rm_class_t *p; if (cl == NULL) return (1); if (TV_LT(now, &cl->undertime_)) return (1); if (cl->depth_ == 0) { if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) return (0); else return (1); } if (cl->children_ != NULL) { p = cl->children_; while (p != NULL) { if (!rmc_satisfied(p, now)) return (0); p = p->next_; } } return (1); } /* * Return 1 if class 'cl' is under limit or can borrow from a parent, * 0 if overlimit. As a side-effect, this routine will invoke the * class overlimit action if the class if overlimit. */ static int rmc_under_limit(struct rm_class *cl, struct timeval *now) { rm_class_t *p = cl; rm_class_t *top; struct rm_ifdat *ifd = cl->ifdat_; ifd->borrowed_[ifd->qi_] = NULL; /* * If cl is the root class, then always return that it is * underlimit. Otherwise, check to see if the class is underlimit. */ if (cl->parent_ == NULL) return (1); if (cl->sleeping_) { if (TV_LT(now, &cl->undertime_)) return (0); CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; return (1); } top = NULL; while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { if (((cl = cl->borrow_) == NULL) || (cl->depth_ > ifd->cutoff_)) { #ifdef ADJUST_CUTOFF if (cl != NULL) /* cutoff is taking effect, just return false without calling the delay action. */ return (0); #endif #ifdef BORROW_OFFTIME /* * check if the class can borrow offtime too. * borrow offtime from the top of the borrow * chain if the top class is not overloaded. */ if (cl != NULL) { /* cutoff is taking effect, use this class as top. */ top = cl; CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); } if (top != NULL && top->avgidle_ == top->minidle_) top = NULL; p->overtime_ = *now; (p->overlimit)(p, top); #else p->overtime_ = *now; (p->overlimit)(p, NULL); #endif return (0); } top = cl; } if (cl != p) ifd->borrowed_[ifd->qi_] = cl; return (1); } /* * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to * Packet-by-packet round robin. * * The heart of the weighted round-robin scheduler, which decides which * class next gets to send a packet. Highest priority first, then * weighted round-robin within priorites. * * Each able-to-send class gets to send until its byte allocation is * exhausted. Thus, the active pointer is only changed after a class has * exhausted its allocation. * * If the scheduler finds no class that is underlimit or able to borrow, * then the first class found that had a nonzero queue and is allowed to * borrow gets to send. */ static mbuf_t * _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) { struct rm_class *cl = NULL, *first = NULL; u_int deficit; int cpri; mbuf_t *m; struct timeval now; RM_GETTIME(now); /* * if the driver polls the top of the queue and then removes * the polled packet, we must return the same packet. */ if (op == ALTDQ_REMOVE && ifd->pollcache_) { cl = ifd->pollcache_; cpri = cl->pri_; if (ifd->efficient_) { /* check if this class is overlimit */ if (cl->undertime_.tv_sec != 0 && rmc_under_limit(cl, &now) == 0) first = cl; } ifd->pollcache_ = NULL; goto _wrr_out; } else { /* mode == ALTDQ_POLL || pollcache == NULL */ ifd->pollcache_ = NULL; ifd->borrowed_[ifd->qi_] = NULL; } #ifdef ADJUST_CUTOFF _again: #endif for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { if (ifd->na_[cpri] == 0) continue; deficit = 0; /* * Loop through twice for a priority level, if some class * was unable to send a packet the first round because * of the weighted round-robin mechanism. * During the second loop at this level, deficit==2. * (This second loop is not needed if for every class, * "M[cl->pri_])" times "cl->allotment" is greater than * the byte size for the largest packet in the class.) */ _wrr_loop: cl = ifd->active_[cpri]; ASSERT(cl != NULL); do { if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) cl->bytes_alloc_ += cl->w_allotment_; if (!qempty(cl->q_)) { if ((cl->undertime_.tv_sec == 0) || rmc_under_limit(cl, &now)) { if (cl->bytes_alloc_ > 0 || deficit > 1) goto _wrr_out; /* underlimit but no alloc */ deficit = 1; #if 1 ifd->borrowed_[ifd->qi_] = NULL; #endif } else if (first == NULL && cl->borrow_ != NULL) first = cl; /* borrowing candidate */ } cl->bytes_alloc_ = 0; cl = cl->peer_; } while (cl != ifd->active_[cpri]); if (deficit == 1) { /* first loop found an underlimit class with deficit */ /* Loop on same priority level, with new deficit. */ deficit = 2; goto _wrr_loop; } } #ifdef ADJUST_CUTOFF /* * no underlimit class found. if cutoff is taking effect, * increase cutoff and try again. */ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { ifd->cutoff_++; CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); goto _again; } #endif /* ADJUST_CUTOFF */ /* * If LINK_EFFICIENCY is turned on, then the first overlimit * class we encounter will send a packet if all the classes * of the link-sharing structure are overlimit. */ reset_cutoff(ifd); CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); if (!ifd->efficient_ || first == NULL) return (NULL); cl = first; cpri = cl->pri_; #if 0 /* too time-consuming for nothing */ if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; #endif ifd->borrowed_[ifd->qi_] = cl->borrow_; ifd->cutoff_ = cl->borrow_->depth_; /* * Deque the packet and do the book keeping... */ _wrr_out: if (op == ALTDQ_REMOVE) { m = _rmc_getq(cl); if (m == NULL) panic("_rmc_wrr_dequeue_next"); if (qempty(cl->q_)) ifd->na_[cpri]--; /* * Update class statistics and link data. */ if (cl->bytes_alloc_ > 0) cl->bytes_alloc_ -= m_pktlen(m); if ((cl->bytes_alloc_ <= 0) || first == cl) ifd->active_[cl->pri_] = cl->peer_; else ifd->active_[cl->pri_] = cl; ifd->class_[ifd->qi_] = cl; ifd->curlen_[ifd->qi_] = m_pktlen(m); ifd->now_[ifd->qi_] = now; ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; ifd->queued_++; } else { /* mode == ALTDQ_PPOLL */ m = _rmc_pollq(cl); ifd->pollcache_ = cl; } return (m); } /* * Dequeue & return next packet from the highest priority class that * has a packet to send & has enough allocation to send it. This * routine is called by a driver whenever it needs a new packet to * output. */ static mbuf_t * _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) { mbuf_t *m; int cpri; struct rm_class *cl, *first = NULL; struct timeval now; RM_GETTIME(now); /* * if the driver polls the top of the queue and then removes * the polled packet, we must return the same packet. */ if (op == ALTDQ_REMOVE && ifd->pollcache_) { cl = ifd->pollcache_; cpri = cl->pri_; ifd->pollcache_ = NULL; goto _prr_out; } else { /* mode == ALTDQ_POLL || pollcache == NULL */ ifd->pollcache_ = NULL; ifd->borrowed_[ifd->qi_] = NULL; } #ifdef ADJUST_CUTOFF _again: #endif for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { if (ifd->na_[cpri] == 0) continue; cl = ifd->active_[cpri]; ASSERT(cl != NULL); do { if (!qempty(cl->q_)) { if ((cl->undertime_.tv_sec == 0) || rmc_under_limit(cl, &now)) goto _prr_out; if (first == NULL && cl->borrow_ != NULL) first = cl; } cl = cl->peer_; } while (cl != ifd->active_[cpri]); } #ifdef ADJUST_CUTOFF /* * no underlimit class found. if cutoff is taking effect, increase * cutoff and try again. */ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { ifd->cutoff_++; goto _again; } #endif /* ADJUST_CUTOFF */ /* * If LINK_EFFICIENCY is turned on, then the first overlimit * class we encounter will send a packet if all the classes * of the link-sharing structure are overlimit. */ reset_cutoff(ifd); if (!ifd->efficient_ || first == NULL) return (NULL); cl = first; cpri = cl->pri_; #if 0 /* too time-consuming for nothing */ if (cl->sleeping_) CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; #endif ifd->borrowed_[ifd->qi_] = cl->borrow_; ifd->cutoff_ = cl->borrow_->depth_; /* * Deque the packet and do the book keeping... */ _prr_out: if (op == ALTDQ_REMOVE) { m = _rmc_getq(cl); if (m == NULL) panic("_rmc_prr_dequeue_next"); if (qempty(cl->q_)) ifd->na_[cpri]--; ifd->active_[cpri] = cl->peer_; ifd->class_[ifd->qi_] = cl; ifd->curlen_[ifd->qi_] = m_pktlen(m); ifd->now_[ifd->qi_] = now; ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; ifd->queued_++; } else { /* mode == ALTDQ_POLL */ m = _rmc_pollq(cl); ifd->pollcache_ = cl; } return (m); } /* * mbuf_t * * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function * is invoked by the packet driver to get the next packet to be * dequeued and output on the link. If WRR is enabled, then the * WRR dequeue next routine will determine the next packet to sent. * Otherwise, packet-by-packet round robin is invoked. * * Returns: NULL, if a packet is not available or if all * classes are overlimit. * * Otherwise, Pointer to the next packet. */ mbuf_t * rmc_dequeue_next(struct rm_ifdat *ifd, int mode) { if (ifd->queued_ >= ifd->maxqueued_) return (NULL); else if (ifd->wrr_) return (_rmc_wrr_dequeue_next(ifd, mode)); else return (_rmc_prr_dequeue_next(ifd, mode)); } /* * Update the utilization estimate for the packet that just completed. * The packet's class & the parent(s) of that class all get their * estimators updated. This routine is called by the driver's output- * packet-completion interrupt service routine. */ /* * a macro to approximate "divide by 1000" that gives 0.000999, * if a value has enough effective digits. * (on pentium, mul takes 9 cycles but div takes 46!) */ #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) void rmc_update_class_util(struct rm_ifdat *ifd) { int idle, avgidle, pktlen; int pkt_time, tidle; rm_class_t *cl, *borrowed; rm_class_t *borrows; struct timeval *nowp; /* * Get the most recent completed class. */ if ((cl = ifd->class_[ifd->qo_]) == NULL) return; pktlen = ifd->curlen_[ifd->qo_]; borrowed = ifd->borrowed_[ifd->qo_]; borrows = borrowed; PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); /* * Run estimator on class and its ancestors. */ /* * rm_update_class_util is designed to be called when the * transfer is completed from a xmit complete interrupt, * but most drivers don't implement an upcall for that. * so, just use estimated completion time. * as a result, ifd->qi_ and ifd->qo_ are always synced. */ nowp = &ifd->now_[ifd->qo_]; /* get pkt_time (for link) in usec */ #if 1 /* use approximation */ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; pkt_time = NSEC_TO_USEC(pkt_time); #else pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; #endif #if 1 /* ALTQ4PPP */ if (TV_LT(nowp, &ifd->ifnow_)) { int iftime; /* * make sure the estimated completion time does not go * too far. it can happen when the link layer supports * data compression or the interface speed is set to * a much lower value. */ TV_DELTA(&ifd->ifnow_, nowp, iftime); if (iftime+pkt_time < ifd->maxiftime_) { TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); } } else { TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #else if (TV_LT(nowp, &ifd->ifnow_)) { TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #endif while (cl != NULL) { TV_DELTA(&ifd->ifnow_, &cl->last_, idle); if (idle >= 2000000) /* * this class is idle enough, reset avgidle. * (TV_DELTA returns 2000000 us when delta is large.) */ cl->avgidle_ = cl->maxidle_; /* get pkt_time (for class) in usec */ #if 1 /* use approximation */ pkt_time = pktlen * cl->ns_per_byte_; pkt_time = NSEC_TO_USEC(pkt_time); #else pkt_time = pktlen * cl->ns_per_byte_ / 1000; #endif idle -= pkt_time; avgidle = cl->avgidle_; avgidle += idle - (avgidle >> RM_FILTER_GAIN); cl->avgidle_ = avgidle; /* Are we overlimit ? */ if (avgidle <= 0) { CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); #if 1 /* ALTQ */ /* * need some lower bound for avgidle, otherwise * a borrowing class gets unbounded penalty. */ if (avgidle < cl->minidle_) avgidle = cl->avgidle_ = cl->minidle_; #endif /* set next idle to make avgidle 0 */ tidle = pkt_time + (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); TV_ADD_DELTA(nowp, tidle, &cl->undertime_); ++cl->stats_.over; } else { cl->avgidle_ = (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; cl->undertime_.tv_sec = 0; if (cl->sleeping_) { CALLOUT_STOP(&cl->callout_); cl->sleeping_ = 0; } } if (borrows != NULL) { if (borrows != cl) ++cl->stats_.borrows; else borrows = NULL; } cl->last_ = ifd->ifnow_; cl->last_pkttime_ = pkt_time; #if 1 if (cl->parent_ == NULL) { /* take stats of root class */ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); } #endif cl = cl->parent_; } /* * Check to see if cutoff needs to set to a new level. */ cl = ifd->class_[ifd->qo_]; if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { #if 1 /* ALTQ */ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { rmc_tl_satisfied(ifd, nowp); CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { ifd->cutoff_ = borrowed->depth_; CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #else /* !ALTQ */ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { reset_cutoff(ifd); #ifdef notdef rmc_tl_satisfied(ifd, &now); #endif CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { ifd->cutoff_ = borrowed->depth_; CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #endif /* !ALTQ */ } /* * Release class slot */ ifd->borrowed_[ifd->qo_] = NULL; ifd->class_[ifd->qo_] = NULL; ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; ifd->queued_--; } /* * void * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) * over-limit action routines. These get invoked by rmc_under_limit() * if a class with packets to send if over its bandwidth limit & can't * borrow from a parent class. * * Returns: NONE */ static void rmc_drop_action(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; ASSERT(qlen(cl->q_) > 0); _rmc_dropq(cl); if (qempty(cl->q_)) ifd->na_[cl->pri_]--; } void rmc_dropall(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; if (!qempty(cl->q_)) { _flushq(cl->q_); ifd->na_[cl->pri_]--; } } #if (__FreeBSD_version > 300000) /* hzto() is removed from FreeBSD-3.0 */ static int hzto(struct timeval *); static int hzto(tv) struct timeval *tv; { struct timeval t2; getmicrotime(&t2); t2.tv_sec = tv->tv_sec - t2.tv_sec; t2.tv_usec = tv->tv_usec - t2.tv_usec; return (tvtohz(&t2)); } #endif /* __FreeBSD_version > 300000 */ /* * void * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ * delay action routine. It is invoked via rmc_under_limit when the * packet is discoverd to be overlimit. * * If the delay action is result of borrow class being overlimit, then * delay for the offtime of the borrowing class that is overlimit. * * Returns: NONE */ void rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) { int delay, t, extradelay; cl->stats_.overactions++; TV_DELTA(&cl->undertime_, &cl->overtime_, delay); #ifndef BORROW_OFFTIME delay += cl->offtime_; #endif if (!cl->sleeping_) { CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); #ifdef BORROW_OFFTIME if (borrow != NULL) extradelay = borrow->offtime_; else #endif extradelay = cl->offtime_; #ifdef ALTQ /* * XXX recalculate suspend time: * current undertime is (tidle + pkt_time) calculated * from the last transmission. * tidle: time required to bring avgidle back to 0 * pkt_time: target waiting time for this class * we need to replace pkt_time by offtime */ extradelay -= cl->last_pkttime_; #endif if (extradelay > 0) { TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); delay += extradelay; } cl->sleeping_ = 1; cl->stats_.delays++; /* * Since packets are phased randomly with respect to the * clock, 1 tick (the next clock tick) can be an arbitrarily * short time so we have to wait for at least two ticks. * NOTE: If there's no other traffic, we need the timer as * a 'backstop' to restart this class. */ if (delay > tick * 2) { -#ifdef __FreeBSD__ /* FreeBSD rounds up the tick */ t = hzto(&cl->undertime_); -#else - /* other BSDs round down the tick */ - t = hzto(&cl->undertime_) + 1; -#endif } else t = 2; CALLOUT_RESET(&cl->callout_, t, (timeout_t *)rmc_restart, (caddr_t)cl); } } /* * void * rmc_restart() - is just a helper routine for rmc_delay_action -- it is * called by the system timer code & is responsible checking if the * class is still sleeping (it might have been restarted as a side * effect of the queue scan on a packet arrival) and, if so, restarting * output for the class. Inspecting the class state & restarting output * require locking the class structure. In general the driver is * responsible for locking but this is the only routine that is not * called directly or indirectly from the interface driver so it has * know about system locking conventions. Under bsd, locking is done * by raising IPL to splimp so that's what's implemented here. On a * different system this would probably need to be changed. * * Returns: NONE */ static void rmc_restart(struct rm_class *cl) { struct rm_ifdat *ifd = cl->ifdat_; int s; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_LOCK(ifd->ifq_); if (cl->sleeping_) { cl->sleeping_ = 0; cl->undertime_.tv_sec = 0; if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); (ifd->restart)(ifd->ifq_); } } IFQ_UNLOCK(ifd->ifq_); splx(s); } /* * void * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit * handling routine for the root class of the link sharing structure. * * Returns: NONE */ static void rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) { panic("rmc_root_overlimit"); } /* * Packet Queue handling routines. Eventually, this is to localize the * effects on the code whether queues are red queues or droptail * queues. */ static int _rmc_addq(rm_class_t *cl, mbuf_t *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) return red_addq(cl->red_, cl->q_, m, cl->pktattr_); #endif /* ALTQ_RED */ if (cl->flags_ & RMCF_CLEARDSCP) write_dsfield(m, cl->pktattr_, 0); _addq(cl->q_, m); return (0); } /* note: _rmc_dropq is not called for red */ static void _rmc_dropq(rm_class_t *cl) { mbuf_t *m; if ((m = _getq(cl->q_)) != NULL) m_freem(m); } static mbuf_t * _rmc_getq(rm_class_t *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) return rio_getq((rio_t *)cl->red_, cl->q_); #endif #ifdef ALTQ_RED if (q_is_red(cl->q_)) return red_getq(cl->red_, cl->q_); #endif return _getq(cl->q_); } static mbuf_t * _rmc_pollq(rm_class_t *cl) { return qhead(cl->q_); } #ifdef CBQ_TRACE struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; struct cbqtrace *cbqtrace_ptr = NULL; int cbqtrace_count; /* * DDB hook to trace cbq events: * the last 1024 events are held in a circular buffer. * use "call cbqtrace_dump(N)" to display 20 events from Nth event. */ void cbqtrace_dump(int); static char *rmc_funcname(void *); static struct rmc_funcs { void *func; char *name; } rmc_funcs[] = { rmc_init, "rmc_init", rmc_queue_packet, "rmc_queue_packet", rmc_under_limit, "rmc_under_limit", rmc_update_class_util, "rmc_update_class_util", rmc_delay_action, "rmc_delay_action", rmc_restart, "rmc_restart", _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", NULL, NULL }; static char *rmc_funcname(void *func) { struct rmc_funcs *fp; for (fp = rmc_funcs; fp->func != NULL; fp++) if (fp->func == func) return (fp->name); return ("unknown"); } void cbqtrace_dump(int counter) { int i, *p; char *cp; counter = counter % NCBQTRACE; p = (int *)&cbqtrace_buffer[counter]; for (i=0; i<20; i++) { printf("[0x%x] ", *p++); printf("%s: ", rmc_funcname((void *)*p++)); cp = (char *)p++; printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); printf("%d\n",*p++); if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) p = (int *)cbqtrace_buffer; } } #endif /* CBQ_TRACE */ #endif /* ALTQ_CBQ */ #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) void _addq(class_queue_t *q, mbuf_t *m) { mbuf_t *m0; if ((m0 = qtail(q)) != NULL) m->m_nextpkt = m0->m_nextpkt; else m0 = m; m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; } mbuf_t * _getq(class_queue_t *q) { mbuf_t *m, *m0; if ((m = qtail(q)) == NULL) return (NULL); if ((m0 = m->m_nextpkt) != m) m->m_nextpkt = m0->m_nextpkt; else { ASSERT(qlen(q) == 1); qtail(q) = NULL; } qlen(q)--; m0->m_nextpkt = NULL; return (m0); } /* drop a packet at the tail of the queue */ mbuf_t * _getq_tail(class_queue_t *q) { mbuf_t *m, *m0, *prev; if ((m = m0 = qtail(q)) == NULL) return NULL; do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) { ASSERT(qlen(q) == 1); qtail(q) = NULL; } else qtail(q) = prev; qlen(q)--; m->m_nextpkt = NULL; return (m); } /* randomly select a packet in the queue */ mbuf_t * _getq_random(class_queue_t *q) { struct mbuf *m; int i, n; if ((m = qtail(q)) == NULL) return NULL; if (m->m_nextpkt == m) { ASSERT(qlen(q) == 1); qtail(q) = NULL; } else { struct mbuf *prev = NULL; n = arc4random() % qlen(q) + 1; for (i = 0; i < n; i++) { prev = m; m = m->m_nextpkt; } prev->m_nextpkt = m->m_nextpkt; if (m == qtail(q)) qtail(q) = prev; } qlen(q)--; m->m_nextpkt = NULL; return (m); } void _removeq(class_queue_t *q, mbuf_t *m) { mbuf_t *m0, *prev; m0 = qtail(q); do { prev = m0; m0 = m0->m_nextpkt; } while (m0 != m); prev->m_nextpkt = m->m_nextpkt; if (prev == m) qtail(q) = NULL; else if (qtail(q) == m) qtail(q) = prev; qlen(q)--; } void _flushq(class_queue_t *q) { mbuf_t *m; while ((m = _getq(q)) != NULL) m_freem(m); ASSERT(qlen(q) == 0); } #endif /* !__GNUC__ || ALTQ_DEBUG */ #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ Index: head/sys/net/altq/altq_rmclass_debug.h =================================================================== --- head/sys/net/altq/altq_rmclass_debug.h (revision 281641) +++ head/sys/net/altq/altq_rmclass_debug.h (revision 281642) @@ -1,112 +1,113 @@ -/* $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ */ - -/* +/*- * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. + * + * $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_ #define _ALTQ_ALTQ_RMCLASS_DEBUG_H_ /* #pragma ident "@(#)rm_class_debug.h 1.7 98/05/04 SMI" */ /* * Cbq debugging macros */ #ifdef __cplusplus extern "C" { #endif #ifdef CBQ_TRACE #ifndef NCBQTRACE #define NCBQTRACE (16 * 1024) #endif /* * To view the trace output, using adb, type: * adb -k /dev/ksyms /dev/mem , then type * cbqtrace_count/D to get the count, then type * cbqtrace_buffer,0tcount/Dp4C" "Xn * This will dump the trace buffer from 0 to count. */ /* * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events * from Nth event in the circular buffer. */ struct cbqtrace { int count; int function; /* address of function */ int trace_action; /* descriptive 4 characters */ int object; /* object operated on */ }; extern struct cbqtrace cbqtrace_buffer[]; extern struct cbqtrace *cbqtrace_ptr; extern int cbqtrace_count; #define CBQTRACEINIT() { \ if (cbqtrace_ptr == NULL) \ cbqtrace_ptr = cbqtrace_buffer; \ else { \ cbqtrace_ptr = cbqtrace_buffer; \ bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \ cbqtrace_count = 0; \ } \ } #define LOCK_TRACE() splimp() #define UNLOCK_TRACE(x) splx(x) #define CBQTRACE(func, act, obj) { \ int __s = LOCK_TRACE(); \ int *_p = &cbqtrace_ptr->count; \ *_p++ = ++cbqtrace_count; \ *_p++ = (int)(func); \ *_p++ = (int)(act); \ *_p++ = (int)(obj); \ if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\ cbqtrace_ptr = cbqtrace_buffer; \ else \ cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \ UNLOCK_TRACE(__s); \ } #else /* If no tracing, define no-ops */ #define CBQTRACEINIT() #define CBQTRACE(a, b, c) #endif /* !CBQ_TRACE */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */ Index: head/sys/net/altq/altq_subr.c =================================================================== --- head/sys/net/altq/altq_subr.c (revision 281641) +++ head/sys/net/altq/altq_subr.c (revision 281642) @@ -1,1981 +1,1925 @@ -/* $FreeBSD$ */ -/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */ - -/* +/*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ + * $FreeBSD$ */ -#if defined(__FreeBSD__) || defined(__NetBSD__) #include "opt_altq.h" #include "opt_inet.h" -#ifdef __FreeBSD__ #include "opt_inet6.h" -#endif -#endif /* __FreeBSD__ || __NetBSD__ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#ifdef __FreeBSD__ #include -#endif #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* machine dependent clock related includes */ -#ifdef __FreeBSD__ #include #include #include #include -#endif #if defined(__amd64__) || defined(__i386__) #include /* for pentium tsc */ #include /* for CPUID_TSC */ -#ifdef __FreeBSD__ #include /* for cpu_feature */ -#elif defined(__NetBSD__) || defined(__OpenBSD__) -#include /* for cpu_feature */ -#endif #endif /* __amd64 || __i386__ */ /* * internal function prototypes */ static void tbr_timeout(void *); int (*altq_input)(struct mbuf *, int) = NULL; static struct mbuf *tbr_dequeue(struct ifaltq *, int); static int tbr_timer = 0; /* token bucket regulator timer */ #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) static struct callout tbr_callout = CALLOUT_INITIALIZER; #else static struct callout tbr_callout; #endif #ifdef ALTQ3_CLFIER_COMPAT static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); #ifdef INET6 static int extract_ports6(struct mbuf *, struct ip6_hdr *, struct flowinfo_in6 *); #endif static int apply_filter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static int apply_ppfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); #ifdef INET6 static int apply_filter6(u_int32_t, struct flow_filter6 *, struct flowinfo_in6 *); #endif static int apply_tosfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static u_long get_filt_handle(struct acc_classifier *, int); static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); static u_int32_t filt2fibmask(struct flow_filter *); static void ip4f_cache(struct ip *, struct flowinfo_in *); static int ip4f_lookup(struct ip *, struct flowinfo_in *); static int ip4f_init(void); static struct ip4_frag *ip4f_alloc(void); static void ip4f_free(struct ip4_frag *); #endif /* ALTQ3_CLFIER_COMPAT */ /* * alternate queueing support routines */ /* look up the queue state by the interface name and the queueing type. */ void * altq_lookup(name, type) char *name; int type; { struct ifnet *ifp; if ((ifp = ifunit(name)) != NULL) { /* read if_snd unlocked */ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) return (ifp->if_snd.altq_disc); } return NULL; } int altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) struct ifaltq *ifq; int type; void *discipline; int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*dequeue)(struct ifaltq *, int); int (*request)(struct ifaltq *, int, void *); void *clfier; void *(*classify)(void *, struct mbuf *, int); { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } #ifdef ALTQ3_COMPAT /* * pfaltq can override the existing discipline, but altq3 cannot. * check these if clfier is not NULL (which implies altq3). */ if (clfier != NULL) { if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return EEXIST; } } #endif ifq->altq_type = type; ifq->altq_disc = discipline; ifq->altq_enqueue = enqueue; ifq->altq_dequeue = dequeue; ifq->altq_request = request; ifq->altq_clfier = clfier; ifq->altq_classify = classify; ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); #ifdef ALTQ3_COMPAT #ifdef ALTQ_KLD altq_module_incref(type); #endif #endif IFQ_UNLOCK(ifq); return 0; } int altq_detach(ifq) struct ifaltq *ifq; { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (!ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return (0); } #ifdef ALTQ3_COMPAT #ifdef ALTQ_KLD altq_module_declref(ifq->altq_type); #endif #endif ifq->altq_type = ALTQT_NONE; ifq->altq_disc = NULL; ifq->altq_enqueue = NULL; ifq->altq_dequeue = NULL; ifq->altq_request = NULL; ifq->altq_clfier = NULL; ifq->altq_classify = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; IFQ_UNLOCK(ifq); return 0; } int altq_enable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ ifq->altq_flags |= ALTQF_ENABLED; if (ifq->altq_clfier != NULL) ifq->altq_flags |= ALTQF_CLASSIFY; splx(s); IFQ_UNLOCK(ifq); return 0; } int altq_disable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); splx(s); IFQ_UNLOCK(ifq); return 0; } #ifdef ALTQ_DEBUG void altq_assert(file, line, failedexpr) const char *file, *failedexpr; int line; { (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", failedexpr, file, line); panic("altq assertion"); /* NOTREACHED */ } #endif /* * internal representation of token bucket parameters * rate: byte_per_unittime << 32 * (((bits_per_sec) / 8) << 32) / machclk_freq * depth: byte << 32 * */ #define TBR_SHIFT 32 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) static struct mbuf * tbr_dequeue(ifq, op) struct ifaltq *ifq; int op; { struct tb_regulator *tbr; struct mbuf *m; int64_t interval; u_int64_t now; IFQ_LOCK_ASSERT(ifq); tbr = ifq->altq_tbr; if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { /* if this is a remove after poll, bypass tbr check */ } else { /* update token only when it is negative */ if (tbr->tbr_token <= 0) { now = read_machclk(); interval = now - tbr->tbr_last; if (interval >= tbr->tbr_filluptime) tbr->tbr_token = tbr->tbr_depth; else { tbr->tbr_token += interval * tbr->tbr_rate; if (tbr->tbr_token > tbr->tbr_depth) tbr->tbr_token = tbr->tbr_depth; } tbr->tbr_last = now; } /* if token is still negative, don't allow dequeue */ if (tbr->tbr_token <= 0) return (NULL); } if (ALTQ_IS_ENABLED(ifq)) m = (*ifq->altq_dequeue)(ifq, op); else { if (op == ALTDQ_POLL) _IF_POLL(ifq, m); else _IF_DEQUEUE(ifq, m); } if (m != NULL && op == ALTDQ_REMOVE) tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); tbr->tbr_lastop = op; return (m); } /* * set a token bucket regulator. * if the specified rate is zero, the token bucket regulator is deleted. */ int tbr_set(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr, *otbr; if (tbr_dequeue_ptr == NULL) tbr_dequeue_ptr = tbr_dequeue; if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("tbr_set: no cpu clock available!\n"); return (ENXIO); } IFQ_LOCK(ifq); if (profile->rate == 0) { /* delete this tbr */ if ((tbr = ifq->altq_tbr) == NULL) { IFQ_UNLOCK(ifq); return (ENOENT); } ifq->altq_tbr = NULL; free(tbr, M_DEVBUF); IFQ_UNLOCK(ifq); return (0); } tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); if (tbr->tbr_rate > 0) tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; else tbr->tbr_filluptime = 0xffffffffffffffffLL; tbr->tbr_token = tbr->tbr_depth; tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ if (otbr != NULL) free(otbr, M_DEVBUF); else { if (tbr_timer == 0) { CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); tbr_timer = 1; } } IFQ_UNLOCK(ifq); return (0); } /* * tbr_timeout goes through the interface list, and kicks the drivers * if necessary. * * MPSAFE */ static void tbr_timeout(arg) void *arg; { -#ifdef __FreeBSD__ VNET_ITERATOR_DECL(vnet_iter); -#endif struct ifnet *ifp; int active, s; active = 0; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif -#ifdef __FreeBSD__ IFNET_RLOCK_NOSLEEP(); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); -#endif for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { /* read from if_snd unlocked */ if (!TBR_IS_ENABLED(&ifp->if_snd)) continue; active++; if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) (*ifp->if_start)(ifp); } -#ifdef __FreeBSD__ CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); IFNET_RUNLOCK_NOSLEEP(); -#endif splx(s); if (active > 0) CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); else tbr_timer = 0; /* don't need tbr_timer anymore */ } /* * get token bucket regulator profile */ int tbr_get(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr; IFQ_LOCK(ifq); if ((tbr = ifq->altq_tbr) == NULL) { profile->rate = 0; profile->depth = 0; } else { profile->rate = (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); } IFQ_UNLOCK(ifq); return (0); } /* * attach a discipline to the interface. if one already exists, it is * overridden. * Locking is done in the discipline specific attach functions. Basically * they call back to altq_attach which takes care of the attach and locking. */ int altq_pfattach(struct pf_altq *a) { int error = 0; switch (a->scheduler) { case ALTQT_NONE: break; #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_pfattach(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_pfattach(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_pfattach(a); break; #endif default: error = ENXIO; } return (error); } /* * detach a discipline from the interface. * it is possible that the discipline was already overridden by another * discipline. */ int altq_pfdetach(struct pf_altq *a) { struct ifnet *ifp; int s, error = 0; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); /* if this discipline is no longer referenced, just return */ /* read unlocked from if_snd */ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) return (0); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif /* read unlocked from if_snd, _disable and _detach take care */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) error = altq_disable(&ifp->if_snd); if (error == 0) error = altq_detach(&ifp->if_snd); splx(s); return (error); } /* * add a discipline or a queue * Locking is done in the discipline specific functions with regards to * malloc with WAITOK, also it is not yet clear which lock to use. */ int altq_add(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_add_queue(a)); if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) panic("altq_add: no cpu clock"); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a discipline or a queue * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_remove_queue(a)); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * add a queue to the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_add_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a queue from the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * get queue statistics * Locking is done in the discipline specific functions with regards to * copyout operations, also it is not yet clear which lock to use. */ int altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_getqstats(a, ubuf, nbytes); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_getqstats(a, ubuf, nbytes); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_getqstats(a, ubuf, nbytes); break; #endif default: error = ENXIO; } return (error); } /* * read and write diffserv field in IPv4 or IPv6 header */ u_int8_t read_dsfield(m, pktattr) struct mbuf *m; struct altq_pktattr *pktattr; { struct mbuf *m0; u_int8_t ds_field = 0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return ((u_int8_t)0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("read_dsfield: can't locate header!\n"); #endif return ((u_int8_t)0); } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; if (ip->ip_v != 4) return ((u_int8_t)0); /* version mismatch! */ ds_field = ip->ip_tos; } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return ((u_int8_t)0); /* version mismatch! */ ds_field = (flowlabel >> 20) & 0xff; } #endif return (ds_field); } void write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) { struct mbuf *m0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return; /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("write_dsfield: can't locate header!\n"); #endif return; } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; u_int8_t old; int32_t sum; if (ip->ip_v != 4) return; /* version mismatch! */ old = ip->ip_tos; dsfield |= old & 3; /* leave CU bits */ if (old == dsfield) return; ip->ip_tos = dsfield; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += 0xff00 + (~old & 0xff) + dsfield; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return; /* version mismatch! */ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); ip6->ip6_flow = htonl(flowlabel); } #endif return; } /* * high resolution clock support taking advantage of a machine dependent * high resolution time counter (e.g., timestamp counter of intel pentium). * we assume * - 64-bit-long monotonically-increasing counter * - frequency range is 100M-4GHz (CPU speed) */ /* if pcc is not available or disabled, emulate 256MHz using microtime() */ #define MACHCLK_SHIFT 8 int machclk_usepcc; u_int32_t machclk_freq; u_int32_t machclk_per_tick; #if defined(__i386__) && defined(__NetBSD__) extern u_int64_t cpu_tsc_freq; #endif #if (__FreeBSD_version >= 700035) /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) /* If TSC is P-state invariant, don't do anything. */ if (tsc_is_invariant) return; #endif /* Total setting for this level gives the new frequency in MHz. */ init_machclk(); } EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_LAST); #endif /* __FreeBSD_version >= 700035 */ static void init_machclk_setup(void) { #if (__FreeBSD_version >= 600000) callout_init(&tbr_callout, 0); #endif machclk_usepcc = 1; #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) machclk_usepcc = 0; #endif #if defined(__FreeBSD__) && defined(SMP) machclk_usepcc = 0; #endif #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif #if defined(__amd64__) || defined(__i386__) /* check if TSC is available */ -#ifdef __FreeBSD__ if ((cpu_feature & CPUID_TSC) == 0 || atomic_load_acq_64(&tsc_freq) == 0) -#else - if ((cpu_feature & CPUID_TSC) == 0) -#endif machclk_usepcc = 0; #endif } void init_machclk(void) { static int called; /* Call one-time initialization function. */ if (!called) { init_machclk_setup(); called = 1; } if (machclk_usepcc == 0) { /* emulate 256MHz using microtime() */ machclk_freq = 1000000 << MACHCLK_SHIFT; machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: emulate %uHz cpu clock\n", machclk_freq); #endif return; } /* * if the clock frequency (of Pentium TSC or Alpha PCC) is * accessible, just use it. */ #if defined(__amd64__) || defined(__i386__) -#ifdef __FreeBSD__ machclk_freq = atomic_load_acq_64(&tsc_freq); -#elif defined(__NetBSD__) - machclk_freq = (u_int32_t)cpu_tsc_freq; -#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU)) - machclk_freq = pentium_mhz * 1000000; #endif -#endif /* * if we don't know the clock frequency, measure it. */ if (machclk_freq == 0) { static int wait; struct timeval tv_start, tv_end; u_int64_t start, end, diff; int timo; microtime(&tv_start); start = read_machclk(); timo = hz; /* 1 sec */ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); microtime(&tv_end); end = read_machclk(); diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 + tv_end.tv_usec - tv_start.tv_usec; if (diff != 0) machclk_freq = (u_int)((end - start) * 1000000 / diff); } machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: CPU clock: %uHz\n", machclk_freq); #endif } #if defined(__OpenBSD__) && defined(__i386__) static __inline u_int64_t rdtsc(void) { u_int64_t rv; __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); return (rv); } #endif /* __OpenBSD__ && __i386__ */ u_int64_t read_machclk(void) { u_int64_t val; if (machclk_usepcc) { #if defined(__amd64__) || defined(__i386__) val = rdtsc(); #else panic("read_machclk"); #endif } else { struct timeval tv; microtime(&tv); val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 + tv.tv_usec) << MACHCLK_SHIFT); } return (val); } #ifdef ALTQ3_CLFIER_COMPAT #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 /* encapsulating security payload */ #endif #ifndef IPPROTO_AH #define IPPROTO_AH 51 /* authentication header */ #endif /* * extract flow information from a given packet. * filt_mask shows flowinfo fields required. * we assume the ip header is in one mbuf, and addresses and ports are * in network byte order. */ int altq_extractflow(m, af, flow, filt_bmask) struct mbuf *m; int af; struct flowinfo *flow; u_int32_t filt_bmask; { switch (af) { case PF_INET: { struct flowinfo_in *fin; struct ip *ip; ip = mtod(m, struct ip *); if (ip->ip_v != 4) break; fin = (struct flowinfo_in *)flow; fin->fi_len = sizeof(struct flowinfo_in); fin->fi_family = AF_INET; fin->fi_proto = ip->ip_p; fin->fi_tos = ip->ip_tos; fin->fi_src.s_addr = ip->ip_src.s_addr; fin->fi_dst.s_addr = ip->ip_dst.s_addr; if (filt_bmask & FIMB4_PORTS) /* if port info is required, extract port numbers */ extract_ports4(m, ip, fin); else { fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; } return (1); } #ifdef INET6 case PF_INET6: { struct flowinfo_in6 *fin6; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); /* should we check the ip version? */ fin6 = (struct flowinfo_in6 *)flow; fin6->fi6_len = sizeof(struct flowinfo_in6); fin6->fi6_family = AF_INET6; fin6->fi6_proto = ip6->ip6_nxt; fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); fin6->fi6_src = ip6->ip6_src; fin6->fi6_dst = ip6->ip6_dst; if ((filt_bmask & FIMB6_PORTS) || ((filt_bmask & FIMB6_PROTO) && ip6->ip6_nxt > IPPROTO_IPV6)) /* * if port info is required, or proto is required * but there are option headers, extract port * and protocol numbers. */ extract_ports6(m, ip6, fin6); else { fin6->fi6_sport = 0; fin6->fi6_dport = 0; fin6->fi6_gpi = 0; } return (1); } #endif /* INET6 */ default: break; } /* failed */ flow->fi_len = sizeof(struct flowinfo); flow->fi_family = AF_UNSPEC; return (0); } /* * helper routine to extract port numbers */ /* structure for ipsec and ipv6 option header template */ struct _opt6 { u_int8_t opt6_nxt; /* next header */ u_int8_t opt6_hlen; /* header extension length */ u_int16_t _pad; u_int32_t ah_spi; /* security parameter index for authentication header */ }; /* * extract port numbers from a ipv4 packet. */ static int extract_ports4(m, ip, fin) struct mbuf *m; struct ip *ip; struct flowinfo_in *fin; { struct mbuf *m0; u_short ip_off; u_int8_t proto; int off; fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; ip_off = ntohs(ip->ip_off); /* if it is a fragment, try cached fragment info */ if (ip_off & IP_OFFMASK) { ip4f_lookup(ip, fin); return (1); } /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip >= m0->m_data) && ((caddr_t)ip < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports4: can't locate header! ip=%p\n", ip); #endif return (0); } off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); proto = ip->ip_p; #ifdef ALTQ_IPSEC again: #endif while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); /* bogus ip_hl! */ } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin->fi_sport = udp->uh_sport; fin->fi_dport = udp->uh_dport; fin->fi_proto = proto; } break; #ifdef ALTQ_IPSEC case IPPROTO_ESP: if (fin->fi_gpi == 0){ u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin->fi_gpi = *gpi; } fin->fi_proto = proto; break; case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); if (fin->fi_gpi == 0 && m0->m_len >= off + 8) fin->fi_gpi = opt6->ah_spi; } /* goto the next header */ goto again; #endif /* ALTQ_IPSEC */ default: fin->fi_proto = proto; return (0); } /* if this is a first fragment, cache it. */ if (ip_off & IP_MF) ip4f_cache(ip, fin); return (1); } #ifdef INET6 static int extract_ports6(m, ip6, fin6) struct mbuf *m; struct ip6_hdr *ip6; struct flowinfo_in6 *fin6; { struct mbuf *m0; int off; u_int8_t proto; fin6->fi6_gpi = 0; fin6->fi6_sport = 0; fin6->fi6_dport = 0; /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip6 >= m0->m_data) && ((caddr_t)ip6 < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports6: can't locate header! ip6=%p\n", ip6); #endif return (0); } off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; do { while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin6->fi6_sport = udp->uh_sport; fin6->fi6_dport = udp->uh_dport; fin6->fi6_proto = proto; } return (1); case IPPROTO_ESP: if (fin6->fi6_gpi == 0) { u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin6->fi6_gpi = *gpi; } fin6->fi6_proto = proto; return (1); case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) fin6->fi6_gpi = opt6->ah_spi; proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); /* goto the next header */ break; } case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += (opt6->opt6_hlen + 1) * 8; /* goto the next header */ break; } case IPPROTO_FRAGMENT: /* ipv6 fragmentations are not supported yet */ default: fin6->fi6_proto = proto; return (0); } } while (1); /*NOTREACHED*/ } #endif /* INET6 */ /* * altq common classifier */ int acc_add_filter(classifier, filter, class, phandle) struct acc_classifier *classifier; struct flow_filter *filter; void *class; u_long *phandle; { struct acc_filter *afp, *prev, *tmp; int i, s; #ifdef INET6 if (filter->ff_flow.fi_family != AF_INET && filter->ff_flow.fi_family != AF_INET6) return (EINVAL); #else if (filter->ff_flow.fi_family != AF_INET) return (EINVAL); #endif afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK); if (afp == NULL) return (ENOMEM); bzero(afp, sizeof(struct acc_filter)); afp->f_filter = *filter; afp->f_class = class; i = ACC_WILDCARD_INDEX; if (filter->ff_flow.fi_family == AF_INET) { struct flow_filter *filter4 = &afp->f_filter; /* * if address is 0, it's a wildcard. if address mask * isn't set, use full mask. */ if (filter4->ff_flow.fi_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0; else if (filter4->ff_mask.mask_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0xffffffff; if (filter4->ff_flow.fi_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0; else if (filter4->ff_mask.mask_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0xffffffff; /* clear extra bits in addresses */ filter4->ff_flow.fi_dst.s_addr &= filter4->ff_mask.mask_dst.s_addr; filter4->ff_flow.fi_src.s_addr &= filter4->ff_mask.mask_src.s_addr; /* * if dst address is a wildcard, use hash-entry * ACC_WILDCARD_INDEX. */ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); } #ifdef INET6 else if (filter->ff_flow.fi_family == AF_INET6) { struct flow_filter6 *filter6 = (struct flow_filter6 *)&afp->f_filter; #ifndef IN6MASK0 /* taken from kame ipv6 */ #define IN6MASK0 {{{ 0, 0, 0, 0 }}} #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask128 = IN6MASK128; #endif if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) filter6->ff_mask6.mask6_dst = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) filter6->ff_mask6.mask6_dst = in6mask128; if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) filter6->ff_mask6.mask6_src = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) filter6->ff_mask6.mask6_src = in6mask128; /* clear extra bits in addresses */ for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_dst.s6_addr[i] &= filter6->ff_mask6.mask6_dst.s6_addr[i]; for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_src.s6_addr[i] &= filter6->ff_mask6.mask6_src.s6_addr[i]; if (filter6->ff_flow6.fi6_flowlabel == 0) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); } #endif /* INET6 */ afp->f_handle = get_filt_handle(classifier, i); /* update filter bitmask */ afp->f_fbmask = filt2fibmask(filter); classifier->acc_fbmask |= afp->f_fbmask; /* * add this filter to the filter list. * filters are ordered from the highest rule number. */ -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif prev = NULL; LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) prev = tmp; else break; } if (prev == NULL) LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); else LIST_INSERT_AFTER(prev, afp, f_chain); splx(s); *phandle = afp->f_handle; return (0); } int acc_delete_filter(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int s; if ((afp = filth_to_filtp(classifier, handle)) == NULL) return (EINVAL); -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif LIST_REMOVE(afp, f_chain); splx(s); free(afp, M_DEVBUF); /* todo: update filt_bmask */ return (0); } /* * delete filters referencing to the specified class. * if the all flag is not 0, delete all the filters. */ int acc_discard_filters(classifier, class, all) struct acc_classifier *classifier; void *class; int all; { struct acc_filter *afp; int i, s; -#ifdef __NetBSD__ s = splnet(); -#else - s = splimp(); -#endif for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (all || afp->f_class == class) { LIST_REMOVE(afp, f_chain); free(afp, M_DEVBUF); /* start again from the head */ break; } } while (afp != NULL); } splx(s); if (all) classifier->acc_fbmask = 0; return (0); } void * acc_classify(clfier, m, af) void *clfier; struct mbuf *m; int af; { struct acc_classifier *classifier; struct flowinfo flow; struct acc_filter *afp; int i; classifier = (struct acc_classifier *)clfier; altq_extractflow(m, af, &flow, classifier->acc_fbmask); if (flow.fi_family == AF_INET) { struct flowinfo_in *fp = (struct flowinfo_in *)&flow; if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { /* only tos is used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_tosfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else if ((classifier->acc_fbmask & (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) == 0) { /* only proto and ports are used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_ppfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else { /* get the filter hash entry from its dest address */ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); do { /* * go through this loop twice. first for dst * hash, second for wildcards. */ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); /* * check again for filters with a dst addr * wildcard. * (daddr == 0 || dmask != 0xffffffff). */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } } #ifdef INET6 else if (flow.fi_family == AF_INET6) { struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; /* get the filter hash entry from its flow ID */ if (fp6->fi6_flowlabel != 0) i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); else /* flowlable can be zero */ i = ACC_WILDCARD_INDEX; /* go through this loop twice. first for flow hash, second for wildcards. */ do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter6(afp->f_fbmask, (struct flow_filter6 *)&afp->f_filter, fp6)) /* filter matched */ return (afp->f_class); /* * check again for filters with a wildcard. */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } #endif /* INET6 */ /* no filter matched */ return (NULL); } static int apply_filter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_DADDR) && filt->ff_flow.fi_dst.s_addr != (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) return (0); if ((fbmask & FIMB4_SADDR) && filt->ff_flow.fi_src.s_addr != (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) return (0); /* match */ return (1); } /* * filter matching function optimized for a common case that checks * only protocol and port numbers */ static int apply_ppfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); /* match */ return (1); } /* * filter matching function only for tos field. */ static int apply_tosfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); /* match */ return (1); } #ifdef INET6 static int apply_filter6(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter6 *filt; struct flowinfo_in6 *pkt; { int i; if (filt->ff_flow6.fi6_family != AF_INET6) return (0); if ((fbmask & FIMB6_FLABEL) && filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) return (0); if ((fbmask & FIMB6_PROTO) && filt->ff_flow6.fi6_proto != pkt->fi6_proto) return (0); if ((fbmask & FIMB6_SPORT) && filt->ff_flow6.fi6_sport != pkt->fi6_sport) return (0); if ((fbmask & FIMB6_DPORT) && filt->ff_flow6.fi6_dport != pkt->fi6_dport) return (0); if (fbmask & FIMB6_SADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_src.s6_addr32[i] != (pkt->fi6_src.s6_addr32[i] & filt->ff_mask6.mask6_src.s6_addr32[i])) return (0); } if (fbmask & FIMB6_DADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_dst.s6_addr32[i] != (pkt->fi6_dst.s6_addr32[i] & filt->ff_mask6.mask6_dst.s6_addr32[i])) return (0); } if ((fbmask & FIMB6_TCLASS) && filt->ff_flow6.fi6_tclass != (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) return (0); if ((fbmask & FIMB6_GPI) && filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) return (0); /* match */ return (1); } #endif /* INET6 */ /* * filter handle: * bit 20-28: index to the filter hash table * bit 0-19: unique id in the hash bucket. */ static u_long get_filt_handle(classifier, i) struct acc_classifier *classifier; int i; { static u_long handle_number = 1; u_long handle; struct acc_filter *afp; while (1) { handle = handle_number++ & 0x000fffff; if (LIST_EMPTY(&classifier->acc_filters[i])) break; LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if ((afp->f_handle & 0x000fffff) == handle) break; if (afp == NULL) break; /* this handle is already used, try again */ } return ((i << 20) | handle); } /* convert filter handle to filter pointer */ static struct acc_filter * filth_to_filtp(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int i; i = ACC_GET_HINDEX(handle); LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (afp->f_handle == handle) return (afp); return (NULL); } /* create flowinfo bitmask */ static u_int32_t filt2fibmask(filt) struct flow_filter *filt; { u_int32_t mask = 0; #ifdef INET6 struct flow_filter6 *filt6; #endif switch (filt->ff_flow.fi_family) { case AF_INET: if (filt->ff_flow.fi_proto != 0) mask |= FIMB4_PROTO; if (filt->ff_flow.fi_tos != 0) mask |= FIMB4_TOS; if (filt->ff_flow.fi_dst.s_addr != 0) mask |= FIMB4_DADDR; if (filt->ff_flow.fi_src.s_addr != 0) mask |= FIMB4_SADDR; if (filt->ff_flow.fi_sport != 0) mask |= FIMB4_SPORT; if (filt->ff_flow.fi_dport != 0) mask |= FIMB4_DPORT; if (filt->ff_flow.fi_gpi != 0) mask |= FIMB4_GPI; break; #ifdef INET6 case AF_INET6: filt6 = (struct flow_filter6 *)filt; if (filt6->ff_flow6.fi6_proto != 0) mask |= FIMB6_PROTO; if (filt6->ff_flow6.fi6_tclass != 0) mask |= FIMB6_TCLASS; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) mask |= FIMB6_DADDR; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) mask |= FIMB6_SADDR; if (filt6->ff_flow6.fi6_sport != 0) mask |= FIMB6_SPORT; if (filt6->ff_flow6.fi6_dport != 0) mask |= FIMB6_DPORT; if (filt6->ff_flow6.fi6_gpi != 0) mask |= FIMB6_GPI; if (filt6->ff_flow6.fi6_flowlabel != 0) mask |= FIMB6_FLABEL; break; #endif /* INET6 */ } return (mask); } /* * helper functions to handle IPv4 fragments. * currently only in-sequence fragments are handled. * - fragment info is cached in a LRU list. * - when a first fragment is found, cache its flow info. * - when a non-first fragment is found, lookup the cache. */ struct ip4_frag { TAILQ_ENTRY(ip4_frag) ip4f_chain; char ip4f_valid; u_short ip4f_id; struct flowinfo_in ip4f_info; }; static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ static void ip4f_cache(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; if (TAILQ_EMPTY(&ip4f_list)) { /* first time call, allocate fragment cache entries. */ if (ip4f_init() < 0) /* allocation failed! */ return; } fp = ip4f_alloc(); fp->ip4f_id = ip->ip_id; fp->ip4f_info.fi_proto = ip->ip_p; fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; /* save port numbers */ fp->ip4f_info.fi_sport = fin->fi_sport; fp->ip4f_info.fi_dport = fin->fi_dport; fp->ip4f_info.fi_gpi = fin->fi_gpi; } static int ip4f_lookup(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; fp = TAILQ_NEXT(fp, ip4f_chain)) if (ip->ip_id == fp->ip4f_id && ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && ip->ip_p == fp->ip4f_info.fi_proto) { /* found the matching entry */ fin->fi_sport = fp->ip4f_info.fi_sport; fin->fi_dport = fp->ip4f_info.fi_dport; fin->fi_gpi = fp->ip4f_info.fi_gpi; if ((ntohs(ip->ip_off) & IP_MF) == 0) /* this is the last fragment, release the entry. */ ip4f_free(fp); return (1); } /* no matching entry found */ return (0); } static int ip4f_init(void) { struct ip4_frag *fp; int i; TAILQ_INIT(&ip4f_list); for (i=0; iip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } return (0); } static struct ip4_frag * ip4f_alloc(void) { struct ip4_frag *fp; /* reclaim an entry at the tail, put it at the head */ fp = TAILQ_LAST(&ip4f_list, ip4f_list); TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 1; TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); return (fp); } static void ip4f_free(fp) struct ip4_frag *fp; { TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } #endif /* ALTQ3_CLFIER_COMPAT */ Index: head/sys/net/altq/altq_var.h =================================================================== --- head/sys/net/altq/altq_var.h (revision 281641) +++ head/sys/net/altq/altq_var.h (revision 281642) @@ -1,261 +1,231 @@ -/* $FreeBSD$ */ -/* $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ */ - -/* +/*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_VAR_H_ #define _ALTQ_ALTQ_VAR_H_ #ifdef _KERNEL #include #include #include #ifdef ALTQ3_CLFIER_COMPAT /* * filter structure for altq common classifier */ struct acc_filter { LIST_ENTRY(acc_filter) f_chain; void *f_class; /* pointer to the class */ u_long f_handle; /* filter id */ u_int32_t f_fbmask; /* filter bitmask */ struct flow_filter f_filter; /* filter value */ }; /* * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix * the handle assignment. */ #define ACC_FILTER_TABLESIZE (256+1) #define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2) #define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1) #ifdef __GNUC__ #define ACC_GET_HASH_INDEX(addr) \ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;}) #else #define ACC_GET_HASH_INDEX(addr) \ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \ & ACC_FILTER_MASK) #endif #define ACC_GET_HINDEX(handle) ((handle) >> 20) #if (__FreeBSD_version > 500000) #define ACC_LOCK_INIT(ac) mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF) #define ACC_LOCK_DESTROY(ac) mtx_destroy(&(ac)->acc_mtx) #define ACC_LOCK(ac) mtx_lock(&(ac)->acc_mtx) #define ACC_UNLOCK(ac) mtx_unlock(&(ac)->acc_mtx) #else #define ACC_LOCK_INIT(ac) #define ACC_LOCK_DESTROY(ac) #define ACC_LOCK(ac) #define ACC_UNLOCK(ac) #endif struct acc_classifier { u_int32_t acc_fbmask; LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE]; #if (__FreeBSD_version > 500000) struct mtx acc_mtx; #endif }; /* * flowinfo mask bits used by classifier */ /* for ipv4 */ #define FIMB4_PROTO 0x0001 #define FIMB4_TOS 0x0002 #define FIMB4_DADDR 0x0004 #define FIMB4_SADDR 0x0008 #define FIMB4_DPORT 0x0010 #define FIMB4_SPORT 0x0020 #define FIMB4_GPI 0x0040 #define FIMB4_ALL 0x007f /* for ipv6 */ #define FIMB6_PROTO 0x0100 #define FIMB6_TCLASS 0x0200 #define FIMB6_DADDR 0x0400 #define FIMB6_SADDR 0x0800 #define FIMB6_DPORT 0x1000 #define FIMB6_SPORT 0x2000 #define FIMB6_GPI 0x4000 #define FIMB6_FLABEL 0x8000 #define FIMB6_ALL 0xff00 #define FIMB_ALL (FIMB4_ALL|FIMB6_ALL) #define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI) #define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI) #endif /* ALTQ3_CLFIER_COMPAT */ /* * machine dependent clock * a 64bit high resolution time counter. */ extern int machclk_usepcc; extern u_int32_t machclk_freq; extern u_int32_t machclk_per_tick; extern void init_machclk(void); extern u_int64_t read_machclk(void); /* * debug support */ #ifdef ALTQ_DEBUG #ifdef __STDC__ #define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e)) #else /* PCC */ #define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e")) #endif #else #define ASSERT(e) ((void)0) #endif /* * misc stuff for compatibility */ /* ioctl cmd type */ typedef u_long ioctlcmd_t; /* * queue macros: * the interface of TAILQ_LAST macro changed after the introduction * of softupdate. redefine it here to make it work with pre-2.2.7. */ #undef TAILQ_LAST #define TAILQ_LAST(head, headname) \ (*(((struct headname *)((head)->tqh_last))->tqh_last)) #ifndef TAILQ_EMPTY #define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) #endif #ifndef TAILQ_FOREACH #define TAILQ_FOREACH(var, head, field) \ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field)) #endif /* macro for timeout/untimeout */ -#if (__FreeBSD_version > 300000) || defined(__NetBSD__) /* use callout */ #include #if (__FreeBSD_version > 500000) #define CALLOUT_INIT(c) callout_init((c), 0) #else #define CALLOUT_INIT(c) callout_init((c)) #endif #define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a)) #define CALLOUT_STOP(c) callout_stop((c)) #if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000) #define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 } -#endif -#elif defined(__OpenBSD__) -#include -/* callout structure as a wrapper of struct timeout */ -struct callout { - struct timeout c_to; -}; -#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0) -#define CALLOUT_RESET(c,t,f,a) do { if (!timeout_initialized(&(c)->c_to)) \ - timeout_set(&(c)->c_to, (f), (a)); \ - timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0) -#define CALLOUT_STOP(c) timeout_del(&(c)->c_to) -#define CALLOUT_INITIALIZER { { { NULL }, NULL, NULL, 0, 0 } } -#else -/* use old-style timeout/untimeout */ -/* dummy callout structure */ -struct callout { - void *c_arg; /* function argument */ - void (*c_func)(void *); /* functiuon to call */ -}; -#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0) -#define CALLOUT_RESET(c,t,f,a) do { (c)->c_arg = (a); \ - (c)->c_func = (f); \ - timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0) -#define CALLOUT_STOP(c) untimeout((c)->c_func,(c)->c_arg) -#define CALLOUT_INITIALIZER { NULL, NULL } -#endif -#if !defined(__FreeBSD__) -typedef void (timeout_t)(void *); #endif #define m_pktlen(m) ((m)->m_pkthdr.len) struct ifnet; struct mbuf; struct pf_altq; #ifdef ALTQ3_CLFIER_COMPAT struct flowinfo; #endif void *altq_lookup(char *, int); #ifdef ALTQ3_CLFIER_COMPAT int altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t); int acc_add_filter(struct acc_classifier *, struct flow_filter *, void *, u_long *); int acc_delete_filter(struct acc_classifier *, u_long); int acc_discard_filters(struct acc_classifier *, void *, int); void *acc_classify(void *, struct mbuf *, int); #endif u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *); void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t); void altq_assert(const char *, int, const char *); int tbr_set(struct ifaltq *, struct tb_profile *); int tbr_get(struct ifaltq *, struct tb_profile *); int altq_pfattach(struct pf_altq *); int altq_pfdetach(struct pf_altq *); int altq_add(struct pf_altq *); int altq_remove(struct pf_altq *); int altq_add_queue(struct pf_altq *); int altq_remove_queue(struct pf_altq *); int altq_getqstats(struct pf_altq *, void *, int *); int cbq_pfattach(struct pf_altq *); int cbq_add_altq(struct pf_altq *); int cbq_remove_altq(struct pf_altq *); int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); int cbq_getqstats(struct pf_altq *, void *, int *); int priq_pfattach(struct pf_altq *); int priq_add_altq(struct pf_altq *); int priq_remove_altq(struct pf_altq *); int priq_add_queue(struct pf_altq *); int priq_remove_queue(struct pf_altq *); int priq_getqstats(struct pf_altq *, void *, int *); int hfsc_pfattach(struct pf_altq *); int hfsc_add_altq(struct pf_altq *); int hfsc_remove_altq(struct pf_altq *); int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); int hfsc_getqstats(struct pf_altq *, void *, int *); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ Index: head/sys/net/altq/if_altq.h =================================================================== --- head/sys/net/altq/if_altq.h (revision 281641) +++ head/sys/net/altq/if_altq.h (revision 281642) @@ -1,190 +1,182 @@ -/* $FreeBSD$ */ -/* $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ */ - -/* +/*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ + * $FreeBSD$ */ #ifndef _ALTQ_IF_ALTQ_H_ #define _ALTQ_IF_ALTQ_H_ -#ifdef __FreeBSD__ #include /* XXX */ #include /* XXX */ #include /* XXX */ -#endif -#ifdef _KERNEL_OPT -#include -#endif - struct altq_pktattr; struct tb_regulator; struct top_cdnr; /* * Structure defining a queue for a network interface. */ struct ifaltq { /* fields compatible with struct ifqueue */ struct mbuf *ifq_head; struct mbuf *ifq_tail; int ifq_len; int ifq_maxlen; -#ifdef __FreeBSD__ struct mtx ifq_mtx; -#endif /* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */ struct mbuf *ifq_drv_head; struct mbuf *ifq_drv_tail; int ifq_drv_len; int ifq_drv_maxlen; /* alternate queueing related fields */ int altq_type; /* discipline type */ int altq_flags; /* flags (e.g. ready, in-use) */ void *altq_disc; /* for discipline-specific use */ struct ifnet *altq_ifp; /* back pointer to interface */ int (*altq_enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*altq_dequeue)(struct ifaltq *, int); int (*altq_request)(struct ifaltq *, int, void *); /* classifier fields */ void *altq_clfier; /* classifier-specific use */ void *(*altq_classify)(void *, struct mbuf *, int); /* token bucket regulator */ struct tb_regulator *altq_tbr; /* input traffic conditioner (doesn't belong to the output queue...) */ struct top_cdnr *altq_cdnr; }; #ifdef _KERNEL /* * packet attributes used by queueing disciplines. * pattr_class is a discipline-dependent scheduling class that is * set by a classifier. * pattr_hdr and pattr_af may be used by a discipline to access * the header within a mbuf. (e.g. ECN needs to update the CE bit) * note that pattr_hdr could be stale after m_pullup, though link * layer output routines usually don't use m_pullup. link-level * compression also invalidates these fields. thus, pattr_hdr needs * to be verified when a discipline touches the header. */ struct altq_pktattr { void *pattr_class; /* sched class set by classifier */ int pattr_af; /* address family */ caddr_t pattr_hdr; /* saved header position in mbuf */ }; /* * mbuf tag to carry a queue id (and hints for ECN). */ struct altq_tag { u_int32_t qid; /* queue id */ /* hints for ecn */ int af; /* address family */ void *hdr; /* saved header position in mbuf */ }; /* * a token-bucket regulator limits the rate that a network driver can * dequeue packets from the output queue. * modern cards are able to buffer a large amount of packets and dequeue * too many packets at a time. this bursty dequeue behavior makes it * impossible to schedule packets by queueing disciplines. * a token-bucket is used to control the burst size in a device * independent manner. */ struct tb_regulator { int64_t tbr_rate; /* (scaled) token bucket rate */ int64_t tbr_depth; /* (scaled) token bucket depth */ int64_t tbr_token; /* (scaled) current token */ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */ u_int64_t tbr_last; /* last time token was updated */ int tbr_lastop; /* last dequeue operation type needed for poll-and-dequeue */ }; /* if_altqflags */ #define ALTQF_READY 0x01 /* driver supports alternate queueing */ #define ALTQF_ENABLED 0x02 /* altq is in use */ #define ALTQF_CLASSIFY 0x04 /* classify packets */ #define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */ #define ALTQF_DRIVER1 0x40 /* driver specific */ /* if_altqflags set internally only: */ #define ALTQF_CANTCHANGE (ALTQF_READY) /* altq_dequeue 2nd arg */ #define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */ #define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */ /* altq request types (currently only purge is defined) */ #define ALTRQ_PURGE 1 /* purge all packets */ #define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY) #define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED) #define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY) #define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING) #define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING) #define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING) #define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL) #define ALTQ_ENQUEUE(ifq, m, pa, err) \ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa)) #define ALTQ_DEQUEUE(ifq, m) \ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE) #define ALTQ_POLL(ifq, m) \ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL) #define ALTQ_PURGE(ifq) \ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0) #define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0) #define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL) extern int altq_attach(struct ifaltq *, int, void *, int (*)(struct ifaltq *, struct mbuf *, struct altq_pktattr *), struct mbuf *(*)(struct ifaltq *, int), int (*)(struct ifaltq *, int, void *), void *, void *(*)(void *, struct mbuf *, int)); extern int altq_detach(struct ifaltq *); extern int altq_enable(struct ifaltq *); extern int altq_disable(struct ifaltq *); extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int); extern int (*altq_input)(struct mbuf *, int); #if 0 /* ALTQ3_CLFIER_COMPAT */ void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *); #endif #endif /* _KERNEL */ #endif /* _ALTQ_IF_ALTQ_H_ */