Index: head/sbin/ipfw/altq.c =================================================================== --- head/sbin/ipfw/altq.c (revision 338208) +++ head/sbin/ipfw/altq.c (revision 338209) @@ -1,151 +1,154 @@ /*- * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * * Idea and grammar partially left from: * Copyright (c) 1993 Daniel Boulet * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. * * NEW command line interface for IP firewall facility * * $FreeBSD$ * * altq interface */ +#define PFIOC_USE_LATEST + #include #include #include #include "ipfw2.h" #include #include #include #include #include #include #include #include #include /* IFNAMSIZ */ #include #include /* in_addr */ #include /* * Map between current altq queue id numbers and names. */ static TAILQ_HEAD(, pf_altq) altq_entries = TAILQ_HEAD_INITIALIZER(altq_entries); void altq_set_enabled(int enabled) { int pffd; pffd = open("/dev/pf", O_RDWR); if (pffd == -1) err(EX_UNAVAILABLE, "altq support opening pf(4) control device"); if (enabled) { if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST) err(EX_UNAVAILABLE, "enabling altq"); } else { if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT) err(EX_UNAVAILABLE, "disabling altq"); } close(pffd); } static void altq_fetch(void) { struct pfioc_altq pfioc; struct pf_altq *altq; int pffd; unsigned int mnr; static int altq_fetched = 0; if (altq_fetched) return; altq_fetched = 1; pffd = open("/dev/pf", O_RDONLY); if (pffd == -1) { warn("altq support opening pf(4) control device"); return; } bzero(&pfioc, sizeof(pfioc)); + pfioc.version = PFIOC_ALTQ_VERSION; if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) { warn("altq support getting queue list"); close(pffd); return; } mnr = pfioc.nr; for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) { if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) { if (errno == EBUSY) break; warn("altq support getting queue list"); close(pffd); return; } if (pfioc.altq.qid == 0) continue; altq = safe_calloc(1, sizeof(*altq)); *altq = pfioc.altq; TAILQ_INSERT_TAIL(&altq_entries, altq, entries); } close(pffd); } u_int32_t altq_name_to_qid(const char *name) { struct pf_altq *altq; altq_fetch(); TAILQ_FOREACH(altq, &altq_entries, entries) if (strcmp(name, altq->qname) == 0) break; if (altq == NULL) errx(EX_DATAERR, "altq has no queue named `%s'", name); return altq->qid; } static const char * altq_qid_to_name(u_int32_t qid) { struct pf_altq *altq; altq_fetch(); TAILQ_FOREACH(altq, &altq_entries, entries) if (qid == altq->qid) break; if (altq == NULL) return NULL; return altq->qname; } void print_altq_cmd(struct buf_pr *bp, ipfw_insn_altq *altqptr) { if (altqptr) { const char *qname; qname = altq_qid_to_name(altqptr->qid); if (qname == NULL) bprintf(bp, " altq ?<%u>", altqptr->qid); else bprintf(bp, " altq %s", qname); } } Index: head/sbin/pfctl/parse.y =================================================================== --- head/sbin/pfctl/parse.y (revision 338208) +++ head/sbin/pfctl/parse.y (revision 338209) @@ -1,6352 +1,6354 @@ /* $OpenBSD: parse.y,v 1.554 2008/10/17 12:59:53 henning Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Markus Friedl. All rights reserved. * Copyright (c) 2001 Daniel Hartmeier. All rights reserved. * Copyright (c) 2001 Theo de Raadt. All rights reserved. * Copyright (c) 2002,2003 Henning Brauer. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ %{ #include __FBSDID("$FreeBSD$"); +#define PFIOC_USE_LATEST + #include #include #include #ifdef __FreeBSD__ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pfctl_parser.h" #include "pfctl.h" static struct pfctl *pf = NULL; static int debug = 0; static int rulestate = 0; static u_int16_t returnicmpdefault = (ICMP_UNREACH << 8) | ICMP_UNREACH_PORT; static u_int16_t returnicmp6default = (ICMP6_DST_UNREACH << 8) | ICMP6_DST_UNREACH_NOPORT; static int blockpolicy = PFRULE_DROP; static int failpolicy = PFRULE_DROP; static int require_order = 1; static int default_statelock; static TAILQ_HEAD(files, file) files = TAILQ_HEAD_INITIALIZER(files); static struct file { TAILQ_ENTRY(file) entry; FILE *stream; char *name; int lineno; int errors; } *file; struct file *pushfile(const char *, int); int popfile(void); int check_file_secrecy(int, const char *); int yyparse(void); int yylex(void); int yyerror(const char *, ...); int kw_cmp(const void *, const void *); int lookup(char *); int lgetc(int); int lungetc(int); int findeol(void); static TAILQ_HEAD(symhead, sym) symhead = TAILQ_HEAD_INITIALIZER(symhead); struct sym { TAILQ_ENTRY(sym) entry; int used; int persist; char *nam; char *val; }; int symset(const char *, const char *, int); char *symget(const char *); int atoul(char *, u_long *); enum { PFCTL_STATE_NONE, PFCTL_STATE_OPTION, PFCTL_STATE_SCRUB, PFCTL_STATE_QUEUE, PFCTL_STATE_NAT, PFCTL_STATE_FILTER }; struct node_proto { u_int8_t proto; struct node_proto *next; struct node_proto *tail; }; struct node_port { u_int16_t port[2]; u_int8_t op; struct node_port *next; struct node_port *tail; }; struct node_uid { uid_t uid[2]; u_int8_t op; struct node_uid *next; struct node_uid *tail; }; struct node_gid { gid_t gid[2]; u_int8_t op; struct node_gid *next; struct node_gid *tail; }; struct node_icmp { u_int8_t code; u_int8_t type; u_int8_t proto; struct node_icmp *next; struct node_icmp *tail; }; enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK, PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN, PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, }; enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE }; struct node_state_opt { int type; union { u_int32_t max_states; u_int32_t max_src_states; u_int32_t max_src_conn; struct { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; struct { u_int8_t flush; char tblname[PF_TABLE_NAME_SIZE]; } overload; u_int32_t max_src_nodes; u_int8_t src_track; u_int32_t statelock; struct { int number; u_int32_t seconds; } timeout; } data; struct node_state_opt *next; struct node_state_opt *tail; }; struct peer { struct node_host *host; struct node_port *port; }; static struct node_queue { char queue[PF_QNAME_SIZE]; char parent[PF_QNAME_SIZE]; char ifname[IFNAMSIZ]; int scheduler; struct node_queue *next; struct node_queue *tail; } *queues = NULL; struct node_qassign { char *qname; char *pqname; }; static struct filter_opts { int marker; #define FOM_FLAGS 0x01 #define FOM_ICMP 0x02 #define FOM_TOS 0x04 #define FOM_KEEP 0x08 #define FOM_SRCTRACK 0x10 #define FOM_SETPRIO 0x0400 #define FOM_PRIO 0x2000 struct node_uid *uid; struct node_gid *gid; struct { u_int8_t b1; u_int8_t b2; u_int16_t w; u_int16_t w2; } flags; struct node_icmp *icmpspec; u_int32_t tos; u_int32_t prob; struct { int action; struct node_state_opt *options; } keep; int fragment; int allowopts; char *label; struct node_qassign queues; char *tag; char *match_tag; u_int8_t match_tag_not; u_int rtableid; u_int8_t prio; u_int8_t set_prio[2]; struct { struct node_host *addr; u_int16_t port; } divert; } filter_opts; static struct antispoof_opts { char *label; u_int rtableid; } antispoof_opts; static struct scrub_opts { int marker; #define SOM_MINTTL 0x01 #define SOM_MAXMSS 0x02 #define SOM_FRAGCACHE 0x04 #define SOM_SETTOS 0x08 int nodf; int minttl; int maxmss; int settos; int fragcache; int randomid; int reassemble_tcp; char *match_tag; u_int8_t match_tag_not; u_int rtableid; } scrub_opts; static struct queue_opts { int marker; #define QOM_BWSPEC 0x01 #define QOM_SCHEDULER 0x02 #define QOM_PRIORITY 0x04 #define QOM_TBRSIZE 0x08 #define QOM_QLIMIT 0x10 struct node_queue_bw queue_bwspec; struct node_queue_opt scheduler; int priority; - int tbrsize; + unsigned int tbrsize; int qlimit; } queue_opts; static struct table_opts { int flags; int init_addr; struct node_tinithead init_nodes; } table_opts; static struct pool_opts { int marker; #define POM_TYPE 0x01 #define POM_STICKYADDRESS 0x02 u_int8_t opts; int type; int staticport; struct pf_poolhashkey *key; } pool_opts; static struct codel_opts codel_opts; static struct node_hfsc_opts hfsc_opts; static struct node_fairq_opts fairq_opts; static struct node_state_opt *keep_state_defaults = NULL; int disallow_table(struct node_host *, const char *); int disallow_urpf_failed(struct node_host *, const char *); int disallow_alias(struct node_host *, const char *); int rule_consistent(struct pf_rule *, int); int filter_consistent(struct pf_rule *, int); int nat_consistent(struct pf_rule *); int rdr_consistent(struct pf_rule *); int process_tabledef(char *, struct table_opts *); void expand_label_str(char *, size_t, const char *, const char *); void expand_label_if(const char *, char *, size_t, const char *); void expand_label_addr(const char *, char *, size_t, u_int8_t, struct node_host *); void expand_label_port(const char *, char *, size_t, struct node_port *); void expand_label_proto(const char *, char *, size_t, u_int8_t); void expand_label_nr(const char *, char *, size_t); void expand_label(char *, size_t, const char *, u_int8_t, struct node_host *, struct node_port *, struct node_host *, struct node_port *, u_int8_t); void expand_rule(struct pf_rule *, struct node_if *, struct node_host *, struct node_proto *, struct node_os *, struct node_host *, struct node_port *, struct node_host *, struct node_port *, struct node_uid *, struct node_gid *, struct node_icmp *, const char *); int expand_altq(struct pf_altq *, struct node_if *, struct node_queue *, struct node_queue_bw bwspec, struct node_queue_opt *); int expand_queue(struct pf_altq *, struct node_if *, struct node_queue *, struct node_queue_bw, struct node_queue_opt *); int expand_skip_interface(struct node_if *); int check_rulestate(int); int getservice(char *); int rule_label(struct pf_rule *, char *); int rt_tableid_max(void); void mv_rules(struct pf_ruleset *, struct pf_ruleset *); void decide_address_family(struct node_host *, sa_family_t *); void remove_invalid_hosts(struct node_host **, sa_family_t *); int invalid_redirect(struct node_host *, sa_family_t); u_int16_t parseicmpspec(char *, sa_family_t); int kw_casecmp(const void *, const void *); int map_tos(char *string, int *); static TAILQ_HEAD(loadanchorshead, loadanchors) loadanchorshead = TAILQ_HEAD_INITIALIZER(loadanchorshead); struct loadanchors { TAILQ_ENTRY(loadanchors) entries; char *anchorname; char *filename; }; typedef struct { union { int64_t number; double probability; int i; char *string; u_int rtableid; struct { u_int8_t b1; u_int8_t b2; u_int16_t w; u_int16_t w2; } b; struct range { int a; int b; int t; } range; struct node_if *interface; struct node_proto *proto; struct node_icmp *icmp; struct node_host *host; struct node_os *os; struct node_port *port; struct node_uid *uid; struct node_gid *gid; struct node_state_opt *state_opt; struct peer peer; struct { struct peer src, dst; struct node_os *src_os; } fromto; struct { struct node_host *host; u_int8_t rt; u_int8_t pool_opts; sa_family_t af; struct pf_poolhashkey *key; } route; struct redirection { struct node_host *host; struct range rport; } *redirection; struct { int action; struct node_state_opt *options; } keep_state; struct { u_int8_t log; u_int8_t logif; u_int8_t quick; } logquick; struct { int neg; char *name; } tagged; struct pf_poolhashkey *hashkey; struct node_queue *queue; struct node_queue_opt queue_options; struct node_queue_bw queue_bwspec; struct node_qassign qassign; struct filter_opts filter_opts; struct antispoof_opts antispoof_opts; struct queue_opts queue_opts; struct scrub_opts scrub_opts; struct table_opts table_opts; struct pool_opts pool_opts; struct node_hfsc_opts hfsc_opts; struct node_fairq_opts fairq_opts; struct codel_opts codel_opts; } v; int lineno; } YYSTYPE; #define PPORT_RANGE 1 #define PPORT_STAR 2 int parseport(char *, struct range *r, int); #define DYNIF_MULTIADDR(addr) ((addr).type == PF_ADDR_DYNIFTL && \ (!((addr).iflags & PFI_AFLAG_NOALIAS) || \ !isdigit((addr).v.ifname[strlen((addr).v.ifname)-1]))) %} %token PASS BLOCK SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS %token RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE %token ICMP6TYPE CODE KEEP MODULATE STATE PORT RDR NAT BINAT ARROW NODF %token MINTTL ERROR ALLOWOPTS FASTROUTE FILENAME ROUTETO DUPTO REPLYTO NO LABEL %token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE %token REASSEMBLE FRAGDROP FRAGCROP ANCHOR NATANCHOR RDRANCHOR BINATANCHOR %token SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY FAILPOLICY %token RANDOMID REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID %token ANTISPOOF FOR INCLUDE %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY %token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME %token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL %token LOAD RULESET_OPTIMIZATION PRIO %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY %token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS %token DIVERTTO DIVERTREPLY %token STRING %token NUMBER %token PORTBINARY %type interface if_list if_item_not if_item %type number icmptype icmp6type uid gid %type tos not yesno %type probability %type no dir af fragcache optimizer %type sourcetrack flush unaryop statelock %type action nataction natpasslog scrubaction %type flags flag blockspec prio %type portplain portstar portrange %type hashkey %type proto proto_list proto_item %type protoval %type icmpspec %type icmp_list icmp_item %type icmp6_list icmp6_item %type reticmpspec reticmp6spec %type fromto %type ipportspec from to %type ipspec toipspec xhost host dynaddr host_list %type redir_host_list redirspec %type route_host route_host_list routespec %type os xos os_list %type portspec port_list port_item %type uids uid_list uid_item %type gids gid_list gid_item %type route %type redirection redirpool %type label stringall tag anchorname %type string varstring numberstring %type keep %type state_opt_spec state_opt_list state_opt_item %type logquick quick log logopts logopt %type antispoof_ifspc antispoof_iflst antispoof_if %type qname %type qassign qassign_list qassign_item %type scheduler %type cbqflags_list cbqflags_item %type priqflags_list priqflags_item %type hfscopts_list hfscopts_item hfsc_opts %type fairqopts_list fairqopts_item fairq_opts %type codelopts_list codelopts_item codel_opts %type bandwidth %type filter_opts filter_opt filter_opts_l %type filter_sets filter_set filter_sets_l %type antispoof_opts antispoof_opt antispoof_opts_l %type queue_opts queue_opt queue_opts_l %type scrub_opts scrub_opt scrub_opts_l %type table_opts table_opt table_opts_l %type pool_opts pool_opt pool_opts_l %type tagged %type rtable %% ruleset : /* empty */ | ruleset include '\n' | ruleset '\n' | ruleset option '\n' | ruleset scrubrule '\n' | ruleset natrule '\n' | ruleset binatrule '\n' | ruleset pfrule '\n' | ruleset anchorrule '\n' | ruleset loadrule '\n' | ruleset altqif '\n' | ruleset queuespec '\n' | ruleset varset '\n' | ruleset antispoof '\n' | ruleset tabledef '\n' | '{' fakeanchor '}' '\n'; | ruleset error '\n' { file->errors++; } ; include : INCLUDE STRING { struct file *nfile; if ((nfile = pushfile($2, 0)) == NULL) { yyerror("failed to include file %s", $2); free($2); YYERROR; } free($2); file = nfile; lungetc('\n'); } ; /* * apply to previouslys specified rule: must be careful to note * what that is: pf or nat or binat or rdr */ fakeanchor : fakeanchor '\n' | fakeanchor anchorrule '\n' | fakeanchor binatrule '\n' | fakeanchor natrule '\n' | fakeanchor pfrule '\n' | fakeanchor error '\n' ; optimizer : string { if (!strcmp($1, "none")) $$ = 0; else if (!strcmp($1, "basic")) $$ = PF_OPTIMIZE_BASIC; else if (!strcmp($1, "profile")) $$ = PF_OPTIMIZE_BASIC | PF_OPTIMIZE_PROFILE; else { yyerror("unknown ruleset-optimization %s", $1); YYERROR; } } ; option : SET OPTIMIZATION STRING { if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); YYERROR; } if (pfctl_set_optimization(pf, $3) != 0) { yyerror("unknown optimization %s", $3); free($3); YYERROR; } free($3); } | SET RULESET_OPTIMIZATION optimizer { if (!(pf->opts & PF_OPT_OPTIMIZE)) { pf->opts |= PF_OPT_OPTIMIZE; pf->optimize = $3; } } | SET TIMEOUT timeout_spec | SET TIMEOUT '{' optnl timeout_list '}' | SET LIMIT limit_spec | SET LIMIT '{' optnl limit_list '}' | SET LOGINTERFACE stringall { if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); YYERROR; } if (pfctl_set_logif(pf, $3) != 0) { yyerror("error setting loginterface %s", $3); free($3); YYERROR; } free($3); } | SET HOSTID number { if ($3 == 0 || $3 > UINT_MAX) { yyerror("hostid must be non-zero"); YYERROR; } if (pfctl_set_hostid(pf, $3) != 0) { yyerror("error setting hostid %08x", $3); YYERROR; } } | SET BLOCKPOLICY DROP { if (pf->opts & PF_OPT_VERBOSE) printf("set block-policy drop\n"); if (check_rulestate(PFCTL_STATE_OPTION)) YYERROR; blockpolicy = PFRULE_DROP; } | SET BLOCKPOLICY RETURN { if (pf->opts & PF_OPT_VERBOSE) printf("set block-policy return\n"); if (check_rulestate(PFCTL_STATE_OPTION)) YYERROR; blockpolicy = PFRULE_RETURN; } | SET FAILPOLICY DROP { if (pf->opts & PF_OPT_VERBOSE) printf("set fail-policy drop\n"); if (check_rulestate(PFCTL_STATE_OPTION)) YYERROR; failpolicy = PFRULE_DROP; } | SET FAILPOLICY RETURN { if (pf->opts & PF_OPT_VERBOSE) printf("set fail-policy return\n"); if (check_rulestate(PFCTL_STATE_OPTION)) YYERROR; failpolicy = PFRULE_RETURN; } | SET REQUIREORDER yesno { if (pf->opts & PF_OPT_VERBOSE) printf("set require-order %s\n", $3 == 1 ? "yes" : "no"); require_order = $3; } | SET FINGERPRINTS STRING { if (pf->opts & PF_OPT_VERBOSE) printf("set fingerprints \"%s\"\n", $3); if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); YYERROR; } if (!pf->anchor->name[0]) { if (pfctl_file_fingerprints(pf->dev, pf->opts, $3)) { yyerror("error loading " "fingerprints %s", $3); free($3); YYERROR; } } free($3); } | SET STATEPOLICY statelock { if (pf->opts & PF_OPT_VERBOSE) switch ($3) { case 0: printf("set state-policy floating\n"); break; case PFRULE_IFBOUND: printf("set state-policy if-bound\n"); break; } default_statelock = $3; } | SET DEBUG STRING { if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); YYERROR; } if (pfctl_set_debug(pf, $3) != 0) { yyerror("error setting debuglevel %s", $3); free($3); YYERROR; } free($3); } | SET SKIP interface { if (expand_skip_interface($3) != 0) { yyerror("error setting skip interface(s)"); YYERROR; } } | SET STATEDEFAULTS state_opt_list { if (keep_state_defaults != NULL) { yyerror("cannot redefine state-defaults"); YYERROR; } keep_state_defaults = $3; } ; stringall : STRING { $$ = $1; } | ALL { if (($$ = strdup("all")) == NULL) { err(1, "stringall: strdup"); } } ; string : STRING string { if (asprintf(&$$, "%s %s", $1, $2) == -1) err(1, "string: asprintf"); free($1); free($2); } | STRING ; varstring : numberstring varstring { if (asprintf(&$$, "%s %s", $1, $2) == -1) err(1, "string: asprintf"); free($1); free($2); } | numberstring ; numberstring : NUMBER { char *s; if (asprintf(&s, "%lld", (long long)$1) == -1) { yyerror("string: asprintf"); YYERROR; } $$ = s; } | STRING ; varset : STRING '=' varstring { if (pf->opts & PF_OPT_VERBOSE) printf("%s = \"%s\"\n", $1, $3); if (symset($1, $3, 0) == -1) err(1, "cannot store variable %s", $1); free($1); free($3); } ; anchorname : STRING { $$ = $1; } | /* empty */ { $$ = NULL; } ; pfa_anchorlist : /* empty */ | pfa_anchorlist '\n' | pfa_anchorlist pfrule '\n' | pfa_anchorlist anchorrule '\n' ; pfa_anchor : '{' { char ta[PF_ANCHOR_NAME_SIZE]; struct pf_ruleset *rs; /* steping into a brace anchor */ pf->asd++; pf->bn++; pf->brace = 1; /* create a holding ruleset in the root */ snprintf(ta, PF_ANCHOR_NAME_SIZE, "_%d", pf->bn); rs = pf_find_or_create_ruleset(ta); if (rs == NULL) err(1, "pfa_anchor: pf_find_or_create_ruleset"); pf->astack[pf->asd] = rs->anchor; pf->anchor = rs->anchor; } '\n' pfa_anchorlist '}' { pf->alast = pf->anchor; pf->asd--; pf->anchor = pf->astack[pf->asd]; } | /* empty */ ; anchorrule : ANCHOR anchorname dir quick interface af proto fromto filter_opts pfa_anchor { struct pf_rule r; struct node_proto *proto; if (check_rulestate(PFCTL_STATE_FILTER)) { if ($2) free($2); YYERROR; } if ($2 && ($2[0] == '_' || strstr($2, "/_") != NULL)) { free($2); yyerror("anchor names beginning with '_' " "are reserved for internal use"); YYERROR; } memset(&r, 0, sizeof(r)); if (pf->astack[pf->asd + 1]) { /* move inline rules into relative location */ pf_anchor_setup(&r, &pf->astack[pf->asd]->ruleset, $2 ? $2 : pf->alast->name); if (r.anchor == NULL) err(1, "anchorrule: unable to " "create ruleset"); if (pf->alast != r.anchor) { if (r.anchor->match) { yyerror("inline anchor '%s' " "already exists", r.anchor->name); YYERROR; } mv_rules(&pf->alast->ruleset, &r.anchor->ruleset); } pf_remove_if_empty_ruleset(&pf->alast->ruleset); pf->alast = r.anchor; } else { if (!$2) { yyerror("anchors without explicit " "rules must specify a name"); YYERROR; } } r.direction = $3; r.quick = $4.quick; r.af = $6; r.prob = $9.prob; r.rtableid = $9.rtableid; if ($9.tag) if (strlcpy(r.tagname, $9.tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } if ($9.match_tag) if (strlcpy(r.match_tagname, $9.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } r.match_tag_not = $9.match_tag_not; if (rule_label(&r, $9.label)) YYERROR; free($9.label); r.flags = $9.flags.b1; r.flagset = $9.flags.b2; if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) { yyerror("flags always false"); YYERROR; } if ($9.flags.b1 || $9.flags.b2 || $8.src_os) { for (proto = $7; proto != NULL && proto->proto != IPPROTO_TCP; proto = proto->next) ; /* nothing */ if (proto == NULL && $7 != NULL) { if ($9.flags.b1 || $9.flags.b2) yyerror( "flags only apply to tcp"); if ($8.src_os) yyerror( "OS fingerprinting only " "applies to tcp"); YYERROR; } } r.tos = $9.tos; if ($9.keep.action) { yyerror("cannot specify state handling " "on anchors"); YYERROR; } if ($9.match_tag) if (strlcpy(r.match_tagname, $9.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } r.match_tag_not = $9.match_tag_not; if ($9.marker & FOM_PRIO) { if ($9.prio == 0) r.prio = PF_PRIO_ZERO; else r.prio = $9.prio; } if ($9.marker & FOM_SETPRIO) { r.set_prio[0] = $9.set_prio[0]; r.set_prio[1] = $9.set_prio[1]; r.scrub_flags |= PFSTATE_SETPRIO; } decide_address_family($8.src.host, &r.af); decide_address_family($8.dst.host, &r.af); expand_rule(&r, $5, NULL, $7, $8.src_os, $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, $9.uid, $9.gid, $9.icmpspec, pf->astack[pf->asd + 1] ? pf->alast->name : $2); free($2); pf->astack[pf->asd + 1] = NULL; } | NATANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { free($2); YYERROR; } memset(&r, 0, sizeof(r)); r.action = PF_NAT; r.af = $4; r.rtableid = $7; decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); expand_rule(&r, $3, NULL, $5, $6.src_os, $6.src.host, $6.src.port, $6.dst.host, $6.dst.port, 0, 0, 0, $2); free($2); } | RDRANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { free($2); YYERROR; } memset(&r, 0, sizeof(r)); r.action = PF_RDR; r.af = $4; r.rtableid = $7; decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); if ($6.src.port != NULL) { yyerror("source port parameter not supported" " in rdr-anchor"); YYERROR; } if ($6.dst.port != NULL) { if ($6.dst.port->next != NULL) { yyerror("destination port list " "expansion not supported in " "rdr-anchor"); YYERROR; } else if ($6.dst.port->op != PF_OP_EQ) { yyerror("destination port operators" " not supported in rdr-anchor"); YYERROR; } r.dst.port[0] = $6.dst.port->port[0]; r.dst.port[1] = $6.dst.port->port[1]; r.dst.port_op = $6.dst.port->op; } expand_rule(&r, $3, NULL, $5, $6.src_os, $6.src.host, $6.src.port, $6.dst.host, $6.dst.port, 0, 0, 0, $2); free($2); } | BINATANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { free($2); YYERROR; } memset(&r, 0, sizeof(r)); r.action = PF_BINAT; r.af = $4; r.rtableid = $7; if ($5 != NULL) { if ($5->next != NULL) { yyerror("proto list expansion" " not supported in binat-anchor"); YYERROR; } r.proto = $5->proto; free($5); } if ($6.src.host != NULL || $6.src.port != NULL || $6.dst.host != NULL || $6.dst.port != NULL) { yyerror("fromto parameter not supported" " in binat-anchor"); YYERROR; } decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); pfctl_add_rule(pf, &r, $2); free($2); } ; loadrule : LOAD ANCHOR string FROM string { struct loadanchors *loadanchor; if (strlen(pf->anchor->name) + 1 + strlen($3) >= MAXPATHLEN) { yyerror("anchorname %s too long, max %u\n", $3, MAXPATHLEN - 1); free($3); YYERROR; } loadanchor = calloc(1, sizeof(struct loadanchors)); if (loadanchor == NULL) err(1, "loadrule: calloc"); if ((loadanchor->anchorname = malloc(MAXPATHLEN)) == NULL) err(1, "loadrule: malloc"); if (pf->anchor->name[0]) snprintf(loadanchor->anchorname, MAXPATHLEN, "%s/%s", pf->anchor->name, $3); else strlcpy(loadanchor->anchorname, $3, MAXPATHLEN); if ((loadanchor->filename = strdup($5)) == NULL) err(1, "loadrule: strdup"); TAILQ_INSERT_TAIL(&loadanchorshead, loadanchor, entries); free($3); free($5); }; scrubaction : no SCRUB { $$.b2 = $$.w = 0; if ($1) $$.b1 = PF_NOSCRUB; else $$.b1 = PF_SCRUB; } ; scrubrule : scrubaction dir logquick interface af proto fromto scrub_opts { struct pf_rule r; if (check_rulestate(PFCTL_STATE_SCRUB)) YYERROR; memset(&r, 0, sizeof(r)); r.action = $1.b1; r.direction = $2; r.log = $3.log; r.logif = $3.logif; if ($3.quick) { yyerror("scrub rules do not support 'quick'"); YYERROR; } r.af = $5; if ($8.nodf) r.rule_flag |= PFRULE_NODF; if ($8.randomid) r.rule_flag |= PFRULE_RANDOMID; if ($8.reassemble_tcp) { if (r.direction != PF_INOUT) { yyerror("reassemble tcp rules can not " "specify direction"); YYERROR; } r.rule_flag |= PFRULE_REASSEMBLE_TCP; } if ($8.minttl) r.min_ttl = $8.minttl; if ($8.maxmss) r.max_mss = $8.maxmss; if ($8.marker & SOM_SETTOS) { r.rule_flag |= PFRULE_SET_TOS; r.set_tos = $8.settos; } if ($8.fragcache) r.rule_flag |= $8.fragcache; if ($8.match_tag) if (strlcpy(r.match_tagname, $8.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } r.match_tag_not = $8.match_tag_not; r.rtableid = $8.rtableid; expand_rule(&r, $4, NULL, $6, $7.src_os, $7.src.host, $7.src.port, $7.dst.host, $7.dst.port, NULL, NULL, NULL, ""); } ; scrub_opts : { bzero(&scrub_opts, sizeof scrub_opts); scrub_opts.rtableid = -1; } scrub_opts_l { $$ = scrub_opts; } | /* empty */ { bzero(&scrub_opts, sizeof scrub_opts); scrub_opts.rtableid = -1; $$ = scrub_opts; } ; scrub_opts_l : scrub_opts_l scrub_opt | scrub_opt ; scrub_opt : NODF { if (scrub_opts.nodf) { yyerror("no-df cannot be respecified"); YYERROR; } scrub_opts.nodf = 1; } | MINTTL NUMBER { if (scrub_opts.marker & SOM_MINTTL) { yyerror("min-ttl cannot be respecified"); YYERROR; } if ($2 < 0 || $2 > 255) { yyerror("illegal min-ttl value %d", $2); YYERROR; } scrub_opts.marker |= SOM_MINTTL; scrub_opts.minttl = $2; } | MAXMSS NUMBER { if (scrub_opts.marker & SOM_MAXMSS) { yyerror("max-mss cannot be respecified"); YYERROR; } if ($2 < 0 || $2 > 65535) { yyerror("illegal max-mss value %d", $2); YYERROR; } scrub_opts.marker |= SOM_MAXMSS; scrub_opts.maxmss = $2; } | SETTOS tos { if (scrub_opts.marker & SOM_SETTOS) { yyerror("set-tos cannot be respecified"); YYERROR; } scrub_opts.marker |= SOM_SETTOS; scrub_opts.settos = $2; } | fragcache { if (scrub_opts.marker & SOM_FRAGCACHE) { yyerror("fragcache cannot be respecified"); YYERROR; } scrub_opts.marker |= SOM_FRAGCACHE; scrub_opts.fragcache = $1; } | REASSEMBLE STRING { if (strcasecmp($2, "tcp") != 0) { yyerror("scrub reassemble supports only tcp, " "not '%s'", $2); free($2); YYERROR; } free($2); if (scrub_opts.reassemble_tcp) { yyerror("reassemble tcp cannot be respecified"); YYERROR; } scrub_opts.reassemble_tcp = 1; } | RANDOMID { if (scrub_opts.randomid) { yyerror("random-id cannot be respecified"); YYERROR; } scrub_opts.randomid = 1; } | RTABLE NUMBER { if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } scrub_opts.rtableid = $2; } | not TAGGED string { scrub_opts.match_tag = $3; scrub_opts.match_tag_not = $1; } ; fragcache : FRAGMENT REASSEMBLE { $$ = 0; /* default */ } | FRAGMENT FRAGCROP { $$ = 0; } | FRAGMENT FRAGDROP { $$ = 0; } ; antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { struct pf_rule r; struct node_host *h = NULL, *hh; struct node_if *i, *j; if (check_rulestate(PFCTL_STATE_FILTER)) YYERROR; for (i = $3; i; i = i->next) { bzero(&r, sizeof(r)); r.action = PF_DROP; r.direction = PF_IN; r.log = $2.log; r.logif = $2.logif; r.quick = $2.quick; r.af = $4; if (rule_label(&r, $5.label)) YYERROR; r.rtableid = $5.rtableid; j = calloc(1, sizeof(struct node_if)); if (j == NULL) err(1, "antispoof: calloc"); if (strlcpy(j->ifname, i->ifname, sizeof(j->ifname)) >= sizeof(j->ifname)) { free(j); yyerror("interface name too long"); YYERROR; } j->not = 1; if (i->dynamic) { h = calloc(1, sizeof(*h)); if (h == NULL) err(1, "address: calloc"); h->addr.type = PF_ADDR_DYNIFTL; set_ipmask(h, 128); if (strlcpy(h->addr.v.ifname, i->ifname, sizeof(h->addr.v.ifname)) >= sizeof(h->addr.v.ifname)) { free(h); yyerror( "interface name too long"); YYERROR; } hh = malloc(sizeof(*hh)); if (hh == NULL) err(1, "address: malloc"); bcopy(h, hh, sizeof(*hh)); h->addr.iflags = PFI_AFLAG_NETWORK; } else { h = ifa_lookup(j->ifname, PFI_AFLAG_NETWORK); hh = NULL; } if (h != NULL) expand_rule(&r, j, NULL, NULL, NULL, h, NULL, NULL, NULL, NULL, NULL, NULL, ""); if ((i->ifa_flags & IFF_LOOPBACK) == 0) { bzero(&r, sizeof(r)); r.action = PF_DROP; r.direction = PF_IN; r.log = $2.log; r.logif = $2.logif; r.quick = $2.quick; r.af = $4; if (rule_label(&r, $5.label)) YYERROR; r.rtableid = $5.rtableid; if (hh != NULL) h = hh; else h = ifa_lookup(i->ifname, 0); if (h != NULL) expand_rule(&r, NULL, NULL, NULL, NULL, h, NULL, NULL, NULL, NULL, NULL, NULL, ""); } else free(hh); } free($5.label); } ; antispoof_ifspc : FOR antispoof_if { $$ = $2; } | FOR '{' optnl antispoof_iflst '}' { $$ = $4; } ; antispoof_iflst : antispoof_if optnl { $$ = $1; } | antispoof_iflst comma antispoof_if optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; antispoof_if : if_item { $$ = $1; } | '(' if_item ')' { $2->dynamic = 1; $$ = $2; } ; antispoof_opts : { bzero(&antispoof_opts, sizeof antispoof_opts); antispoof_opts.rtableid = -1; } antispoof_opts_l { $$ = antispoof_opts; } | /* empty */ { bzero(&antispoof_opts, sizeof antispoof_opts); antispoof_opts.rtableid = -1; $$ = antispoof_opts; } ; antispoof_opts_l : antispoof_opts_l antispoof_opt | antispoof_opt ; antispoof_opt : label { if (antispoof_opts.label) { yyerror("label cannot be redefined"); YYERROR; } antispoof_opts.label = $1; } | RTABLE NUMBER { if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } antispoof_opts.rtableid = $2; } ; not : '!' { $$ = 1; } | /* empty */ { $$ = 0; } ; tabledef : TABLE '<' STRING '>' table_opts { struct node_host *h, *nh; struct node_tinit *ti, *nti; if (strlen($3) >= PF_TABLE_NAME_SIZE) { yyerror("table name too long, max %d chars", PF_TABLE_NAME_SIZE - 1); free($3); YYERROR; } if (pf->loadopt & PFCTL_FLAG_TABLE) if (process_tabledef($3, &$5)) { free($3); YYERROR; } free($3); for (ti = SIMPLEQ_FIRST(&$5.init_nodes); ti != SIMPLEQ_END(&$5.init_nodes); ti = nti) { if (ti->file) free(ti->file); for (h = ti->host; h != NULL; h = nh) { nh = h->next; free(h); } nti = SIMPLEQ_NEXT(ti, entries); free(ti); } } ; table_opts : { bzero(&table_opts, sizeof table_opts); SIMPLEQ_INIT(&table_opts.init_nodes); } table_opts_l { $$ = table_opts; } | /* empty */ { bzero(&table_opts, sizeof table_opts); SIMPLEQ_INIT(&table_opts.init_nodes); $$ = table_opts; } ; table_opts_l : table_opts_l table_opt | table_opt ; table_opt : STRING { if (!strcmp($1, "const")) table_opts.flags |= PFR_TFLAG_CONST; else if (!strcmp($1, "persist")) table_opts.flags |= PFR_TFLAG_PERSIST; else if (!strcmp($1, "counters")) table_opts.flags |= PFR_TFLAG_COUNTERS; else { yyerror("invalid table option '%s'", $1); free($1); YYERROR; } free($1); } | '{' optnl '}' { table_opts.init_addr = 1; } | '{' optnl host_list '}' { struct node_host *n; struct node_tinit *ti; for (n = $3; n != NULL; n = n->next) { switch (n->addr.type) { case PF_ADDR_ADDRMASK: continue; /* ok */ case PF_ADDR_RANGE: yyerror("address ranges are not " "permitted inside tables"); break; case PF_ADDR_DYNIFTL: yyerror("dynamic addresses are not " "permitted inside tables"); break; case PF_ADDR_TABLE: yyerror("tables cannot contain tables"); break; case PF_ADDR_NOROUTE: yyerror("\"no-route\" is not permitted " "inside tables"); break; case PF_ADDR_URPFFAILED: yyerror("\"urpf-failed\" is not " "permitted inside tables"); break; default: yyerror("unknown address type %d", n->addr.type); } YYERROR; } if (!(ti = calloc(1, sizeof(*ti)))) err(1, "table_opt: calloc"); ti->host = $3; SIMPLEQ_INSERT_TAIL(&table_opts.init_nodes, ti, entries); table_opts.init_addr = 1; } | FILENAME STRING { struct node_tinit *ti; if (!(ti = calloc(1, sizeof(*ti)))) err(1, "table_opt: calloc"); ti->file = $2; SIMPLEQ_INSERT_TAIL(&table_opts.init_nodes, ti, entries); table_opts.init_addr = 1; } ; altqif : ALTQ interface queue_opts QUEUE qassign { struct pf_altq a; if (check_rulestate(PFCTL_STATE_QUEUE)) YYERROR; memset(&a, 0, sizeof(a)); if ($3.scheduler.qtype == ALTQT_NONE) { yyerror("no scheduler specified!"); YYERROR; } a.scheduler = $3.scheduler.qtype; a.qlimit = $3.qlimit; a.tbrsize = $3.tbrsize; if ($5 == NULL && $3.scheduler.qtype != ALTQT_CODEL) { yyerror("no child queues specified"); YYERROR; } if (expand_altq(&a, $2, $5, $3.queue_bwspec, &$3.scheduler)) YYERROR; } ; queuespec : QUEUE STRING interface queue_opts qassign { struct pf_altq a; if (check_rulestate(PFCTL_STATE_QUEUE)) { free($2); YYERROR; } memset(&a, 0, sizeof(a)); if (strlcpy(a.qname, $2, sizeof(a.qname)) >= sizeof(a.qname)) { yyerror("queue name too long (max " "%d chars)", PF_QNAME_SIZE-1); free($2); YYERROR; } free($2); if ($4.tbrsize) { yyerror("cannot specify tbrsize for queue"); YYERROR; } if ($4.priority > 255) { yyerror("priority out of range: max 255"); YYERROR; } a.priority = $4.priority; a.qlimit = $4.qlimit; a.scheduler = $4.scheduler.qtype; if (expand_queue(&a, $3, $5, $4.queue_bwspec, &$4.scheduler)) { yyerror("errors in queue definition"); YYERROR; } } ; queue_opts : { bzero(&queue_opts, sizeof queue_opts); queue_opts.priority = DEFAULT_PRIORITY; queue_opts.qlimit = DEFAULT_QLIMIT; queue_opts.scheduler.qtype = ALTQT_NONE; queue_opts.queue_bwspec.bw_percent = 100; } queue_opts_l { $$ = queue_opts; } | /* empty */ { bzero(&queue_opts, sizeof queue_opts); queue_opts.priority = DEFAULT_PRIORITY; queue_opts.qlimit = DEFAULT_QLIMIT; queue_opts.scheduler.qtype = ALTQT_NONE; queue_opts.queue_bwspec.bw_percent = 100; $$ = queue_opts; } ; queue_opts_l : queue_opts_l queue_opt | queue_opt ; queue_opt : BANDWIDTH bandwidth { if (queue_opts.marker & QOM_BWSPEC) { yyerror("bandwidth cannot be respecified"); YYERROR; } queue_opts.marker |= QOM_BWSPEC; queue_opts.queue_bwspec = $2; } | PRIORITY NUMBER { if (queue_opts.marker & QOM_PRIORITY) { yyerror("priority cannot be respecified"); YYERROR; } if ($2 < 0 || $2 > 255) { yyerror("priority out of range: max 255"); YYERROR; } queue_opts.marker |= QOM_PRIORITY; queue_opts.priority = $2; } | QLIMIT NUMBER { if (queue_opts.marker & QOM_QLIMIT) { yyerror("qlimit cannot be respecified"); YYERROR; } if ($2 < 0 || $2 > 65535) { yyerror("qlimit out of range: max 65535"); YYERROR; } queue_opts.marker |= QOM_QLIMIT; queue_opts.qlimit = $2; } | scheduler { if (queue_opts.marker & QOM_SCHEDULER) { yyerror("scheduler cannot be respecified"); YYERROR; } queue_opts.marker |= QOM_SCHEDULER; queue_opts.scheduler = $1; } | TBRSIZE NUMBER { if (queue_opts.marker & QOM_TBRSIZE) { yyerror("tbrsize cannot be respecified"); YYERROR; } - if ($2 < 0 || $2 > 65535) { - yyerror("tbrsize too big: max 65535"); + if ($2 < 0 || $2 > UINT_MAX) { + yyerror("tbrsize too big: max %u", UINT_MAX); YYERROR; } queue_opts.marker |= QOM_TBRSIZE; queue_opts.tbrsize = $2; } ; bandwidth : STRING { double bps; char *cp; $$.bw_percent = 0; bps = strtod($1, &cp); if (cp != NULL) { if (strlen(cp) > 1) { char *cu = cp + 1; if (!strcmp(cu, "Bit") || !strcmp(cu, "B") || !strcmp(cu, "bit") || !strcmp(cu, "b")) { *cu = 0; } } if (!strcmp(cp, "b")) ; /* nothing */ else if (!strcmp(cp, "K")) bps *= 1000; else if (!strcmp(cp, "M")) bps *= 1000 * 1000; else if (!strcmp(cp, "G")) bps *= 1000 * 1000 * 1000; else if (!strcmp(cp, "%")) { if (bps < 0 || bps > 100) { yyerror("bandwidth spec " "out of range"); free($1); YYERROR; } $$.bw_percent = bps; bps = 0; } else { yyerror("unknown unit %s", cp); free($1); YYERROR; } } free($1); - $$.bw_absolute = (u_int32_t)bps; + $$.bw_absolute = (u_int64_t)bps; } | NUMBER { - if ($1 < 0 || $1 > UINT_MAX) { + if ($1 < 0 || $1 >= LLONG_MAX) { yyerror("bandwidth number too big"); YYERROR; } $$.bw_percent = 0; $$.bw_absolute = $1; } ; scheduler : CBQ { $$.qtype = ALTQT_CBQ; $$.data.cbq_opts.flags = 0; } | CBQ '(' cbqflags_list ')' { $$.qtype = ALTQT_CBQ; $$.data.cbq_opts.flags = $3; } | PRIQ { $$.qtype = ALTQT_PRIQ; $$.data.priq_opts.flags = 0; } | PRIQ '(' priqflags_list ')' { $$.qtype = ALTQT_PRIQ; $$.data.priq_opts.flags = $3; } | HFSC { $$.qtype = ALTQT_HFSC; bzero(&$$.data.hfsc_opts, sizeof(struct node_hfsc_opts)); } | HFSC '(' hfsc_opts ')' { $$.qtype = ALTQT_HFSC; $$.data.hfsc_opts = $3; } | FAIRQ { $$.qtype = ALTQT_FAIRQ; bzero(&$$.data.fairq_opts, sizeof(struct node_fairq_opts)); } | FAIRQ '(' fairq_opts ')' { $$.qtype = ALTQT_FAIRQ; $$.data.fairq_opts = $3; } | CODEL { $$.qtype = ALTQT_CODEL; bzero(&$$.data.codel_opts, sizeof(struct codel_opts)); } | CODEL '(' codel_opts ')' { $$.qtype = ALTQT_CODEL; $$.data.codel_opts = $3; } ; cbqflags_list : cbqflags_item { $$ |= $1; } | cbqflags_list comma cbqflags_item { $$ |= $3; } ; cbqflags_item : STRING { if (!strcmp($1, "default")) $$ = CBQCLF_DEFCLASS; else if (!strcmp($1, "borrow")) $$ = CBQCLF_BORROW; else if (!strcmp($1, "red")) $$ = CBQCLF_RED; else if (!strcmp($1, "ecn")) $$ = CBQCLF_RED|CBQCLF_ECN; else if (!strcmp($1, "rio")) $$ = CBQCLF_RIO; else if (!strcmp($1, "codel")) $$ = CBQCLF_CODEL; else { yyerror("unknown cbq flag \"%s\"", $1); free($1); YYERROR; } free($1); } ; priqflags_list : priqflags_item { $$ |= $1; } | priqflags_list comma priqflags_item { $$ |= $3; } ; priqflags_item : STRING { if (!strcmp($1, "default")) $$ = PRCF_DEFAULTCLASS; else if (!strcmp($1, "red")) $$ = PRCF_RED; else if (!strcmp($1, "ecn")) $$ = PRCF_RED|PRCF_ECN; else if (!strcmp($1, "rio")) $$ = PRCF_RIO; else if (!strcmp($1, "codel")) $$ = PRCF_CODEL; else { yyerror("unknown priq flag \"%s\"", $1); free($1); YYERROR; } free($1); } ; hfsc_opts : { bzero(&hfsc_opts, sizeof(struct node_hfsc_opts)); } hfscopts_list { $$ = hfsc_opts; } ; hfscopts_list : hfscopts_item | hfscopts_list comma hfscopts_item ; hfscopts_item : LINKSHARE bandwidth { if (hfsc_opts.linkshare.used) { yyerror("linkshare already specified"); YYERROR; } hfsc_opts.linkshare.m2 = $2; hfsc_opts.linkshare.used = 1; } | LINKSHARE '(' bandwidth comma NUMBER comma bandwidth ')' { if ($5 < 0 || $5 > INT_MAX) { yyerror("timing in curve out of range"); YYERROR; } if (hfsc_opts.linkshare.used) { yyerror("linkshare already specified"); YYERROR; } hfsc_opts.linkshare.m1 = $3; hfsc_opts.linkshare.d = $5; hfsc_opts.linkshare.m2 = $7; hfsc_opts.linkshare.used = 1; } | REALTIME bandwidth { if (hfsc_opts.realtime.used) { yyerror("realtime already specified"); YYERROR; } hfsc_opts.realtime.m2 = $2; hfsc_opts.realtime.used = 1; } | REALTIME '(' bandwidth comma NUMBER comma bandwidth ')' { if ($5 < 0 || $5 > INT_MAX) { yyerror("timing in curve out of range"); YYERROR; } if (hfsc_opts.realtime.used) { yyerror("realtime already specified"); YYERROR; } hfsc_opts.realtime.m1 = $3; hfsc_opts.realtime.d = $5; hfsc_opts.realtime.m2 = $7; hfsc_opts.realtime.used = 1; } | UPPERLIMIT bandwidth { if (hfsc_opts.upperlimit.used) { yyerror("upperlimit already specified"); YYERROR; } hfsc_opts.upperlimit.m2 = $2; hfsc_opts.upperlimit.used = 1; } | UPPERLIMIT '(' bandwidth comma NUMBER comma bandwidth ')' { if ($5 < 0 || $5 > INT_MAX) { yyerror("timing in curve out of range"); YYERROR; } if (hfsc_opts.upperlimit.used) { yyerror("upperlimit already specified"); YYERROR; } hfsc_opts.upperlimit.m1 = $3; hfsc_opts.upperlimit.d = $5; hfsc_opts.upperlimit.m2 = $7; hfsc_opts.upperlimit.used = 1; } | STRING { if (!strcmp($1, "default")) hfsc_opts.flags |= HFCF_DEFAULTCLASS; else if (!strcmp($1, "red")) hfsc_opts.flags |= HFCF_RED; else if (!strcmp($1, "ecn")) hfsc_opts.flags |= HFCF_RED|HFCF_ECN; else if (!strcmp($1, "rio")) hfsc_opts.flags |= HFCF_RIO; else if (!strcmp($1, "codel")) hfsc_opts.flags |= HFCF_CODEL; else { yyerror("unknown hfsc flag \"%s\"", $1); free($1); YYERROR; } free($1); } ; fairq_opts : { bzero(&fairq_opts, sizeof(struct node_fairq_opts)); } fairqopts_list { $$ = fairq_opts; } ; fairqopts_list : fairqopts_item | fairqopts_list comma fairqopts_item ; fairqopts_item : LINKSHARE bandwidth { if (fairq_opts.linkshare.used) { yyerror("linkshare already specified"); YYERROR; } fairq_opts.linkshare.m2 = $2; fairq_opts.linkshare.used = 1; } | LINKSHARE '(' bandwidth number bandwidth ')' { if (fairq_opts.linkshare.used) { yyerror("linkshare already specified"); YYERROR; } fairq_opts.linkshare.m1 = $3; fairq_opts.linkshare.d = $4; fairq_opts.linkshare.m2 = $5; fairq_opts.linkshare.used = 1; } | HOGS bandwidth { fairq_opts.hogs_bw = $2; } | BUCKETS number { fairq_opts.nbuckets = $2; } | STRING { if (!strcmp($1, "default")) fairq_opts.flags |= FARF_DEFAULTCLASS; else if (!strcmp($1, "red")) fairq_opts.flags |= FARF_RED; else if (!strcmp($1, "ecn")) fairq_opts.flags |= FARF_RED|FARF_ECN; else if (!strcmp($1, "rio")) fairq_opts.flags |= FARF_RIO; else if (!strcmp($1, "codel")) fairq_opts.flags |= FARF_CODEL; else { yyerror("unknown fairq flag \"%s\"", $1); free($1); YYERROR; } free($1); } ; codel_opts : { bzero(&codel_opts, sizeof(struct codel_opts)); } codelopts_list { $$ = codel_opts; } ; codelopts_list : codelopts_item | codelopts_list comma codelopts_item ; codelopts_item : INTERVAL number { if (codel_opts.interval) { yyerror("interval already specified"); YYERROR; } codel_opts.interval = $2; } | TARGET number { if (codel_opts.target) { yyerror("target already specified"); YYERROR; } codel_opts.target = $2; } | STRING { if (!strcmp($1, "ecn")) codel_opts.ecn = 1; else { yyerror("unknown codel option \"%s\"", $1); free($1); YYERROR; } free($1); } ; qassign : /* empty */ { $$ = NULL; } | qassign_item { $$ = $1; } | '{' optnl qassign_list '}' { $$ = $3; } ; qassign_list : qassign_item optnl { $$ = $1; } | qassign_list comma qassign_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; qassign_item : STRING { $$ = calloc(1, sizeof(struct node_queue)); if ($$ == NULL) err(1, "qassign_item: calloc"); if (strlcpy($$->queue, $1, sizeof($$->queue)) >= sizeof($$->queue)) { yyerror("queue name '%s' too long (max " "%d chars)", $1, sizeof($$->queue)-1); free($1); free($$); YYERROR; } free($1); $$->next = NULL; $$->tail = $$; } ; pfrule : action dir logquick interface route af proto fromto filter_opts { struct pf_rule r; struct node_state_opt *o; struct node_proto *proto; int srctrack = 0; int statelock = 0; int adaptive = 0; int defaults = 0; if (check_rulestate(PFCTL_STATE_FILTER)) YYERROR; memset(&r, 0, sizeof(r)); r.action = $1.b1; switch ($1.b2) { case PFRULE_RETURNRST: r.rule_flag |= PFRULE_RETURNRST; r.return_ttl = $1.w; break; case PFRULE_RETURNICMP: r.rule_flag |= PFRULE_RETURNICMP; r.return_icmp = $1.w; r.return_icmp6 = $1.w2; break; case PFRULE_RETURN: r.rule_flag |= PFRULE_RETURN; r.return_icmp = $1.w; r.return_icmp6 = $1.w2; break; } r.direction = $2; r.log = $3.log; r.logif = $3.logif; r.quick = $3.quick; r.prob = $9.prob; r.rtableid = $9.rtableid; if ($9.marker & FOM_PRIO) { if ($9.prio == 0) r.prio = PF_PRIO_ZERO; else r.prio = $9.prio; } if ($9.marker & FOM_SETPRIO) { r.set_prio[0] = $9.set_prio[0]; r.set_prio[1] = $9.set_prio[1]; r.scrub_flags |= PFSTATE_SETPRIO; } r.af = $6; if ($9.tag) if (strlcpy(r.tagname, $9.tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } if ($9.match_tag) if (strlcpy(r.match_tagname, $9.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } r.match_tag_not = $9.match_tag_not; if (rule_label(&r, $9.label)) YYERROR; free($9.label); r.flags = $9.flags.b1; r.flagset = $9.flags.b2; if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) { yyerror("flags always false"); YYERROR; } if ($9.flags.b1 || $9.flags.b2 || $8.src_os) { for (proto = $7; proto != NULL && proto->proto != IPPROTO_TCP; proto = proto->next) ; /* nothing */ if (proto == NULL && $7 != NULL) { if ($9.flags.b1 || $9.flags.b2) yyerror( "flags only apply to tcp"); if ($8.src_os) yyerror( "OS fingerprinting only " "apply to tcp"); YYERROR; } #if 0 if (($9.flags.b1 & parse_flags("S")) == 0 && $8.src_os) { yyerror("OS fingerprinting requires " "the SYN TCP flag (flags S/SA)"); YYERROR; } #endif } r.tos = $9.tos; r.keep_state = $9.keep.action; o = $9.keep.options; /* 'keep state' by default on pass rules. */ if (!r.keep_state && !r.action && !($9.marker & FOM_KEEP)) { r.keep_state = PF_STATE_NORMAL; o = keep_state_defaults; defaults = 1; } while (o) { struct node_state_opt *p = o; switch (o->type) { case PF_STATE_OPT_MAX: if (r.max_states) { yyerror("state option 'max' " "multiple definitions"); YYERROR; } r.max_states = o->data.max_states; break; case PF_STATE_OPT_NOSYNC: if (r.rule_flag & PFRULE_NOSYNC) { yyerror("state option 'sync' " "multiple definitions"); YYERROR; } r.rule_flag |= PFRULE_NOSYNC; break; case PF_STATE_OPT_SRCTRACK: if (srctrack) { yyerror("state option " "'source-track' " "multiple definitions"); YYERROR; } srctrack = o->data.src_track; r.rule_flag |= PFRULE_SRCTRACK; break; case PF_STATE_OPT_MAX_SRC_STATES: if (r.max_src_states) { yyerror("state option " "'max-src-states' " "multiple definitions"); YYERROR; } if (o->data.max_src_states == 0) { yyerror("'max-src-states' must " "be > 0"); YYERROR; } r.max_src_states = o->data.max_src_states; r.rule_flag |= PFRULE_SRCTRACK; break; case PF_STATE_OPT_OVERLOAD: if (r.overload_tblname[0]) { yyerror("multiple 'overload' " "table definitions"); YYERROR; } if (strlcpy(r.overload_tblname, o->data.overload.tblname, PF_TABLE_NAME_SIZE) >= PF_TABLE_NAME_SIZE) { yyerror("state option: " "strlcpy"); YYERROR; } r.flush = o->data.overload.flush; break; case PF_STATE_OPT_MAX_SRC_CONN: if (r.max_src_conn) { yyerror("state option " "'max-src-conn' " "multiple definitions"); YYERROR; } if (o->data.max_src_conn == 0) { yyerror("'max-src-conn' " "must be > 0"); YYERROR; } r.max_src_conn = o->data.max_src_conn; r.rule_flag |= PFRULE_SRCTRACK | PFRULE_RULESRCTRACK; break; case PF_STATE_OPT_MAX_SRC_CONN_RATE: if (r.max_src_conn_rate.limit) { yyerror("state option " "'max-src-conn-rate' " "multiple definitions"); YYERROR; } if (!o->data.max_src_conn_rate.limit || !o->data.max_src_conn_rate.seconds) { yyerror("'max-src-conn-rate' " "values must be > 0"); YYERROR; } if (o->data.max_src_conn_rate.limit > PF_THRESHOLD_MAX) { yyerror("'max-src-conn-rate' " "maximum rate must be < %u", PF_THRESHOLD_MAX); YYERROR; } r.max_src_conn_rate.limit = o->data.max_src_conn_rate.limit; r.max_src_conn_rate.seconds = o->data.max_src_conn_rate.seconds; r.rule_flag |= PFRULE_SRCTRACK | PFRULE_RULESRCTRACK; break; case PF_STATE_OPT_MAX_SRC_NODES: if (r.max_src_nodes) { yyerror("state option " "'max-src-nodes' " "multiple definitions"); YYERROR; } if (o->data.max_src_nodes == 0) { yyerror("'max-src-nodes' must " "be > 0"); YYERROR; } r.max_src_nodes = o->data.max_src_nodes; r.rule_flag |= PFRULE_SRCTRACK | PFRULE_RULESRCTRACK; break; case PF_STATE_OPT_STATELOCK: if (statelock) { yyerror("state locking option: " "multiple definitions"); YYERROR; } statelock = 1; r.rule_flag |= o->data.statelock; break; case PF_STATE_OPT_SLOPPY: if (r.rule_flag & PFRULE_STATESLOPPY) { yyerror("state sloppy option: " "multiple definitions"); YYERROR; } r.rule_flag |= PFRULE_STATESLOPPY; break; case PF_STATE_OPT_TIMEOUT: if (o->data.timeout.number == PFTM_ADAPTIVE_START || o->data.timeout.number == PFTM_ADAPTIVE_END) adaptive = 1; if (r.timeout[o->data.timeout.number]) { yyerror("state timeout %s " "multiple definitions", pf_timeouts[o->data. timeout.number].name); YYERROR; } r.timeout[o->data.timeout.number] = o->data.timeout.seconds; } o = o->next; if (!defaults) free(p); } /* 'flags S/SA' by default on stateful rules */ if (!r.action && !r.flags && !r.flagset && !$9.fragment && !($9.marker & FOM_FLAGS) && r.keep_state) { r.flags = parse_flags("S"); r.flagset = parse_flags("SA"); } if (!adaptive && r.max_states) { r.timeout[PFTM_ADAPTIVE_START] = (r.max_states / 10) * 6; r.timeout[PFTM_ADAPTIVE_END] = (r.max_states / 10) * 12; } if (r.rule_flag & PFRULE_SRCTRACK) { if (srctrack == PF_SRCTRACK_GLOBAL && r.max_src_nodes) { yyerror("'max-src-nodes' is " "incompatible with " "'source-track global'"); YYERROR; } if (srctrack == PF_SRCTRACK_GLOBAL && r.max_src_conn) { yyerror("'max-src-conn' is " "incompatible with " "'source-track global'"); YYERROR; } if (srctrack == PF_SRCTRACK_GLOBAL && r.max_src_conn_rate.seconds) { yyerror("'max-src-conn-rate' is " "incompatible with " "'source-track global'"); YYERROR; } if (r.timeout[PFTM_SRC_NODE] < r.max_src_conn_rate.seconds) r.timeout[PFTM_SRC_NODE] = r.max_src_conn_rate.seconds; r.rule_flag |= PFRULE_SRCTRACK; if (srctrack == PF_SRCTRACK_RULE) r.rule_flag |= PFRULE_RULESRCTRACK; } if (r.keep_state && !statelock) r.rule_flag |= default_statelock; if ($9.fragment) r.rule_flag |= PFRULE_FRAGMENT; r.allow_opts = $9.allowopts; decide_address_family($8.src.host, &r.af); decide_address_family($8.dst.host, &r.af); if ($5.rt) { if (!r.direction) { yyerror("direction must be explicit " "with rules that specify routing"); YYERROR; } r.rt = $5.rt; r.rpool.opts = $5.pool_opts; if ($5.key != NULL) memcpy(&r.rpool.key, $5.key, sizeof(struct pf_poolhashkey)); } if (r.rt) { decide_address_family($5.host, &r.af); remove_invalid_hosts(&$5.host, &r.af); if ($5.host == NULL) { yyerror("no routing address with " "matching address family found."); YYERROR; } if ((r.rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_NONE && ($5.host->next != NULL || $5.host->addr.type == PF_ADDR_TABLE || DYNIF_MULTIADDR($5.host->addr))) r.rpool.opts |= PF_POOL_ROUNDROBIN; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && disallow_table($5.host, "tables are only " "supported in round-robin routing pools")) YYERROR; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && disallow_alias($5.host, "interface (%s) " "is only supported in round-robin " "routing pools")) YYERROR; if ($5.host->next != NULL) { if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { yyerror("r.rpool.opts must " "be PF_POOL_ROUNDROBIN"); YYERROR; } } } if ($9.queues.qname != NULL) { if (strlcpy(r.qname, $9.queues.qname, sizeof(r.qname)) >= sizeof(r.qname)) { yyerror("rule qname too long (max " "%d chars)", sizeof(r.qname)-1); YYERROR; } free($9.queues.qname); } if ($9.queues.pqname != NULL) { if (strlcpy(r.pqname, $9.queues.pqname, sizeof(r.pqname)) >= sizeof(r.pqname)) { yyerror("rule pqname too long (max " "%d chars)", sizeof(r.pqname)-1); YYERROR; } free($9.queues.pqname); } #ifdef __FreeBSD__ r.divert.port = $9.divert.port; #else if ((r.divert.port = $9.divert.port)) { if (r.direction == PF_OUT) { if ($9.divert.addr) { yyerror("address specified " "for outgoing divert"); YYERROR; } bzero(&r.divert.addr, sizeof(r.divert.addr)); } else { if (!$9.divert.addr) { yyerror("no address specified " "for incoming divert"); YYERROR; } if ($9.divert.addr->af != r.af) { yyerror("address family " "mismatch for divert"); YYERROR; } r.divert.addr = $9.divert.addr->addr.v.a.addr; } } #endif expand_rule(&r, $4, $5.host, $7, $8.src_os, $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, $9.uid, $9.gid, $9.icmpspec, ""); } ; filter_opts : { bzero(&filter_opts, sizeof filter_opts); filter_opts.rtableid = -1; } filter_opts_l { $$ = filter_opts; } | /* empty */ { bzero(&filter_opts, sizeof filter_opts); filter_opts.rtableid = -1; $$ = filter_opts; } ; filter_opts_l : filter_opts_l filter_opt | filter_opt ; filter_opt : USER uids { if (filter_opts.uid) $2->tail->next = filter_opts.uid; filter_opts.uid = $2; } | GROUP gids { if (filter_opts.gid) $2->tail->next = filter_opts.gid; filter_opts.gid = $2; } | flags { if (filter_opts.marker & FOM_FLAGS) { yyerror("flags cannot be redefined"); YYERROR; } filter_opts.marker |= FOM_FLAGS; filter_opts.flags.b1 |= $1.b1; filter_opts.flags.b2 |= $1.b2; filter_opts.flags.w |= $1.w; filter_opts.flags.w2 |= $1.w2; } | icmpspec { if (filter_opts.marker & FOM_ICMP) { yyerror("icmp-type cannot be redefined"); YYERROR; } filter_opts.marker |= FOM_ICMP; filter_opts.icmpspec = $1; } | PRIO NUMBER { if (filter_opts.marker & FOM_PRIO) { yyerror("prio cannot be redefined"); YYERROR; } if ($2 < 0 || $2 > PF_PRIO_MAX) { yyerror("prio must be 0 - %u", PF_PRIO_MAX); YYERROR; } filter_opts.marker |= FOM_PRIO; filter_opts.prio = $2; } | TOS tos { if (filter_opts.marker & FOM_TOS) { yyerror("tos cannot be redefined"); YYERROR; } filter_opts.marker |= FOM_TOS; filter_opts.tos = $2; } | keep { if (filter_opts.marker & FOM_KEEP) { yyerror("modulate or keep cannot be redefined"); YYERROR; } filter_opts.marker |= FOM_KEEP; filter_opts.keep.action = $1.action; filter_opts.keep.options = $1.options; } | FRAGMENT { filter_opts.fragment = 1; } | ALLOWOPTS { filter_opts.allowopts = 1; } | label { if (filter_opts.label) { yyerror("label cannot be redefined"); YYERROR; } filter_opts.label = $1; } | qname { if (filter_opts.queues.qname) { yyerror("queue cannot be redefined"); YYERROR; } filter_opts.queues = $1; } | TAG string { filter_opts.tag = $2; } | not TAGGED string { filter_opts.match_tag = $3; filter_opts.match_tag_not = $1; } | PROBABILITY probability { double p; p = floor($2 * UINT_MAX + 0.5); if (p < 0.0 || p > UINT_MAX) { yyerror("invalid probability: %lf", p); YYERROR; } filter_opts.prob = (u_int32_t)p; if (filter_opts.prob == 0) filter_opts.prob = 1; } | RTABLE NUMBER { if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } filter_opts.rtableid = $2; } | DIVERTTO portplain { #ifdef __FreeBSD__ filter_opts.divert.port = $2.a; if (!filter_opts.divert.port) { yyerror("invalid divert port: %u", ntohs($2.a)); YYERROR; } #endif } | DIVERTTO STRING PORT portplain { #ifndef __FreeBSD__ if ((filter_opts.divert.addr = host($2)) == NULL) { yyerror("could not parse divert address: %s", $2); free($2); YYERROR; } #else if ($2) #endif free($2); filter_opts.divert.port = $4.a; if (!filter_opts.divert.port) { yyerror("invalid divert port: %u", ntohs($4.a)); YYERROR; } } | DIVERTREPLY { #ifdef __FreeBSD__ yyerror("divert-reply has no meaning in FreeBSD pf(4)"); YYERROR; #else filter_opts.divert.port = 1; /* some random value */ #endif } | filter_sets ; filter_sets : SET '(' filter_sets_l ')' { $$ = filter_opts; } | SET filter_set { $$ = filter_opts; } ; filter_sets_l : filter_sets_l comma filter_set | filter_set ; filter_set : prio { if (filter_opts.marker & FOM_SETPRIO) { yyerror("prio cannot be redefined"); YYERROR; } filter_opts.marker |= FOM_SETPRIO; filter_opts.set_prio[0] = $1.b1; filter_opts.set_prio[1] = $1.b2; } prio : PRIO NUMBER { if ($2 < 0 || $2 > PF_PRIO_MAX) { yyerror("prio must be 0 - %u", PF_PRIO_MAX); YYERROR; } $$.b1 = $$.b2 = $2; } | PRIO '(' NUMBER comma NUMBER ')' { if ($3 < 0 || $3 > PF_PRIO_MAX || $5 < 0 || $5 > PF_PRIO_MAX) { yyerror("prio must be 0 - %u", PF_PRIO_MAX); YYERROR; } $$.b1 = $3; $$.b2 = $5; } ; probability : STRING { char *e; double p = strtod($1, &e); if (*e == '%') { p *= 0.01; e++; } if (*e) { yyerror("invalid probability: %s", $1); free($1); YYERROR; } free($1); $$ = p; } | NUMBER { $$ = (double)$1; } ; action : PASS { $$.b1 = PF_PASS; $$.b2 = failpolicy; $$.w = returnicmpdefault; $$.w2 = returnicmp6default; } | BLOCK blockspec { $$ = $2; $$.b1 = PF_DROP; } ; blockspec : /* empty */ { $$.b2 = blockpolicy; $$.w = returnicmpdefault; $$.w2 = returnicmp6default; } | DROP { $$.b2 = PFRULE_DROP; $$.w = 0; $$.w2 = 0; } | RETURNRST { $$.b2 = PFRULE_RETURNRST; $$.w = 0; $$.w2 = 0; } | RETURNRST '(' TTL NUMBER ')' { if ($4 < 0 || $4 > 255) { yyerror("illegal ttl value %d", $4); YYERROR; } $$.b2 = PFRULE_RETURNRST; $$.w = $4; $$.w2 = 0; } | RETURNICMP { $$.b2 = PFRULE_RETURNICMP; $$.w = returnicmpdefault; $$.w2 = returnicmp6default; } | RETURNICMP6 { $$.b2 = PFRULE_RETURNICMP; $$.w = returnicmpdefault; $$.w2 = returnicmp6default; } | RETURNICMP '(' reticmpspec ')' { $$.b2 = PFRULE_RETURNICMP; $$.w = $3; $$.w2 = returnicmpdefault; } | RETURNICMP6 '(' reticmp6spec ')' { $$.b2 = PFRULE_RETURNICMP; $$.w = returnicmpdefault; $$.w2 = $3; } | RETURNICMP '(' reticmpspec comma reticmp6spec ')' { $$.b2 = PFRULE_RETURNICMP; $$.w = $3; $$.w2 = $5; } | RETURN { $$.b2 = PFRULE_RETURN; $$.w = returnicmpdefault; $$.w2 = returnicmp6default; } ; reticmpspec : STRING { if (!($$ = parseicmpspec($1, AF_INET))) { free($1); YYERROR; } free($1); } | NUMBER { u_int8_t icmptype; if ($1 < 0 || $1 > 255) { yyerror("invalid icmp code %lu", $1); YYERROR; } icmptype = returnicmpdefault >> 8; $$ = (icmptype << 8 | $1); } ; reticmp6spec : STRING { if (!($$ = parseicmpspec($1, AF_INET6))) { free($1); YYERROR; } free($1); } | NUMBER { u_int8_t icmptype; if ($1 < 0 || $1 > 255) { yyerror("invalid icmp code %lu", $1); YYERROR; } icmptype = returnicmp6default >> 8; $$ = (icmptype << 8 | $1); } ; dir : /* empty */ { $$ = PF_INOUT; } | IN { $$ = PF_IN; } | OUT { $$ = PF_OUT; } ; quick : /* empty */ { $$.quick = 0; } | QUICK { $$.quick = 1; } ; logquick : /* empty */ { $$.log = 0; $$.quick = 0; $$.logif = 0; } | log { $$ = $1; $$.quick = 0; } | QUICK { $$.quick = 1; $$.log = 0; $$.logif = 0; } | log QUICK { $$ = $1; $$.quick = 1; } | QUICK log { $$ = $2; $$.quick = 1; } ; log : LOG { $$.log = PF_LOG; $$.logif = 0; } | LOG '(' logopts ')' { $$.log = PF_LOG | $3.log; $$.logif = $3.logif; } ; logopts : logopt { $$ = $1; } | logopts comma logopt { $$.log = $1.log | $3.log; $$.logif = $3.logif; if ($$.logif == 0) $$.logif = $1.logif; } ; logopt : ALL { $$.log = PF_LOG_ALL; $$.logif = 0; } | USER { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; } | GROUP { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; } | TO string { const char *errstr; u_int i; $$.log = 0; if (strncmp($2, "pflog", 5)) { yyerror("%s: should be a pflog interface", $2); free($2); YYERROR; } i = strtonum($2 + 5, 0, 255, &errstr); if (errstr) { yyerror("%s: %s", $2, errstr); free($2); YYERROR; } free($2); $$.logif = i; } ; interface : /* empty */ { $$ = NULL; } | ON if_item_not { $$ = $2; } | ON '{' optnl if_list '}' { $$ = $4; } ; if_list : if_item_not optnl { $$ = $1; } | if_list comma if_item_not optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; if_item_not : not if_item { $$ = $2; $$->not = $1; } ; if_item : STRING { struct node_host *n; $$ = calloc(1, sizeof(struct node_if)); if ($$ == NULL) err(1, "if_item: calloc"); if (strlcpy($$->ifname, $1, sizeof($$->ifname)) >= sizeof($$->ifname)) { free($1); free($$); yyerror("interface name too long"); YYERROR; } if ((n = ifa_exists($1)) != NULL) $$->ifa_flags = n->ifa_flags; free($1); $$->not = 0; $$->next = NULL; $$->tail = $$; } ; af : /* empty */ { $$ = 0; } | INET { $$ = AF_INET; } | INET6 { $$ = AF_INET6; } ; proto : /* empty */ { $$ = NULL; } | PROTO proto_item { $$ = $2; } | PROTO '{' optnl proto_list '}' { $$ = $4; } ; proto_list : proto_item optnl { $$ = $1; } | proto_list comma proto_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; proto_item : protoval { u_int8_t pr; pr = (u_int8_t)$1; if (pr == 0) { yyerror("proto 0 cannot be used"); YYERROR; } $$ = calloc(1, sizeof(struct node_proto)); if ($$ == NULL) err(1, "proto_item: calloc"); $$->proto = pr; $$->next = NULL; $$->tail = $$; } ; protoval : STRING { struct protoent *p; p = getprotobyname($1); if (p == NULL) { yyerror("unknown protocol %s", $1); free($1); YYERROR; } $$ = p->p_proto; free($1); } | NUMBER { if ($1 < 0 || $1 > 255) { yyerror("protocol outside range"); YYERROR; } } ; fromto : ALL { $$.src.host = NULL; $$.src.port = NULL; $$.dst.host = NULL; $$.dst.port = NULL; $$.src_os = NULL; } | from os to { $$.src = $1; $$.src_os = $2; $$.dst = $3; } ; os : /* empty */ { $$ = NULL; } | OS xos { $$ = $2; } | OS '{' optnl os_list '}' { $$ = $4; } ; xos : STRING { $$ = calloc(1, sizeof(struct node_os)); if ($$ == NULL) err(1, "os: calloc"); $$->os = $1; $$->tail = $$; } ; os_list : xos optnl { $$ = $1; } | os_list comma xos optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; from : /* empty */ { $$.host = NULL; $$.port = NULL; } | FROM ipportspec { $$ = $2; } ; to : /* empty */ { $$.host = NULL; $$.port = NULL; } | TO ipportspec { if (disallow_urpf_failed($2.host, "\"urpf-failed\" is " "not permitted in a destination address")) YYERROR; $$ = $2; } ; ipportspec : ipspec { $$.host = $1; $$.port = NULL; } | ipspec PORT portspec { $$.host = $1; $$.port = $3; } | PORT portspec { $$.host = NULL; $$.port = $2; } ; optnl : '\n' optnl | ; ipspec : ANY { $$ = NULL; } | xhost { $$ = $1; } | '{' optnl host_list '}' { $$ = $3; } ; toipspec : TO ipspec { $$ = $2; } | /* empty */ { $$ = NULL; } ; host_list : ipspec optnl { $$ = $1; } | host_list comma ipspec optnl { if ($3 == NULL) $$ = $1; else if ($1 == NULL) $$ = $3; else { $1->tail->next = $3; $1->tail = $3->tail; $$ = $1; } } ; xhost : not host { struct node_host *n; for (n = $2; n != NULL; n = n->next) n->not = $1; $$ = $2; } | not NOROUTE { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "xhost: calloc"); $$->addr.type = PF_ADDR_NOROUTE; $$->next = NULL; $$->not = $1; $$->tail = $$; } | not URPFFAILED { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "xhost: calloc"); $$->addr.type = PF_ADDR_URPFFAILED; $$->next = NULL; $$->not = $1; $$->tail = $$; } ; host : STRING { if (($$ = host($1)) == NULL) { /* error. "any" is handled elsewhere */ free($1); yyerror("could not parse host specification"); YYERROR; } free($1); } | STRING '-' STRING { struct node_host *b, *e; if ((b = host($1)) == NULL || (e = host($3)) == NULL) { free($1); free($3); yyerror("could not parse host specification"); YYERROR; } if (b->af != e->af || b->addr.type != PF_ADDR_ADDRMASK || e->addr.type != PF_ADDR_ADDRMASK || unmask(&b->addr.v.a.mask, b->af) != (b->af == AF_INET ? 32 : 128) || unmask(&e->addr.v.a.mask, e->af) != (e->af == AF_INET ? 32 : 128) || b->next != NULL || b->not || e->next != NULL || e->not) { free(b); free(e); free($1); free($3); yyerror("invalid address range"); YYERROR; } memcpy(&b->addr.v.a.mask, &e->addr.v.a.addr, sizeof(b->addr.v.a.mask)); b->addr.type = PF_ADDR_RANGE; $$ = b; free(e); free($1); free($3); } | STRING '/' NUMBER { char *buf; if (asprintf(&buf, "%s/%lld", $1, (long long)$3) == -1) err(1, "host: asprintf"); free($1); if (($$ = host(buf)) == NULL) { /* error. "any" is handled elsewhere */ free(buf); yyerror("could not parse host specification"); YYERROR; } free(buf); } | NUMBER '/' NUMBER { char *buf; /* ie. for 10/8 parsing */ #ifdef __FreeBSD__ if (asprintf(&buf, "%lld/%lld", (long long)$1, (long long)$3) == -1) #else if (asprintf(&buf, "%lld/%lld", $1, $3) == -1) #endif err(1, "host: asprintf"); if (($$ = host(buf)) == NULL) { /* error. "any" is handled elsewhere */ free(buf); yyerror("could not parse host specification"); YYERROR; } free(buf); } | dynaddr | dynaddr '/' NUMBER { struct node_host *n; if ($3 < 0 || $3 > 128) { yyerror("bit number too big"); YYERROR; } $$ = $1; for (n = $1; n != NULL; n = n->next) set_ipmask(n, $3); } | '<' STRING '>' { if (strlen($2) >= PF_TABLE_NAME_SIZE) { yyerror("table name '%s' too long", $2); free($2); YYERROR; } $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "host: calloc"); $$->addr.type = PF_ADDR_TABLE; if (strlcpy($$->addr.v.tblname, $2, sizeof($$->addr.v.tblname)) >= sizeof($$->addr.v.tblname)) errx(1, "host: strlcpy"); free($2); $$->next = NULL; $$->tail = $$; } ; number : NUMBER | STRING { u_long ulval; if (atoul($1, &ulval) == -1) { yyerror("%s is not a number", $1); free($1); YYERROR; } else $$ = ulval; free($1); } ; dynaddr : '(' STRING ')' { int flags = 0; char *p, *op; op = $2; if (!isalpha(op[0])) { yyerror("invalid interface name '%s'", op); free(op); YYERROR; } while ((p = strrchr($2, ':')) != NULL) { if (!strcmp(p+1, "network")) flags |= PFI_AFLAG_NETWORK; else if (!strcmp(p+1, "broadcast")) flags |= PFI_AFLAG_BROADCAST; else if (!strcmp(p+1, "peer")) flags |= PFI_AFLAG_PEER; else if (!strcmp(p+1, "0")) flags |= PFI_AFLAG_NOALIAS; else { yyerror("interface %s has bad modifier", $2); free(op); YYERROR; } *p = '\0'; } if (flags & (flags - 1) & PFI_AFLAG_MODEMASK) { free(op); yyerror("illegal combination of " "interface modifiers"); YYERROR; } $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "address: calloc"); $$->af = 0; set_ipmask($$, 128); $$->addr.type = PF_ADDR_DYNIFTL; $$->addr.iflags = flags; if (strlcpy($$->addr.v.ifname, $2, sizeof($$->addr.v.ifname)) >= sizeof($$->addr.v.ifname)) { free(op); free($$); yyerror("interface name too long"); YYERROR; } free(op); $$->next = NULL; $$->tail = $$; } ; portspec : port_item { $$ = $1; } | '{' optnl port_list '}' { $$ = $3; } ; port_list : port_item optnl { $$ = $1; } | port_list comma port_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; port_item : portrange { $$ = calloc(1, sizeof(struct node_port)); if ($$ == NULL) err(1, "port_item: calloc"); $$->port[0] = $1.a; $$->port[1] = $1.b; if ($1.t) $$->op = PF_OP_RRG; else $$->op = PF_OP_EQ; $$->next = NULL; $$->tail = $$; } | unaryop portrange { if ($2.t) { yyerror("':' cannot be used with an other " "port operator"); YYERROR; } $$ = calloc(1, sizeof(struct node_port)); if ($$ == NULL) err(1, "port_item: calloc"); $$->port[0] = $2.a; $$->port[1] = $2.b; $$->op = $1; $$->next = NULL; $$->tail = $$; } | portrange PORTBINARY portrange { if ($1.t || $3.t) { yyerror("':' cannot be used with an other " "port operator"); YYERROR; } $$ = calloc(1, sizeof(struct node_port)); if ($$ == NULL) err(1, "port_item: calloc"); $$->port[0] = $1.a; $$->port[1] = $3.a; $$->op = $2; $$->next = NULL; $$->tail = $$; } ; portplain : numberstring { if (parseport($1, &$$, 0) == -1) { free($1); YYERROR; } free($1); } ; portrange : numberstring { if (parseport($1, &$$, PPORT_RANGE) == -1) { free($1); YYERROR; } free($1); } ; uids : uid_item { $$ = $1; } | '{' optnl uid_list '}' { $$ = $3; } ; uid_list : uid_item optnl { $$ = $1; } | uid_list comma uid_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; uid_item : uid { $$ = calloc(1, sizeof(struct node_uid)); if ($$ == NULL) err(1, "uid_item: calloc"); $$->uid[0] = $1; $$->uid[1] = $1; $$->op = PF_OP_EQ; $$->next = NULL; $$->tail = $$; } | unaryop uid { if ($2 == UID_MAX && $1 != PF_OP_EQ && $1 != PF_OP_NE) { yyerror("user unknown requires operator = or " "!="); YYERROR; } $$ = calloc(1, sizeof(struct node_uid)); if ($$ == NULL) err(1, "uid_item: calloc"); $$->uid[0] = $2; $$->uid[1] = $2; $$->op = $1; $$->next = NULL; $$->tail = $$; } | uid PORTBINARY uid { if ($1 == UID_MAX || $3 == UID_MAX) { yyerror("user unknown requires operator = or " "!="); YYERROR; } $$ = calloc(1, sizeof(struct node_uid)); if ($$ == NULL) err(1, "uid_item: calloc"); $$->uid[0] = $1; $$->uid[1] = $3; $$->op = $2; $$->next = NULL; $$->tail = $$; } ; uid : STRING { if (!strcmp($1, "unknown")) $$ = UID_MAX; else { struct passwd *pw; if ((pw = getpwnam($1)) == NULL) { yyerror("unknown user %s", $1); free($1); YYERROR; } $$ = pw->pw_uid; } free($1); } | NUMBER { if ($1 < 0 || $1 >= UID_MAX) { yyerror("illegal uid value %lu", $1); YYERROR; } $$ = $1; } ; gids : gid_item { $$ = $1; } | '{' optnl gid_list '}' { $$ = $3; } ; gid_list : gid_item optnl { $$ = $1; } | gid_list comma gid_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; gid_item : gid { $$ = calloc(1, sizeof(struct node_gid)); if ($$ == NULL) err(1, "gid_item: calloc"); $$->gid[0] = $1; $$->gid[1] = $1; $$->op = PF_OP_EQ; $$->next = NULL; $$->tail = $$; } | unaryop gid { if ($2 == GID_MAX && $1 != PF_OP_EQ && $1 != PF_OP_NE) { yyerror("group unknown requires operator = or " "!="); YYERROR; } $$ = calloc(1, sizeof(struct node_gid)); if ($$ == NULL) err(1, "gid_item: calloc"); $$->gid[0] = $2; $$->gid[1] = $2; $$->op = $1; $$->next = NULL; $$->tail = $$; } | gid PORTBINARY gid { if ($1 == GID_MAX || $3 == GID_MAX) { yyerror("group unknown requires operator = or " "!="); YYERROR; } $$ = calloc(1, sizeof(struct node_gid)); if ($$ == NULL) err(1, "gid_item: calloc"); $$->gid[0] = $1; $$->gid[1] = $3; $$->op = $2; $$->next = NULL; $$->tail = $$; } ; gid : STRING { if (!strcmp($1, "unknown")) $$ = GID_MAX; else { struct group *grp; if ((grp = getgrnam($1)) == NULL) { yyerror("unknown group %s", $1); free($1); YYERROR; } $$ = grp->gr_gid; } free($1); } | NUMBER { if ($1 < 0 || $1 >= GID_MAX) { yyerror("illegal gid value %lu", $1); YYERROR; } $$ = $1; } ; flag : STRING { int f; if ((f = parse_flags($1)) < 0) { yyerror("bad flags %s", $1); free($1); YYERROR; } free($1); $$.b1 = f; } ; flags : FLAGS flag '/' flag { $$.b1 = $2.b1; $$.b2 = $4.b1; } | FLAGS '/' flag { $$.b1 = 0; $$.b2 = $3.b1; } | FLAGS ANY { $$.b1 = 0; $$.b2 = 0; } ; icmpspec : ICMPTYPE icmp_item { $$ = $2; } | ICMPTYPE '{' optnl icmp_list '}' { $$ = $4; } | ICMP6TYPE icmp6_item { $$ = $2; } | ICMP6TYPE '{' optnl icmp6_list '}' { $$ = $4; } ; icmp_list : icmp_item optnl { $$ = $1; } | icmp_list comma icmp_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; icmp6_list : icmp6_item optnl { $$ = $1; } | icmp6_list comma icmp6_item optnl { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; icmp_item : icmptype { $$ = calloc(1, sizeof(struct node_icmp)); if ($$ == NULL) err(1, "icmp_item: calloc"); $$->type = $1; $$->code = 0; $$->proto = IPPROTO_ICMP; $$->next = NULL; $$->tail = $$; } | icmptype CODE STRING { const struct icmpcodeent *p; if ((p = geticmpcodebyname($1-1, $3, AF_INET)) == NULL) { yyerror("unknown icmp-code %s", $3); free($3); YYERROR; } free($3); $$ = calloc(1, sizeof(struct node_icmp)); if ($$ == NULL) err(1, "icmp_item: calloc"); $$->type = $1; $$->code = p->code + 1; $$->proto = IPPROTO_ICMP; $$->next = NULL; $$->tail = $$; } | icmptype CODE NUMBER { if ($3 < 0 || $3 > 255) { yyerror("illegal icmp-code %lu", $3); YYERROR; } $$ = calloc(1, sizeof(struct node_icmp)); if ($$ == NULL) err(1, "icmp_item: calloc"); $$->type = $1; $$->code = $3 + 1; $$->proto = IPPROTO_ICMP; $$->next = NULL; $$->tail = $$; } ; icmp6_item : icmp6type { $$ = calloc(1, sizeof(struct node_icmp)); if ($$ == NULL) err(1, "icmp_item: calloc"); $$->type = $1; $$->code = 0; $$->proto = IPPROTO_ICMPV6; $$->next = NULL; $$->tail = $$; } | icmp6type CODE STRING { const struct icmpcodeent *p; if ((p = geticmpcodebyname($1-1, $3, AF_INET6)) == NULL) { yyerror("unknown icmp6-code %s", $3); free($3); YYERROR; } free($3); $$ = calloc(1, sizeof(struct node_icmp)); if ($$ == NULL) err(1, "icmp_item: calloc"); $$->type = $1; $$->code = p->code + 1; $$->proto = IPPROTO_ICMPV6; $$->next = NULL; $$->tail = $$; } | icmp6type CODE NUMBER { if ($3 < 0 || $3 > 255) { yyerror("illegal icmp-code %lu", $3); YYERROR; } $$ = calloc(1, sizeof(struct node_icmp)); if ($$ == NULL) err(1, "icmp_item: calloc"); $$->type = $1; $$->code = $3 + 1; $$->proto = IPPROTO_ICMPV6; $$->next = NULL; $$->tail = $$; } ; icmptype : STRING { const struct icmptypeent *p; if ((p = geticmptypebyname($1, AF_INET)) == NULL) { yyerror("unknown icmp-type %s", $1); free($1); YYERROR; } $$ = p->type + 1; free($1); } | NUMBER { if ($1 < 0 || $1 > 255) { yyerror("illegal icmp-type %lu", $1); YYERROR; } $$ = $1 + 1; } ; icmp6type : STRING { const struct icmptypeent *p; if ((p = geticmptypebyname($1, AF_INET6)) == NULL) { yyerror("unknown icmp6-type %s", $1); free($1); YYERROR; } $$ = p->type + 1; free($1); } | NUMBER { if ($1 < 0 || $1 > 255) { yyerror("illegal icmp6-type %lu", $1); YYERROR; } $$ = $1 + 1; } ; tos : STRING { int val; char *end; if (map_tos($1, &val)) $$ = val; else if ($1[0] == '0' && $1[1] == 'x') { errno = 0; $$ = strtoul($1, &end, 16); if (errno || *end != '\0') $$ = 256; } else $$ = 256; /* flag bad argument */ if ($$ < 0 || $$ > 255) { yyerror("illegal tos value %s", $1); free($1); YYERROR; } free($1); } | NUMBER { $$ = $1; if ($$ < 0 || $$ > 255) { yyerror("illegal tos value %s", $1); YYERROR; } } ; sourcetrack : SOURCETRACK { $$ = PF_SRCTRACK; } | SOURCETRACK GLOBAL { $$ = PF_SRCTRACK_GLOBAL; } | SOURCETRACK RULE { $$ = PF_SRCTRACK_RULE; } ; statelock : IFBOUND { $$ = PFRULE_IFBOUND; } | FLOATING { $$ = 0; } ; keep : NO STATE { $$.action = 0; $$.options = NULL; } | KEEP STATE state_opt_spec { $$.action = PF_STATE_NORMAL; $$.options = $3; } | MODULATE STATE state_opt_spec { $$.action = PF_STATE_MODULATE; $$.options = $3; } | SYNPROXY STATE state_opt_spec { $$.action = PF_STATE_SYNPROXY; $$.options = $3; } ; flush : /* empty */ { $$ = 0; } | FLUSH { $$ = PF_FLUSH; } | FLUSH GLOBAL { $$ = PF_FLUSH | PF_FLUSH_GLOBAL; } ; state_opt_spec : '(' state_opt_list ')' { $$ = $2; } | /* empty */ { $$ = NULL; } ; state_opt_list : state_opt_item { $$ = $1; } | state_opt_list comma state_opt_item { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; state_opt_item : MAXIMUM NUMBER { if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_MAX; $$->data.max_states = $2; $$->next = NULL; $$->tail = $$; } | NOSYNC { $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_NOSYNC; $$->next = NULL; $$->tail = $$; } | MAXSRCSTATES NUMBER { if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_MAX_SRC_STATES; $$->data.max_src_states = $2; $$->next = NULL; $$->tail = $$; } | MAXSRCCONN NUMBER { if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_MAX_SRC_CONN; $$->data.max_src_conn = $2; $$->next = NULL; $$->tail = $$; } | MAXSRCCONNRATE NUMBER '/' NUMBER { if ($2 < 0 || $2 > UINT_MAX || $4 < 0 || $4 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_MAX_SRC_CONN_RATE; $$->data.max_src_conn_rate.limit = $2; $$->data.max_src_conn_rate.seconds = $4; $$->next = NULL; $$->tail = $$; } | OVERLOAD '<' STRING '>' flush { if (strlen($3) >= PF_TABLE_NAME_SIZE) { yyerror("table name '%s' too long", $3); free($3); YYERROR; } $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); if (strlcpy($$->data.overload.tblname, $3, PF_TABLE_NAME_SIZE) >= PF_TABLE_NAME_SIZE) errx(1, "state_opt_item: strlcpy"); free($3); $$->type = PF_STATE_OPT_OVERLOAD; $$->data.overload.flush = $5; $$->next = NULL; $$->tail = $$; } | MAXSRCNODES NUMBER { if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_MAX_SRC_NODES; $$->data.max_src_nodes = $2; $$->next = NULL; $$->tail = $$; } | sourcetrack { $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_SRCTRACK; $$->data.src_track = $1; $$->next = NULL; $$->tail = $$; } | statelock { $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_STATELOCK; $$->data.statelock = $1; $$->next = NULL; $$->tail = $$; } | SLOPPY { $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_SLOPPY; $$->next = NULL; $$->tail = $$; } | STRING NUMBER { int i; if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } for (i = 0; pf_timeouts[i].name && strcmp(pf_timeouts[i].name, $1); ++i) ; /* nothing */ if (!pf_timeouts[i].name) { yyerror("illegal timeout name %s", $1); free($1); YYERROR; } if (strchr(pf_timeouts[i].name, '.') == NULL) { yyerror("illegal state timeout %s", $1); free($1); YYERROR; } free($1); $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) err(1, "state_opt_item: calloc"); $$->type = PF_STATE_OPT_TIMEOUT; $$->data.timeout.number = pf_timeouts[i].timeout; $$->data.timeout.seconds = $2; $$->next = NULL; $$->tail = $$; } ; label : LABEL STRING { $$ = $2; } ; qname : QUEUE STRING { $$.qname = $2; $$.pqname = NULL; } | QUEUE '(' STRING ')' { $$.qname = $3; $$.pqname = NULL; } | QUEUE '(' STRING comma STRING ')' { $$.qname = $3; $$.pqname = $5; } ; no : /* empty */ { $$ = 0; } | NO { $$ = 1; } ; portstar : numberstring { if (parseport($1, &$$, PPORT_RANGE|PPORT_STAR) == -1) { free($1); YYERROR; } free($1); } ; redirspec : host { $$ = $1; } | '{' optnl redir_host_list '}' { $$ = $3; } ; redir_host_list : host optnl { $$ = $1; } | redir_host_list comma host optnl { $1->tail->next = $3; $1->tail = $3->tail; $$ = $1; } ; redirpool : /* empty */ { $$ = NULL; } | ARROW redirspec { $$ = calloc(1, sizeof(struct redirection)); if ($$ == NULL) err(1, "redirection: calloc"); $$->host = $2; $$->rport.a = $$->rport.b = $$->rport.t = 0; } | ARROW redirspec PORT portstar { $$ = calloc(1, sizeof(struct redirection)); if ($$ == NULL) err(1, "redirection: calloc"); $$->host = $2; $$->rport = $4; } ; hashkey : /* empty */ { $$ = calloc(1, sizeof(struct pf_poolhashkey)); if ($$ == NULL) err(1, "hashkey: calloc"); $$->key32[0] = arc4random(); $$->key32[1] = arc4random(); $$->key32[2] = arc4random(); $$->key32[3] = arc4random(); } | string { if (!strncmp($1, "0x", 2)) { if (strlen($1) != 34) { free($1); yyerror("hex key must be 128 bits " "(32 hex digits) long"); YYERROR; } $$ = calloc(1, sizeof(struct pf_poolhashkey)); if ($$ == NULL) err(1, "hashkey: calloc"); if (sscanf($1, "0x%8x%8x%8x%8x", &$$->key32[0], &$$->key32[1], &$$->key32[2], &$$->key32[3]) != 4) { free($$); free($1); yyerror("invalid hex key"); YYERROR; } } else { MD5_CTX context; $$ = calloc(1, sizeof(struct pf_poolhashkey)); if ($$ == NULL) err(1, "hashkey: calloc"); MD5Init(&context); MD5Update(&context, (unsigned char *)$1, strlen($1)); MD5Final((unsigned char *)$$, &context); HTONL($$->key32[0]); HTONL($$->key32[1]); HTONL($$->key32[2]); HTONL($$->key32[3]); } free($1); } ; pool_opts : { bzero(&pool_opts, sizeof pool_opts); } pool_opts_l { $$ = pool_opts; } | /* empty */ { bzero(&pool_opts, sizeof pool_opts); $$ = pool_opts; } ; pool_opts_l : pool_opts_l pool_opt | pool_opt ; pool_opt : BITMASK { if (pool_opts.type) { yyerror("pool type cannot be redefined"); YYERROR; } pool_opts.type = PF_POOL_BITMASK; } | RANDOM { if (pool_opts.type) { yyerror("pool type cannot be redefined"); YYERROR; } pool_opts.type = PF_POOL_RANDOM; } | SOURCEHASH hashkey { if (pool_opts.type) { yyerror("pool type cannot be redefined"); YYERROR; } pool_opts.type = PF_POOL_SRCHASH; pool_opts.key = $2; } | ROUNDROBIN { if (pool_opts.type) { yyerror("pool type cannot be redefined"); YYERROR; } pool_opts.type = PF_POOL_ROUNDROBIN; } | STATICPORT { if (pool_opts.staticport) { yyerror("static-port cannot be redefined"); YYERROR; } pool_opts.staticport = 1; } | STICKYADDRESS { if (filter_opts.marker & POM_STICKYADDRESS) { yyerror("sticky-address cannot be redefined"); YYERROR; } pool_opts.marker |= POM_STICKYADDRESS; pool_opts.opts |= PF_POOL_STICKYADDR; } ; redirection : /* empty */ { $$ = NULL; } | ARROW host { $$ = calloc(1, sizeof(struct redirection)); if ($$ == NULL) err(1, "redirection: calloc"); $$->host = $2; $$->rport.a = $$->rport.b = $$->rport.t = 0; } | ARROW host PORT portstar { $$ = calloc(1, sizeof(struct redirection)); if ($$ == NULL) err(1, "redirection: calloc"); $$->host = $2; $$->rport = $4; } ; natpasslog : /* empty */ { $$.b1 = $$.b2 = 0; $$.w2 = 0; } | PASS { $$.b1 = 1; $$.b2 = 0; $$.w2 = 0; } | PASS log { $$.b1 = 1; $$.b2 = $2.log; $$.w2 = $2.logif; } | log { $$.b1 = 0; $$.b2 = $1.log; $$.w2 = $1.logif; } ; nataction : no NAT natpasslog { if ($1 && $3.b1) { yyerror("\"pass\" not valid with \"no\""); YYERROR; } if ($1) $$.b1 = PF_NONAT; else $$.b1 = PF_NAT; $$.b2 = $3.b1; $$.w = $3.b2; $$.w2 = $3.w2; } | no RDR natpasslog { if ($1 && $3.b1) { yyerror("\"pass\" not valid with \"no\""); YYERROR; } if ($1) $$.b1 = PF_NORDR; else $$.b1 = PF_RDR; $$.b2 = $3.b1; $$.w = $3.b2; $$.w2 = $3.w2; } ; natrule : nataction interface af proto fromto tag tagged rtable redirpool pool_opts { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) YYERROR; memset(&r, 0, sizeof(r)); r.action = $1.b1; r.natpass = $1.b2; r.log = $1.w; r.logif = $1.w2; r.af = $3; if (!r.af) { if ($5.src.host && $5.src.host->af && !$5.src.host->ifindex) r.af = $5.src.host->af; else if ($5.dst.host && $5.dst.host->af && !$5.dst.host->ifindex) r.af = $5.dst.host->af; } if ($6 != NULL) if (strlcpy(r.tagname, $6, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } if ($7.name) if (strlcpy(r.match_tagname, $7.name, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } r.match_tag_not = $7.neg; r.rtableid = $8; if (r.action == PF_NONAT || r.action == PF_NORDR) { if ($9 != NULL) { yyerror("translation rule with 'no' " "does not need '->'"); YYERROR; } } else { if ($9 == NULL || $9->host == NULL) { yyerror("translation rule requires '-> " "address'"); YYERROR; } if (!r.af && ! $9->host->ifindex) r.af = $9->host->af; remove_invalid_hosts(&$9->host, &r.af); if (invalid_redirect($9->host, r.af)) YYERROR; if (check_netmask($9->host, r.af)) YYERROR; r.rpool.proxy_port[0] = ntohs($9->rport.a); switch (r.action) { case PF_RDR: if (!$9->rport.b && $9->rport.t && $5.dst.port != NULL) { r.rpool.proxy_port[1] = ntohs($9->rport.a) + (ntohs( $5.dst.port->port[1]) - ntohs( $5.dst.port->port[0])); } else r.rpool.proxy_port[1] = ntohs($9->rport.b); break; case PF_NAT: r.rpool.proxy_port[1] = ntohs($9->rport.b); if (!r.rpool.proxy_port[0] && !r.rpool.proxy_port[1]) { r.rpool.proxy_port[0] = PF_NAT_PROXY_PORT_LOW; r.rpool.proxy_port[1] = PF_NAT_PROXY_PORT_HIGH; } else if (!r.rpool.proxy_port[1]) r.rpool.proxy_port[1] = r.rpool.proxy_port[0]; break; default: break; } r.rpool.opts = $10.type; if ((r.rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_NONE && ($9->host->next != NULL || $9->host->addr.type == PF_ADDR_TABLE || DYNIF_MULTIADDR($9->host->addr))) r.rpool.opts = PF_POOL_ROUNDROBIN; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && disallow_table($9->host, "tables are only " "supported in round-robin redirection " "pools")) YYERROR; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && disallow_alias($9->host, "interface (%s) " "is only supported in round-robin " "redirection pools")) YYERROR; if ($9->host->next != NULL) { if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { yyerror("only round-robin " "valid for multiple " "redirection addresses"); YYERROR; } } } if ($10.key != NULL) memcpy(&r.rpool.key, $10.key, sizeof(struct pf_poolhashkey)); if ($10.opts) r.rpool.opts |= $10.opts; if ($10.staticport) { if (r.action != PF_NAT) { yyerror("the 'static-port' option is " "only valid with nat rules"); YYERROR; } if (r.rpool.proxy_port[0] != PF_NAT_PROXY_PORT_LOW && r.rpool.proxy_port[1] != PF_NAT_PROXY_PORT_HIGH) { yyerror("the 'static-port' option can't" " be used when specifying a port" " range"); YYERROR; } r.rpool.proxy_port[0] = 0; r.rpool.proxy_port[1] = 0; } expand_rule(&r, $2, $9 == NULL ? NULL : $9->host, $4, $5.src_os, $5.src.host, $5.src.port, $5.dst.host, $5.dst.port, 0, 0, 0, ""); free($9); } ; binatrule : no BINAT natpasslog interface af proto FROM ipspec toipspec tag tagged rtable redirection { struct pf_rule binat; struct pf_pooladdr *pa; if (check_rulestate(PFCTL_STATE_NAT)) YYERROR; if (disallow_urpf_failed($9, "\"urpf-failed\" is not " "permitted as a binat destination")) YYERROR; memset(&binat, 0, sizeof(binat)); if ($1 && $3.b1) { yyerror("\"pass\" not valid with \"no\""); YYERROR; } if ($1) binat.action = PF_NOBINAT; else binat.action = PF_BINAT; binat.natpass = $3.b1; binat.log = $3.b2; binat.logif = $3.w2; binat.af = $5; if (!binat.af && $8 != NULL && $8->af) binat.af = $8->af; if (!binat.af && $9 != NULL && $9->af) binat.af = $9->af; if (!binat.af && $13 != NULL && $13->host) binat.af = $13->host->af; if (!binat.af) { yyerror("address family (inet/inet6) " "undefined"); YYERROR; } if ($4 != NULL) { memcpy(binat.ifname, $4->ifname, sizeof(binat.ifname)); binat.ifnot = $4->not; free($4); } if ($10 != NULL) if (strlcpy(binat.tagname, $10, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } if ($11.name) if (strlcpy(binat.match_tagname, $11.name, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } binat.match_tag_not = $11.neg; binat.rtableid = $12; if ($6 != NULL) { binat.proto = $6->proto; free($6); } if ($8 != NULL && disallow_table($8, "invalid use of " "table <%s> as the source address of a binat rule")) YYERROR; if ($8 != NULL && disallow_alias($8, "invalid use of " "interface (%s) as the source address of a binat " "rule")) YYERROR; if ($13 != NULL && $13->host != NULL && disallow_table( $13->host, "invalid use of table <%s> as the " "redirect address of a binat rule")) YYERROR; if ($13 != NULL && $13->host != NULL && disallow_alias( $13->host, "invalid use of interface (%s) as the " "redirect address of a binat rule")) YYERROR; if ($8 != NULL) { if ($8->next) { yyerror("multiple binat ip addresses"); YYERROR; } if ($8->addr.type == PF_ADDR_DYNIFTL) $8->af = binat.af; if ($8->af != binat.af) { yyerror("binat ip versions must match"); YYERROR; } if (check_netmask($8, binat.af)) YYERROR; memcpy(&binat.src.addr, &$8->addr, sizeof(binat.src.addr)); free($8); } if ($9 != NULL) { if ($9->next) { yyerror("multiple binat ip addresses"); YYERROR; } if ($9->af != binat.af && $9->af) { yyerror("binat ip versions must match"); YYERROR; } if (check_netmask($9, binat.af)) YYERROR; memcpy(&binat.dst.addr, &$9->addr, sizeof(binat.dst.addr)); binat.dst.neg = $9->not; free($9); } if (binat.action == PF_NOBINAT) { if ($13 != NULL) { yyerror("'no binat' rule does not need" " '->'"); YYERROR; } } else { if ($13 == NULL || $13->host == NULL) { yyerror("'binat' rule requires" " '-> address'"); YYERROR; } remove_invalid_hosts(&$13->host, &binat.af); if (invalid_redirect($13->host, binat.af)) YYERROR; if ($13->host->next != NULL) { yyerror("binat rule must redirect to " "a single address"); YYERROR; } if (check_netmask($13->host, binat.af)) YYERROR; if (!PF_AZERO(&binat.src.addr.v.a.mask, binat.af) && !PF_AEQ(&binat.src.addr.v.a.mask, &$13->host->addr.v.a.mask, binat.af)) { yyerror("'binat' source mask and " "redirect mask must be the same"); YYERROR; } TAILQ_INIT(&binat.rpool.list); pa = calloc(1, sizeof(struct pf_pooladdr)); if (pa == NULL) err(1, "binat: calloc"); pa->addr = $13->host->addr; pa->ifname[0] = 0; TAILQ_INSERT_TAIL(&binat.rpool.list, pa, entries); free($13); } pfctl_add_rule(pf, &binat, ""); } ; tag : /* empty */ { $$ = NULL; } | TAG STRING { $$ = $2; } ; tagged : /* empty */ { $$.neg = 0; $$.name = NULL; } | not TAGGED string { $$.neg = $1; $$.name = $3; } ; rtable : /* empty */ { $$ = -1; } | RTABLE NUMBER { if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } $$ = $2; } ; route_host : STRING { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "route_host: calloc"); $$->ifname = $1; set_ipmask($$, 128); $$->next = NULL; $$->tail = $$; } | '(' STRING host ')' { struct node_host *n; $$ = $3; for (n = $3; n != NULL; n = n->next) n->ifname = $2; } ; route_host_list : route_host optnl { $$ = $1; } | route_host_list comma route_host optnl { if ($1->af == 0) $1->af = $3->af; if ($1->af != $3->af) { yyerror("all pool addresses must be in the " "same address family"); YYERROR; } $1->tail->next = $3; $1->tail = $3->tail; $$ = $1; } ; routespec : route_host { $$ = $1; } | '{' optnl route_host_list '}' { $$ = $3; } ; route : /* empty */ { $$.host = NULL; $$.rt = 0; $$.pool_opts = 0; } | FASTROUTE { /* backwards-compat */ $$.host = NULL; $$.rt = 0; $$.pool_opts = 0; } | ROUTETO routespec pool_opts { $$.host = $2; $$.rt = PF_ROUTETO; $$.pool_opts = $3.type | $3.opts; if ($3.key != NULL) $$.key = $3.key; } | REPLYTO routespec pool_opts { $$.host = $2; $$.rt = PF_REPLYTO; $$.pool_opts = $3.type | $3.opts; if ($3.key != NULL) $$.key = $3.key; } | DUPTO routespec pool_opts { $$.host = $2; $$.rt = PF_DUPTO; $$.pool_opts = $3.type | $3.opts; if ($3.key != NULL) $$.key = $3.key; } ; timeout_spec : STRING NUMBER { if (check_rulestate(PFCTL_STATE_OPTION)) { free($1); YYERROR; } if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } if (pfctl_set_timeout(pf, $1, $2, 0) != 0) { yyerror("unknown timeout %s", $1); free($1); YYERROR; } free($1); } | INTERVAL NUMBER { if (check_rulestate(PFCTL_STATE_OPTION)) YYERROR; if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } if (pfctl_set_timeout(pf, "interval", $2, 0) != 0) YYERROR; } ; timeout_list : timeout_list comma timeout_spec optnl | timeout_spec optnl ; limit_spec : STRING NUMBER { if (check_rulestate(PFCTL_STATE_OPTION)) { free($1); YYERROR; } if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); YYERROR; } if (pfctl_set_limit(pf, $1, $2) != 0) { yyerror("unable to set limit %s %u", $1, $2); free($1); YYERROR; } free($1); } ; limit_list : limit_list comma limit_spec optnl | limit_spec optnl ; comma : ',' | /* empty */ ; yesno : NO { $$ = 0; } | STRING { if (!strcmp($1, "yes")) $$ = 1; else { yyerror("invalid value '%s', expected 'yes' " "or 'no'", $1); free($1); YYERROR; } free($1); } ; unaryop : '=' { $$ = PF_OP_EQ; } | '!' '=' { $$ = PF_OP_NE; } | '<' '=' { $$ = PF_OP_LE; } | '<' { $$ = PF_OP_LT; } | '>' '=' { $$ = PF_OP_GE; } | '>' { $$ = PF_OP_GT; } ; %% int yyerror(const char *fmt, ...) { va_list ap; file->errors++; va_start(ap, fmt); fprintf(stderr, "%s:%d: ", file->name, yylval.lineno); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); return (0); } int disallow_table(struct node_host *h, const char *fmt) { for (; h != NULL; h = h->next) if (h->addr.type == PF_ADDR_TABLE) { yyerror(fmt, h->addr.v.tblname); return (1); } return (0); } int disallow_urpf_failed(struct node_host *h, const char *fmt) { for (; h != NULL; h = h->next) if (h->addr.type == PF_ADDR_URPFFAILED) { yyerror(fmt); return (1); } return (0); } int disallow_alias(struct node_host *h, const char *fmt) { for (; h != NULL; h = h->next) if (DYNIF_MULTIADDR(h->addr)) { yyerror(fmt, h->addr.v.tblname); return (1); } return (0); } int rule_consistent(struct pf_rule *r, int anchor_call) { int problems = 0; switch (r->action) { case PF_PASS: case PF_DROP: case PF_SCRUB: case PF_NOSCRUB: problems = filter_consistent(r, anchor_call); break; case PF_NAT: case PF_NONAT: problems = nat_consistent(r); break; case PF_RDR: case PF_NORDR: problems = rdr_consistent(r); break; case PF_BINAT: case PF_NOBINAT: default: break; } return (problems); } int filter_consistent(struct pf_rule *r, int anchor_call) { int problems = 0; if (r->proto != IPPROTO_TCP && r->proto != IPPROTO_UDP && (r->src.port_op || r->dst.port_op)) { yyerror("port only applies to tcp/udp"); problems++; } if (r->proto != IPPROTO_ICMP && r->proto != IPPROTO_ICMPV6 && (r->type || r->code)) { yyerror("icmp-type/code only applies to icmp"); problems++; } if (!r->af && (r->type || r->code)) { yyerror("must indicate address family with icmp-type/code"); problems++; } if (r->overload_tblname[0] && r->max_src_conn == 0 && r->max_src_conn_rate.seconds == 0) { yyerror("'overload' requires 'max-src-conn' " "or 'max-src-conn-rate'"); problems++; } if ((r->proto == IPPROTO_ICMP && r->af == AF_INET6) || (r->proto == IPPROTO_ICMPV6 && r->af == AF_INET)) { yyerror("proto %s doesn't match address family %s", r->proto == IPPROTO_ICMP ? "icmp" : "icmp6", r->af == AF_INET ? "inet" : "inet6"); problems++; } if (r->allow_opts && r->action != PF_PASS) { yyerror("allow-opts can only be specified for pass rules"); problems++; } if (r->rule_flag & PFRULE_FRAGMENT && (r->src.port_op || r->dst.port_op || r->flagset || r->type || r->code)) { yyerror("fragments can be filtered only on IP header fields"); problems++; } if (r->rule_flag & PFRULE_RETURNRST && r->proto != IPPROTO_TCP) { yyerror("return-rst can only be applied to TCP rules"); problems++; } if (r->max_src_nodes && !(r->rule_flag & PFRULE_RULESRCTRACK)) { yyerror("max-src-nodes requires 'source-track rule'"); problems++; } if (r->action == PF_DROP && r->keep_state) { yyerror("keep state on block rules doesn't make sense"); problems++; } if (r->rule_flag & PFRULE_STATESLOPPY && (r->keep_state == PF_STATE_MODULATE || r->keep_state == PF_STATE_SYNPROXY)) { yyerror("sloppy state matching cannot be used with " "synproxy state or modulate state"); problems++; } return (-problems); } int nat_consistent(struct pf_rule *r) { return (0); /* yeah! */ } int rdr_consistent(struct pf_rule *r) { int problems = 0; if (r->proto != IPPROTO_TCP && r->proto != IPPROTO_UDP) { if (r->src.port_op) { yyerror("src port only applies to tcp/udp"); problems++; } if (r->dst.port_op) { yyerror("dst port only applies to tcp/udp"); problems++; } if (r->rpool.proxy_port[0]) { yyerror("rpool port only applies to tcp/udp"); problems++; } } if (r->dst.port_op && r->dst.port_op != PF_OP_EQ && r->dst.port_op != PF_OP_RRG) { yyerror("invalid port operator for rdr destination port"); problems++; } return (-problems); } int process_tabledef(char *name, struct table_opts *opts) { struct pfr_buffer ab; struct node_tinit *ti; bzero(&ab, sizeof(ab)); ab.pfrb_type = PFRB_ADDRS; SIMPLEQ_FOREACH(ti, &opts->init_nodes, entries) { if (ti->file) if (pfr_buf_load(&ab, ti->file, 0, append_addr)) { if (errno) yyerror("cannot load \"%s\": %s", ti->file, strerror(errno)); else yyerror("file \"%s\" contains bad data", ti->file); goto _error; } if (ti->host) if (append_addr_host(&ab, ti->host, 0, 0)) { yyerror("cannot create address buffer: %s", strerror(errno)); goto _error; } } if (pf->opts & PF_OPT_VERBOSE) print_tabledef(name, opts->flags, opts->init_addr, &opts->init_nodes); if (!(pf->opts & PF_OPT_NOACTION) && pfctl_define_table(name, opts->flags, opts->init_addr, pf->anchor->name, &ab, pf->anchor->ruleset.tticket)) { yyerror("cannot define table %s: %s", name, pfr_strerror(errno)); goto _error; } pf->tdirty = 1; pfr_buf_clear(&ab); return (0); _error: pfr_buf_clear(&ab); return (-1); } struct keywords { const char *k_name; int k_val; }; /* macro gore, but you should've seen the prior indentation nightmare... */ #define FREE_LIST(T,r) \ do { \ T *p, *node = r; \ while (node != NULL) { \ p = node; \ node = node->next; \ free(p); \ } \ } while (0) #define LOOP_THROUGH(T,n,r,C) \ do { \ T *n; \ if (r == NULL) { \ r = calloc(1, sizeof(T)); \ if (r == NULL) \ err(1, "LOOP: calloc"); \ r->next = NULL; \ } \ n = r; \ while (n != NULL) { \ do { \ C; \ } while (0); \ n = n->next; \ } \ } while (0) void expand_label_str(char *label, size_t len, const char *srch, const char *repl) { char *tmp; char *p, *q; if ((tmp = calloc(1, len)) == NULL) err(1, "expand_label_str: calloc"); p = q = label; while ((q = strstr(p, srch)) != NULL) { *q = '\0'; if ((strlcat(tmp, p, len) >= len) || (strlcat(tmp, repl, len) >= len)) errx(1, "expand_label: label too long"); q += strlen(srch); p = q; } if (strlcat(tmp, p, len) >= len) errx(1, "expand_label: label too long"); strlcpy(label, tmp, len); /* always fits */ free(tmp); } void expand_label_if(const char *name, char *label, size_t len, const char *ifname) { if (strstr(label, name) != NULL) { if (!*ifname) expand_label_str(label, len, name, "any"); else expand_label_str(label, len, name, ifname); } } void expand_label_addr(const char *name, char *label, size_t len, sa_family_t af, struct node_host *h) { char tmp[64], tmp_not[66]; if (strstr(label, name) != NULL) { switch (h->addr.type) { case PF_ADDR_DYNIFTL: snprintf(tmp, sizeof(tmp), "(%s)", h->addr.v.ifname); break; case PF_ADDR_TABLE: snprintf(tmp, sizeof(tmp), "<%s>", h->addr.v.tblname); break; case PF_ADDR_NOROUTE: snprintf(tmp, sizeof(tmp), "no-route"); break; case PF_ADDR_URPFFAILED: snprintf(tmp, sizeof(tmp), "urpf-failed"); break; case PF_ADDR_ADDRMASK: if (!af || (PF_AZERO(&h->addr.v.a.addr, af) && PF_AZERO(&h->addr.v.a.mask, af))) snprintf(tmp, sizeof(tmp), "any"); else { char a[48]; int bits; if (inet_ntop(af, &h->addr.v.a.addr, a, sizeof(a)) == NULL) snprintf(tmp, sizeof(tmp), "?"); else { bits = unmask(&h->addr.v.a.mask, af); if ((af == AF_INET && bits < 32) || (af == AF_INET6 && bits < 128)) snprintf(tmp, sizeof(tmp), "%s/%d", a, bits); else snprintf(tmp, sizeof(tmp), "%s", a); } } break; default: snprintf(tmp, sizeof(tmp), "?"); break; } if (h->not) { snprintf(tmp_not, sizeof(tmp_not), "! %s", tmp); expand_label_str(label, len, name, tmp_not); } else expand_label_str(label, len, name, tmp); } } void expand_label_port(const char *name, char *label, size_t len, struct node_port *port) { char a1[6], a2[6], op[13] = ""; if (strstr(label, name) != NULL) { snprintf(a1, sizeof(a1), "%u", ntohs(port->port[0])); snprintf(a2, sizeof(a2), "%u", ntohs(port->port[1])); if (!port->op) ; else if (port->op == PF_OP_IRG) snprintf(op, sizeof(op), "%s><%s", a1, a2); else if (port->op == PF_OP_XRG) snprintf(op, sizeof(op), "%s<>%s", a1, a2); else if (port->op == PF_OP_EQ) snprintf(op, sizeof(op), "%s", a1); else if (port->op == PF_OP_NE) snprintf(op, sizeof(op), "!=%s", a1); else if (port->op == PF_OP_LT) snprintf(op, sizeof(op), "<%s", a1); else if (port->op == PF_OP_LE) snprintf(op, sizeof(op), "<=%s", a1); else if (port->op == PF_OP_GT) snprintf(op, sizeof(op), ">%s", a1); else if (port->op == PF_OP_GE) snprintf(op, sizeof(op), ">=%s", a1); expand_label_str(label, len, name, op); } } void expand_label_proto(const char *name, char *label, size_t len, u_int8_t proto) { struct protoent *pe; char n[4]; if (strstr(label, name) != NULL) { pe = getprotobynumber(proto); if (pe != NULL) expand_label_str(label, len, name, pe->p_name); else { snprintf(n, sizeof(n), "%u", proto); expand_label_str(label, len, name, n); } } } void expand_label_nr(const char *name, char *label, size_t len) { char n[11]; if (strstr(label, name) != NULL) { snprintf(n, sizeof(n), "%u", pf->anchor->match); expand_label_str(label, len, name, n); } } void expand_label(char *label, size_t len, const char *ifname, sa_family_t af, struct node_host *src_host, struct node_port *src_port, struct node_host *dst_host, struct node_port *dst_port, u_int8_t proto) { expand_label_if("$if", label, len, ifname); expand_label_addr("$srcaddr", label, len, af, src_host); expand_label_addr("$dstaddr", label, len, af, dst_host); expand_label_port("$srcport", label, len, src_port); expand_label_port("$dstport", label, len, dst_port); expand_label_proto("$proto", label, len, proto); expand_label_nr("$nr", label, len); } int expand_altq(struct pf_altq *a, struct node_if *interfaces, struct node_queue *nqueues, struct node_queue_bw bwspec, struct node_queue_opt *opts) { struct pf_altq pa, pb; char qname[PF_QNAME_SIZE]; struct node_queue *n; struct node_queue_bw bw; int errs = 0; if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) { FREE_LIST(struct node_if, interfaces); if (nqueues) FREE_LIST(struct node_queue, nqueues); return (0); } LOOP_THROUGH(struct node_if, interface, interfaces, memcpy(&pa, a, sizeof(struct pf_altq)); if (strlcpy(pa.ifname, interface->ifname, sizeof(pa.ifname)) >= sizeof(pa.ifname)) errx(1, "expand_altq: strlcpy"); if (interface->not) { yyerror("altq on ! is not supported"); errs++; } else { if (eval_pfaltq(pf, &pa, &bwspec, opts)) errs++; else if (pfctl_add_altq(pf, &pa)) errs++; if (pf->opts & PF_OPT_VERBOSE) { print_altq(&pf->paltq->altq, 0, &bwspec, opts); if (nqueues && nqueues->tail) { printf("queue { "); LOOP_THROUGH(struct node_queue, queue, nqueues, printf("%s ", queue->queue); ); printf("}"); } printf("\n"); } if (pa.scheduler == ALTQT_CBQ || pa.scheduler == ALTQT_HFSC) { /* now create a root queue */ memset(&pb, 0, sizeof(struct pf_altq)); if (strlcpy(qname, "root_", sizeof(qname)) >= sizeof(qname)) errx(1, "expand_altq: strlcpy"); if (strlcat(qname, interface->ifname, sizeof(qname)) >= sizeof(qname)) errx(1, "expand_altq: strlcat"); if (strlcpy(pb.qname, qname, sizeof(pb.qname)) >= sizeof(pb.qname)) errx(1, "expand_altq: strlcpy"); if (strlcpy(pb.ifname, interface->ifname, sizeof(pb.ifname)) >= sizeof(pb.ifname)) errx(1, "expand_altq: strlcpy"); pb.qlimit = pa.qlimit; pb.scheduler = pa.scheduler; bw.bw_absolute = pa.ifbandwidth; bw.bw_percent = 0; if (eval_pfqueue(pf, &pb, &bw, opts)) errs++; else if (pfctl_add_altq(pf, &pb)) errs++; } LOOP_THROUGH(struct node_queue, queue, nqueues, n = calloc(1, sizeof(struct node_queue)); if (n == NULL) err(1, "expand_altq: calloc"); if (pa.scheduler == ALTQT_CBQ || pa.scheduler == ALTQT_HFSC) if (strlcpy(n->parent, qname, sizeof(n->parent)) >= sizeof(n->parent)) errx(1, "expand_altq: strlcpy"); if (strlcpy(n->queue, queue->queue, sizeof(n->queue)) >= sizeof(n->queue)) errx(1, "expand_altq: strlcpy"); if (strlcpy(n->ifname, interface->ifname, sizeof(n->ifname)) >= sizeof(n->ifname)) errx(1, "expand_altq: strlcpy"); n->scheduler = pa.scheduler; n->next = NULL; n->tail = n; if (queues == NULL) queues = n; else { queues->tail->next = n; queues->tail = n; } ); } ); FREE_LIST(struct node_if, interfaces); if (nqueues) FREE_LIST(struct node_queue, nqueues); return (errs); } int expand_queue(struct pf_altq *a, struct node_if *interfaces, struct node_queue *nqueues, struct node_queue_bw bwspec, struct node_queue_opt *opts) { struct node_queue *n, *nq; struct pf_altq pa; u_int8_t found = 0; u_int8_t errs = 0; if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) { FREE_LIST(struct node_queue, nqueues); return (0); } if (queues == NULL) { yyerror("queue %s has no parent", a->qname); FREE_LIST(struct node_queue, nqueues); return (1); } LOOP_THROUGH(struct node_if, interface, interfaces, LOOP_THROUGH(struct node_queue, tqueue, queues, if (!strncmp(a->qname, tqueue->queue, PF_QNAME_SIZE) && (interface->ifname[0] == 0 || (!interface->not && !strncmp(interface->ifname, tqueue->ifname, IFNAMSIZ)) || (interface->not && strncmp(interface->ifname, tqueue->ifname, IFNAMSIZ)))) { /* found ourself in queues */ found++; memcpy(&pa, a, sizeof(struct pf_altq)); if (pa.scheduler != ALTQT_NONE && pa.scheduler != tqueue->scheduler) { yyerror("exactly one scheduler type " "per interface allowed"); return (1); } pa.scheduler = tqueue->scheduler; /* scheduler dependent error checking */ switch (pa.scheduler) { case ALTQT_PRIQ: if (nqueues != NULL) { yyerror("priq queues cannot " "have child queues"); return (1); } if (bwspec.bw_absolute > 0 || bwspec.bw_percent < 100) { yyerror("priq doesn't take " "bandwidth"); return (1); } break; default: break; } if (strlcpy(pa.ifname, tqueue->ifname, sizeof(pa.ifname)) >= sizeof(pa.ifname)) errx(1, "expand_queue: strlcpy"); if (strlcpy(pa.parent, tqueue->parent, sizeof(pa.parent)) >= sizeof(pa.parent)) errx(1, "expand_queue: strlcpy"); if (eval_pfqueue(pf, &pa, &bwspec, opts)) errs++; else if (pfctl_add_altq(pf, &pa)) errs++; for (nq = nqueues; nq != NULL; nq = nq->next) { if (!strcmp(a->qname, nq->queue)) { yyerror("queue cannot have " "itself as child"); errs++; continue; } n = calloc(1, sizeof(struct node_queue)); if (n == NULL) err(1, "expand_queue: calloc"); if (strlcpy(n->parent, a->qname, sizeof(n->parent)) >= sizeof(n->parent)) errx(1, "expand_queue strlcpy"); if (strlcpy(n->queue, nq->queue, sizeof(n->queue)) >= sizeof(n->queue)) errx(1, "expand_queue strlcpy"); if (strlcpy(n->ifname, tqueue->ifname, sizeof(n->ifname)) >= sizeof(n->ifname)) errx(1, "expand_queue strlcpy"); n->scheduler = tqueue->scheduler; n->next = NULL; n->tail = n; if (queues == NULL) queues = n; else { queues->tail->next = n; queues->tail = n; } } if ((pf->opts & PF_OPT_VERBOSE) && ( (found == 1 && interface->ifname[0] == 0) || (found > 0 && interface->ifname[0] != 0))) { print_queue(&pf->paltq->altq, 0, &bwspec, interface->ifname[0] != 0, opts); if (nqueues && nqueues->tail) { printf("{ "); LOOP_THROUGH(struct node_queue, queue, nqueues, printf("%s ", queue->queue); ); printf("}"); } printf("\n"); } } ); ); FREE_LIST(struct node_queue, nqueues); FREE_LIST(struct node_if, interfaces); if (!found) { yyerror("queue %s has no parent", a->qname); errs++; } if (errs) return (1); else return (0); } void expand_rule(struct pf_rule *r, struct node_if *interfaces, struct node_host *rpool_hosts, struct node_proto *protos, struct node_os *src_oses, struct node_host *src_hosts, struct node_port *src_ports, struct node_host *dst_hosts, struct node_port *dst_ports, struct node_uid *uids, struct node_gid *gids, struct node_icmp *icmp_types, const char *anchor_call) { sa_family_t af = r->af; int added = 0, error = 0; char ifname[IF_NAMESIZE]; char label[PF_RULE_LABEL_SIZE]; char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; struct pf_pooladdr *pa; struct node_host *h; u_int8_t flags, flagset, keep_state; if (strlcpy(label, r->label, sizeof(label)) >= sizeof(label)) errx(1, "expand_rule: strlcpy"); if (strlcpy(tagname, r->tagname, sizeof(tagname)) >= sizeof(tagname)) errx(1, "expand_rule: strlcpy"); if (strlcpy(match_tagname, r->match_tagname, sizeof(match_tagname)) >= sizeof(match_tagname)) errx(1, "expand_rule: strlcpy"); flags = r->flags; flagset = r->flagset; keep_state = r->keep_state; LOOP_THROUGH(struct node_if, interface, interfaces, LOOP_THROUGH(struct node_proto, proto, protos, LOOP_THROUGH(struct node_icmp, icmp_type, icmp_types, LOOP_THROUGH(struct node_host, src_host, src_hosts, LOOP_THROUGH(struct node_port, src_port, src_ports, LOOP_THROUGH(struct node_os, src_os, src_oses, LOOP_THROUGH(struct node_host, dst_host, dst_hosts, LOOP_THROUGH(struct node_port, dst_port, dst_ports, LOOP_THROUGH(struct node_uid, uid, uids, LOOP_THROUGH(struct node_gid, gid, gids, r->af = af; /* for link-local IPv6 address, interface must match up */ if ((r->af && src_host->af && r->af != src_host->af) || (r->af && dst_host->af && r->af != dst_host->af) || (src_host->af && dst_host->af && src_host->af != dst_host->af) || (src_host->ifindex && dst_host->ifindex && src_host->ifindex != dst_host->ifindex) || (src_host->ifindex && *interface->ifname && src_host->ifindex != if_nametoindex(interface->ifname)) || (dst_host->ifindex && *interface->ifname && dst_host->ifindex != if_nametoindex(interface->ifname))) continue; if (!r->af && src_host->af) r->af = src_host->af; else if (!r->af && dst_host->af) r->af = dst_host->af; if (*interface->ifname) strlcpy(r->ifname, interface->ifname, sizeof(r->ifname)); else if (if_indextoname(src_host->ifindex, ifname)) strlcpy(r->ifname, ifname, sizeof(r->ifname)); else if (if_indextoname(dst_host->ifindex, ifname)) strlcpy(r->ifname, ifname, sizeof(r->ifname)); else memset(r->ifname, '\0', sizeof(r->ifname)); if (strlcpy(r->label, label, sizeof(r->label)) >= sizeof(r->label)) errx(1, "expand_rule: strlcpy"); if (strlcpy(r->tagname, tagname, sizeof(r->tagname)) >= sizeof(r->tagname)) errx(1, "expand_rule: strlcpy"); if (strlcpy(r->match_tagname, match_tagname, sizeof(r->match_tagname)) >= sizeof(r->match_tagname)) errx(1, "expand_rule: strlcpy"); expand_label(r->label, PF_RULE_LABEL_SIZE, r->ifname, r->af, src_host, src_port, dst_host, dst_port, proto->proto); expand_label(r->tagname, PF_TAG_NAME_SIZE, r->ifname, r->af, src_host, src_port, dst_host, dst_port, proto->proto); expand_label(r->match_tagname, PF_TAG_NAME_SIZE, r->ifname, r->af, src_host, src_port, dst_host, dst_port, proto->proto); error += check_netmask(src_host, r->af); error += check_netmask(dst_host, r->af); r->ifnot = interface->not; r->proto = proto->proto; r->src.addr = src_host->addr; r->src.neg = src_host->not; r->src.port[0] = src_port->port[0]; r->src.port[1] = src_port->port[1]; r->src.port_op = src_port->op; r->dst.addr = dst_host->addr; r->dst.neg = dst_host->not; r->dst.port[0] = dst_port->port[0]; r->dst.port[1] = dst_port->port[1]; r->dst.port_op = dst_port->op; r->uid.op = uid->op; r->uid.uid[0] = uid->uid[0]; r->uid.uid[1] = uid->uid[1]; r->gid.op = gid->op; r->gid.gid[0] = gid->gid[0]; r->gid.gid[1] = gid->gid[1]; r->type = icmp_type->type; r->code = icmp_type->code; if ((keep_state == PF_STATE_MODULATE || keep_state == PF_STATE_SYNPROXY) && r->proto && r->proto != IPPROTO_TCP) r->keep_state = PF_STATE_NORMAL; else r->keep_state = keep_state; if (r->proto && r->proto != IPPROTO_TCP) { r->flags = 0; r->flagset = 0; } else { r->flags = flags; r->flagset = flagset; } if (icmp_type->proto && r->proto != icmp_type->proto) { yyerror("icmp-type mismatch"); error++; } if (src_os && src_os->os) { r->os_fingerprint = pfctl_get_fingerprint(src_os->os); if ((pf->opts & PF_OPT_VERBOSE2) && r->os_fingerprint == PF_OSFP_NOMATCH) fprintf(stderr, "warning: unknown '%s' OS fingerprint\n", src_os->os); } else { r->os_fingerprint = PF_OSFP_ANY; } TAILQ_INIT(&r->rpool.list); for (h = rpool_hosts; h != NULL; h = h->next) { pa = calloc(1, sizeof(struct pf_pooladdr)); if (pa == NULL) err(1, "expand_rule: calloc"); pa->addr = h->addr; if (h->ifname != NULL) { if (strlcpy(pa->ifname, h->ifname, sizeof(pa->ifname)) >= sizeof(pa->ifname)) errx(1, "expand_rule: strlcpy"); } else pa->ifname[0] = 0; TAILQ_INSERT_TAIL(&r->rpool.list, pa, entries); } if (rule_consistent(r, anchor_call[0]) < 0 || error) yyerror("skipping rule due to errors"); else { r->nr = pf->astack[pf->asd]->match++; pfctl_add_rule(pf, r, anchor_call); added++; } )))))))))); FREE_LIST(struct node_if, interfaces); FREE_LIST(struct node_proto, protos); FREE_LIST(struct node_host, src_hosts); FREE_LIST(struct node_port, src_ports); FREE_LIST(struct node_os, src_oses); FREE_LIST(struct node_host, dst_hosts); FREE_LIST(struct node_port, dst_ports); FREE_LIST(struct node_uid, uids); FREE_LIST(struct node_gid, gids); FREE_LIST(struct node_icmp, icmp_types); FREE_LIST(struct node_host, rpool_hosts); if (!added) yyerror("rule expands to no valid combination"); } int expand_skip_interface(struct node_if *interfaces) { int errs = 0; if (!interfaces || (!interfaces->next && !interfaces->not && !strcmp(interfaces->ifname, "none"))) { if (pf->opts & PF_OPT_VERBOSE) printf("set skip on none\n"); errs = pfctl_set_interface_flags(pf, "", PFI_IFLAG_SKIP, 0); return (errs); } if (pf->opts & PF_OPT_VERBOSE) printf("set skip on {"); LOOP_THROUGH(struct node_if, interface, interfaces, if (pf->opts & PF_OPT_VERBOSE) printf(" %s", interface->ifname); if (interface->not) { yyerror("skip on ! is not supported"); errs++; } else errs += pfctl_set_interface_flags(pf, interface->ifname, PFI_IFLAG_SKIP, 1); ); if (pf->opts & PF_OPT_VERBOSE) printf(" }\n"); FREE_LIST(struct node_if, interfaces); if (errs) return (1); else return (0); } #undef FREE_LIST #undef LOOP_THROUGH int check_rulestate(int desired_state) { if (require_order && (rulestate > desired_state)) { yyerror("Rules must be in order: options, normalization, " "queueing, translation, filtering"); return (1); } rulestate = desired_state; return (0); } int kw_cmp(const void *k, const void *e) { return (strcmp(k, ((const struct keywords *)e)->k_name)); } int lookup(char *s) { /* this has to be sorted always */ static const struct keywords keywords[] = { { "all", ALL}, { "allow-opts", ALLOWOPTS}, { "altq", ALTQ}, { "anchor", ANCHOR}, { "antispoof", ANTISPOOF}, { "any", ANY}, { "bandwidth", BANDWIDTH}, { "binat", BINAT}, { "binat-anchor", BINATANCHOR}, { "bitmask", BITMASK}, { "block", BLOCK}, { "block-policy", BLOCKPOLICY}, { "buckets", BUCKETS}, { "cbq", CBQ}, { "code", CODE}, { "codelq", CODEL}, { "crop", FRAGCROP}, { "debug", DEBUG}, { "divert-reply", DIVERTREPLY}, { "divert-to", DIVERTTO}, { "drop", DROP}, { "drop-ovl", FRAGDROP}, { "dup-to", DUPTO}, { "fail-policy", FAILPOLICY}, { "fairq", FAIRQ}, { "fastroute", FASTROUTE}, { "file", FILENAME}, { "fingerprints", FINGERPRINTS}, { "flags", FLAGS}, { "floating", FLOATING}, { "flush", FLUSH}, { "for", FOR}, { "fragment", FRAGMENT}, { "from", FROM}, { "global", GLOBAL}, { "group", GROUP}, { "hfsc", HFSC}, { "hogs", HOGS}, { "hostid", HOSTID}, { "icmp-type", ICMPTYPE}, { "icmp6-type", ICMP6TYPE}, { "if-bound", IFBOUND}, { "in", IN}, { "include", INCLUDE}, { "inet", INET}, { "inet6", INET6}, { "interval", INTERVAL}, { "keep", KEEP}, { "label", LABEL}, { "limit", LIMIT}, { "linkshare", LINKSHARE}, { "load", LOAD}, { "log", LOG}, { "loginterface", LOGINTERFACE}, { "max", MAXIMUM}, { "max-mss", MAXMSS}, { "max-src-conn", MAXSRCCONN}, { "max-src-conn-rate", MAXSRCCONNRATE}, { "max-src-nodes", MAXSRCNODES}, { "max-src-states", MAXSRCSTATES}, { "min-ttl", MINTTL}, { "modulate", MODULATE}, { "nat", NAT}, { "nat-anchor", NATANCHOR}, { "no", NO}, { "no-df", NODF}, { "no-route", NOROUTE}, { "no-sync", NOSYNC}, { "on", ON}, { "optimization", OPTIMIZATION}, { "os", OS}, { "out", OUT}, { "overload", OVERLOAD}, { "pass", PASS}, { "port", PORT}, { "prio", PRIO}, { "priority", PRIORITY}, { "priq", PRIQ}, { "probability", PROBABILITY}, { "proto", PROTO}, { "qlimit", QLIMIT}, { "queue", QUEUE}, { "quick", QUICK}, { "random", RANDOM}, { "random-id", RANDOMID}, { "rdr", RDR}, { "rdr-anchor", RDRANCHOR}, { "realtime", REALTIME}, { "reassemble", REASSEMBLE}, { "reply-to", REPLYTO}, { "require-order", REQUIREORDER}, { "return", RETURN}, { "return-icmp", RETURNICMP}, { "return-icmp6", RETURNICMP6}, { "return-rst", RETURNRST}, { "round-robin", ROUNDROBIN}, { "route", ROUTE}, { "route-to", ROUTETO}, { "rtable", RTABLE}, { "rule", RULE}, { "ruleset-optimization", RULESET_OPTIMIZATION}, { "scrub", SCRUB}, { "set", SET}, { "set-tos", SETTOS}, { "skip", SKIP}, { "sloppy", SLOPPY}, { "source-hash", SOURCEHASH}, { "source-track", SOURCETRACK}, { "state", STATE}, { "state-defaults", STATEDEFAULTS}, { "state-policy", STATEPOLICY}, { "static-port", STATICPORT}, { "sticky-address", STICKYADDRESS}, { "synproxy", SYNPROXY}, { "table", TABLE}, { "tag", TAG}, { "tagged", TAGGED}, { "target", TARGET}, { "tbrsize", TBRSIZE}, { "timeout", TIMEOUT}, { "to", TO}, { "tos", TOS}, { "ttl", TTL}, { "upperlimit", UPPERLIMIT}, { "urpf-failed", URPFFAILED}, { "user", USER}, }; const struct keywords *p; p = bsearch(s, keywords, sizeof(keywords)/sizeof(keywords[0]), sizeof(keywords[0]), kw_cmp); if (p) { if (debug > 1) fprintf(stderr, "%s: %d\n", s, p->k_val); return (p->k_val); } else { if (debug > 1) fprintf(stderr, "string: %s\n", s); return (STRING); } } #define MAXPUSHBACK 128 static char *parsebuf; static int parseindex; static char pushback_buffer[MAXPUSHBACK]; static int pushback_index = 0; int lgetc(int quotec) { int c, next; if (parsebuf) { /* Read character from the parsebuffer instead of input. */ if (parseindex >= 0) { c = parsebuf[parseindex++]; if (c != '\0') return (c); parsebuf = NULL; } else parseindex++; } if (pushback_index) return (pushback_buffer[--pushback_index]); if (quotec) { if ((c = getc(file->stream)) == EOF) { yyerror("reached end of file while parsing quoted string"); if (popfile() == EOF) return (EOF); return (quotec); } return (c); } while ((c = getc(file->stream)) == '\\') { next = getc(file->stream); if (next != '\n') { c = next; break; } yylval.lineno = file->lineno; file->lineno++; } while (c == EOF) { if (popfile() == EOF) return (EOF); c = getc(file->stream); } return (c); } int lungetc(int c) { if (c == EOF) return (EOF); if (parsebuf) { parseindex--; if (parseindex >= 0) return (c); } if (pushback_index < MAXPUSHBACK-1) return (pushback_buffer[pushback_index++] = c); else return (EOF); } int findeol(void) { int c; parsebuf = NULL; /* skip to either EOF or the first real EOL */ while (1) { if (pushback_index) c = pushback_buffer[--pushback_index]; else c = lgetc(0); if (c == '\n') { file->lineno++; break; } if (c == EOF) break; } return (ERROR); } int yylex(void) { char buf[8096]; char *p, *val; int quotec, next, c; int token; top: p = buf; while ((c = lgetc(0)) == ' ' || c == '\t') ; /* nothing */ yylval.lineno = file->lineno; if (c == '#') while ((c = lgetc(0)) != '\n' && c != EOF) ; /* nothing */ if (c == '$' && parsebuf == NULL) { while (1) { if ((c = lgetc(0)) == EOF) return (0); if (p + 1 >= buf + sizeof(buf) - 1) { yyerror("string too long"); return (findeol()); } if (isalnum(c) || c == '_') { *p++ = (char)c; continue; } *p = '\0'; lungetc(c); break; } val = symget(buf); if (val == NULL) { yyerror("macro '%s' not defined", buf); return (findeol()); } parsebuf = val; parseindex = 0; goto top; } switch (c) { case '\'': case '"': quotec = c; while (1) { if ((c = lgetc(quotec)) == EOF) return (0); if (c == '\n') { file->lineno++; continue; } else if (c == '\\') { if ((next = lgetc(quotec)) == EOF) return (0); if (next == quotec || c == ' ' || c == '\t') c = next; else if (next == '\n') continue; else lungetc(next); } else if (c == quotec) { *p = '\0'; break; } if (p + 1 >= buf + sizeof(buf) - 1) { yyerror("string too long"); return (findeol()); } *p++ = (char)c; } yylval.v.string = strdup(buf); if (yylval.v.string == NULL) err(1, "yylex: strdup"); return (STRING); case '<': next = lgetc(0); if (next == '>') { yylval.v.i = PF_OP_XRG; return (PORTBINARY); } lungetc(next); break; case '>': next = lgetc(0); if (next == '<') { yylval.v.i = PF_OP_IRG; return (PORTBINARY); } lungetc(next); break; case '-': next = lgetc(0); if (next == '>') return (ARROW); lungetc(next); break; } #define allowed_to_end_number(x) \ (isspace(x) || x == ')' || x ==',' || x == '/' || x == '}' || x == '=') if (c == '-' || isdigit(c)) { do { *p++ = c; if ((unsigned)(p-buf) >= sizeof(buf)) { yyerror("string too long"); return (findeol()); } } while ((c = lgetc(0)) != EOF && isdigit(c)); lungetc(c); if (p == buf + 1 && buf[0] == '-') goto nodigits; if (c == EOF || allowed_to_end_number(c)) { const char *errstr = NULL; *p = '\0'; yylval.v.number = strtonum(buf, LLONG_MIN, LLONG_MAX, &errstr); if (errstr) { yyerror("\"%s\" invalid number: %s", buf, errstr); return (findeol()); } return (NUMBER); } else { nodigits: while (p > buf + 1) lungetc(*--p); c = *--p; if (c == '-') return (c); } } #define allowed_in_string(x) \ (isalnum(x) || (ispunct(x) && x != '(' && x != ')' && \ x != '{' && x != '}' && x != '<' && x != '>' && \ x != '!' && x != '=' && x != '/' && x != '#' && \ x != ',')) if (isalnum(c) || c == ':' || c == '_') { do { *p++ = c; if ((unsigned)(p-buf) >= sizeof(buf)) { yyerror("string too long"); return (findeol()); } } while ((c = lgetc(0)) != EOF && (allowed_in_string(c))); lungetc(c); *p = '\0'; if ((token = lookup(buf)) == STRING) if ((yylval.v.string = strdup(buf)) == NULL) err(1, "yylex: strdup"); return (token); } if (c == '\n') { yylval.lineno = file->lineno; file->lineno++; } if (c == EOF) return (0); return (c); } int check_file_secrecy(int fd, const char *fname) { struct stat st; if (fstat(fd, &st)) { warn("cannot stat %s", fname); return (-1); } if (st.st_uid != 0 && st.st_uid != getuid()) { warnx("%s: owner not root or current user", fname); return (-1); } if (st.st_mode & (S_IRWXG | S_IRWXO)) { warnx("%s: group/world readable/writeable", fname); return (-1); } return (0); } struct file * pushfile(const char *name, int secret) { struct file *nfile; if ((nfile = calloc(1, sizeof(struct file))) == NULL || (nfile->name = strdup(name)) == NULL) { warn("malloc"); return (NULL); } if (TAILQ_FIRST(&files) == NULL && strcmp(nfile->name, "-") == 0) { nfile->stream = stdin; free(nfile->name); if ((nfile->name = strdup("stdin")) == NULL) { warn("strdup"); free(nfile); return (NULL); } } else if ((nfile->stream = fopen(nfile->name, "r")) == NULL) { warn("%s", nfile->name); free(nfile->name); free(nfile); return (NULL); } else if (secret && check_file_secrecy(fileno(nfile->stream), nfile->name)) { fclose(nfile->stream); free(nfile->name); free(nfile); return (NULL); } nfile->lineno = 1; TAILQ_INSERT_TAIL(&files, nfile, entry); return (nfile); } int popfile(void) { struct file *prev; if ((prev = TAILQ_PREV(file, files, entry)) != NULL) { prev->errors += file->errors; TAILQ_REMOVE(&files, file, entry); fclose(file->stream); free(file->name); free(file); file = prev; return (0); } return (EOF); } int parse_config(char *filename, struct pfctl *xpf) { int errors = 0; struct sym *sym; pf = xpf; errors = 0; rulestate = PFCTL_STATE_NONE; returnicmpdefault = (ICMP_UNREACH << 8) | ICMP_UNREACH_PORT; returnicmp6default = (ICMP6_DST_UNREACH << 8) | ICMP6_DST_UNREACH_NOPORT; blockpolicy = PFRULE_DROP; failpolicy = PFRULE_DROP; require_order = 1; if ((file = pushfile(filename, 0)) == NULL) { warn("cannot open the main config file!"); return (-1); } yyparse(); errors = file->errors; popfile(); /* Free macros and check which have not been used. */ while ((sym = TAILQ_FIRST(&symhead))) { if ((pf->opts & PF_OPT_VERBOSE2) && !sym->used) fprintf(stderr, "warning: macro '%s' not " "used\n", sym->nam); free(sym->nam); free(sym->val); TAILQ_REMOVE(&symhead, sym, entry); free(sym); } return (errors ? -1 : 0); } int symset(const char *nam, const char *val, int persist) { struct sym *sym; for (sym = TAILQ_FIRST(&symhead); sym && strcmp(nam, sym->nam); sym = TAILQ_NEXT(sym, entry)) ; /* nothing */ if (sym != NULL) { if (sym->persist == 1) return (0); else { free(sym->nam); free(sym->val); TAILQ_REMOVE(&symhead, sym, entry); free(sym); } } if ((sym = calloc(1, sizeof(*sym))) == NULL) return (-1); sym->nam = strdup(nam); if (sym->nam == NULL) { free(sym); return (-1); } sym->val = strdup(val); if (sym->val == NULL) { free(sym->nam); free(sym); return (-1); } sym->used = 0; sym->persist = persist; TAILQ_INSERT_TAIL(&symhead, sym, entry); return (0); } int pfctl_cmdline_symset(char *s) { char *sym, *val; int ret; if ((val = strrchr(s, '=')) == NULL) return (-1); if ((sym = malloc(strlen(s) - strlen(val) + 1)) == NULL) err(1, "pfctl_cmdline_symset: malloc"); strlcpy(sym, s, strlen(s) - strlen(val) + 1); ret = symset(sym, val + 1, 1); free(sym); return (ret); } char * symget(const char *nam) { struct sym *sym; TAILQ_FOREACH(sym, &symhead, entry) if (strcmp(nam, sym->nam) == 0) { sym->used = 1; return (sym->val); } return (NULL); } void mv_rules(struct pf_ruleset *src, struct pf_ruleset *dst) { int i; struct pf_rule *r; for (i = 0; i < PF_RULESET_MAX; ++i) { while ((r = TAILQ_FIRST(src->rules[i].active.ptr)) != NULL) { TAILQ_REMOVE(src->rules[i].active.ptr, r, entries); TAILQ_INSERT_TAIL(dst->rules[i].active.ptr, r, entries); dst->anchor->match++; } src->anchor->match = 0; while ((r = TAILQ_FIRST(src->rules[i].inactive.ptr)) != NULL) { TAILQ_REMOVE(src->rules[i].inactive.ptr, r, entries); TAILQ_INSERT_TAIL(dst->rules[i].inactive.ptr, r, entries); } } } void decide_address_family(struct node_host *n, sa_family_t *af) { if (*af != 0 || n == NULL) return; *af = n->af; while ((n = n->next) != NULL) { if (n->af != *af) { *af = 0; return; } } } void remove_invalid_hosts(struct node_host **nh, sa_family_t *af) { struct node_host *n = *nh, *prev = NULL; while (n != NULL) { if (*af && n->af && n->af != *af) { /* unlink and free n */ struct node_host *next = n->next; /* adjust tail pointer */ if (n == (*nh)->tail) (*nh)->tail = prev; /* adjust previous node's next pointer */ if (prev == NULL) *nh = next; else prev->next = next; /* free node */ if (n->ifname != NULL) free(n->ifname); free(n); n = next; } else { if (n->af && !*af) *af = n->af; prev = n; n = n->next; } } } int invalid_redirect(struct node_host *nh, sa_family_t af) { if (!af) { struct node_host *n; /* tables and dyniftl are ok without an address family */ for (n = nh; n != NULL; n = n->next) { if (n->addr.type != PF_ADDR_TABLE && n->addr.type != PF_ADDR_DYNIFTL) { yyerror("address family not given and " "translation address expands to multiple " "address families"); return (1); } } } if (nh == NULL) { yyerror("no translation address with matching address family " "found."); return (1); } return (0); } int atoul(char *s, u_long *ulvalp) { u_long ulval; char *ep; errno = 0; ulval = strtoul(s, &ep, 0); if (s[0] == '\0' || *ep != '\0') return (-1); if (errno == ERANGE && ulval == ULONG_MAX) return (-1); *ulvalp = ulval; return (0); } int getservice(char *n) { struct servent *s; u_long ulval; if (atoul(n, &ulval) == 0) { if (ulval > 65535) { yyerror("illegal port value %lu", ulval); return (-1); } return (htons(ulval)); } else { s = getservbyname(n, "tcp"); if (s == NULL) s = getservbyname(n, "udp"); if (s == NULL) { yyerror("unknown port %s", n); return (-1); } return (s->s_port); } } int rule_label(struct pf_rule *r, char *s) { if (s) { if (strlcpy(r->label, s, sizeof(r->label)) >= sizeof(r->label)) { yyerror("rule label too long (max %d chars)", sizeof(r->label)-1); return (-1); } } return (0); } u_int16_t parseicmpspec(char *w, sa_family_t af) { const struct icmpcodeent *p; u_long ulval; u_int8_t icmptype; if (af == AF_INET) icmptype = returnicmpdefault >> 8; else icmptype = returnicmp6default >> 8; if (atoul(w, &ulval) == -1) { if ((p = geticmpcodebyname(icmptype, w, af)) == NULL) { yyerror("unknown icmp code %s", w); return (0); } ulval = p->code; } if (ulval > 255) { yyerror("invalid icmp code %lu", ulval); return (0); } return (icmptype << 8 | ulval); } int parseport(char *port, struct range *r, int extensions) { char *p = strchr(port, ':'); if (p == NULL) { if ((r->a = getservice(port)) == -1) return (-1); r->b = 0; r->t = PF_OP_NONE; return (0); } if ((extensions & PPORT_STAR) && !strcmp(p+1, "*")) { *p = 0; if ((r->a = getservice(port)) == -1) return (-1); r->b = 0; r->t = PF_OP_IRG; return (0); } if ((extensions & PPORT_RANGE)) { *p++ = 0; if ((r->a = getservice(port)) == -1 || (r->b = getservice(p)) == -1) return (-1); if (r->a == r->b) { r->b = 0; r->t = PF_OP_NONE; } else r->t = PF_OP_RRG; return (0); } return (-1); } int pfctl_load_anchors(int dev, struct pfctl *pf, struct pfr_buffer *trans) { struct loadanchors *la; TAILQ_FOREACH(la, &loadanchorshead, entries) { if (pf->opts & PF_OPT_VERBOSE) fprintf(stderr, "\nLoading anchor %s from %s\n", la->anchorname, la->filename); if (pfctl_rules(dev, la->filename, pf->opts, pf->optimize, la->anchorname, trans) == -1) return (-1); } return (0); } int kw_casecmp(const void *k, const void *e) { return (strcasecmp(k, ((const struct keywords *)e)->k_name)); } int map_tos(char *s, int *val) { /* DiffServ Codepoints and other TOS mappings */ const struct keywords toswords[] = { { "af11", IPTOS_DSCP_AF11 }, { "af12", IPTOS_DSCP_AF12 }, { "af13", IPTOS_DSCP_AF13 }, { "af21", IPTOS_DSCP_AF21 }, { "af22", IPTOS_DSCP_AF22 }, { "af23", IPTOS_DSCP_AF23 }, { "af31", IPTOS_DSCP_AF31 }, { "af32", IPTOS_DSCP_AF32 }, { "af33", IPTOS_DSCP_AF33 }, { "af41", IPTOS_DSCP_AF41 }, { "af42", IPTOS_DSCP_AF42 }, { "af43", IPTOS_DSCP_AF43 }, { "critical", IPTOS_PREC_CRITIC_ECP }, { "cs0", IPTOS_DSCP_CS0 }, { "cs1", IPTOS_DSCP_CS1 }, { "cs2", IPTOS_DSCP_CS2 }, { "cs3", IPTOS_DSCP_CS3 }, { "cs4", IPTOS_DSCP_CS4 }, { "cs5", IPTOS_DSCP_CS5 }, { "cs6", IPTOS_DSCP_CS6 }, { "cs7", IPTOS_DSCP_CS7 }, { "ef", IPTOS_DSCP_EF }, { "inetcontrol", IPTOS_PREC_INTERNETCONTROL }, { "lowdelay", IPTOS_LOWDELAY }, { "netcontrol", IPTOS_PREC_NETCONTROL }, { "reliability", IPTOS_RELIABILITY }, { "throughput", IPTOS_THROUGHPUT } }; const struct keywords *p; p = bsearch(s, toswords, sizeof(toswords)/sizeof(toswords[0]), sizeof(toswords[0]), kw_casecmp); if (p) { *val = p->k_val; return (1); } return (0); } int rt_tableid_max(void) { #ifdef __FreeBSD__ int fibs; size_t l = sizeof(fibs); if (sysctlbyname("net.fibs", &fibs, &l, NULL, 0) == -1) fibs = 16; /* XXX RT_MAXFIBS, at least limit it some. */ /* * As the OpenBSD code only compares > and not >= we need to adjust * here given we only accept values of 0..n and want to avoid #ifdefs * in the grammar. */ return (fibs - 1); #else return (RT_TABLEID_MAX); #endif } Index: head/sbin/pfctl/pfctl.c =================================================================== --- head/sbin/pfctl/pfctl.c (revision 338208) +++ head/sbin/pfctl/pfctl.c (revision 338209) @@ -1,2500 +1,2504 @@ /* $OpenBSD: pfctl.c,v 1.278 2008/08/31 20:18:17 jmc Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); +#define PFIOC_USE_LATEST + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pfctl_parser.h" #include "pfctl.h" void usage(void); int pfctl_enable(int, int); int pfctl_disable(int, int); int pfctl_clear_stats(int, int); int pfctl_get_skip_ifaces(void); int pfctl_check_skip_ifaces(char *); int pfctl_adjust_skip_ifaces(struct pfctl *); int pfctl_clear_interface_flags(int, int); int pfctl_clear_rules(int, int, char *); int pfctl_clear_nat(int, int, char *); int pfctl_clear_altq(int, int); int pfctl_clear_src_nodes(int, int); int pfctl_clear_states(int, const char *, int); void pfctl_addrprefix(char *, struct pf_addr *); int pfctl_kill_src_nodes(int, const char *, int); int pfctl_net_kill_states(int, const char *, int); int pfctl_label_kill_states(int, const char *, int); int pfctl_id_kill_states(int, const char *, int); void pfctl_init_options(struct pfctl *); int pfctl_load_options(struct pfctl *); int pfctl_load_limit(struct pfctl *, unsigned int, unsigned int); int pfctl_load_timeout(struct pfctl *, unsigned int, unsigned int); int pfctl_load_debug(struct pfctl *, unsigned int); int pfctl_load_logif(struct pfctl *, char *); int pfctl_load_hostid(struct pfctl *, u_int32_t); int pfctl_get_pool(int, struct pf_pool *, u_int32_t, u_int32_t, int, char *); void pfctl_print_rule_counters(struct pf_rule *, int); int pfctl_show_rules(int, char *, int, enum pfctl_show, char *, int); int pfctl_show_nat(int, int, char *); int pfctl_show_src_nodes(int, int); int pfctl_show_states(int, const char *, int); int pfctl_show_status(int, int); int pfctl_show_running(int); int pfctl_show_timeouts(int, int); int pfctl_show_limits(int, int); void pfctl_debug(int, u_int32_t, int); int pfctl_test_altqsupport(int, int); int pfctl_show_anchors(int, int, char *); int pfctl_ruleset_trans(struct pfctl *, char *, struct pf_anchor *); int pfctl_load_ruleset(struct pfctl *, char *, struct pf_ruleset *, int, int); int pfctl_load_rule(struct pfctl *, char *, struct pf_rule *, int); const char *pfctl_lookup_option(char *, const char * const *); static struct pf_anchor_global pf_anchors; static struct pf_anchor pf_main_anchor; static struct pfr_buffer skip_b; static const char *clearopt; static char *rulesopt; static const char *showopt; static const char *debugopt; static char *anchoropt; static const char *optiopt = NULL; static const char *pf_device = "/dev/pf"; static char *ifaceopt; static char *tableopt; static const char *tblcmdopt; static int src_node_killers; static char *src_node_kill[2]; static int state_killers; static char *state_kill[2]; int loadopt; int altqsupport; int dev = -1; static int first_title = 1; static int labels = 0; #define INDENT(d, o) do { \ if (o) { \ int i; \ for (i=0; i < d; i++) \ printf(" "); \ } \ } while (0); \ static const struct { const char *name; int index; } pf_limits[] = { { "states", PF_LIMIT_STATES }, { "src-nodes", PF_LIMIT_SRC_NODES }, { "frags", PF_LIMIT_FRAGS }, { "table-entries", PF_LIMIT_TABLE_ENTRIES }, { NULL, 0 } }; struct pf_hint { const char *name; int timeout; }; static const struct pf_hint pf_hint_normal[] = { { "tcp.first", 2 * 60 }, { "tcp.opening", 30 }, { "tcp.established", 24 * 60 * 60 }, { "tcp.closing", 15 * 60 }, { "tcp.finwait", 45 }, { "tcp.closed", 90 }, { "tcp.tsdiff", 30 }, { NULL, 0 } }; static const struct pf_hint pf_hint_satellite[] = { { "tcp.first", 3 * 60 }, { "tcp.opening", 30 + 5 }, { "tcp.established", 24 * 60 * 60 }, { "tcp.closing", 15 * 60 + 5 }, { "tcp.finwait", 45 + 5 }, { "tcp.closed", 90 + 5 }, { "tcp.tsdiff", 60 }, { NULL, 0 } }; static const struct pf_hint pf_hint_conservative[] = { { "tcp.first", 60 * 60 }, { "tcp.opening", 15 * 60 }, { "tcp.established", 5 * 24 * 60 * 60 }, { "tcp.closing", 60 * 60 }, { "tcp.finwait", 10 * 60 }, { "tcp.closed", 3 * 60 }, { "tcp.tsdiff", 60 }, { NULL, 0 } }; static const struct pf_hint pf_hint_aggressive[] = { { "tcp.first", 30 }, { "tcp.opening", 5 }, { "tcp.established", 5 * 60 * 60 }, { "tcp.closing", 60 }, { "tcp.finwait", 30 }, { "tcp.closed", 30 }, { "tcp.tsdiff", 10 }, { NULL, 0 } }; static const struct { const char *name; const struct pf_hint *hint; } pf_hints[] = { { "normal", pf_hint_normal }, { "satellite", pf_hint_satellite }, { "high-latency", pf_hint_satellite }, { "conservative", pf_hint_conservative }, { "aggressive", pf_hint_aggressive }, { NULL, NULL } }; static const char * const clearopt_list[] = { "nat", "queue", "rules", "Sources", "states", "info", "Tables", "osfp", "all", NULL }; static const char * const showopt_list[] = { "nat", "queue", "rules", "Anchors", "Sources", "states", "info", "Interfaces", "labels", "timeouts", "memory", "Tables", "osfp", "Running", "all", NULL }; static const char * const tblcmdopt_list[] = { "kill", "flush", "add", "delete", "load", "replace", "show", "test", "zero", "expire", NULL }; static const char * const debugopt_list[] = { "none", "urgent", "misc", "loud", NULL }; static const char * const optiopt_list[] = { "none", "basic", "profile", NULL }; void usage(void) { extern char *__progname; fprintf(stderr, "usage: %s [-AdeghmNnOPqRrvz] [-a anchor] [-D macro=value] [-F modifier]\n" "\t[-f file] [-i interface] [-K host | network]\n" "\t[-k host | network | label | id] [-o level] [-p device]\n" "\t[-s modifier] [-t table -T command [address ...]] [-x level]\n", __progname); exit(1); } int pfctl_enable(int dev, int opts) { if (ioctl(dev, DIOCSTART)) { if (errno == EEXIST) errx(1, "pf already enabled"); else if (errno == ESRCH) errx(1, "pfil registeration failed"); else err(1, "DIOCSTART"); } if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "pf enabled\n"); if (altqsupport && ioctl(dev, DIOCSTARTALTQ)) if (errno != EEXIST) err(1, "DIOCSTARTALTQ"); return (0); } int pfctl_disable(int dev, int opts) { if (ioctl(dev, DIOCSTOP)) { if (errno == ENOENT) errx(1, "pf not enabled"); else err(1, "DIOCSTOP"); } if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "pf disabled\n"); if (altqsupport && ioctl(dev, DIOCSTOPALTQ)) if (errno != ENOENT) err(1, "DIOCSTOPALTQ"); return (0); } int pfctl_clear_stats(int dev, int opts) { if (ioctl(dev, DIOCCLRSTATUS)) err(1, "DIOCCLRSTATUS"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "pf: statistics cleared\n"); return (0); } int pfctl_get_skip_ifaces(void) { bzero(&skip_b, sizeof(skip_b)); skip_b.pfrb_type = PFRB_IFACES; for (;;) { pfr_buf_grow(&skip_b, skip_b.pfrb_size); skip_b.pfrb_size = skip_b.pfrb_msize; if (pfi_get_ifaces(NULL, skip_b.pfrb_caddr, &skip_b.pfrb_size)) err(1, "pfi_get_ifaces"); if (skip_b.pfrb_size <= skip_b.pfrb_msize) break; } return (0); } int pfctl_check_skip_ifaces(char *ifname) { struct pfi_kif *p; struct node_host *h = NULL, *n = NULL; PFRB_FOREACH(p, &skip_b) { if (!strcmp(ifname, p->pfik_name) && (p->pfik_flags & PFI_IFLAG_SKIP)) p->pfik_flags &= ~PFI_IFLAG_SKIP; if (!strcmp(ifname, p->pfik_name) && p->pfik_group != NULL) { if ((h = ifa_grouplookup(p->pfik_name, 0)) == NULL) continue; for (n = h; n != NULL; n = n->next) { if (p->pfik_ifp == NULL) continue; if (strncmp(p->pfik_name, ifname, IFNAMSIZ)) continue; p->pfik_flags &= ~PFI_IFLAG_SKIP; } } } return (0); } int pfctl_adjust_skip_ifaces(struct pfctl *pf) { struct pfi_kif *p, *pp; struct node_host *h = NULL, *n = NULL; PFRB_FOREACH(p, &skip_b) { if (p->pfik_group == NULL || !(p->pfik_flags & PFI_IFLAG_SKIP)) continue; pfctl_set_interface_flags(pf, p->pfik_name, PFI_IFLAG_SKIP, 0); if ((h = ifa_grouplookup(p->pfik_name, 0)) == NULL) continue; for (n = h; n != NULL; n = n->next) PFRB_FOREACH(pp, &skip_b) { if (pp->pfik_ifp == NULL) continue; if (strncmp(pp->pfik_name, n->ifname, IFNAMSIZ)) continue; if (!(pp->pfik_flags & PFI_IFLAG_SKIP)) pfctl_set_interface_flags(pf, pp->pfik_name, PFI_IFLAG_SKIP, 1); if (pp->pfik_flags & PFI_IFLAG_SKIP) pp->pfik_flags &= ~PFI_IFLAG_SKIP; } } PFRB_FOREACH(p, &skip_b) { if (p->pfik_ifp == NULL || ! (p->pfik_flags & PFI_IFLAG_SKIP)) continue; pfctl_set_interface_flags(pf, p->pfik_name, PFI_IFLAG_SKIP, 0); } return (0); } int pfctl_clear_interface_flags(int dev, int opts) { struct pfioc_iface pi; if ((opts & PF_OPT_NOACTION) == 0) { bzero(&pi, sizeof(pi)); pi.pfiio_flags = PFI_IFLAG_SKIP; if (ioctl(dev, DIOCCLRIFFLAG, &pi)) err(1, "DIOCCLRIFFLAG"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "pf: interface flags reset\n"); } return (0); } int pfctl_clear_rules(int dev, int opts, char *anchorname) { struct pfr_buffer t; memset(&t, 0, sizeof(t)); t.pfrb_type = PFRB_TRANS; if (pfctl_add_trans(&t, PF_RULESET_SCRUB, anchorname) || pfctl_add_trans(&t, PF_RULESET_FILTER, anchorname) || pfctl_trans(dev, &t, DIOCXBEGIN, 0) || pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) err(1, "pfctl_clear_rules"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "rules cleared\n"); return (0); } int pfctl_clear_nat(int dev, int opts, char *anchorname) { struct pfr_buffer t; memset(&t, 0, sizeof(t)); t.pfrb_type = PFRB_TRANS; if (pfctl_add_trans(&t, PF_RULESET_NAT, anchorname) || pfctl_add_trans(&t, PF_RULESET_BINAT, anchorname) || pfctl_add_trans(&t, PF_RULESET_RDR, anchorname) || pfctl_trans(dev, &t, DIOCXBEGIN, 0) || pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) err(1, "pfctl_clear_nat"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "nat cleared\n"); return (0); } int pfctl_clear_altq(int dev, int opts) { struct pfr_buffer t; if (!altqsupport) return (-1); memset(&t, 0, sizeof(t)); t.pfrb_type = PFRB_TRANS; if (pfctl_add_trans(&t, PF_RULESET_ALTQ, "") || pfctl_trans(dev, &t, DIOCXBEGIN, 0) || pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) err(1, "pfctl_clear_altq"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "altq cleared\n"); return (0); } int pfctl_clear_src_nodes(int dev, int opts) { if (ioctl(dev, DIOCCLRSRCNODES)) err(1, "DIOCCLRSRCNODES"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "source tracking entries cleared\n"); return (0); } int pfctl_clear_states(int dev, const char *iface, int opts) { struct pfioc_state_kill psk; memset(&psk, 0, sizeof(psk)); if (iface != NULL && strlcpy(psk.psk_ifname, iface, sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname)) errx(1, "invalid interface: %s", iface); if (ioctl(dev, DIOCCLRSTATES, &psk)) err(1, "DIOCCLRSTATES"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "%d states cleared\n", psk.psk_killed); return (0); } void pfctl_addrprefix(char *addr, struct pf_addr *mask) { char *p; const char *errstr; int prefix, ret_ga, q, r; struct addrinfo hints, *res; if ((p = strchr(addr, '/')) == NULL) return; *p++ = '\0'; prefix = strtonum(p, 0, 128, &errstr); if (errstr) errx(1, "prefix is %s: %s", errstr, p); bzero(&hints, sizeof(hints)); /* prefix only with numeric addresses */ hints.ai_flags |= AI_NUMERICHOST; if ((ret_ga = getaddrinfo(addr, NULL, &hints, &res))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ } if (res->ai_family == AF_INET && prefix > 32) errx(1, "prefix too long for AF_INET"); else if (res->ai_family == AF_INET6 && prefix > 128) errx(1, "prefix too long for AF_INET6"); q = prefix >> 3; r = prefix & 7; switch (res->ai_family) { case AF_INET: bzero(&mask->v4, sizeof(mask->v4)); mask->v4.s_addr = htonl((u_int32_t) (0xffffffffffULL << (32 - prefix))); break; case AF_INET6: bzero(&mask->v6, sizeof(mask->v6)); if (q > 0) memset((void *)&mask->v6, 0xff, q); if (r > 0) *((u_char *)&mask->v6 + q) = (0xff00 >> r) & 0xff; break; } freeaddrinfo(res); } int pfctl_kill_src_nodes(int dev, const char *iface, int opts) { struct pfioc_src_node_kill psnk; struct addrinfo *res[2], *resp[2]; struct sockaddr last_src, last_dst; int killed, sources, dests; int ret_ga; killed = sources = dests = 0; memset(&psnk, 0, sizeof(psnk)); memset(&psnk.psnk_src.addr.v.a.mask, 0xff, sizeof(psnk.psnk_src.addr.v.a.mask)); memset(&last_src, 0xff, sizeof(last_src)); memset(&last_dst, 0xff, sizeof(last_dst)); pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask); if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ } for (resp[0] = res[0]; resp[0]; resp[0] = resp[0]->ai_next) { if (resp[0]->ai_addr == NULL) continue; /* We get lots of duplicates. Catch the easy ones */ if (memcmp(&last_src, resp[0]->ai_addr, sizeof(last_src)) == 0) continue; last_src = *(struct sockaddr *)resp[0]->ai_addr; psnk.psnk_af = resp[0]->ai_family; sources++; if (psnk.psnk_af == AF_INET) psnk.psnk_src.addr.v.a.addr.v4 = ((struct sockaddr_in *)resp[0]->ai_addr)->sin_addr; else if (psnk.psnk_af == AF_INET6) psnk.psnk_src.addr.v.a.addr.v6 = ((struct sockaddr_in6 *)resp[0]->ai_addr)-> sin6_addr; else errx(1, "Unknown address family %d", psnk.psnk_af); if (src_node_killers > 1) { dests = 0; memset(&psnk.psnk_dst.addr.v.a.mask, 0xff, sizeof(psnk.psnk_dst.addr.v.a.mask)); memset(&last_dst, 0xff, sizeof(last_dst)); pfctl_addrprefix(src_node_kill[1], &psnk.psnk_dst.addr.v.a.mask); if ((ret_ga = getaddrinfo(src_node_kill[1], NULL, NULL, &res[1]))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ } for (resp[1] = res[1]; resp[1]; resp[1] = resp[1]->ai_next) { if (resp[1]->ai_addr == NULL) continue; if (psnk.psnk_af != resp[1]->ai_family) continue; if (memcmp(&last_dst, resp[1]->ai_addr, sizeof(last_dst)) == 0) continue; last_dst = *(struct sockaddr *)resp[1]->ai_addr; dests++; if (psnk.psnk_af == AF_INET) psnk.psnk_dst.addr.v.a.addr.v4 = ((struct sockaddr_in *)resp[1]-> ai_addr)->sin_addr; else if (psnk.psnk_af == AF_INET6) psnk.psnk_dst.addr.v.a.addr.v6 = ((struct sockaddr_in6 *)resp[1]-> ai_addr)->sin6_addr; else errx(1, "Unknown address family %d", psnk.psnk_af); if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) err(1, "DIOCKILLSRCNODES"); killed += psnk.psnk_killed; } freeaddrinfo(res[1]); } else { if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) err(1, "DIOCKILLSRCNODES"); killed += psnk.psnk_killed; } } freeaddrinfo(res[0]); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "killed %d src nodes from %d sources and %d " "destinations\n", killed, sources, dests); return (0); } int pfctl_net_kill_states(int dev, const char *iface, int opts) { struct pfioc_state_kill psk; struct addrinfo *res[2], *resp[2]; struct sockaddr last_src, last_dst; int killed, sources, dests; int ret_ga; killed = sources = dests = 0; memset(&psk, 0, sizeof(psk)); memset(&psk.psk_src.addr.v.a.mask, 0xff, sizeof(psk.psk_src.addr.v.a.mask)); memset(&last_src, 0xff, sizeof(last_src)); memset(&last_dst, 0xff, sizeof(last_dst)); if (iface != NULL && strlcpy(psk.psk_ifname, iface, sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname)) errx(1, "invalid interface: %s", iface); pfctl_addrprefix(state_kill[0], &psk.psk_src.addr.v.a.mask); if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, &res[0]))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ } for (resp[0] = res[0]; resp[0]; resp[0] = resp[0]->ai_next) { if (resp[0]->ai_addr == NULL) continue; /* We get lots of duplicates. Catch the easy ones */ if (memcmp(&last_src, resp[0]->ai_addr, sizeof(last_src)) == 0) continue; last_src = *(struct sockaddr *)resp[0]->ai_addr; psk.psk_af = resp[0]->ai_family; sources++; if (psk.psk_af == AF_INET) psk.psk_src.addr.v.a.addr.v4 = ((struct sockaddr_in *)resp[0]->ai_addr)->sin_addr; else if (psk.psk_af == AF_INET6) psk.psk_src.addr.v.a.addr.v6 = ((struct sockaddr_in6 *)resp[0]->ai_addr)-> sin6_addr; else errx(1, "Unknown address family %d", psk.psk_af); if (state_killers > 1) { dests = 0; memset(&psk.psk_dst.addr.v.a.mask, 0xff, sizeof(psk.psk_dst.addr.v.a.mask)); memset(&last_dst, 0xff, sizeof(last_dst)); pfctl_addrprefix(state_kill[1], &psk.psk_dst.addr.v.a.mask); if ((ret_ga = getaddrinfo(state_kill[1], NULL, NULL, &res[1]))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ } for (resp[1] = res[1]; resp[1]; resp[1] = resp[1]->ai_next) { if (resp[1]->ai_addr == NULL) continue; if (psk.psk_af != resp[1]->ai_family) continue; if (memcmp(&last_dst, resp[1]->ai_addr, sizeof(last_dst)) == 0) continue; last_dst = *(struct sockaddr *)resp[1]->ai_addr; dests++; if (psk.psk_af == AF_INET) psk.psk_dst.addr.v.a.addr.v4 = ((struct sockaddr_in *)resp[1]-> ai_addr)->sin_addr; else if (psk.psk_af == AF_INET6) psk.psk_dst.addr.v.a.addr.v6 = ((struct sockaddr_in6 *)resp[1]-> ai_addr)->sin6_addr; else errx(1, "Unknown address family %d", psk.psk_af); if (ioctl(dev, DIOCKILLSTATES, &psk)) err(1, "DIOCKILLSTATES"); killed += psk.psk_killed; } freeaddrinfo(res[1]); } else { if (ioctl(dev, DIOCKILLSTATES, &psk)) err(1, "DIOCKILLSTATES"); killed += psk.psk_killed; } } freeaddrinfo(res[0]); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "killed %d states from %d sources and %d " "destinations\n", killed, sources, dests); return (0); } int pfctl_label_kill_states(int dev, const char *iface, int opts) { struct pfioc_state_kill psk; if (state_killers != 2 || (strlen(state_kill[1]) == 0)) { warnx("no label specified"); usage(); } memset(&psk, 0, sizeof(psk)); if (iface != NULL && strlcpy(psk.psk_ifname, iface, sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname)) errx(1, "invalid interface: %s", iface); if (strlcpy(psk.psk_label, state_kill[1], sizeof(psk.psk_label)) >= sizeof(psk.psk_label)) errx(1, "label too long: %s", state_kill[1]); if (ioctl(dev, DIOCKILLSTATES, &psk)) err(1, "DIOCKILLSTATES"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "killed %d states\n", psk.psk_killed); return (0); } int pfctl_id_kill_states(int dev, const char *iface, int opts) { struct pfioc_state_kill psk; if (state_killers != 2 || (strlen(state_kill[1]) == 0)) { warnx("no id specified"); usage(); } memset(&psk, 0, sizeof(psk)); if ((sscanf(state_kill[1], "%jx/%x", &psk.psk_pfcmp.id, &psk.psk_pfcmp.creatorid)) == 2) HTONL(psk.psk_pfcmp.creatorid); else if ((sscanf(state_kill[1], "%jx", &psk.psk_pfcmp.id)) == 1) { psk.psk_pfcmp.creatorid = 0; } else { warnx("wrong id format specified"); usage(); } if (psk.psk_pfcmp.id == 0) { warnx("cannot kill id 0"); usage(); } psk.psk_pfcmp.id = htobe64(psk.psk_pfcmp.id); if (ioctl(dev, DIOCKILLSTATES, &psk)) err(1, "DIOCKILLSTATES"); if ((opts & PF_OPT_QUIET) == 0) fprintf(stderr, "killed %d states\n", psk.psk_killed); return (0); } int pfctl_get_pool(int dev, struct pf_pool *pool, u_int32_t nr, u_int32_t ticket, int r_action, char *anchorname) { struct pfioc_pooladdr pp; struct pf_pooladdr *pa; u_int32_t pnr, mpnr; memset(&pp, 0, sizeof(pp)); memcpy(pp.anchor, anchorname, sizeof(pp.anchor)); pp.r_action = r_action; pp.r_num = nr; pp.ticket = ticket; if (ioctl(dev, DIOCGETADDRS, &pp)) { warn("DIOCGETADDRS"); return (-1); } mpnr = pp.nr; TAILQ_INIT(&pool->list); for (pnr = 0; pnr < mpnr; ++pnr) { pp.nr = pnr; if (ioctl(dev, DIOCGETADDR, &pp)) { warn("DIOCGETADDR"); return (-1); } pa = calloc(1, sizeof(struct pf_pooladdr)); if (pa == NULL) err(1, "calloc"); bcopy(&pp.addr, pa, sizeof(struct pf_pooladdr)); TAILQ_INSERT_TAIL(&pool->list, pa, entries); } return (0); } void pfctl_move_pool(struct pf_pool *src, struct pf_pool *dst) { struct pf_pooladdr *pa; while ((pa = TAILQ_FIRST(&src->list)) != NULL) { TAILQ_REMOVE(&src->list, pa, entries); TAILQ_INSERT_TAIL(&dst->list, pa, entries); } } void pfctl_clear_pool(struct pf_pool *pool) { struct pf_pooladdr *pa; while ((pa = TAILQ_FIRST(&pool->list)) != NULL) { TAILQ_REMOVE(&pool->list, pa, entries); free(pa); } } void pfctl_print_rule_counters(struct pf_rule *rule, int opts) { if (opts & PF_OPT_DEBUG) { const char *t[PF_SKIP_COUNT] = { "i", "d", "f", "p", "sa", "sp", "da", "dp" }; int i; printf(" [ Skip steps: "); for (i = 0; i < PF_SKIP_COUNT; ++i) { if (rule->skip[i].nr == rule->nr + 1) continue; printf("%s=", t[i]); if (rule->skip[i].nr == -1) printf("end "); else printf("%u ", rule->skip[i].nr); } printf("]\n"); printf(" [ queue: qname=%s qid=%u pqname=%s pqid=%u ]\n", rule->qname, rule->qid, rule->pqname, rule->pqid); } if (opts & PF_OPT_VERBOSE) { printf(" [ Evaluations: %-8llu Packets: %-8llu " "Bytes: %-10llu States: %-6ju]\n", (unsigned long long)rule->evaluations, (unsigned long long)(rule->packets[0] + rule->packets[1]), (unsigned long long)(rule->bytes[0] + rule->bytes[1]), (uintmax_t)rule->u_states_cur); if (!(opts & PF_OPT_DEBUG)) printf(" [ Inserted: uid %u pid %u " "State Creations: %-6ju]\n", (unsigned)rule->cuid, (unsigned)rule->cpid, (uintmax_t)rule->u_states_tot); } } void pfctl_print_title(char *title) { if (!first_title) printf("\n"); first_title = 0; printf("%s\n", title); } int pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, char *anchorname, int depth) { struct pfioc_rule pr; u_int32_t nr, mnr, header = 0; int rule_numbers = opts & (PF_OPT_VERBOSE2 | PF_OPT_DEBUG); int numeric = opts & PF_OPT_NUMERIC; int len = strlen(path); int brace; char *p; if (path[0]) snprintf(&path[len], MAXPATHLEN - len, "/%s", anchorname); else snprintf(&path[len], MAXPATHLEN - len, "%s", anchorname); memset(&pr, 0, sizeof(pr)); memcpy(pr.anchor, path, sizeof(pr.anchor)); if (opts & PF_OPT_SHOWALL) { pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); goto error; } header++; } pr.rule.action = PF_SCRUB; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); goto error; } if (opts & PF_OPT_SHOWALL) { if (format == PFCTL_SHOW_RULES && (pr.nr > 0 || header)) pfctl_print_title("FILTER RULES:"); else if (format == PFCTL_SHOW_LABELS && labels) pfctl_print_title("LABEL COUNTERS:"); } mnr = pr.nr; if (opts & PF_OPT_CLRRULECTRS) pr.action = PF_GET_CLR_CNTR; for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); goto error; } if (pfctl_get_pool(dev, &pr.rule.rpool, nr, pr.ticket, PF_SCRUB, path) != 0) goto error; switch (format) { case PFCTL_SHOW_LABELS: break; case PFCTL_SHOW_RULES: if (pr.rule.label[0] && (opts & PF_OPT_SHOWALL)) labels = 1; print_rule(&pr.rule, pr.anchor_call, rule_numbers, numeric); printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); break; case PFCTL_SHOW_NOTHING: break; } pfctl_clear_pool(&pr.rule.rpool); } pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); goto error; } mnr = pr.nr; for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); goto error; } if (pfctl_get_pool(dev, &pr.rule.rpool, nr, pr.ticket, PF_PASS, path) != 0) goto error; switch (format) { case PFCTL_SHOW_LABELS: if (pr.rule.label[0]) { printf("%s %llu %llu %llu %llu" " %llu %llu %llu %ju\n", pr.rule.label, (unsigned long long)pr.rule.evaluations, (unsigned long long)(pr.rule.packets[0] + pr.rule.packets[1]), (unsigned long long)(pr.rule.bytes[0] + pr.rule.bytes[1]), (unsigned long long)pr.rule.packets[0], (unsigned long long)pr.rule.bytes[0], (unsigned long long)pr.rule.packets[1], (unsigned long long)pr.rule.bytes[1], (uintmax_t)pr.rule.u_states_tot); } break; case PFCTL_SHOW_RULES: brace = 0; if (pr.rule.label[0] && (opts & PF_OPT_SHOWALL)) labels = 1; INDENT(depth, !(opts & PF_OPT_VERBOSE)); if (pr.anchor_call[0] && ((((p = strrchr(pr.anchor_call, '_')) != NULL) && ((void *)p == (void *)pr.anchor_call || *(--p) == '/')) || (opts & PF_OPT_RECURSE))) { brace++; if ((p = strrchr(pr.anchor_call, '/')) != NULL) p++; else p = &pr.anchor_call[0]; } else p = &pr.anchor_call[0]; print_rule(&pr.rule, p, rule_numbers, numeric); if (brace) printf(" {\n"); else printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); if (brace) { pfctl_show_rules(dev, path, opts, format, p, depth + 1); INDENT(depth, !(opts & PF_OPT_VERBOSE)); printf("}\n"); } break; case PFCTL_SHOW_NOTHING: break; } pfctl_clear_pool(&pr.rule.rpool); } path[len] = '\0'; return (0); error: path[len] = '\0'; return (-1); } int pfctl_show_nat(int dev, int opts, char *anchorname) { struct pfioc_rule pr; u_int32_t mnr, nr; static int nattype[3] = { PF_NAT, PF_RDR, PF_BINAT }; int i, dotitle = opts & PF_OPT_SHOWALL; memset(&pr, 0, sizeof(pr)); memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); for (i = 0; i < 3; i++) { pr.rule.action = nattype[i]; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); return (-1); } mnr = pr.nr; for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); return (-1); } if (pfctl_get_pool(dev, &pr.rule.rpool, nr, pr.ticket, nattype[i], anchorname) != 0) return (-1); if (dotitle) { pfctl_print_title("TRANSLATION RULES:"); dotitle = 0; } print_rule(&pr.rule, pr.anchor_call, opts & PF_OPT_VERBOSE2, opts & PF_OPT_NUMERIC); printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); pfctl_clear_pool(&pr.rule.rpool); } } return (0); } int pfctl_show_src_nodes(int dev, int opts) { struct pfioc_src_nodes psn; struct pf_src_node *p; char *inbuf = NULL, *newinbuf = NULL; unsigned int len = 0; int i; memset(&psn, 0, sizeof(psn)); for (;;) { psn.psn_len = len; if (len) { newinbuf = realloc(inbuf, len); if (newinbuf == NULL) err(1, "realloc"); psn.psn_buf = inbuf = newinbuf; } if (ioctl(dev, DIOCGETSRCNODES, &psn) < 0) { warn("DIOCGETSRCNODES"); free(inbuf); return (-1); } if (psn.psn_len + sizeof(struct pfioc_src_nodes) < len) break; if (len == 0 && psn.psn_len == 0) goto done; if (len == 0 && psn.psn_len != 0) len = psn.psn_len; if (psn.psn_len == 0) goto done; /* no src_nodes */ len *= 2; } p = psn.psn_src_nodes; if (psn.psn_len > 0 && (opts & PF_OPT_SHOWALL)) pfctl_print_title("SOURCE TRACKING NODES:"); for (i = 0; i < psn.psn_len; i += sizeof(*p)) { print_src_node(p, opts); p++; } done: free(inbuf); return (0); } int pfctl_show_states(int dev, const char *iface, int opts) { struct pfioc_states ps; struct pfsync_state *p; char *inbuf = NULL, *newinbuf = NULL; unsigned int len = 0; int i, dotitle = (opts & PF_OPT_SHOWALL); memset(&ps, 0, sizeof(ps)); for (;;) { ps.ps_len = len; if (len) { newinbuf = realloc(inbuf, len); if (newinbuf == NULL) err(1, "realloc"); ps.ps_buf = inbuf = newinbuf; } if (ioctl(dev, DIOCGETSTATES, &ps) < 0) { warn("DIOCGETSTATES"); free(inbuf); return (-1); } if (ps.ps_len + sizeof(struct pfioc_states) < len) break; if (len == 0 && ps.ps_len == 0) goto done; if (len == 0 && ps.ps_len != 0) len = ps.ps_len; if (ps.ps_len == 0) goto done; /* no states */ len *= 2; } p = ps.ps_states; for (i = 0; i < ps.ps_len; i += sizeof(*p), p++) { if (iface != NULL && strcmp(p->ifname, iface)) continue; if (dotitle) { pfctl_print_title("STATES:"); dotitle = 0; } print_state(p, opts); } done: free(inbuf); return (0); } int pfctl_show_status(int dev, int opts) { struct pf_status status; if (ioctl(dev, DIOCGETSTATUS, &status)) { warn("DIOCGETSTATUS"); return (-1); } if (opts & PF_OPT_SHOWALL) pfctl_print_title("INFO:"); print_status(&status, opts); return (0); } int pfctl_show_running(int dev) { struct pf_status status; if (ioctl(dev, DIOCGETSTATUS, &status)) { warn("DIOCGETSTATUS"); return (-1); } print_running(&status); return (!status.running); } int pfctl_show_timeouts(int dev, int opts) { struct pfioc_tm pt; int i; if (opts & PF_OPT_SHOWALL) pfctl_print_title("TIMEOUTS:"); memset(&pt, 0, sizeof(pt)); for (i = 0; pf_timeouts[i].name; i++) { pt.timeout = pf_timeouts[i].timeout; if (ioctl(dev, DIOCGETTIMEOUT, &pt)) err(1, "DIOCGETTIMEOUT"); printf("%-20s %10d", pf_timeouts[i].name, pt.seconds); if (pf_timeouts[i].timeout >= PFTM_ADAPTIVE_START && pf_timeouts[i].timeout <= PFTM_ADAPTIVE_END) printf(" states"); else printf("s"); printf("\n"); } return (0); } int pfctl_show_limits(int dev, int opts) { struct pfioc_limit pl; int i; if (opts & PF_OPT_SHOWALL) pfctl_print_title("LIMITS:"); memset(&pl, 0, sizeof(pl)); for (i = 0; pf_limits[i].name; i++) { pl.index = pf_limits[i].index; if (ioctl(dev, DIOCGETLIMIT, &pl)) err(1, "DIOCGETLIMIT"); printf("%-13s ", pf_limits[i].name); if (pl.limit == UINT_MAX) printf("unlimited\n"); else printf("hard limit %8u\n", pl.limit); } return (0); } /* callbacks for rule/nat/rdr/addr */ int pfctl_add_pool(struct pfctl *pf, struct pf_pool *p, sa_family_t af) { struct pf_pooladdr *pa; if ((pf->opts & PF_OPT_NOACTION) == 0) { if (ioctl(pf->dev, DIOCBEGINADDRS, &pf->paddr)) err(1, "DIOCBEGINADDRS"); } pf->paddr.af = af; TAILQ_FOREACH(pa, &p->list, entries) { memcpy(&pf->paddr.addr, pa, sizeof(struct pf_pooladdr)); if ((pf->opts & PF_OPT_NOACTION) == 0) { if (ioctl(pf->dev, DIOCADDADDR, &pf->paddr)) err(1, "DIOCADDADDR"); } } return (0); } int pfctl_add_rule(struct pfctl *pf, struct pf_rule *r, const char *anchor_call) { u_int8_t rs_num; struct pf_rule *rule; struct pf_ruleset *rs; char *p; rs_num = pf_get_ruleset_number(r->action); if (rs_num == PF_RULESET_MAX) errx(1, "Invalid rule type %d", r->action); rs = &pf->anchor->ruleset; if (anchor_call[0] && r->anchor == NULL) { /* * Don't make non-brace anchors part of the main anchor pool. */ if ((r->anchor = calloc(1, sizeof(*r->anchor))) == NULL) err(1, "pfctl_add_rule: calloc"); pf_init_ruleset(&r->anchor->ruleset); r->anchor->ruleset.anchor = r->anchor; if (strlcpy(r->anchor->path, anchor_call, sizeof(rule->anchor->path)) >= sizeof(rule->anchor->path)) errx(1, "pfctl_add_rule: strlcpy"); if ((p = strrchr(anchor_call, '/')) != NULL) { if (!strlen(p)) err(1, "pfctl_add_rule: bad anchor name %s", anchor_call); } else p = (char *)anchor_call; if (strlcpy(r->anchor->name, p, sizeof(rule->anchor->name)) >= sizeof(rule->anchor->name)) errx(1, "pfctl_add_rule: strlcpy"); } if ((rule = calloc(1, sizeof(*rule))) == NULL) err(1, "calloc"); bcopy(r, rule, sizeof(*rule)); TAILQ_INIT(&rule->rpool.list); pfctl_move_pool(&r->rpool, &rule->rpool); TAILQ_INSERT_TAIL(rs->rules[rs_num].active.ptr, rule, entries); return (0); } int pfctl_ruleset_trans(struct pfctl *pf, char *path, struct pf_anchor *a) { int osize = pf->trans->pfrb_size; if ((pf->loadopt & PFCTL_FLAG_NAT) != 0) { if (pfctl_add_trans(pf->trans, PF_RULESET_NAT, path) || pfctl_add_trans(pf->trans, PF_RULESET_BINAT, path) || pfctl_add_trans(pf->trans, PF_RULESET_RDR, path)) return (1); } if (a == pf->astack[0] && ((altqsupport && (pf->loadopt & PFCTL_FLAG_ALTQ) != 0))) { if (pfctl_add_trans(pf->trans, PF_RULESET_ALTQ, path)) return (2); } if ((pf->loadopt & PFCTL_FLAG_FILTER) != 0) { if (pfctl_add_trans(pf->trans, PF_RULESET_SCRUB, path) || pfctl_add_trans(pf->trans, PF_RULESET_FILTER, path)) return (3); } if (pf->loadopt & PFCTL_FLAG_TABLE) if (pfctl_add_trans(pf->trans, PF_RULESET_TABLE, path)) return (4); if (pfctl_trans(pf->dev, pf->trans, DIOCXBEGIN, osize)) return (5); return (0); } int pfctl_load_ruleset(struct pfctl *pf, char *path, struct pf_ruleset *rs, int rs_num, int depth) { struct pf_rule *r; int error, len = strlen(path); int brace = 0; pf->anchor = rs->anchor; if (path[0]) snprintf(&path[len], MAXPATHLEN - len, "/%s", pf->anchor->name); else snprintf(&path[len], MAXPATHLEN - len, "%s", pf->anchor->name); if (depth) { if (TAILQ_FIRST(rs->rules[rs_num].active.ptr) != NULL) { brace++; if (pf->opts & PF_OPT_VERBOSE) printf(" {\n"); if ((pf->opts & PF_OPT_NOACTION) == 0 && (error = pfctl_ruleset_trans(pf, path, rs->anchor))) { printf("pfctl_load_rulesets: " "pfctl_ruleset_trans %d\n", error); goto error; } } else if (pf->opts & PF_OPT_VERBOSE) printf("\n"); } if (pf->optimize && rs_num == PF_RULESET_FILTER) pfctl_optimize_ruleset(pf, rs); while ((r = TAILQ_FIRST(rs->rules[rs_num].active.ptr)) != NULL) { TAILQ_REMOVE(rs->rules[rs_num].active.ptr, r, entries); if ((error = pfctl_load_rule(pf, path, r, depth))) goto error; if (r->anchor) { if ((error = pfctl_load_ruleset(pf, path, &r->anchor->ruleset, rs_num, depth + 1))) goto error; } else if (pf->opts & PF_OPT_VERBOSE) printf("\n"); free(r); } if (brace && pf->opts & PF_OPT_VERBOSE) { INDENT(depth - 1, (pf->opts & PF_OPT_VERBOSE)); printf("}\n"); } path[len] = '\0'; return (0); error: path[len] = '\0'; return (error); } int pfctl_load_rule(struct pfctl *pf, char *path, struct pf_rule *r, int depth) { u_int8_t rs_num = pf_get_ruleset_number(r->action); char *name; struct pfioc_rule pr; int len = strlen(path); bzero(&pr, sizeof(pr)); /* set up anchor before adding to path for anchor_call */ if ((pf->opts & PF_OPT_NOACTION) == 0) pr.ticket = pfctl_get_ticket(pf->trans, rs_num, path); if (strlcpy(pr.anchor, path, sizeof(pr.anchor)) >= sizeof(pr.anchor)) errx(1, "pfctl_load_rule: strlcpy"); if (r->anchor) { if (r->anchor->match) { if (path[0]) snprintf(&path[len], MAXPATHLEN - len, "/%s", r->anchor->name); else snprintf(&path[len], MAXPATHLEN - len, "%s", r->anchor->name); name = r->anchor->name; } else name = r->anchor->path; } else name = ""; if ((pf->opts & PF_OPT_NOACTION) == 0) { if (pfctl_add_pool(pf, &r->rpool, r->af)) return (1); pr.pool_ticket = pf->paddr.ticket; memcpy(&pr.rule, r, sizeof(pr.rule)); if (r->anchor && strlcpy(pr.anchor_call, name, sizeof(pr.anchor_call)) >= sizeof(pr.anchor_call)) errx(1, "pfctl_load_rule: strlcpy"); if (ioctl(pf->dev, DIOCADDRULE, &pr)) err(1, "DIOCADDRULE"); } if (pf->opts & PF_OPT_VERBOSE) { INDENT(depth, !(pf->opts & PF_OPT_VERBOSE2)); print_rule(r, r->anchor ? r->anchor->name : "", pf->opts & PF_OPT_VERBOSE2, pf->opts & PF_OPT_NUMERIC); } path[len] = '\0'; pfctl_clear_pool(&r->rpool); return (0); } int pfctl_add_altq(struct pfctl *pf, struct pf_altq *a) { if (altqsupport && (loadopt & PFCTL_FLAG_ALTQ) != 0) { memcpy(&pf->paltq->altq, a, sizeof(struct pf_altq)); if ((pf->opts & PF_OPT_NOACTION) == 0) { if (ioctl(pf->dev, DIOCADDALTQ, pf->paltq)) { if (errno == ENXIO) errx(1, "qtype not configured"); else if (errno == ENODEV) errx(1, "%s: driver does not support " "altq", a->ifname); else err(1, "DIOCADDALTQ"); } } pfaltq_store(&pf->paltq->altq); } return (0); } int pfctl_rules(int dev, char *filename, int opts, int optimize, char *anchorname, struct pfr_buffer *trans) { #define ERR(x) do { warn(x); goto _error; } while(0) #define ERRX(x) do { warnx(x); goto _error; } while(0) struct pfr_buffer *t, buf; struct pfioc_altq pa; struct pfctl pf; struct pf_ruleset *rs; struct pfr_table trs; char *path; int osize; RB_INIT(&pf_anchors); memset(&pf_main_anchor, 0, sizeof(pf_main_anchor)); pf_init_ruleset(&pf_main_anchor.ruleset); pf_main_anchor.ruleset.anchor = &pf_main_anchor; if (trans == NULL) { bzero(&buf, sizeof(buf)); buf.pfrb_type = PFRB_TRANS; t = &buf; osize = 0; } else { t = trans; osize = t->pfrb_size; } memset(&pa, 0, sizeof(pa)); + pa.version = PFIOC_ALTQ_VERSION; memset(&pf, 0, sizeof(pf)); memset(&trs, 0, sizeof(trs)); if ((path = calloc(1, MAXPATHLEN)) == NULL) ERRX("pfctl_rules: calloc"); if (strlcpy(trs.pfrt_anchor, anchorname, sizeof(trs.pfrt_anchor)) >= sizeof(trs.pfrt_anchor)) ERRX("pfctl_rules: strlcpy"); pf.dev = dev; pf.opts = opts; pf.optimize = optimize; pf.loadopt = loadopt; /* non-brace anchor, create without resolving the path */ if ((pf.anchor = calloc(1, sizeof(*pf.anchor))) == NULL) ERRX("pfctl_rules: calloc"); rs = &pf.anchor->ruleset; pf_init_ruleset(rs); rs->anchor = pf.anchor; if (strlcpy(pf.anchor->path, anchorname, sizeof(pf.anchor->path)) >= sizeof(pf.anchor->path)) errx(1, "pfctl_add_rule: strlcpy"); if (strlcpy(pf.anchor->name, anchorname, sizeof(pf.anchor->name)) >= sizeof(pf.anchor->name)) errx(1, "pfctl_add_rule: strlcpy"); pf.astack[0] = pf.anchor; pf.asd = 0; if (anchorname[0]) pf.loadopt &= ~PFCTL_FLAG_ALTQ; pf.paltq = &pa; pf.trans = t; pfctl_init_options(&pf); if ((opts & PF_OPT_NOACTION) == 0) { /* * XXX For the time being we need to open transactions for * the main ruleset before parsing, because tables are still * loaded at parse time. */ if (pfctl_ruleset_trans(&pf, anchorname, pf.anchor)) ERRX("pfctl_rules"); if (altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ)) pa.ticket = pfctl_get_ticket(t, PF_RULESET_ALTQ, anchorname); if (pf.loadopt & PFCTL_FLAG_TABLE) pf.astack[0]->ruleset.tticket = pfctl_get_ticket(t, PF_RULESET_TABLE, anchorname); } if (parse_config(filename, &pf) < 0) { if ((opts & PF_OPT_NOACTION) == 0) ERRX("Syntax error in config file: " "pf rules not loaded"); else goto _error; } if (loadopt & PFCTL_FLAG_OPTION) pfctl_adjust_skip_ifaces(&pf); if ((pf.loadopt & PFCTL_FLAG_FILTER && (pfctl_load_ruleset(&pf, path, rs, PF_RULESET_SCRUB, 0))) || (pf.loadopt & PFCTL_FLAG_NAT && (pfctl_load_ruleset(&pf, path, rs, PF_RULESET_NAT, 0) || pfctl_load_ruleset(&pf, path, rs, PF_RULESET_RDR, 0) || pfctl_load_ruleset(&pf, path, rs, PF_RULESET_BINAT, 0))) || (pf.loadopt & PFCTL_FLAG_FILTER && pfctl_load_ruleset(&pf, path, rs, PF_RULESET_FILTER, 0))) { if ((opts & PF_OPT_NOACTION) == 0) ERRX("Unable to load rules into kernel"); else goto _error; } if ((altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ) != 0)) if (check_commit_altq(dev, opts) != 0) ERRX("errors in altq config"); /* process "load anchor" directives */ if (!anchorname[0]) if (pfctl_load_anchors(dev, &pf, t) == -1) ERRX("load anchors"); if (trans == NULL && (opts & PF_OPT_NOACTION) == 0) { if (!anchorname[0]) if (pfctl_load_options(&pf)) goto _error; if (pfctl_trans(dev, t, DIOCXCOMMIT, osize)) ERR("DIOCXCOMMIT"); } free(path); return (0); _error: if (trans == NULL) { /* main ruleset */ if ((opts & PF_OPT_NOACTION) == 0) if (pfctl_trans(dev, t, DIOCXROLLBACK, osize)) err(1, "DIOCXROLLBACK"); exit(1); } else { /* sub ruleset */ free(path); return (-1); } #undef ERR #undef ERRX } FILE * pfctl_fopen(const char *name, const char *mode) { struct stat st; FILE *fp; fp = fopen(name, mode); if (fp == NULL) return (NULL); if (fstat(fileno(fp), &st)) { fclose(fp); return (NULL); } if (S_ISDIR(st.st_mode)) { fclose(fp); errno = EISDIR; return (NULL); } return (fp); } void pfctl_init_options(struct pfctl *pf) { pf->timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; pf->timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; pf->timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; pf->timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; pf->timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; pf->timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; pf->timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; pf->timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; pf->timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; pf->timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; pf->timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; pf->timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; pf->timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; pf->timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; pf->timeout[PFTM_FRAG] = PFTM_FRAG_VAL; pf->timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; pf->timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; pf->timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; pf->timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; pf->timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT; pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT; pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT; pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT; pf->debug = PF_DEBUG_URGENT; } int pfctl_load_options(struct pfctl *pf) { int i, error = 0; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); /* load limits */ for (i = 0; i < PF_LIMIT_MAX; i++) { if ((pf->opts & PF_OPT_MERGE) && !pf->limit_set[i]) continue; if (pfctl_load_limit(pf, i, pf->limit[i])) error = 1; } /* * If we've set the limit, but haven't explicitly set adaptive * timeouts, do it now with a start of 60% and end of 120%. */ if (pf->limit_set[PF_LIMIT_STATES] && !pf->timeout_set[PFTM_ADAPTIVE_START] && !pf->timeout_set[PFTM_ADAPTIVE_END]) { pf->timeout[PFTM_ADAPTIVE_START] = (pf->limit[PF_LIMIT_STATES] / 10) * 6; pf->timeout_set[PFTM_ADAPTIVE_START] = 1; pf->timeout[PFTM_ADAPTIVE_END] = (pf->limit[PF_LIMIT_STATES] / 10) * 12; pf->timeout_set[PFTM_ADAPTIVE_END] = 1; } /* load timeouts */ for (i = 0; i < PFTM_MAX; i++) { if ((pf->opts & PF_OPT_MERGE) && !pf->timeout_set[i]) continue; if (pfctl_load_timeout(pf, i, pf->timeout[i])) error = 1; } /* load debug */ if (!(pf->opts & PF_OPT_MERGE) || pf->debug_set) if (pfctl_load_debug(pf, pf->debug)) error = 1; /* load logif */ if (!(pf->opts & PF_OPT_MERGE) || pf->ifname_set) if (pfctl_load_logif(pf, pf->ifname)) error = 1; /* load hostid */ if (!(pf->opts & PF_OPT_MERGE) || pf->hostid_set) if (pfctl_load_hostid(pf, pf->hostid)) error = 1; return (error); } int pfctl_set_limit(struct pfctl *pf, const char *opt, unsigned int limit) { int i; for (i = 0; pf_limits[i].name; i++) { if (strcasecmp(opt, pf_limits[i].name) == 0) { pf->limit[pf_limits[i].index] = limit; pf->limit_set[pf_limits[i].index] = 1; break; } } if (pf_limits[i].name == NULL) { warnx("Bad pool name."); return (1); } if (pf->opts & PF_OPT_VERBOSE) printf("set limit %s %d\n", opt, limit); return (0); } int pfctl_load_limit(struct pfctl *pf, unsigned int index, unsigned int limit) { struct pfioc_limit pl; memset(&pl, 0, sizeof(pl)); pl.index = index; pl.limit = limit; if (ioctl(pf->dev, DIOCSETLIMIT, &pl)) { if (errno == EBUSY) warnx("Current pool size exceeds requested hard limit"); else warnx("DIOCSETLIMIT"); return (1); } return (0); } int pfctl_set_timeout(struct pfctl *pf, const char *opt, int seconds, int quiet) { int i; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); for (i = 0; pf_timeouts[i].name; i++) { if (strcasecmp(opt, pf_timeouts[i].name) == 0) { pf->timeout[pf_timeouts[i].timeout] = seconds; pf->timeout_set[pf_timeouts[i].timeout] = 1; break; } } if (pf_timeouts[i].name == NULL) { warnx("Bad timeout name."); return (1); } if (pf->opts & PF_OPT_VERBOSE && ! quiet) printf("set timeout %s %d\n", opt, seconds); return (0); } int pfctl_load_timeout(struct pfctl *pf, unsigned int timeout, unsigned int seconds) { struct pfioc_tm pt; memset(&pt, 0, sizeof(pt)); pt.timeout = timeout; pt.seconds = seconds; if (ioctl(pf->dev, DIOCSETTIMEOUT, &pt)) { warnx("DIOCSETTIMEOUT"); return (1); } return (0); } int pfctl_set_optimization(struct pfctl *pf, const char *opt) { const struct pf_hint *hint; int i, r; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); for (i = 0; pf_hints[i].name; i++) if (strcasecmp(opt, pf_hints[i].name) == 0) break; hint = pf_hints[i].hint; if (hint == NULL) { warnx("invalid state timeouts optimization"); return (1); } for (i = 0; hint[i].name; i++) if ((r = pfctl_set_timeout(pf, hint[i].name, hint[i].timeout, 1))) return (r); if (pf->opts & PF_OPT_VERBOSE) printf("set optimization %s\n", opt); return (0); } int pfctl_set_logif(struct pfctl *pf, char *ifname) { if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); if (!strcmp(ifname, "none")) { free(pf->ifname); pf->ifname = NULL; } else { pf->ifname = strdup(ifname); if (!pf->ifname) errx(1, "pfctl_set_logif: strdup"); } pf->ifname_set = 1; if (pf->opts & PF_OPT_VERBOSE) printf("set loginterface %s\n", ifname); return (0); } int pfctl_load_logif(struct pfctl *pf, char *ifname) { struct pfioc_if pi; memset(&pi, 0, sizeof(pi)); if (ifname && strlcpy(pi.ifname, ifname, sizeof(pi.ifname)) >= sizeof(pi.ifname)) { warnx("pfctl_load_logif: strlcpy"); return (1); } if (ioctl(pf->dev, DIOCSETSTATUSIF, &pi)) { warnx("DIOCSETSTATUSIF"); return (1); } return (0); } int pfctl_set_hostid(struct pfctl *pf, u_int32_t hostid) { if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); HTONL(hostid); pf->hostid = hostid; pf->hostid_set = 1; if (pf->opts & PF_OPT_VERBOSE) printf("set hostid 0x%08x\n", ntohl(hostid)); return (0); } int pfctl_load_hostid(struct pfctl *pf, u_int32_t hostid) { if (ioctl(dev, DIOCSETHOSTID, &hostid)) { warnx("DIOCSETHOSTID"); return (1); } return (0); } int pfctl_set_debug(struct pfctl *pf, char *d) { u_int32_t level; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); if (!strcmp(d, "none")) pf->debug = PF_DEBUG_NONE; else if (!strcmp(d, "urgent")) pf->debug = PF_DEBUG_URGENT; else if (!strcmp(d, "misc")) pf->debug = PF_DEBUG_MISC; else if (!strcmp(d, "loud")) pf->debug = PF_DEBUG_NOISY; else { warnx("unknown debug level \"%s\"", d); return (-1); } pf->debug_set = 1; level = pf->debug; if ((pf->opts & PF_OPT_NOACTION) == 0) if (ioctl(dev, DIOCSETDEBUG, &level)) err(1, "DIOCSETDEBUG"); if (pf->opts & PF_OPT_VERBOSE) printf("set debug %s\n", d); return (0); } int pfctl_load_debug(struct pfctl *pf, unsigned int level) { if (ioctl(pf->dev, DIOCSETDEBUG, &level)) { warnx("DIOCSETDEBUG"); return (1); } return (0); } int pfctl_set_interface_flags(struct pfctl *pf, char *ifname, int flags, int how) { struct pfioc_iface pi; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); bzero(&pi, sizeof(pi)); pi.pfiio_flags = flags; if (strlcpy(pi.pfiio_name, ifname, sizeof(pi.pfiio_name)) >= sizeof(pi.pfiio_name)) errx(1, "pfctl_set_interface_flags: strlcpy"); if ((pf->opts & PF_OPT_NOACTION) == 0) { if (how == 0) { if (ioctl(pf->dev, DIOCCLRIFFLAG, &pi)) err(1, "DIOCCLRIFFLAG"); } else { if (ioctl(pf->dev, DIOCSETIFFLAG, &pi)) err(1, "DIOCSETIFFLAG"); pfctl_check_skip_ifaces(ifname); } } return (0); } void pfctl_debug(int dev, u_int32_t level, int opts) { if (ioctl(dev, DIOCSETDEBUG, &level)) err(1, "DIOCSETDEBUG"); if ((opts & PF_OPT_QUIET) == 0) { fprintf(stderr, "debug level set to '"); switch (level) { case PF_DEBUG_NONE: fprintf(stderr, "none"); break; case PF_DEBUG_URGENT: fprintf(stderr, "urgent"); break; case PF_DEBUG_MISC: fprintf(stderr, "misc"); break; case PF_DEBUG_NOISY: fprintf(stderr, "loud"); break; default: fprintf(stderr, ""); break; } fprintf(stderr, "'\n"); } } int pfctl_test_altqsupport(int dev, int opts) { struct pfioc_altq pa; + pa.version = PFIOC_ALTQ_VERSION; if (ioctl(dev, DIOCGETALTQS, &pa)) { if (errno == ENODEV) { if (opts & PF_OPT_VERBOSE) fprintf(stderr, "No ALTQ support in kernel\n" "ALTQ related functions disabled\n"); return (0); } else err(1, "DIOCGETALTQS"); } return (1); } int pfctl_show_anchors(int dev, int opts, char *anchorname) { struct pfioc_ruleset pr; u_int32_t mnr, nr; memset(&pr, 0, sizeof(pr)); memcpy(pr.path, anchorname, sizeof(pr.path)); if (ioctl(dev, DIOCGETRULESETS, &pr)) { if (errno == EINVAL) fprintf(stderr, "Anchor '%s' not found.\n", anchorname); else err(1, "DIOCGETRULESETS"); return (-1); } mnr = pr.nr; for (nr = 0; nr < mnr; ++nr) { char sub[MAXPATHLEN]; pr.nr = nr; if (ioctl(dev, DIOCGETRULESET, &pr)) err(1, "DIOCGETRULESET"); if (!strcmp(pr.name, PF_RESERVED_ANCHOR)) continue; sub[0] = 0; if (pr.path[0]) { strlcat(sub, pr.path, sizeof(sub)); strlcat(sub, "/", sizeof(sub)); } strlcat(sub, pr.name, sizeof(sub)); if (sub[0] != '_' || (opts & PF_OPT_VERBOSE)) printf(" %s\n", sub); if ((opts & PF_OPT_VERBOSE) && pfctl_show_anchors(dev, opts, sub)) return (-1); } return (0); } const char * pfctl_lookup_option(char *cmd, const char * const *list) { if (cmd != NULL && *cmd) for (; *list; list++) if (!strncmp(cmd, *list, strlen(cmd))) return (*list); return (NULL); } int main(int argc, char *argv[]) { int error = 0; int ch; int mode = O_RDONLY; int opts = 0; int optimize = PF_OPTIMIZE_BASIC; char anchorname[MAXPATHLEN]; char *path; if (argc < 2) usage(); while ((ch = getopt(argc, argv, "a:AdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) { switch (ch) { case 'a': anchoropt = optarg; break; case 'd': opts |= PF_OPT_DISABLE; mode = O_RDWR; break; case 'D': if (pfctl_cmdline_symset(optarg) < 0) warnx("could not parse macro definition %s", optarg); break; case 'e': opts |= PF_OPT_ENABLE; mode = O_RDWR; break; case 'q': opts |= PF_OPT_QUIET; break; case 'F': clearopt = pfctl_lookup_option(optarg, clearopt_list); if (clearopt == NULL) { warnx("Unknown flush modifier '%s'", optarg); usage(); } mode = O_RDWR; break; case 'i': ifaceopt = optarg; break; case 'k': if (state_killers >= 2) { warnx("can only specify -k twice"); usage(); /* NOTREACHED */ } state_kill[state_killers++] = optarg; mode = O_RDWR; break; case 'K': if (src_node_killers >= 2) { warnx("can only specify -K twice"); usage(); /* NOTREACHED */ } src_node_kill[src_node_killers++] = optarg; mode = O_RDWR; break; case 'm': opts |= PF_OPT_MERGE; break; case 'n': opts |= PF_OPT_NOACTION; break; case 'N': loadopt |= PFCTL_FLAG_NAT; break; case 'r': opts |= PF_OPT_USEDNS; break; case 'f': rulesopt = optarg; mode = O_RDWR; break; case 'g': opts |= PF_OPT_DEBUG; break; case 'A': loadopt |= PFCTL_FLAG_ALTQ; break; case 'R': loadopt |= PFCTL_FLAG_FILTER; break; case 'o': optiopt = pfctl_lookup_option(optarg, optiopt_list); if (optiopt == NULL) { warnx("Unknown optimization '%s'", optarg); usage(); } opts |= PF_OPT_OPTIMIZE; break; case 'O': loadopt |= PFCTL_FLAG_OPTION; break; case 'p': pf_device = optarg; break; case 'P': opts |= PF_OPT_NUMERIC; break; case 's': showopt = pfctl_lookup_option(optarg, showopt_list); if (showopt == NULL) { warnx("Unknown show modifier '%s'", optarg); usage(); } break; case 't': tableopt = optarg; break; case 'T': tblcmdopt = pfctl_lookup_option(optarg, tblcmdopt_list); if (tblcmdopt == NULL) { warnx("Unknown table command '%s'", optarg); usage(); } break; case 'v': if (opts & PF_OPT_VERBOSE) opts |= PF_OPT_VERBOSE2; opts |= PF_OPT_VERBOSE; break; case 'x': debugopt = pfctl_lookup_option(optarg, debugopt_list); if (debugopt == NULL) { warnx("Unknown debug level '%s'", optarg); usage(); } mode = O_RDWR; break; case 'z': opts |= PF_OPT_CLRRULECTRS; mode = O_RDWR; break; case 'h': /* FALLTHROUGH */ default: usage(); /* NOTREACHED */ } } if (tblcmdopt != NULL) { argc -= optind; argv += optind; ch = *tblcmdopt; if (ch == 'l') { loadopt |= PFCTL_FLAG_TABLE; tblcmdopt = NULL; } else mode = strchr("acdefkrz", ch) ? O_RDWR : O_RDONLY; } else if (argc != optind) { warnx("unknown command line argument: %s ...", argv[optind]); usage(); /* NOTREACHED */ } if (loadopt == 0) loadopt = ~0; if ((path = calloc(1, MAXPATHLEN)) == NULL) errx(1, "pfctl: calloc"); memset(anchorname, 0, sizeof(anchorname)); if (anchoropt != NULL) { int len = strlen(anchoropt); if (anchoropt[len - 1] == '*') { if (len >= 2 && anchoropt[len - 2] == '/') anchoropt[len - 2] = '\0'; else anchoropt[len - 1] = '\0'; opts |= PF_OPT_RECURSE; } if (strlcpy(anchorname, anchoropt, sizeof(anchorname)) >= sizeof(anchorname)) errx(1, "anchor name '%s' too long", anchoropt); loadopt &= PFCTL_FLAG_FILTER|PFCTL_FLAG_NAT|PFCTL_FLAG_TABLE; } if ((opts & PF_OPT_NOACTION) == 0) { dev = open(pf_device, mode); if (dev == -1) err(1, "%s", pf_device); altqsupport = pfctl_test_altqsupport(dev, opts); } else { dev = open(pf_device, O_RDONLY); if (dev >= 0) opts |= PF_OPT_DUMMYACTION; /* turn off options */ opts &= ~ (PF_OPT_DISABLE | PF_OPT_ENABLE); clearopt = showopt = debugopt = NULL; #if !defined(ENABLE_ALTQ) altqsupport = 0; #else altqsupport = 1; #endif } if (opts & PF_OPT_DISABLE) if (pfctl_disable(dev, opts)) error = 1; if (showopt != NULL) { switch (*showopt) { case 'A': pfctl_show_anchors(dev, opts, anchorname); break; case 'r': pfctl_load_fingerprints(dev, opts); pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES, anchorname, 0); break; case 'l': pfctl_load_fingerprints(dev, opts); pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS, anchorname, 0); break; case 'n': pfctl_load_fingerprints(dev, opts); pfctl_show_nat(dev, opts, anchorname); break; case 'q': pfctl_show_altq(dev, ifaceopt, opts, opts & PF_OPT_VERBOSE2); break; case 's': pfctl_show_states(dev, ifaceopt, opts); break; case 'S': pfctl_show_src_nodes(dev, opts); break; case 'i': pfctl_show_status(dev, opts); break; case 'R': error = pfctl_show_running(dev); break; case 't': pfctl_show_timeouts(dev, opts); break; case 'm': pfctl_show_limits(dev, opts); break; case 'a': opts |= PF_OPT_SHOWALL; pfctl_load_fingerprints(dev, opts); pfctl_show_nat(dev, opts, anchorname); pfctl_show_rules(dev, path, opts, 0, anchorname, 0); pfctl_show_altq(dev, ifaceopt, opts, 0); pfctl_show_states(dev, ifaceopt, opts); pfctl_show_src_nodes(dev, opts); pfctl_show_status(dev, opts); pfctl_show_rules(dev, path, opts, 1, anchorname, 0); pfctl_show_timeouts(dev, opts); pfctl_show_limits(dev, opts); pfctl_show_tables(anchorname, opts); pfctl_show_fingerprints(opts); break; case 'T': pfctl_show_tables(anchorname, opts); break; case 'o': pfctl_load_fingerprints(dev, opts); pfctl_show_fingerprints(opts); break; case 'I': pfctl_show_ifaces(ifaceopt, opts); break; } } if ((opts & PF_OPT_CLRRULECTRS) && showopt == NULL) pfctl_show_rules(dev, path, opts, PFCTL_SHOW_NOTHING, anchorname, 0); if (clearopt != NULL) { if (anchorname[0] == '_' || strstr(anchorname, "/_") != NULL) errx(1, "anchor names beginning with '_' cannot " "be modified from the command line"); switch (*clearopt) { case 'r': pfctl_clear_rules(dev, opts, anchorname); break; case 'n': pfctl_clear_nat(dev, opts, anchorname); break; case 'q': pfctl_clear_altq(dev, opts); break; case 's': pfctl_clear_states(dev, ifaceopt, opts); break; case 'S': pfctl_clear_src_nodes(dev, opts); break; case 'i': pfctl_clear_stats(dev, opts); break; case 'a': pfctl_clear_rules(dev, opts, anchorname); pfctl_clear_nat(dev, opts, anchorname); pfctl_clear_tables(anchorname, opts); if (!*anchorname) { pfctl_clear_altq(dev, opts); pfctl_clear_states(dev, ifaceopt, opts); pfctl_clear_src_nodes(dev, opts); pfctl_clear_stats(dev, opts); pfctl_clear_fingerprints(dev, opts); pfctl_clear_interface_flags(dev, opts); } break; case 'o': pfctl_clear_fingerprints(dev, opts); break; case 'T': pfctl_clear_tables(anchorname, opts); break; } } if (state_killers) { if (!strcmp(state_kill[0], "label")) pfctl_label_kill_states(dev, ifaceopt, opts); else if (!strcmp(state_kill[0], "id")) pfctl_id_kill_states(dev, ifaceopt, opts); else pfctl_net_kill_states(dev, ifaceopt, opts); } if (src_node_killers) pfctl_kill_src_nodes(dev, ifaceopt, opts); if (tblcmdopt != NULL) { error = pfctl_command_tables(argc, argv, tableopt, tblcmdopt, rulesopt, anchorname, opts); rulesopt = NULL; } if (optiopt != NULL) { switch (*optiopt) { case 'n': optimize = 0; break; case 'b': optimize |= PF_OPTIMIZE_BASIC; break; case 'o': case 'p': optimize |= PF_OPTIMIZE_PROFILE; break; } } if ((rulesopt != NULL) && (loadopt & PFCTL_FLAG_OPTION) && !anchorname[0] && !(opts & PF_OPT_NOACTION)) if (pfctl_get_skip_ifaces()) error = 1; if (rulesopt != NULL && !(opts & (PF_OPT_MERGE|PF_OPT_NOACTION)) && !anchorname[0] && (loadopt & PFCTL_FLAG_OPTION)) if (pfctl_file_fingerprints(dev, opts, PF_OSFP_FILE)) error = 1; if (rulesopt != NULL) { if (anchorname[0] == '_' || strstr(anchorname, "/_") != NULL) errx(1, "anchor names beginning with '_' cannot " "be modified from the command line"); if (pfctl_rules(dev, rulesopt, opts, optimize, anchorname, NULL)) error = 1; else if (!(opts & PF_OPT_NOACTION) && (loadopt & PFCTL_FLAG_TABLE)) warn_namespace_collision(NULL); } if (opts & PF_OPT_ENABLE) if (pfctl_enable(dev, opts)) error = 1; if (debugopt != NULL) { switch (*debugopt) { case 'n': pfctl_debug(dev, PF_DEBUG_NONE, opts); break; case 'u': pfctl_debug(dev, PF_DEBUG_URGENT, opts); break; case 'm': pfctl_debug(dev, PF_DEBUG_MISC, opts); break; case 'l': pfctl_debug(dev, PF_DEBUG_NOISY, opts); break; } } exit(error); } Index: head/sbin/pfctl/pfctl_altq.c =================================================================== --- head/sbin/pfctl/pfctl_altq.c (revision 338208) +++ head/sbin/pfctl/pfctl_altq.c (revision 338209) @@ -1,1519 +1,1541 @@ /* $OpenBSD: pfctl_altq.c,v 1.93 2007/10/15 02:16:35 deraadt Exp $ */ /* * Copyright (c) 2002 * Sony Computer Science Laboratories Inc. * Copyright (c) 2002, 2003 Henning Brauer * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); +#define PFIOC_USE_LATEST + #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include "pfctl_parser.h" #include "pfctl.h" #define is_sc_null(sc) (((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0)) static TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs); static LIST_HEAD(gen_sc, segment) rtsc, lssc; struct pf_altq *qname_to_pfaltq(const char *, const char *); u_int32_t qname_to_qid(const char *); static int eval_pfqueue_cbq(struct pfctl *, struct pf_altq *); static int cbq_compute_idletime(struct pfctl *, struct pf_altq *); static int check_commit_cbq(int, int, struct pf_altq *); static int print_cbq_opts(const struct pf_altq *); static int print_codel_opts(const struct pf_altq *, const struct node_queue_opt *); static int eval_pfqueue_priq(struct pfctl *, struct pf_altq *); static int check_commit_priq(int, int, struct pf_altq *); static int print_priq_opts(const struct pf_altq *); static int eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *); static int check_commit_hfsc(int, int, struct pf_altq *); static int print_hfsc_opts(const struct pf_altq *, const struct node_queue_opt *); static int eval_pfqueue_fairq(struct pfctl *, struct pf_altq *); static int print_fairq_opts(const struct pf_altq *, const struct node_queue_opt *); static int check_commit_fairq(int, int, struct pf_altq *); static void gsc_add_sc(struct gen_sc *, struct service_curve *); static int is_gsc_under_sc(struct gen_sc *, struct service_curve *); static void gsc_destroy(struct gen_sc *); static struct segment *gsc_getentry(struct gen_sc *, double); static int gsc_add_seg(struct gen_sc *, double, double, double, double); static double sc_x2y(struct service_curve *, double); #ifdef __FreeBSD__ -u_int32_t getifspeed(int, char *); +u_int64_t getifspeed(int, char *); #else u_int32_t getifspeed(char *); #endif u_long getifmtu(char *); int eval_queue_opts(struct pf_altq *, struct node_queue_opt *, - u_int32_t); -u_int32_t eval_bwspec(struct node_queue_bw *, u_int32_t); + u_int64_t); +u_int64_t eval_bwspec(struct node_queue_bw *, u_int64_t); void print_hfsc_sc(const char *, u_int, u_int, u_int, const struct node_hfsc_sc *); void print_fairq_sc(const char *, u_int, u_int, u_int, const struct node_fairq_sc *); void pfaltq_store(struct pf_altq *a) { struct pf_altq *altq; if ((altq = malloc(sizeof(*altq))) == NULL) err(1, "malloc"); memcpy(altq, a, sizeof(struct pf_altq)); TAILQ_INSERT_TAIL(&altqs, altq, entries); } struct pf_altq * pfaltq_lookup(const char *ifname) { struct pf_altq *altq; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 && altq->qname[0] == 0) return (altq); } return (NULL); } struct pf_altq * qname_to_pfaltq(const char *qname, const char *ifname) { struct pf_altq *altq; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 && strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0) return (altq); } return (NULL); } u_int32_t qname_to_qid(const char *qname) { struct pf_altq *altq; /* * We guarantee that same named queues on different interfaces * have the same qid, so we do NOT need to limit matching on * one interface! */ TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0) return (altq->qid); } return (0); } void print_altq(const struct pf_altq *a, unsigned int level, struct node_queue_bw *bw, struct node_queue_opt *qopts) { if (a->qname[0] != 0) { print_queue(a, level, bw, 1, qopts); return; } #ifdef __FreeBSD__ if (a->local_flags & PFALTQ_FLAG_IF_REMOVED) printf("INACTIVE "); #endif printf("altq on %s ", a->ifname); switch (a->scheduler) { case ALTQT_CBQ: if (!print_cbq_opts(a)) printf("cbq "); break; case ALTQT_PRIQ: if (!print_priq_opts(a)) printf("priq "); break; case ALTQT_HFSC: if (!print_hfsc_opts(a, qopts)) printf("hfsc "); break; case ALTQT_FAIRQ: if (!print_fairq_opts(a, qopts)) printf("fairq "); break; case ALTQT_CODEL: if (!print_codel_opts(a, qopts)) printf("codel "); break; } if (bw != NULL && bw->bw_percent > 0) { if (bw->bw_percent < 100) printf("bandwidth %u%% ", bw->bw_percent); } else printf("bandwidth %s ", rate2str((double)a->ifbandwidth)); if (a->qlimit != DEFAULT_QLIMIT) printf("qlimit %u ", a->qlimit); printf("tbrsize %u ", a->tbrsize); } void print_queue(const struct pf_altq *a, unsigned int level, struct node_queue_bw *bw, int print_interface, struct node_queue_opt *qopts) { unsigned int i; #ifdef __FreeBSD__ if (a->local_flags & PFALTQ_FLAG_IF_REMOVED) printf("INACTIVE "); #endif printf("queue "); for (i = 0; i < level; ++i) printf(" "); printf("%s ", a->qname); if (print_interface) printf("on %s ", a->ifname); if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC || a->scheduler == ALTQT_FAIRQ) { if (bw != NULL && bw->bw_percent > 0) { if (bw->bw_percent < 100) printf("bandwidth %u%% ", bw->bw_percent); } else printf("bandwidth %s ", rate2str((double)a->bandwidth)); } if (a->priority != DEFAULT_PRIORITY) printf("priority %u ", a->priority); if (a->qlimit != DEFAULT_QLIMIT) printf("qlimit %u ", a->qlimit); switch (a->scheduler) { case ALTQT_CBQ: print_cbq_opts(a); break; case ALTQT_PRIQ: print_priq_opts(a); break; case ALTQT_HFSC: print_hfsc_opts(a, qopts); break; case ALTQT_FAIRQ: print_fairq_opts(a, qopts); break; } } /* * eval_pfaltq computes the discipline parameters. */ int eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, struct node_queue_opt *opts) { - u_int rate, size, errors = 0; + u_int64_t rate; + u_int size, errors = 0; if (bw->bw_absolute > 0) pa->ifbandwidth = bw->bw_absolute; else #ifdef __FreeBSD__ if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) { #else if ((rate = getifspeed(pa->ifname)) == 0) { #endif fprintf(stderr, "interface %s does not know its bandwidth, " "please specify an absolute bandwidth\n", pa->ifname); errors++; } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0) pa->ifbandwidth = rate; + /* + * Limit bandwidth to UINT_MAX for schedulers that aren't 64-bit ready. + */ + if ((pa->scheduler != ALTQT_HFSC) && (pa->ifbandwidth > UINT_MAX)) { + pa->ifbandwidth = UINT_MAX; + warnx("interface %s bandwidth limited to %" PRIu64 " bps " + "because selected scheduler is 32-bit limited\n", pa->ifname, + pa->ifbandwidth); + } errors += eval_queue_opts(pa, opts, pa->ifbandwidth); /* if tbrsize is not specified, use heuristics */ if (pa->tbrsize == 0) { rate = pa->ifbandwidth; if (rate <= 1 * 1000 * 1000) size = 1; else if (rate <= 10 * 1000 * 1000) size = 4; else if (rate <= 200 * 1000 * 1000) size = 8; else size = 24; size = size * getifmtu(pa->ifname); - if (size > 0xffff) - size = 0xffff; pa->tbrsize = size; } return (errors); } /* * check_commit_altq does consistency check for each interface */ int check_commit_altq(int dev, int opts) { struct pf_altq *altq; int error = 0; /* call the discipline check for each interface. */ TAILQ_FOREACH(altq, &altqs, entries) { if (altq->qname[0] == 0) { switch (altq->scheduler) { case ALTQT_CBQ: error = check_commit_cbq(dev, opts, altq); break; case ALTQT_PRIQ: error = check_commit_priq(dev, opts, altq); break; case ALTQT_HFSC: error = check_commit_hfsc(dev, opts, altq); break; case ALTQT_FAIRQ: error = check_commit_fairq(dev, opts, altq); break; default: break; } } } return (error); } /* * eval_pfqueue computes the queue parameters. */ int eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, struct node_queue_opt *opts) { /* should be merged with expand_queue */ struct pf_altq *if_pa, *parent, *altq; - u_int32_t bwsum; + u_int64_t bwsum; int error = 0; /* find the corresponding interface and copy fields used by queues */ if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) { fprintf(stderr, "altq not defined on %s\n", pa->ifname); return (1); } pa->scheduler = if_pa->scheduler; pa->ifbandwidth = if_pa->ifbandwidth; if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) { fprintf(stderr, "queue %s already exists on interface %s\n", pa->qname, pa->ifname); return (1); } pa->qid = qname_to_qid(pa->qname); parent = NULL; if (pa->parent[0] != 0) { parent = qname_to_pfaltq(pa->parent, pa->ifname); if (parent == NULL) { fprintf(stderr, "parent %s not found for %s\n", pa->parent, pa->qname); return (1); } pa->parent_qid = parent->qid; } if (pa->qlimit == 0) pa->qlimit = DEFAULT_QLIMIT; if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC || pa->scheduler == ALTQT_FAIRQ) { pa->bandwidth = eval_bwspec(bw, - parent == NULL ? 0 : parent->bandwidth); + parent == NULL ? pa->ifbandwidth : parent->bandwidth); if (pa->bandwidth > pa->ifbandwidth) { fprintf(stderr, "bandwidth for %s higher than " "interface\n", pa->qname); return (1); } /* check the sum of the child bandwidth is under parent's */ if (parent != NULL) { if (pa->bandwidth > parent->bandwidth) { warnx("bandwidth for %s higher than parent", pa->qname); return (1); } bwsum = 0; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 && altq->qname[0] != 0 && strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) == 0) bwsum += altq->bandwidth; } bwsum += pa->bandwidth; if (bwsum > parent->bandwidth) { warnx("the sum of the child bandwidth higher" " than parent \"%s\"", parent->qname); } } } - if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth)) + if (eval_queue_opts(pa, opts, + parent == NULL ? pa->ifbandwidth : parent->bandwidth)) return (1); switch (pa->scheduler) { case ALTQT_CBQ: error = eval_pfqueue_cbq(pf, pa); break; case ALTQT_PRIQ: error = eval_pfqueue_priq(pf, pa); break; case ALTQT_HFSC: error = eval_pfqueue_hfsc(pf, pa); break; case ALTQT_FAIRQ: error = eval_pfqueue_fairq(pf, pa); break; default: break; } return (error); } /* * CBQ support functions */ #define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */ #define RM_NS_PER_SEC (1000000000) static int eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa) { struct cbq_opts *opts; u_int ifmtu; if (pa->priority >= CBQ_MAXPRI) { warnx("priority out of range: max %d", CBQ_MAXPRI - 1); return (-1); } ifmtu = getifmtu(pa->ifname); opts = &pa->pq_u.cbq_opts; if (opts->pktsize == 0) { /* use default */ opts->pktsize = ifmtu; if (opts->pktsize > MCLBYTES) /* do what TCP does */ opts->pktsize &= ~MCLBYTES; } else if (opts->pktsize > ifmtu) opts->pktsize = ifmtu; if (opts->maxpktsize == 0) /* use default */ opts->maxpktsize = ifmtu; else if (opts->maxpktsize > ifmtu) opts->pktsize = ifmtu; if (opts->pktsize > opts->maxpktsize) opts->pktsize = opts->maxpktsize; if (pa->parent[0] == 0) opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR); cbq_compute_idletime(pf, pa); return (0); } /* * compute ns_per_byte, maxidle, minidle, and offtime */ static int cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa) { struct cbq_opts *opts; double maxidle_s, maxidle, minidle; double offtime, nsPerByte, ifnsPerByte, ptime, cptime; double z, g, f, gton, gtom; u_int minburst, maxburst; opts = &pa->pq_u.cbq_opts; ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8; minburst = opts->minburst; maxburst = opts->maxburst; if (pa->bandwidth == 0) f = 0.0001; /* small enough? */ else f = ((double) pa->bandwidth / (double) pa->ifbandwidth); nsPerByte = ifnsPerByte / f; ptime = (double)opts->pktsize * ifnsPerByte; cptime = ptime * (1.0 - f) / f; if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) { /* * this causes integer overflow in kernel! * (bandwidth < 6Kbps when max_pkt_size=1500) */ if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0) { warnx("queue bandwidth must be larger than %s", rate2str(ifnsPerByte * (double)opts->maxpktsize / (double)INT_MAX * (double)pa->ifbandwidth)); fprintf(stderr, "cbq: queue %s is too slow!\n", pa->qname); } nsPerByte = (double)(INT_MAX / opts->maxpktsize); } if (maxburst == 0) { /* use default */ if (cptime > 10.0 * 1000000) maxburst = 4; else maxburst = 16; } if (minburst == 0) /* use default */ minburst = 2; if (minburst > maxburst) minburst = maxburst; z = (double)(1 << RM_FILTER_GAIN); g = (1.0 - 1.0 / z); gton = pow(g, (double)maxburst); gtom = pow(g, (double)(minburst-1)); maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton)); maxidle_s = (1.0 - g); if (maxidle > maxidle_s) maxidle = ptime * maxidle; else maxidle = ptime * maxidle_s; offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom); minidle = -((double)opts->maxpktsize * (double)nsPerByte); /* scale parameters */ maxidle = ((maxidle * 8.0) / nsPerByte) * pow(2.0, (double)RM_FILTER_GAIN); offtime = (offtime * 8.0) / nsPerByte * pow(2.0, (double)RM_FILTER_GAIN); minidle = ((minidle * 8.0) / nsPerByte) * pow(2.0, (double)RM_FILTER_GAIN); maxidle = maxidle / 1000.0; offtime = offtime / 1000.0; minidle = minidle / 1000.0; opts->minburst = minburst; opts->maxburst = maxburst; opts->ns_per_byte = (u_int)nsPerByte; opts->maxidle = (u_int)fabs(maxidle); opts->minidle = (int)minidle; opts->offtime = (u_int)fabs(offtime); return (0); } static int check_commit_cbq(int dev, int opts, struct pf_altq *pa) { struct pf_altq *altq; int root_class, default_class; int error = 0; /* * check if cbq has one root queue and one default queue * for this interface */ root_class = default_class = 0; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS) root_class++; if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS) default_class++; } if (root_class != 1) { warnx("should have one root queue on %s", pa->ifname); error++; } if (default_class != 1) { warnx("should have one default queue on %s", pa->ifname); error++; } return (error); } static int print_cbq_opts(const struct pf_altq *a) { const struct cbq_opts *opts; opts = &a->pq_u.cbq_opts; if (opts->flags) { printf("cbq("); if (opts->flags & CBQCLF_RED) printf(" red"); if (opts->flags & CBQCLF_ECN) printf(" ecn"); if (opts->flags & CBQCLF_RIO) printf(" rio"); if (opts->flags & CBQCLF_CODEL) printf(" codel"); if (opts->flags & CBQCLF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & CBQCLF_FLOWVALVE) printf(" flowvalve"); if (opts->flags & CBQCLF_BORROW) printf(" borrow"); if (opts->flags & CBQCLF_WRR) printf(" wrr"); if (opts->flags & CBQCLF_EFFICIENT) printf(" efficient"); if (opts->flags & CBQCLF_ROOTCLASS) printf(" root"); if (opts->flags & CBQCLF_DEFCLASS) printf(" default"); printf(" ) "); return (1); } else return (0); } /* * PRIQ support functions */ static int eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa) { struct pf_altq *altq; if (pa->priority >= PRIQ_MAXPRI) { warnx("priority out of range: max %d", PRIQ_MAXPRI - 1); return (-1); } /* the priority should be unique for the interface */ TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 && altq->qname[0] != 0 && altq->priority == pa->priority) { warnx("%s and %s have the same priority", altq->qname, pa->qname); return (-1); } } return (0); } static int check_commit_priq(int dev, int opts, struct pf_altq *pa) { struct pf_altq *altq; int default_class; int error = 0; /* * check if priq has one default class for this interface */ default_class = 0; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS) default_class++; } if (default_class != 1) { warnx("should have one default queue on %s", pa->ifname); error++; } return (error); } static int print_priq_opts(const struct pf_altq *a) { const struct priq_opts *opts; opts = &a->pq_u.priq_opts; if (opts->flags) { printf("priq("); if (opts->flags & PRCF_RED) printf(" red"); if (opts->flags & PRCF_ECN) printf(" ecn"); if (opts->flags & PRCF_RIO) printf(" rio"); if (opts->flags & PRCF_CODEL) printf(" codel"); if (opts->flags & PRCF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & PRCF_DEFAULTCLASS) printf(" default"); printf(" ) "); return (1); } else return (0); } /* * HFSC support functions */ static int eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa) { struct pf_altq *altq, *parent; - struct hfsc_opts *opts; + struct hfsc_opts_v1 *opts; struct service_curve sc; opts = &pa->pq_u.hfsc_opts; if (pa->parent[0] == 0) { /* root queue */ opts->lssc_m1 = pa->ifbandwidth; opts->lssc_m2 = pa->ifbandwidth; opts->lssc_d = 0; return (0); } LIST_INIT(&rtsc); LIST_INIT(&lssc); /* if link_share is not specified, use bandwidth */ if (opts->lssc_m2 == 0) opts->lssc_m2 = pa->bandwidth; if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) || (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) || (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) { warnx("m2 is zero for %s", pa->qname); return (-1); } if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) || (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) || (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) { warnx("m1 must be zero for convex curve: %s", pa->qname); return (-1); } /* * admission control: * for the real-time service curve, the sum of the service curves * should not exceed 80% of the interface bandwidth. 20% is reserved * not to over-commit the actual interface bandwidth. * for the linkshare service curve, the sum of the child service * curve should not exceed the parent service curve. * for the upper-limit service curve, the assigned bandwidth should * be smaller than the interface bandwidth, and the upper-limit should * be larger than the real-time service curve when both are defined. */ parent = qname_to_pfaltq(pa->parent, pa->ifname); if (parent == NULL) errx(1, "parent %s not found for %s", pa->parent, pa->qname); TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; /* if the class has a real-time service curve, add it. */ if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) { sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1; sc.d = altq->pq_u.hfsc_opts.rtsc_d; sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2; gsc_add_sc(&rtsc, &sc); } if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0) continue; /* if the class has a linkshare service curve, add it. */ if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) { sc.m1 = altq->pq_u.hfsc_opts.lssc_m1; sc.d = altq->pq_u.hfsc_opts.lssc_d; sc.m2 = altq->pq_u.hfsc_opts.lssc_m2; gsc_add_sc(&lssc, &sc); } } /* check the real-time service curve. reserve 20% of interface bw */ if (opts->rtsc_m2 != 0) { /* add this queue to the sum */ sc.m1 = opts->rtsc_m1; sc.d = opts->rtsc_d; sc.m2 = opts->rtsc_m2; gsc_add_sc(&rtsc, &sc); /* compare the sum with 80% of the interface */ sc.m1 = 0; sc.d = 0; sc.m2 = pa->ifbandwidth / 100 * 80; if (!is_gsc_under_sc(&rtsc, &sc)) { warnx("real-time sc exceeds 80%% of the interface " "bandwidth (%s)", rate2str((double)sc.m2)); goto err_ret; } } /* check the linkshare service curve. */ if (opts->lssc_m2 != 0) { /* add this queue to the child sum */ sc.m1 = opts->lssc_m1; sc.d = opts->lssc_d; sc.m2 = opts->lssc_m2; gsc_add_sc(&lssc, &sc); /* compare the sum of the children with parent's sc */ sc.m1 = parent->pq_u.hfsc_opts.lssc_m1; sc.d = parent->pq_u.hfsc_opts.lssc_d; sc.m2 = parent->pq_u.hfsc_opts.lssc_m2; if (!is_gsc_under_sc(&lssc, &sc)) { warnx("linkshare sc exceeds parent's sc"); goto err_ret; } } /* check the upper-limit service curve. */ if (opts->ulsc_m2 != 0) { if (opts->ulsc_m1 > pa->ifbandwidth || opts->ulsc_m2 > pa->ifbandwidth) { warnx("upper-limit larger than interface bandwidth"); goto err_ret; } if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) { warnx("upper-limit sc smaller than real-time sc"); goto err_ret; } } gsc_destroy(&rtsc); gsc_destroy(&lssc); return (0); err_ret: gsc_destroy(&rtsc); gsc_destroy(&lssc); return (-1); } /* * FAIRQ support functions */ static int eval_pfqueue_fairq(struct pfctl *pf __unused, struct pf_altq *pa) { struct pf_altq *altq, *parent; struct fairq_opts *opts; struct service_curve sc; opts = &pa->pq_u.fairq_opts; if (pa->parent[0] == 0) { /* root queue */ opts->lssc_m1 = pa->ifbandwidth; opts->lssc_m2 = pa->ifbandwidth; opts->lssc_d = 0; return (0); } LIST_INIT(&lssc); /* if link_share is not specified, use bandwidth */ if (opts->lssc_m2 == 0) opts->lssc_m2 = pa->bandwidth; /* * admission control: * for the real-time service curve, the sum of the service curves * should not exceed 80% of the interface bandwidth. 20% is reserved * not to over-commit the actual interface bandwidth. * for the link-sharing service curve, the sum of the child service * curve should not exceed the parent service curve. * for the upper-limit service curve, the assigned bandwidth should * be smaller than the interface bandwidth, and the upper-limit should * be larger than the real-time service curve when both are defined. */ parent = qname_to_pfaltq(pa->parent, pa->ifname); if (parent == NULL) errx(1, "parent %s not found for %s", pa->parent, pa->qname); TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0) continue; /* if the class has a link-sharing service curve, add it. */ if (opts->lssc_m2 != 0 && altq->pq_u.fairq_opts.lssc_m2 != 0) { sc.m1 = altq->pq_u.fairq_opts.lssc_m1; sc.d = altq->pq_u.fairq_opts.lssc_d; sc.m2 = altq->pq_u.fairq_opts.lssc_m2; gsc_add_sc(&lssc, &sc); } } /* check the link-sharing service curve. */ if (opts->lssc_m2 != 0) { sc.m1 = parent->pq_u.fairq_opts.lssc_m1; sc.d = parent->pq_u.fairq_opts.lssc_d; sc.m2 = parent->pq_u.fairq_opts.lssc_m2; if (!is_gsc_under_sc(&lssc, &sc)) { warnx("link-sharing sc exceeds parent's sc"); goto err_ret; } } gsc_destroy(&lssc); return (0); err_ret: gsc_destroy(&lssc); return (-1); } static int check_commit_hfsc(int dev, int opts, struct pf_altq *pa) { struct pf_altq *altq, *def = NULL; int default_class; int error = 0; /* check if hfsc has one default queue for this interface */ default_class = 0; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (altq->parent[0] == 0) /* dummy root */ continue; if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) { default_class++; def = altq; } } if (default_class != 1) { warnx("should have one default queue on %s", pa->ifname); return (1); } /* make sure the default queue is a leaf */ TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) { warnx("default queue is not a leaf"); error++; } } return (error); } static int check_commit_fairq(int dev __unused, int opts __unused, struct pf_altq *pa) { struct pf_altq *altq, *def = NULL; int default_class; int error = 0; /* check if fairq has one default queue for this interface */ default_class = 0; TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (altq->pq_u.fairq_opts.flags & FARF_DEFAULTCLASS) { default_class++; def = altq; } } if (default_class != 1) { warnx("should have one default queue on %s", pa->ifname); return (1); } /* make sure the default queue is a leaf */ TAILQ_FOREACH(altq, &altqs, entries) { if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) continue; if (altq->qname[0] == 0) /* this is for interface */ continue; if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) { warnx("default queue is not a leaf"); error++; } } return (error); } static int print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) { - const struct hfsc_opts *opts; + const struct hfsc_opts_v1 *opts; const struct node_hfsc_sc *rtsc, *lssc, *ulsc; opts = &a->pq_u.hfsc_opts; if (qopts == NULL) rtsc = lssc = ulsc = NULL; else { rtsc = &qopts->data.hfsc_opts.realtime; lssc = &qopts->data.hfsc_opts.linkshare; ulsc = &qopts->data.hfsc_opts.upperlimit; } if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 || (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || opts->lssc_d != 0))) { printf("hfsc("); if (opts->flags & HFCF_RED) printf(" red"); if (opts->flags & HFCF_ECN) printf(" ecn"); if (opts->flags & HFCF_RIO) printf(" rio"); if (opts->flags & HFCF_CODEL) printf(" codel"); if (opts->flags & HFCF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & HFCF_DEFAULTCLASS) printf(" default"); if (opts->rtsc_m2 != 0) print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d, opts->rtsc_m2, rtsc); if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || opts->lssc_d != 0)) print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d, opts->lssc_m2, lssc); if (opts->ulsc_m2 != 0) print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d, opts->ulsc_m2, ulsc); printf(" ) "); return (1); } else return (0); } static int print_codel_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) { const struct codel_opts *opts; opts = &a->pq_u.codel_opts; if (opts->target || opts->interval || opts->ecn) { printf("codel("); if (opts->target) printf(" target %d", opts->target); if (opts->interval) printf(" interval %d", opts->interval); if (opts->ecn) printf("ecn"); printf(" ) "); return (1); } return (0); } static int print_fairq_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) { const struct fairq_opts *opts; const struct node_fairq_sc *loc_lssc; opts = &a->pq_u.fairq_opts; if (qopts == NULL) loc_lssc = NULL; else loc_lssc = &qopts->data.fairq_opts.linkshare; if (opts->flags || (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || opts->lssc_d != 0))) { printf("fairq("); if (opts->flags & FARF_RED) printf(" red"); if (opts->flags & FARF_ECN) printf(" ecn"); if (opts->flags & FARF_RIO) printf(" rio"); if (opts->flags & FARF_CODEL) printf(" codel"); if (opts->flags & FARF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & FARF_DEFAULTCLASS) printf(" default"); if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || opts->lssc_d != 0)) print_fairq_sc("linkshare", opts->lssc_m1, opts->lssc_d, opts->lssc_m2, loc_lssc); printf(" ) "); return (1); } else return (0); } /* * admission control using generalized service curve */ /* add a new service curve to a generalized service curve */ static void gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc) { if (is_sc_null(sc)) return; if (sc->d != 0) gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1); gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2); } /* * check whether all points of a generalized service curve have * their y-coordinates no larger than a given two-piece linear * service curve. */ static int is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc) { struct segment *s, *last, *end; double y; if (is_sc_null(sc)) { if (LIST_EMPTY(gsc)) return (1); LIST_FOREACH(s, gsc, _next) { if (s->m != 0) return (0); } return (1); } /* * gsc has a dummy entry at the end with x = INFINITY. * loop through up to this dummy entry. */ end = gsc_getentry(gsc, INFINITY); if (end == NULL) return (1); last = NULL; for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) { if (s->y > sc_x2y(sc, s->x)) return (0); last = s; } /* last now holds the real last segment */ if (last == NULL) return (1); if (last->m > sc->m2) return (0); if (last->x < sc->d && last->m > sc->m1) { y = last->y + (sc->d - last->x) * last->m; if (y > sc_x2y(sc, sc->d)) return (0); } return (1); } static void gsc_destroy(struct gen_sc *gsc) { struct segment *s; while ((s = LIST_FIRST(gsc)) != NULL) { LIST_REMOVE(s, _next); free(s); } } /* * return a segment entry starting at x. * if gsc has no entry starting at x, a new entry is created at x. */ static struct segment * gsc_getentry(struct gen_sc *gsc, double x) { struct segment *new, *prev, *s; prev = NULL; LIST_FOREACH(s, gsc, _next) { if (s->x == x) return (s); /* matching entry found */ else if (s->x < x) prev = s; else break; } /* we have to create a new entry */ if ((new = calloc(1, sizeof(struct segment))) == NULL) return (NULL); new->x = x; if (x == INFINITY || s == NULL) new->d = 0; else if (s->x == INFINITY) new->d = INFINITY; else new->d = s->x - x; if (prev == NULL) { /* insert the new entry at the head of the list */ new->y = 0; new->m = 0; LIST_INSERT_HEAD(gsc, new, _next); } else { /* * the start point intersects with the segment pointed by * prev. divide prev into 2 segments */ if (x == INFINITY) { prev->d = INFINITY; if (prev->m == 0) new->y = prev->y; else new->y = INFINITY; } else { prev->d = x - prev->x; new->y = prev->d * prev->m + prev->y; } new->m = prev->m; LIST_INSERT_AFTER(prev, new, _next); } return (new); } /* add a segment to a generalized service curve */ static int gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m) { struct segment *start, *end, *s; double x2; if (d == INFINITY) x2 = INFINITY; else x2 = x + d; start = gsc_getentry(gsc, x); end = gsc_getentry(gsc, x2); if (start == NULL || end == NULL) return (-1); for (s = start; s != end; s = LIST_NEXT(s, _next)) { s->m += m; s->y += y + (s->x - x) * m; } end = gsc_getentry(gsc, INFINITY); for (; s != end; s = LIST_NEXT(s, _next)) { s->y += m * d; } return (0); } /* get y-projection of a service curve */ static double sc_x2y(struct service_curve *sc, double x) { double y; if (x <= (double)sc->d) /* y belongs to the 1st segment */ y = x * (double)sc->m1; else /* y belongs to the 2nd segment */ y = (double)sc->d * (double)sc->m1 + (x - (double)sc->d) * (double)sc->m2; return (y); } /* * misc utilities */ #define R2S_BUFS 8 #define RATESTR_MAX 16 char * rate2str(double rate) { char *buf; static char r2sbuf[R2S_BUFS][RATESTR_MAX]; /* ring bufer */ static int idx = 0; int i; static const char unit[] = " KMG"; buf = r2sbuf[idx++]; if (idx == R2S_BUFS) idx = 0; for (i = 0; rate >= 1000 && i <= 3; i++) rate /= 1000; if ((int)(rate * 100) % 100) snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]); else snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]); return (buf); } #ifdef __FreeBSD__ /* * XXX * FreeBSD does not have SIOCGIFDATA. * To emulate this, DIOCGIFSPEED ioctl added to pf. */ -u_int32_t +u_int64_t getifspeed(int pfdev, char *ifname) { struct pf_ifspeed io; bzero(&io, sizeof io); if (strlcpy(io.ifname, ifname, IFNAMSIZ) >= sizeof(io.ifname)) errx(1, "getifspeed: strlcpy"); if (ioctl(pfdev, DIOCGIFSPEED, &io) == -1) err(1, "DIOCGIFSPEED"); - return ((u_int32_t)io.baudrate); + return (io.baudrate); } #else u_int32_t getifspeed(char *ifname) { int s; struct ifreq ifr; struct if_data ifrdat; if ((s = socket(get_socket_domain(), SOCK_DGRAM, 0)) < 0) err(1, "socket"); bzero(&ifr, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) errx(1, "getifspeed: strlcpy"); ifr.ifr_data = (caddr_t)&ifrdat; if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1) err(1, "SIOCGIFDATA"); if (close(s)) err(1, "close"); return ((u_int32_t)ifrdat.ifi_baudrate); } #endif u_long getifmtu(char *ifname) { int s; struct ifreq ifr; if ((s = socket(get_socket_domain(), SOCK_DGRAM, 0)) < 0) err(1, "socket"); bzero(&ifr, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) errx(1, "getifmtu: strlcpy"); if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1) #ifdef __FreeBSD__ ifr.ifr_mtu = 1500; #else err(1, "SIOCGIFMTU"); #endif if (close(s)) err(1, "close"); if (ifr.ifr_mtu > 0) return (ifr.ifr_mtu); else { warnx("could not get mtu for %s, assuming 1500", ifname); return (1500); } } int eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts, - u_int32_t ref_bw) + u_int64_t ref_bw) { int errors = 0; switch (pa->scheduler) { case ALTQT_CBQ: pa->pq_u.cbq_opts = opts->data.cbq_opts; break; case ALTQT_PRIQ: pa->pq_u.priq_opts = opts->data.priq_opts; break; case ALTQT_HFSC: pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags; if (opts->data.hfsc_opts.linkshare.used) { pa->pq_u.hfsc_opts.lssc_m1 = eval_bwspec(&opts->data.hfsc_opts.linkshare.m1, ref_bw); pa->pq_u.hfsc_opts.lssc_m2 = eval_bwspec(&opts->data.hfsc_opts.linkshare.m2, ref_bw); pa->pq_u.hfsc_opts.lssc_d = opts->data.hfsc_opts.linkshare.d; } if (opts->data.hfsc_opts.realtime.used) { pa->pq_u.hfsc_opts.rtsc_m1 = eval_bwspec(&opts->data.hfsc_opts.realtime.m1, ref_bw); pa->pq_u.hfsc_opts.rtsc_m2 = eval_bwspec(&opts->data.hfsc_opts.realtime.m2, ref_bw); pa->pq_u.hfsc_opts.rtsc_d = opts->data.hfsc_opts.realtime.d; } if (opts->data.hfsc_opts.upperlimit.used) { pa->pq_u.hfsc_opts.ulsc_m1 = eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1, ref_bw); pa->pq_u.hfsc_opts.ulsc_m2 = eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2, ref_bw); pa->pq_u.hfsc_opts.ulsc_d = opts->data.hfsc_opts.upperlimit.d; } break; case ALTQT_FAIRQ: pa->pq_u.fairq_opts.flags = opts->data.fairq_opts.flags; pa->pq_u.fairq_opts.nbuckets = opts->data.fairq_opts.nbuckets; pa->pq_u.fairq_opts.hogs_m1 = eval_bwspec(&opts->data.fairq_opts.hogs_bw, ref_bw); if (opts->data.fairq_opts.linkshare.used) { pa->pq_u.fairq_opts.lssc_m1 = eval_bwspec(&opts->data.fairq_opts.linkshare.m1, ref_bw); pa->pq_u.fairq_opts.lssc_m2 = eval_bwspec(&opts->data.fairq_opts.linkshare.m2, ref_bw); pa->pq_u.fairq_opts.lssc_d = opts->data.fairq_opts.linkshare.d; } break; case ALTQT_CODEL: pa->pq_u.codel_opts.target = opts->data.codel_opts.target; pa->pq_u.codel_opts.interval = opts->data.codel_opts.interval; pa->pq_u.codel_opts.ecn = opts->data.codel_opts.ecn; break; default: warnx("eval_queue_opts: unknown scheduler type %u", opts->qtype); errors++; break; } return (errors); } -u_int32_t -eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw) +/* + * If absolute bandwidth if set, return the lesser of that value and the + * reference bandwidth. Limiting to the reference bandwidth allows simple + * limiting of configured bandwidth parameters for schedulers that are + * 32-bit limited, as the root/interface bandwidth (top-level reference + * bandwidth) will be properly limited in that case. + * + * Otherwise, if the absolute bandwidth is not set, return given percentage + * of reference bandwidth. + */ +u_int64_t +eval_bwspec(struct node_queue_bw *bw, u_int64_t ref_bw) { if (bw->bw_absolute > 0) - return (bw->bw_absolute); + return (MIN(bw->bw_absolute, ref_bw)); if (bw->bw_percent > 0) return (ref_bw / 100 * bw->bw_percent); return (0); } void print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2, const struct node_hfsc_sc *sc) { printf(" %s", scname); if (d != 0) { printf("("); if (sc != NULL && sc->m1.bw_percent > 0) printf("%u%%", sc->m1.bw_percent); else printf("%s", rate2str((double)m1)); printf(" %u", d); } if (sc != NULL && sc->m2.bw_percent > 0) printf(" %u%%", sc->m2.bw_percent); else printf(" %s", rate2str((double)m2)); if (d != 0) printf(")"); } void print_fairq_sc(const char *scname, u_int m1, u_int d, u_int m2, const struct node_fairq_sc *sc) { printf(" %s", scname); if (d != 0) { printf("("); if (sc != NULL && sc->m1.bw_percent > 0) printf("%u%%", sc->m1.bw_percent); else printf("%s", rate2str((double)m1)); printf(" %u", d); } if (sc != NULL && sc->m2.bw_percent > 0) printf(" %u%%", sc->m2.bw_percent); else printf(" %s", rate2str((double)m2)); if (d != 0) printf(")"); } Index: head/sbin/pfctl/pfctl_parser.h =================================================================== --- head/sbin/pfctl/pfctl_parser.h (revision 338208) +++ head/sbin/pfctl/pfctl_parser.h (revision 338209) @@ -1,326 +1,326 @@ /* $OpenBSD: pfctl_parser.h,v 1.86 2006/10/31 23:46:25 mcbride Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PFCTL_PARSER_H_ #define _PFCTL_PARSER_H_ #define PF_OSFP_FILE "/etc/pf.os" #define PF_OPT_DISABLE 0x0001 #define PF_OPT_ENABLE 0x0002 #define PF_OPT_VERBOSE 0x0004 #define PF_OPT_NOACTION 0x0008 #define PF_OPT_QUIET 0x0010 #define PF_OPT_CLRRULECTRS 0x0020 #define PF_OPT_USEDNS 0x0040 #define PF_OPT_VERBOSE2 0x0080 #define PF_OPT_DUMMYACTION 0x0100 #define PF_OPT_DEBUG 0x0200 #define PF_OPT_SHOWALL 0x0400 #define PF_OPT_OPTIMIZE 0x0800 #define PF_OPT_NUMERIC 0x1000 #define PF_OPT_MERGE 0x2000 #define PF_OPT_RECURSE 0x4000 #define PF_TH_ALL 0xFF #define PF_NAT_PROXY_PORT_LOW 50001 #define PF_NAT_PROXY_PORT_HIGH 65535 #define PF_OPTIMIZE_BASIC 0x0001 #define PF_OPTIMIZE_PROFILE 0x0002 #define FCNT_NAMES { \ "searches", \ "inserts", \ "removals", \ NULL \ } struct pfr_buffer; /* forward definition */ struct pfctl { int dev; int opts; int optimize; int loadopt; int asd; /* anchor stack depth */ int bn; /* brace number */ int brace; int tdirty; /* kernel dirty */ #define PFCTL_ANCHOR_STACK_DEPTH 64 struct pf_anchor *astack[PFCTL_ANCHOR_STACK_DEPTH]; struct pfioc_pooladdr paddr; struct pfioc_altq *paltq; struct pfioc_queue *pqueue; struct pfr_buffer *trans; struct pf_anchor *anchor, *alast; const char *ruleset; /* 'set foo' options */ u_int32_t timeout[PFTM_MAX]; u_int32_t limit[PF_LIMIT_MAX]; u_int32_t debug; u_int32_t hostid; char *ifname; u_int8_t timeout_set[PFTM_MAX]; u_int8_t limit_set[PF_LIMIT_MAX]; u_int8_t debug_set; u_int8_t hostid_set; u_int8_t ifname_set; }; struct node_if { char ifname[IFNAMSIZ]; u_int8_t not; u_int8_t dynamic; /* antispoof */ u_int ifa_flags; struct node_if *next; struct node_if *tail; }; struct node_host { struct pf_addr_wrap addr; struct pf_addr bcast; struct pf_addr peer; sa_family_t af; u_int8_t not; u_int32_t ifindex; /* link-local IPv6 addrs */ char *ifname; u_int ifa_flags; struct node_host *next; struct node_host *tail; }; struct node_os { char *os; pf_osfp_t fingerprint; struct node_os *next; struct node_os *tail; }; struct node_queue_bw { - u_int32_t bw_absolute; + u_int64_t bw_absolute; u_int16_t bw_percent; }; struct node_hfsc_sc { struct node_queue_bw m1; /* slope of 1st segment; bps */ u_int d; /* x-projection of m1; msec */ struct node_queue_bw m2; /* slope of 2nd segment; bps */ u_int8_t used; }; struct node_hfsc_opts { struct node_hfsc_sc realtime; struct node_hfsc_sc linkshare; struct node_hfsc_sc upperlimit; int flags; }; struct node_fairq_sc { struct node_queue_bw m1; /* slope of 1st segment; bps */ u_int d; /* x-projection of m1; msec */ struct node_queue_bw m2; /* slope of 2nd segment; bps */ u_int8_t used; }; struct node_fairq_opts { struct node_fairq_sc linkshare; struct node_queue_bw hogs_bw; u_int nbuckets; int flags; }; struct node_queue_opt { int qtype; union { struct cbq_opts cbq_opts; struct codel_opts codel_opts; struct priq_opts priq_opts; struct node_hfsc_opts hfsc_opts; struct node_fairq_opts fairq_opts; } data; }; #ifdef __FreeBSD__ /* * XXX * Absolutely this is not correct location to define this. * Should we use an another sperate header file? */ #define SIMPLEQ_HEAD STAILQ_HEAD #define SIMPLEQ_HEAD_INITIALIZER STAILQ_HEAD_INITIALIZER #define SIMPLEQ_ENTRY STAILQ_ENTRY #define SIMPLEQ_FIRST STAILQ_FIRST #define SIMPLEQ_END(head) NULL #define SIMPLEQ_EMPTY STAILQ_EMPTY #define SIMPLEQ_NEXT STAILQ_NEXT /*#define SIMPLEQ_FOREACH STAILQ_FOREACH*/ #define SIMPLEQ_FOREACH(var, head, field) \ for((var) = SIMPLEQ_FIRST(head); \ (var) != SIMPLEQ_END(head); \ (var) = SIMPLEQ_NEXT(var, field)) #define SIMPLEQ_INIT STAILQ_INIT #define SIMPLEQ_INSERT_HEAD STAILQ_INSERT_HEAD #define SIMPLEQ_INSERT_TAIL STAILQ_INSERT_TAIL #define SIMPLEQ_INSERT_AFTER STAILQ_INSERT_AFTER #define SIMPLEQ_REMOVE_HEAD STAILQ_REMOVE_HEAD #endif SIMPLEQ_HEAD(node_tinithead, node_tinit); struct node_tinit { /* table initializer */ SIMPLEQ_ENTRY(node_tinit) entries; struct node_host *host; char *file; }; /* optimizer created tables */ struct pf_opt_tbl { char pt_name[PF_TABLE_NAME_SIZE]; int pt_rulecount; int pt_generated; struct node_tinithead pt_nodes; struct pfr_buffer *pt_buf; }; #define PF_OPT_TABLE_PREFIX "__automatic_" /* optimizer pf_rule container */ struct pf_opt_rule { struct pf_rule por_rule; struct pf_opt_tbl *por_src_tbl; struct pf_opt_tbl *por_dst_tbl; u_int64_t por_profile_count; TAILQ_ENTRY(pf_opt_rule) por_entry; TAILQ_ENTRY(pf_opt_rule) por_skip_entry[PF_SKIP_COUNT]; }; TAILQ_HEAD(pf_opt_queue, pf_opt_rule); int pfctl_rules(int, char *, int, int, char *, struct pfr_buffer *); int pfctl_optimize_ruleset(struct pfctl *, struct pf_ruleset *); int pfctl_add_rule(struct pfctl *, struct pf_rule *, const char *); int pfctl_add_altq(struct pfctl *, struct pf_altq *); int pfctl_add_pool(struct pfctl *, struct pf_pool *, sa_family_t); void pfctl_move_pool(struct pf_pool *, struct pf_pool *); void pfctl_clear_pool(struct pf_pool *); int pfctl_set_timeout(struct pfctl *, const char *, int, int); int pfctl_set_optimization(struct pfctl *, const char *); int pfctl_set_limit(struct pfctl *, const char *, unsigned int); int pfctl_set_logif(struct pfctl *, char *); int pfctl_set_hostid(struct pfctl *, u_int32_t); int pfctl_set_debug(struct pfctl *, char *); int pfctl_set_interface_flags(struct pfctl *, char *, int, int); int parse_config(char *, struct pfctl *); int parse_flags(char *); int pfctl_load_anchors(int, struct pfctl *, struct pfr_buffer *); void print_pool(struct pf_pool *, u_int16_t, u_int16_t, sa_family_t, int); void print_src_node(struct pf_src_node *, int); void print_rule(struct pf_rule *, const char *, int, int); void print_tabledef(const char *, int, int, struct node_tinithead *); void print_status(struct pf_status *, int); void print_running(struct pf_status *); int eval_pfaltq(struct pfctl *, struct pf_altq *, struct node_queue_bw *, struct node_queue_opt *); int eval_pfqueue(struct pfctl *, struct pf_altq *, struct node_queue_bw *, struct node_queue_opt *); void print_altq(const struct pf_altq *, unsigned, struct node_queue_bw *, struct node_queue_opt *); void print_queue(const struct pf_altq *, unsigned, struct node_queue_bw *, int, struct node_queue_opt *); int pfctl_define_table(char *, int, int, const char *, struct pfr_buffer *, u_int32_t); void pfctl_clear_fingerprints(int, int); int pfctl_file_fingerprints(int, int, const char *); pf_osfp_t pfctl_get_fingerprint(const char *); int pfctl_load_fingerprints(int, int); char *pfctl_lookup_fingerprint(pf_osfp_t, char *, size_t); void pfctl_show_fingerprints(int); struct icmptypeent { const char *name; u_int8_t type; }; struct icmpcodeent { const char *name; u_int8_t type; u_int8_t code; }; const struct icmptypeent *geticmptypebynumber(u_int8_t, u_int8_t); const struct icmptypeent *geticmptypebyname(char *, u_int8_t); const struct icmpcodeent *geticmpcodebynumber(u_int8_t, u_int8_t, u_int8_t); const struct icmpcodeent *geticmpcodebyname(u_long, char *, u_int8_t); struct pf_timeout { const char *name; int timeout; }; #define PFCTL_FLAG_FILTER 0x02 #define PFCTL_FLAG_NAT 0x04 #define PFCTL_FLAG_OPTION 0x08 #define PFCTL_FLAG_ALTQ 0x10 #define PFCTL_FLAG_TABLE 0x20 extern const struct pf_timeout pf_timeouts[]; void set_ipmask(struct node_host *, u_int8_t); int check_netmask(struct node_host *, sa_family_t); int unmask(struct pf_addr *, sa_family_t); void ifa_load(void); int get_socket_domain(void); struct node_host *ifa_exists(const char *); struct node_host *ifa_grouplookup(const char *ifa_name, int flags); struct node_host *ifa_lookup(const char *, int); struct node_host *host(const char *); int append_addr(struct pfr_buffer *, char *, int); int append_addr_host(struct pfr_buffer *, struct node_host *, int, int); #endif /* _PFCTL_PARSER_H_ */ Index: head/sbin/pfctl/pfctl_qstats.c =================================================================== --- head/sbin/pfctl/pfctl_qstats.c (revision 338208) +++ head/sbin/pfctl/pfctl_qstats.c (revision 338209) @@ -1,511 +1,515 @@ /* $OpenBSD: pfctl_qstats.c,v 1.30 2004/04/27 21:47:32 kjc Exp $ */ /* * Copyright (c) Henning Brauer * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); +#define PFIOC_USE_LATEST + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pfctl.h" #include "pfctl_parser.h" union class_stats { class_stats_t cbq_stats; struct priq_classstats priq_stats; struct hfsc_classstats hfsc_stats; struct fairq_classstats fairq_stats; struct codel_ifstats codel_stats; }; #define AVGN_MAX 8 #define STAT_INTERVAL 5 struct queue_stats { union class_stats data; int avgn; double avg_bytes; double avg_packets; u_int64_t prev_bytes; u_int64_t prev_packets; }; struct pf_altq_node { struct pf_altq altq; struct pf_altq_node *next; struct pf_altq_node *children; struct queue_stats qstats; }; int pfctl_update_qstats(int, struct pf_altq_node **); void pfctl_insert_altq_node(struct pf_altq_node **, const struct pf_altq, const struct queue_stats); struct pf_altq_node *pfctl_find_altq_node(struct pf_altq_node *, const char *, const char *); void pfctl_print_altq_node(int, const struct pf_altq_node *, unsigned, int); void print_cbqstats(struct queue_stats); void print_codelstats(struct queue_stats); void print_priqstats(struct queue_stats); void print_hfscstats(struct queue_stats); void print_fairqstats(struct queue_stats); void pfctl_free_altq_node(struct pf_altq_node *); void pfctl_print_altq_nodestat(int, const struct pf_altq_node *); void update_avg(struct pf_altq_node *); int pfctl_show_altq(int dev, const char *iface, int opts, int verbose2) { struct pf_altq_node *root = NULL, *node; int nodes, dotitle = (opts & PF_OPT_SHOWALL); #ifdef __FreeBSD__ if (!altqsupport) return (-1); #endif if ((nodes = pfctl_update_qstats(dev, &root)) < 0) return (-1); if (nodes == 0) printf("No queue in use\n"); for (node = root; node != NULL; node = node->next) { if (iface != NULL && strcmp(node->altq.ifname, iface)) continue; if (dotitle) { pfctl_print_title("ALTQ:"); dotitle = 0; } pfctl_print_altq_node(dev, node, 0, opts); } while (verbose2 && nodes > 0) { printf("\n"); fflush(stdout); sleep(STAT_INTERVAL); if ((nodes = pfctl_update_qstats(dev, &root)) == -1) return (-1); for (node = root; node != NULL; node = node->next) { if (iface != NULL && strcmp(node->altq.ifname, iface)) continue; #ifdef __FreeBSD__ if (node->altq.local_flags & PFALTQ_FLAG_IF_REMOVED) continue; #endif pfctl_print_altq_node(dev, node, 0, opts); } } pfctl_free_altq_node(root); return (0); } int pfctl_update_qstats(int dev, struct pf_altq_node **root) { struct pf_altq_node *node; struct pfioc_altq pa; struct pfioc_qstats pq; u_int32_t mnr, nr; struct queue_stats qstats; static u_int32_t last_ticket; memset(&pa, 0, sizeof(pa)); memset(&pq, 0, sizeof(pq)); memset(&qstats, 0, sizeof(qstats)); + pa.version = PFIOC_ALTQ_VERSION; if (ioctl(dev, DIOCGETALTQS, &pa)) { warn("DIOCGETALTQS"); return (-1); } /* if a new set is found, start over */ if (pa.ticket != last_ticket && *root != NULL) { pfctl_free_altq_node(*root); *root = NULL; } last_ticket = pa.ticket; mnr = pa.nr; for (nr = 0; nr < mnr; ++nr) { pa.nr = nr; if (ioctl(dev, DIOCGETALTQ, &pa)) { warn("DIOCGETALTQ"); return (-1); } #ifdef __FreeBSD__ if ((pa.altq.qid > 0 || pa.altq.scheduler == ALTQT_CODEL) && !(pa.altq.local_flags & PFALTQ_FLAG_IF_REMOVED)) { #else if (pa.altq.qid > 0) { #endif pq.nr = nr; pq.ticket = pa.ticket; pq.buf = &qstats.data; pq.nbytes = sizeof(qstats.data); + pq.version = altq_stats_version(pa.altq.scheduler); if (ioctl(dev, DIOCGETQSTATS, &pq)) { warn("DIOCGETQSTATS"); return (-1); } if ((node = pfctl_find_altq_node(*root, pa.altq.qname, pa.altq.ifname)) != NULL) { memcpy(&node->qstats.data, &qstats.data, sizeof(qstats.data)); update_avg(node); } else { pfctl_insert_altq_node(root, pa.altq, qstats); } } #ifdef __FreeBSD__ else if (pa.altq.local_flags & PFALTQ_FLAG_IF_REMOVED) { memset(&qstats.data, 0, sizeof(qstats.data)); if ((node = pfctl_find_altq_node(*root, pa.altq.qname, pa.altq.ifname)) != NULL) { memcpy(&node->qstats.data, &qstats.data, sizeof(qstats.data)); update_avg(node); } else { pfctl_insert_altq_node(root, pa.altq, qstats); } } #endif } return (mnr); } void pfctl_insert_altq_node(struct pf_altq_node **root, const struct pf_altq altq, const struct queue_stats qstats) { struct pf_altq_node *node; node = calloc(1, sizeof(struct pf_altq_node)); if (node == NULL) err(1, "pfctl_insert_altq_node: calloc"); memcpy(&node->altq, &altq, sizeof(struct pf_altq)); memcpy(&node->qstats, &qstats, sizeof(qstats)); node->next = node->children = NULL; if (*root == NULL) *root = node; else if (!altq.parent[0]) { struct pf_altq_node *prev = *root; while (prev->next != NULL) prev = prev->next; prev->next = node; } else { struct pf_altq_node *parent; parent = pfctl_find_altq_node(*root, altq.parent, altq.ifname); if (parent == NULL) errx(1, "parent %s not found", altq.parent); if (parent->children == NULL) parent->children = node; else { struct pf_altq_node *prev = parent->children; while (prev->next != NULL) prev = prev->next; prev->next = node; } } update_avg(node); } struct pf_altq_node * pfctl_find_altq_node(struct pf_altq_node *root, const char *qname, const char *ifname) { struct pf_altq_node *node, *child; for (node = root; node != NULL; node = node->next) { if (!strcmp(node->altq.qname, qname) && !(strcmp(node->altq.ifname, ifname))) return (node); if (node->children != NULL) { child = pfctl_find_altq_node(node->children, qname, ifname); if (child != NULL) return (child); } } return (NULL); } void pfctl_print_altq_node(int dev, const struct pf_altq_node *node, unsigned int level, int opts) { const struct pf_altq_node *child; if (node == NULL) return; print_altq(&node->altq, level, NULL, NULL); if (node->children != NULL) { printf("{"); for (child = node->children; child != NULL; child = child->next) { printf("%s", child->altq.qname); if (child->next != NULL) printf(", "); } printf("}"); } printf("\n"); if (opts & PF_OPT_VERBOSE) pfctl_print_altq_nodestat(dev, node); if (opts & PF_OPT_DEBUG) printf(" [ qid=%u ifname=%s ifbandwidth=%s ]\n", node->altq.qid, node->altq.ifname, rate2str((double)(node->altq.ifbandwidth))); for (child = node->children; child != NULL; child = child->next) pfctl_print_altq_node(dev, child, level + 1, opts); } void pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a) { if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL) return; #ifdef __FreeBSD__ if (a->altq.local_flags & PFALTQ_FLAG_IF_REMOVED) return; #endif switch (a->altq.scheduler) { case ALTQT_CBQ: print_cbqstats(a->qstats); break; case ALTQT_PRIQ: print_priqstats(a->qstats); break; case ALTQT_HFSC: print_hfscstats(a->qstats); break; case ALTQT_FAIRQ: print_fairqstats(a->qstats); break; case ALTQT_CODEL: print_codelstats(a->qstats); break; } } void print_cbqstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " "dropped pkts: %6llu bytes: %6llu ]\n", (unsigned long long)cur.data.cbq_stats.xmit_cnt.packets, (unsigned long long)cur.data.cbq_stats.xmit_cnt.bytes, (unsigned long long)cur.data.cbq_stats.drop_cnt.packets, (unsigned long long)cur.data.cbq_stats.drop_cnt.bytes); printf(" [ qlength: %3d/%3d borrows: %6u suspends: %6u ]\n", cur.data.cbq_stats.qcnt, cur.data.cbq_stats.qmax, cur.data.cbq_stats.borrows, cur.data.cbq_stats.delays); if (cur.avgn < 2) return; printf(" [ measured: %7.1f packets/s, %s/s ]\n", cur.avg_packets / STAT_INTERVAL, rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); } void print_codelstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " "dropped pkts: %6llu bytes: %6llu ]\n", (unsigned long long)cur.data.codel_stats.cl_xmitcnt.packets, (unsigned long long)cur.data.codel_stats.cl_xmitcnt.bytes, (unsigned long long)cur.data.codel_stats.cl_dropcnt.packets + cur.data.codel_stats.stats.drop_cnt.packets, (unsigned long long)cur.data.codel_stats.cl_dropcnt.bytes + cur.data.codel_stats.stats.drop_cnt.bytes); printf(" [ qlength: %3d/%3d ]\n", cur.data.codel_stats.qlength, cur.data.codel_stats.qlimit); if (cur.avgn < 2) return; printf(" [ measured: %7.1f packets/s, %s/s ]\n", cur.avg_packets / STAT_INTERVAL, rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); } void print_priqstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " "dropped pkts: %6llu bytes: %6llu ]\n", (unsigned long long)cur.data.priq_stats.xmitcnt.packets, (unsigned long long)cur.data.priq_stats.xmitcnt.bytes, (unsigned long long)cur.data.priq_stats.dropcnt.packets, (unsigned long long)cur.data.priq_stats.dropcnt.bytes); printf(" [ qlength: %3d/%3d ]\n", cur.data.priq_stats.qlength, cur.data.priq_stats.qlimit); if (cur.avgn < 2) return; printf(" [ measured: %7.1f packets/s, %s/s ]\n", cur.avg_packets / STAT_INTERVAL, rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); } void print_hfscstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " "dropped pkts: %6llu bytes: %6llu ]\n", (unsigned long long)cur.data.hfsc_stats.xmit_cnt.packets, (unsigned long long)cur.data.hfsc_stats.xmit_cnt.bytes, (unsigned long long)cur.data.hfsc_stats.drop_cnt.packets, (unsigned long long)cur.data.hfsc_stats.drop_cnt.bytes); printf(" [ qlength: %3d/%3d ]\n", cur.data.hfsc_stats.qlength, cur.data.hfsc_stats.qlimit); if (cur.avgn < 2) return; printf(" [ measured: %7.1f packets/s, %s/s ]\n", cur.avg_packets / STAT_INTERVAL, rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); } void print_fairqstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " "dropped pkts: %6llu bytes: %6llu ]\n", (unsigned long long)cur.data.fairq_stats.xmit_cnt.packets, (unsigned long long)cur.data.fairq_stats.xmit_cnt.bytes, (unsigned long long)cur.data.fairq_stats.drop_cnt.packets, (unsigned long long)cur.data.fairq_stats.drop_cnt.bytes); printf(" [ qlength: %3d/%3d ]\n", cur.data.fairq_stats.qlength, cur.data.fairq_stats.qlimit); if (cur.avgn < 2) return; printf(" [ measured: %7.1f packets/s, %s/s ]\n", cur.avg_packets / STAT_INTERVAL, rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); } void pfctl_free_altq_node(struct pf_altq_node *node) { while (node != NULL) { struct pf_altq_node *prev; if (node->children != NULL) pfctl_free_altq_node(node->children); prev = node; node = node->next; free(prev); } } void update_avg(struct pf_altq_node *a) { struct queue_stats *qs; u_int64_t b, p; int n; if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL) return; qs = &a->qstats; n = qs->avgn; switch (a->altq.scheduler) { case ALTQT_CBQ: b = qs->data.cbq_stats.xmit_cnt.bytes; p = qs->data.cbq_stats.xmit_cnt.packets; break; case ALTQT_PRIQ: b = qs->data.priq_stats.xmitcnt.bytes; p = qs->data.priq_stats.xmitcnt.packets; break; case ALTQT_HFSC: b = qs->data.hfsc_stats.xmit_cnt.bytes; p = qs->data.hfsc_stats.xmit_cnt.packets; break; case ALTQT_FAIRQ: b = qs->data.fairq_stats.xmit_cnt.bytes; p = qs->data.fairq_stats.xmit_cnt.packets; break; case ALTQT_CODEL: b = qs->data.codel_stats.cl_xmitcnt.bytes; p = qs->data.codel_stats.cl_xmitcnt.packets; break; default: b = 0; p = 0; break; } if (n == 0) { qs->prev_bytes = b; qs->prev_packets = p; qs->avgn++; return; } if (b >= qs->prev_bytes) qs->avg_bytes = ((qs->avg_bytes * (n - 1)) + (b - qs->prev_bytes)) / n; if (p >= qs->prev_packets) qs->avg_packets = ((qs->avg_packets * (n - 1)) + (p - qs->prev_packets)) / n; qs->prev_bytes = b; qs->prev_packets = p; if (n < AVGN_MAX) qs->avgn++; } Index: head/sys/net/altq/altq.h =================================================================== --- head/sys/net/altq/altq.h (revision 338208) +++ head/sys/net/altq/altq.h (revision 338209) @@ -1,206 +1,231 @@ /*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_H_ #define _ALTQ_ALTQ_H_ #if 0 /* * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq. * altq3 is mainly for research experiments. pf-based altq is for daily use. */ #define ALTQ3_COMPAT /* for compatibility with altq-3 */ #define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */ #endif #ifdef ALTQ3_COMPAT #include #include #include #include #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif #endif /* ALTQ3_COMPAT */ /* altq discipline type */ #define ALTQT_NONE 0 /* reserved */ #define ALTQT_CBQ 1 /* cbq */ #define ALTQT_WFQ 2 /* wfq */ #define ALTQT_AFMAP 3 /* afmap */ #define ALTQT_FIFOQ 4 /* fifoq */ #define ALTQT_RED 5 /* red */ #define ALTQT_RIO 6 /* rio */ #define ALTQT_LOCALQ 7 /* local use */ #define ALTQT_HFSC 8 /* hfsc */ #define ALTQT_CDNR 9 /* traffic conditioner */ #define ALTQT_BLUE 10 /* blue */ #define ALTQT_PRIQ 11 /* priority queue */ #define ALTQT_JOBS 12 /* JoBS */ #define ALTQT_FAIRQ 13 /* fairq */ #define ALTQT_CODEL 14 /* CoDel */ #define ALTQT_MAX 15 /* should be max discipline type + 1 */ #ifdef ALTQ3_COMPAT struct altqreq { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ u_long arg; /* request-specific argument */ }; #endif /* simple token backet meter profile */ struct tb_profile { - u_int rate; /* rate in bit-per-sec */ - u_int depth; /* depth in bytes */ + u_int64_t rate; /* rate in bit-per-sec */ + u_int32_t depth; /* depth in bytes */ }; #ifdef ALTQ3_COMPAT struct tbrreq { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct tb_profile tb_prof; /* token bucket profile */ }; #ifdef ALTQ3_CLFIER_COMPAT /* * common network flow info structure */ struct flowinfo { u_char fi_len; /* total length */ u_char fi_family; /* address family */ u_int8_t fi_data[46]; /* actually longer; address family specific flow info. */ }; /* * flow info structure for internet protocol family. * (currently this is the only protocol family supported) */ struct flowinfo_in { u_char fi_len; /* sizeof(struct flowinfo_in) */ u_char fi_family; /* AF_INET */ u_int8_t fi_proto; /* IPPROTO_XXX */ u_int8_t fi_tos; /* type-of-service */ struct in_addr fi_dst; /* dest address */ struct in_addr fi_src; /* src address */ u_int16_t fi_dport; /* dest port */ u_int16_t fi_sport; /* src port */ u_int32_t fi_gpi; /* generalized port id for ipsec */ u_int8_t _pad[28]; /* make the size equal to flowinfo_in6 */ }; #ifdef SIN6_LEN struct flowinfo_in6 { u_char fi6_len; /* sizeof(struct flowinfo_in6) */ u_char fi6_family; /* AF_INET6 */ u_int8_t fi6_proto; /* IPPROTO_XXX */ u_int8_t fi6_tclass; /* traffic class */ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */ u_int16_t fi6_dport; /* dest port */ u_int16_t fi6_sport; /* src port */ u_int32_t fi6_gpi; /* generalized port id */ struct in6_addr fi6_dst; /* dest address */ struct in6_addr fi6_src; /* src address */ }; #endif /* INET6 */ /* * flow filters for AF_INET and AF_INET6 */ struct flow_filter { int ff_ruleno; struct flowinfo_in ff_flow; struct { struct in_addr mask_dst; struct in_addr mask_src; u_int8_t mask_tos; u_int8_t _pad[3]; } ff_mask; u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */ }; #ifdef SIN6_LEN struct flow_filter6 { int ff_ruleno; struct flowinfo_in6 ff_flow6; struct { struct in6_addr mask6_dst; struct in6_addr mask6_src; u_int8_t mask6_tclass; u_int8_t _pad[3]; } ff_mask6; }; #endif /* INET6 */ #endif /* ALTQ3_CLFIER_COMPAT */ #endif /* ALTQ3_COMPAT */ /* * generic packet counter */ struct pktcntr { u_int64_t packets; u_int64_t bytes; }; #define PKTCNTR_ADD(cntr, len) \ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0) #ifdef ALTQ3_COMPAT /* * altq related ioctls */ #define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */ #if 0 /* * these ioctls are currently discipline-specific but could be shared * in the future. */ #define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */ #define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */ #define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */ #define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/ #define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */ #define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */ #define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */ #define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */ #define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */ #define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */ #define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */ #define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */ #define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */ #endif /* 0 */ #define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */ #define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */ #endif /* ALTQ3_COMPAT */ #ifdef _KERNEL #include #endif +/* + * Can't put these versions in the scheduler-specific headers and include + * them all here as that will cause build failure due to cross-including + * each other scheduler's private bits into each scheduler's + * implementation. + */ +#define CBQ_STATS_VERSION 0 /* Latest version of class_stats_t */ +#define CODEL_STATS_VERSION 0 /* Latest version of codel_ifstats */ +#define FAIRQ_STATS_VERSION 0 /* Latest version of fairq_classstats */ +#define HFSC_STATS_VERSION 1 /* Latest version of hfsc_classstats */ +#define PRIQ_STATS_VERSION 0 /* Latest version of priq_classstats */ + +/* Return the latest stats version for the given scheduler. */ +static inline int altq_stats_version(int scheduler) +{ + switch (scheduler) { + case ALTQT_CBQ: return (CBQ_STATS_VERSION); + case ALTQT_CODEL: return (CODEL_STATS_VERSION); + case ALTQT_FAIRQ: return (FAIRQ_STATS_VERSION); + case ALTQT_HFSC: return (HFSC_STATS_VERSION); + case ALTQT_PRIQ: return (PRIQ_STATS_VERSION); + default: return (0); + } +} + #endif /* _ALTQ_ALTQ_H_ */ Index: head/sys/net/altq/altq_cbq.c =================================================================== --- head/sys/net/altq/altq_cbq.c (revision 338208) +++ head/sys/net/altq/altq_cbq.c (revision 338209) @@ -1,1169 +1,1169 @@ /*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. * * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #include #endif #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #ifdef ALTQ3_COMPAT /* * Local Data structures. */ static cbq_state_t *cbq_list = NULL; #endif /* * Forward Declarations. */ static int cbq_class_destroy(cbq_state_t *, struct rm_class *); static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t); static int cbq_clear_interface(cbq_state_t *); static int cbq_request(struct ifaltq *, int, void *); static int cbq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *cbq_dequeue(struct ifaltq *, int); static void cbqrestart(struct ifaltq *); static void get_class_stats(class_stats_t *, struct rm_class *); static void cbq_purge(cbq_state_t *); #ifdef ALTQ3_COMPAT static int cbq_add_class(struct cbq_add_class *); static int cbq_delete_class(struct cbq_delete_class *); static int cbq_modify_class(struct cbq_modify_class *); static int cbq_class_create(cbq_state_t *, struct cbq_add_class *, struct rm_class *, struct rm_class *); static int cbq_clear_hierarchy(struct cbq_interface *); static int cbq_set_enable(struct cbq_interface *, int); static int cbq_ifattach(struct cbq_interface *); static int cbq_ifdetach(struct cbq_interface *); static int cbq_getstats(struct cbq_getstats *); static int cbq_add_filter(struct cbq_add_filter *); static int cbq_delete_filter(struct cbq_delete_filter *); #endif /* ALTQ3_COMPAT */ /* * int * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This * function destroys a given traffic class. Before destroying * the class, all traffic for that class is released. */ static int cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl) { int i; /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; #ifdef ALTQ3_COMPAT if (cl == cbqp->ifnp.ctl_) cbqp->ifnp.ctl_ = NULL; #endif return (0); } /* convert class handle to class pointer */ static struct rm_class * clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle) { int i; struct rm_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % CBQ_MAX_CLASSES; if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL && cl->stats_.handle == chandle) return (cl); return (NULL); } static int cbq_clear_interface(cbq_state_t *cbqp) { int again, i; struct rm_class *cl; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1); #endif /* clear out the classes now */ do { again = 0; for (i = 0; i < CBQ_MAX_CLASSES; i++) { if ((cl = cbqp->cbq_class_tbl[i]) != NULL) { if (is_a_parent_class(cl)) again++; else { cbq_class_destroy(cbqp, cl); cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; #ifdef ALTQ3_COMPAT if (cl == cbqp->ifnp.ctl_) cbqp->ifnp.ctl_ = NULL; #endif } } } } while (again); return (0); } static int cbq_request(struct ifaltq *ifq, int req, void *arg) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: cbq_purge(cbqp); break; } return (0); } /* copy the stats info in rm_class to class_states_t */ static void get_class_stats(class_stats_t *statsp, struct rm_class *cl) { statsp->xmit_cnt = cl->stats_.xmit_cnt; statsp->drop_cnt = cl->stats_.drop_cnt; statsp->over = cl->stats_.over; statsp->borrows = cl->stats_.borrows; statsp->overactions = cl->stats_.overactions; statsp->delays = cl->stats_.delays; statsp->depth = cl->depth_; statsp->priority = cl->pri_; statsp->maxidle = cl->maxidle_; statsp->minidle = cl->minidle_; statsp->offtime = cl->offtime_; statsp->qmax = qlimit(cl->q_); statsp->ns_per_byte = cl->ns_per_byte_; statsp->wrr_allot = cl->w_allotment_; statsp->qcnt = qlen(cl->q_); statsp->avgidle = cl->avgidle_; statsp->qtype = qtype(cl->q_); #ifdef ALTQ_RED if (q_is_red(cl->q_)) red_getstats(cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->q_)) codel_getstats(cl->codel_, &statsp->codel); #endif } int cbq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc, cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL); splx(s); return (error); } int cbq_add_altq(struct pf_altq *a) { cbq_state_t *cbqp; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); /* allocate and initialize cbq_state_t */ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cbqp == NULL) return (ENOMEM); CALLOUT_INIT(&cbqp->cbq_callout); cbqp->cbq_qlen = 0; cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* keep the state in pf_altq */ a->altq_disc = cbqp; return (0); } int cbq_remove_altq(struct pf_altq *a) { cbq_state_t *cbqp; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; cbq_clear_interface(cbqp); if (cbqp->ifnp.default_) cbq_class_destroy(cbqp, cbqp->ifnp.default_); if (cbqp->ifnp.root_) cbq_class_destroy(cbqp, cbqp->ifnp.root_); /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } int cbq_add_queue(struct pf_altq *a) { struct rm_class *borrow, *parent; cbq_state_t *cbqp; struct rm_class *cl; struct cbq_opts *opts; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = a->qid % CBQ_MAX_CLASSES; if (cbqp->cbq_class_tbl[i] != NULL) { for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); } opts = &a->pq_u.cbq_opts; /* check parameters */ if (a->priority >= CBQ_MAXPRI) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, a->parent_qid); if (opts->flags & CBQCLF_BORROW) borrow = parent; else borrow = NULL; /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_queue: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } /* * check parameters */ switch (opts->flags & CBQCLF_CLASSMASK) { case CBQCLF_ROOTCLASS: if (parent != NULL) return (EINVAL); if (cbqp->ifnp.root_) return (EINVAL); break; case CBQCLF_DEFCLASS: if (cbqp->ifnp.default_) return (EINVAL); break; case 0: if (a->qid == 0) return (EINVAL); break; default: /* more than two flags bits set */ return (EINVAL); } /* * create a class. if this is a root class, initialize the * interface. */ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte, cbqrestart, a->qlimit, RM_MAXQUEUED, opts->maxidle, opts->minidle, opts->offtime, opts->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(a->priority, &cbqp->ifnp, opts->ns_per_byte, rmc_delay_action, a->qlimit, parent, borrow, opts->maxidle, opts->minidle, opts->offtime, opts->pktsize, opts->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ cl->stats_.handle = a->qid; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; return (0); } int cbq_remove_queue(struct pf_altq *a) { struct rm_class *cl; cbq_state_t *cbqp; int i; if ((cbqp = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* delete the class */ rmc_delete_class(&cbqp->ifnp, cl); /* * free the class handle */ for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == cl) { cbqp->cbq_class_tbl[i] = NULL; if (cl == cbqp->ifnp.root_) cbqp->ifnp.root_ = NULL; if (cl == cbqp->ifnp.default_) cbqp->ifnp.default_ = NULL; break; } return (0); } int -cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats; int error = 0; if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * int * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr) * - Queue data packets. * * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper * layer (e.g. ether_output). cbq_enqueue queues the given packet * to the cbq, then invokes the driver's start routine. * * Assumptions: called in splimp * Returns: 0 if the queueing is successful. * ENOBUFS if a packet dropping occurred as a result of * the queueing. */ static int cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct rm_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(cbqp, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL) { cl = cbqp->ifnp.default_; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->pktattr_ = pktattr; /* save proto hdr used by ECN */ else #endif cl->pktattr_ = NULL; len = m_pktlen(m); if (rmc_queue_packet(cl, m) != 0) { /* drop occurred. some mbuf was freed in rmc_queue_packet. */ PKTCNTR_ADD(&cl->stats_.drop_cnt, len); return (ENOBUFS); } /* successfully queued. */ ++cbqp->cbq_qlen; IFQ_INC_LEN(ifq); return (0); } static struct mbuf * cbq_dequeue(struct ifaltq *ifq, int op) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); m = rmc_dequeue_next(&cbqp->ifnp, op); if (m && op == ALTDQ_REMOVE) { --cbqp->cbq_qlen; /* decrement # of packets in cbq */ IFQ_DEC_LEN(ifq); /* Update the class. */ rmc_update_class_util(&cbqp->ifnp); } return (m); } /* * void * cbqrestart(queue_t *) - Restart sending of data. * called from rmc_restart in splimp via timeout after waking up * a suspended class. * Returns: NONE */ static void cbqrestart(struct ifaltq *ifq) { cbq_state_t *cbqp; struct ifnet *ifp; IFQ_LOCK_ASSERT(ifq); if (!ALTQ_IS_ENABLED(ifq)) /* cbq must have been detached */ return; if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL) /* should not happen */ return; ifp = ifq->altq_ifp; if (ifp->if_start && cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_UNLOCK(ifq); (*ifp->if_start)(ifp); IFQ_LOCK(ifq); } } static void cbq_purge(cbq_state_t *cbqp) { struct rm_class *cl; int i; for (i = 0; i < CBQ_MAX_CLASSES; i++) if ((cl = cbqp->cbq_class_tbl[i]) != NULL) rmc_dropall(cl); if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_)) cbqp->ifnp.ifq_->ifq_len = 0; } #ifdef ALTQ3_COMPAT static int cbq_add_class(acp) struct cbq_add_class *acp; { char *ifacename; struct rm_class *borrow, *parent; cbq_state_t *cbqp; ifacename = acp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* check parameters */ if (acp->cbq_class.priority >= CBQ_MAXPRI || acp->cbq_class.maxq > CBQ_MAXQSIZE) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle); borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle); /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) { printf("cbq_add_class: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { printf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } return cbq_class_create(cbqp, acp, parent, borrow); } static int cbq_delete_class(dcp) struct cbq_delete_class *dcp; { char *ifacename; struct rm_class *cl; cbq_state_t *cbqp; ifacename = dcp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL) return (EINVAL); /* if we are a parent class, then return an error. */ if (is_a_parent_class(cl)) return (EINVAL); /* if a filter has a reference to this class delete the filter */ acc_discard_filters(&cbqp->cbq_classifier, cl, 0); return cbq_class_destroy(cbqp, cl); } static int cbq_modify_class(acp) struct cbq_modify_class *acp; { char *ifacename; struct rm_class *cl; cbq_state_t *cbqp; ifacename = acp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* Get pointer to this class */ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL) return (EINVAL); if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte, acp->cbq_class.maxq, acp->cbq_class.maxidle, acp->cbq_class.minidle, acp->cbq_class.offtime, acp->cbq_class.pktsize) < 0) return (EINVAL); return (0); } /* * struct rm_class * * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp, * struct rm_class *parent, struct rm_class *borrow) * * This function create a new traffic class in the CBQ class hierarchy of * given parameters. The class that created is either the root, default, * or a new dynamic class. If CBQ is not initilaized, the root class * will be created. */ static int cbq_class_create(cbqp, acp, parent, borrow) cbq_state_t *cbqp; struct cbq_add_class *acp; struct rm_class *parent, *borrow; { struct rm_class *cl; cbq_class_spec_t *spec = &acp->cbq_class; u_int32_t chandle; int i; /* * allocate class handle */ for (i = 1; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); chandle = i; /* use the slot number as class handle */ /* * create a class. if this is a root class, initialize the * interface. */ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte, cbqrestart, spec->maxq, RM_MAXQUEUED, spec->maxidle, spec->minidle, spec->offtime, spec->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(spec->priority, &cbqp->ifnp, spec->nano_sec_per_byte, rmc_delay_action, spec->maxq, parent, borrow, spec->maxidle, spec->minidle, spec->offtime, spec->pktsize, spec->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ acp->cbq_class_handle = chandle; cl->stats_.handle = chandle; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS) cbqp->ifnp.ctl_ = cl; return (0); } static int cbq_add_filter(afp) struct cbq_add_filter *afp; { char *ifacename; cbq_state_t *cbqp; struct rm_class *cl; ifacename = afp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); /* Get the pointer to class. */ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL) return (EINVAL); return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter, cl, &afp->cbq_filter_handle); } static int cbq_delete_filter(dfp) struct cbq_delete_filter *dfp; { char *ifacename; cbq_state_t *cbqp; ifacename = dfp->cbq_iface.cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); return acc_delete_filter(&cbqp->cbq_classifier, dfp->cbq_filter_handle); } /* * cbq_clear_hierarchy deletes all classes and their filters on the * given interface. */ static int cbq_clear_hierarchy(ifacep) struct cbq_interface *ifacep; { char *ifacename; cbq_state_t *cbqp; ifacename = ifacep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); return cbq_clear_interface(cbqp); } /* * static int * cbq_set_enable(struct cbq_enable *ep) - this function processed the * ioctl request to enable class based queueing. It searches the list * of interfaces for the specified interface and then enables CBQ on * that interface. * * Returns: 0, for no error. * EBADF, for specified inteface not found. */ static int cbq_set_enable(ep, enable) struct cbq_interface *ep; int enable; { int error = 0; cbq_state_t *cbqp; char *ifacename; ifacename = ep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); switch (enable) { case ENABLE: if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL || cbqp->ifnp.ctl_ == NULL) { if (cbqp->ifnp.root_ == NULL) printf("No Root Class for %s\n", ifacename); if (cbqp->ifnp.default_ == NULL) printf("No Default Class for %s\n", ifacename); if (cbqp->ifnp.ctl_ == NULL) printf("No Control Class for %s\n", ifacename); error = EINVAL; } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) { cbqp->cbq_qlen = 0; } break; case DISABLE: error = altq_disable(cbqp->ifnp.ifq_); break; } return (error); } static int cbq_getstats(gsp) struct cbq_getstats *gsp; { char *ifacename; int i, n, nclasses; cbq_state_t *cbqp; struct rm_class *cl; class_stats_t stats, *usp; int error = 0; ifacename = gsp->iface.cbq_ifacename; nclasses = gsp->nclasses; usp = gsp->stats; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); if (nclasses <= 0) return (EINVAL); for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) { while ((cl = cbqp->cbq_class_tbl[i]) == NULL) if (++i >= CBQ_MAX_CLASSES) goto out; get_class_stats(&stats, cl); stats.handle = cl->stats_.handle; if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } out: gsp->nclasses = n; return (error); } static int cbq_ifattach(ifacep) struct cbq_interface *ifacep; { int error = 0; char *ifacename; cbq_state_t *new_cbqp; struct ifnet *ifp; ifacename = ifacep->cbq_ifacename; if ((ifp = ifunit(ifacename)) == NULL) return (ENXIO); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENXIO); /* allocate and initialize cbq_state_t */ new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK); if (new_cbqp == NULL) return (ENOMEM); bzero(new_cbqp, sizeof(cbq_state_t)); CALLOUT_INIT(&new_cbqp->cbq_callout); new_cbqp->cbq_qlen = 0; new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */ /* * set CBQ to this ifnet structure. */ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp, cbq_enqueue, cbq_dequeue, cbq_request, &new_cbqp->cbq_classifier, acc_classify); if (error) { free(new_cbqp, M_DEVBUF); return (error); } /* prepend to the list of cbq_state_t's. */ new_cbqp->cbq_next = cbq_list; cbq_list = new_cbqp; return (0); } static int cbq_ifdetach(ifacep) struct cbq_interface *ifacep; { char *ifacename; cbq_state_t *cbqp; ifacename = ifacep->cbq_ifacename; if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL) return (EBADF); (void)cbq_set_enable(ifacep, DISABLE); cbq_clear_interface(cbqp); /* remove CBQ from the ifnet structure. */ (void)altq_detach(cbqp->ifnp.ifq_); /* remove from the list of cbq_state_t's. */ if (cbq_list == cbqp) cbq_list = cbqp->cbq_next; else { cbq_state_t *cp; for (cp = cbq_list; cp != NULL; cp = cp->cbq_next) if (cp->cbq_next == cbqp) { cp->cbq_next = cbqp->cbq_next; break; } ASSERT(cp != NULL); } /* deallocate cbq_state_t */ free(cbqp, M_DEVBUF); return (0); } /* * cbq device interface */ altqdev_decl(cbq); int cbqopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { return (0); } int cbqclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct ifnet *ifp; struct cbq_interface iface; int err, error = 0; while (cbq_list) { ifp = cbq_list->ifnp.ifq_->altq_ifp; sprintf(iface.cbq_ifacename, "%s", ifp->if_xname); err = cbq_ifdetach(&iface); if (err != 0 && error == 0) error = err; } return (error); } int cbqioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { int error = 0; /* check cmd for superuser only */ switch (cmd) { case CBQ_GETSTATS: /* currently only command that an ordinary user can call */ break; default: #if (__FreeBSD_version > 700000) error = priv_check(p, PRIV_ALTQ_MANAGE); #elsif (__FreeBSD_version > 400000) error = suser(p); #else error = suser(p->p_ucred, &p->p_acflag); #endif if (error) return (error); break; } switch (cmd) { case CBQ_ENABLE: error = cbq_set_enable((struct cbq_interface *)addr, ENABLE); break; case CBQ_DISABLE: error = cbq_set_enable((struct cbq_interface *)addr, DISABLE); break; case CBQ_ADD_FILTER: error = cbq_add_filter((struct cbq_add_filter *)addr); break; case CBQ_DEL_FILTER: error = cbq_delete_filter((struct cbq_delete_filter *)addr); break; case CBQ_ADD_CLASS: error = cbq_add_class((struct cbq_add_class *)addr); break; case CBQ_DEL_CLASS: error = cbq_delete_class((struct cbq_delete_class *)addr); break; case CBQ_MODIFY_CLASS: error = cbq_modify_class((struct cbq_modify_class *)addr); break; case CBQ_CLEAR_HIERARCHY: error = cbq_clear_hierarchy((struct cbq_interface *)addr); break; case CBQ_IF_ATTACH: error = cbq_ifattach((struct cbq_interface *)addr); break; case CBQ_IF_DETACH: error = cbq_ifdetach((struct cbq_interface *)addr); break; case CBQ_GETSTATS: error = cbq_getstats((struct cbq_getstats *)addr); break; default: error = EINVAL; break; } return error; } #if 0 /* for debug */ static void cbq_class_dump(int); static void cbq_class_dump(i) int i; { struct rm_class *cl; rm_class_stats_t *s; struct _class_queue_ *q; if (cbq_list == NULL) { printf("cbq_class_dump: no cbq_state found\n"); return; } cl = cbq_list->cbq_class_tbl[i]; printf("class %d cl=%p\n", i, cl); if (cl != NULL) { s = &cl->stats_; q = cl->q_; printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n", cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_); printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n", cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_, cl->maxidle_); printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n", cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_); printf("handle=%d, depth=%d, packets=%d, bytes=%d\n", s->handle, s->depth, (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes); printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n", s->over, s->borrows, (int)s->drop_cnt.packets, s->overactions, s->delays); printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n", q->tail_, q->head_, q->qlen_, q->qlim_, q->qthresh_, q->qtype_); } } #endif /* 0 */ #ifdef KLD_MODULE static struct altqsw cbq_sw = {"cbq", cbqopen, cbqclose, cbqioctl}; ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw); MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1); MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_CBQ */ Index: head/sys/net/altq/altq_cbq.h =================================================================== --- head/sys/net/altq/altq_cbq.h (revision 338208) +++ head/sys/net/altq/altq_cbq.h (revision 338209) @@ -1,225 +1,231 @@ /*- * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the SMCC Technology * Development Group at Sun Microsystems, Inc. * * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is * provided "as is" without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this software. * * $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_CBQ_H_ #define _ALTQ_ALTQ_CBQ_H_ #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define NULL_CLASS_HANDLE 0 /* class flags should be same as class flags in rm_class.h */ #define CBQCLF_RED 0x0001 /* use RED */ #define CBQCLF_ECN 0x0002 /* use RED/ECN */ #define CBQCLF_RIO 0x0004 /* use RIO */ #define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define CBQCLF_BORROW 0x0020 /* borrow from parent */ #define CBQCLF_CODEL 0x0040 /* use CoDel */ /* class flags only for root class */ #define CBQCLF_WRR 0x0100 /* weighted-round robin */ #define CBQCLF_EFFICIENT 0x0200 /* work-conserving */ /* class flags for special classes */ #define CBQCLF_ROOTCLASS 0x1000 /* root class */ #define CBQCLF_DEFCLASS 0x2000 /* default class */ #ifdef ALTQ3_COMPAT #define CBQCLF_CTLCLASS 0x4000 /* control class */ #endif #define CBQCLF_CLASSMASK 0xf000 /* class mask */ #define CBQ_MAXQSIZE 200 #define CBQ_MAXPRI RM_MAXPRIO typedef struct _cbq_class_stats_ { u_int32_t handle; u_int depth; struct pktcntr xmit_cnt; /* packets sent in this class */ struct pktcntr drop_cnt; /* dropped packets */ u_int over; /* # times went over limit */ u_int borrows; /* # times tried to borrow */ u_int overactions; /* # times invoked overlimit action */ u_int delays; /* # times invoked delay actions */ /* other static class parameters useful for debugging */ int priority; int maxidle; int minidle; int offtime; int qmax; int ns_per_byte; int wrr_allot; int qcnt; /* # packets in queue */ int avgidle; /* codel, red and rio related info */ int qtype; struct redstats red[3]; struct codel_stats codel; } class_stats_t; +/* + * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef ALTQ3_COMPAT /* * Define structures associated with IOCTLS for cbq. */ /* * Define the CBQ interface structure. This must be included in all * IOCTL's such that the CBQ driver may find the appropriate CBQ module * associated with the network interface to be affected. */ struct cbq_interface { char cbq_ifacename[IFNAMSIZ]; }; typedef struct cbq_class_spec { u_int priority; u_int nano_sec_per_byte; u_int maxq; u_int maxidle; int minidle; u_int offtime; u_int32_t parent_class_handle; u_int32_t borrow_class_handle; u_int pktsize; int flags; } cbq_class_spec_t; struct cbq_add_class { struct cbq_interface cbq_iface; cbq_class_spec_t cbq_class; u_int32_t cbq_class_handle; }; struct cbq_delete_class { struct cbq_interface cbq_iface; u_int32_t cbq_class_handle; }; struct cbq_modify_class { struct cbq_interface cbq_iface; cbq_class_spec_t cbq_class; u_int32_t cbq_class_handle; }; struct cbq_add_filter { struct cbq_interface cbq_iface; u_int32_t cbq_class_handle; struct flow_filter cbq_filter; u_long cbq_filter_handle; }; struct cbq_delete_filter { struct cbq_interface cbq_iface; u_long cbq_filter_handle; }; /* number of classes are returned in nclasses field */ struct cbq_getstats { struct cbq_interface iface; int nclasses; class_stats_t *stats; }; /* * Define IOCTLs for CBQ. */ #define CBQ_IF_ATTACH _IOW('Q', 1, struct cbq_interface) #define CBQ_IF_DETACH _IOW('Q', 2, struct cbq_interface) #define CBQ_ENABLE _IOW('Q', 3, struct cbq_interface) #define CBQ_DISABLE _IOW('Q', 4, struct cbq_interface) #define CBQ_CLEAR_HIERARCHY _IOW('Q', 5, struct cbq_interface) #define CBQ_ADD_CLASS _IOWR('Q', 7, struct cbq_add_class) #define CBQ_DEL_CLASS _IOW('Q', 8, struct cbq_delete_class) #define CBQ_MODIFY_CLASS _IOWR('Q', 9, struct cbq_modify_class) #define CBQ_ADD_FILTER _IOWR('Q', 10, struct cbq_add_filter) #define CBQ_DEL_FILTER _IOW('Q', 11, struct cbq_delete_filter) #define CBQ_GETSTATS _IOWR('Q', 12, struct cbq_getstats) #endif /* ALTQ3_COMPAT */ #ifdef _KERNEL /* * Define macros only good for kernel drivers and modules. */ #define CBQ_WATCHDOG (hz / 20) #define CBQ_TIMEOUT 10 #define CBQ_LS_TIMEOUT (20 * hz / 1000) #define CBQ_MAX_CLASSES 256 #ifdef ALTQ3_COMPAT #define CBQ_MAX_FILTERS 256 #define DISABLE 0x00 #define ENABLE 0x01 #endif /* ALTQ3_COMPAT */ /* * Define State structures. */ typedef struct cbqstate { #ifdef ALTQ3_COMPAT struct cbqstate *cbq_next; #endif int cbq_qlen; /* # of packets in cbq */ struct rm_class *cbq_class_tbl[CBQ_MAX_CLASSES]; struct rm_ifdat ifnp; struct callout cbq_callout; /* for timeouts */ #ifdef ALTQ3_CLFIER_COMPAT struct acc_classifier cbq_classifier; #endif } cbq_state_t; #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* !_ALTQ_ALTQ_CBQ_H_ */ Index: head/sys/net/altq/altq_codel.c =================================================================== --- head/sys/net/altq/altq_codel.c (revision 338208) +++ head/sys/net/altq/altq_codel.c (revision 338209) @@ -1,477 +1,477 @@ /* * CoDel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2013 Ermal Luçi * Copyright (C) 2011-2012 Kathleen Nichols * Copyright (C) 2011-2012 Van Jacobson * Copyright (C) 2012 Michael D. Taht * Copyright (C) 2012 Eric Dumazet * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include static int codel_should_drop(struct codel *, class_queue_t *, struct mbuf *, u_int64_t); static void codel_Newton_step(struct codel_vars *); static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); #define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) #define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) #define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) #define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) static int codel_request(struct ifaltq *, int, void *); static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *codel_dequeue(struct ifaltq *, int); int codel_pfattach(struct pf_altq *a) { struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, codel_enqueue, codel_dequeue, codel_request, NULL, NULL)); } int codel_add_altq(struct pf_altq *a) { struct codel_if *cif; struct ifnet *ifp; struct codel_opts *opts; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); opts = &a->pq_u.codel_opts; cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (cif == NULL) return (ENOMEM); cif->cif_bandwidth = a->ifbandwidth; cif->cif_ifq = &ifp->if_snd; cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cif->cl_q == NULL) { free(cif, M_DEVBUF); return (ENOMEM); } if (a->qlimit == 0) a->qlimit = 50; /* use default. */ qlimit(cif->cl_q) = a->qlimit; qtype(cif->cl_q) = Q_CODEL; qlen(cif->cl_q) = 0; qsize(cif->cl_q) = 0; if (opts->target == 0) opts->target = 5; if (opts->interval == 0) opts->interval = 100; cif->codel.params.target = machclk_freq * opts->target / 1000; cif->codel.params.interval = machclk_freq * opts->interval / 1000; cif->codel.params.ecn = opts->ecn; cif->codel.stats.maxpacket = 256; cif->cl_stats.qlength = qlen(cif->cl_q); cif->cl_stats.qlimit = qlimit(cif->cl_q); /* keep the state in pf_altq */ a->altq_disc = cif; return (0); } int codel_remove_altq(struct pf_altq *a) { struct codel_if *cif; if ((cif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; if (cif->cl_q) free(cif->cl_q, M_DEVBUF); free(cif, M_DEVBUF); return (0); } int -codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct codel_if *cif; struct codel_ifstats stats; int error = 0; if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) return (EBADF); if (*nbytes < sizeof(stats)) return (EINVAL); stats = cif->cl_stats; stats.stats = cif->codel.stats; if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } static int codel_request(struct ifaltq *ifq, int req, void *arg) { struct codel_if *cif = (struct codel_if *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: if (!ALTQ_IS_ENABLED(cif->cif_ifq)) break; if (qempty(cif->cl_q)) break; while ((m = _getq(cif->cl_q)) != NULL) { PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); m_freem(m); IFQ_DEC_LEN(cif->cif_ifq); } cif->cif_ifq->ifq_len = 0; break; } return (0); } static int codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct codel_if *cif = (struct codel_if *) ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); return (ENOBUFS); } if (codel_addq(&cif->codel, cif->cl_q, m)) { PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); return (ENOBUFS); } IFQ_INC_LEN(ifq); return (0); } static struct mbuf * codel_dequeue(struct ifaltq *ifq, int op) { struct codel_if *cif = (struct codel_if *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) return (NULL); if (op == ALTDQ_POLL) return (qhead(cif->cl_q)); m = codel_getq(&cif->codel, cif->cl_q); if (m != NULL) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); return (m); } return (NULL); } struct codel * codel_alloc(int target, int interval, int ecn) { struct codel *c; c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); if (c != NULL) { c->params.target = machclk_freq * target / 1000; c->params.interval = machclk_freq * interval / 1000; c->params.ecn = ecn; c->stats.maxpacket = 256; } return (c); } void codel_destroy(struct codel *c) { free(c, M_DEVBUF); } #define MTAG_CODEL 1438031249 int codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) { struct m_tag *mtag; uint64_t *enqueue_time; if (qlen(q) < qlimit(q)) { mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); if (mtag == NULL) mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), M_NOWAIT); if (mtag == NULL) { m_freem(m); return (-1); } enqueue_time = (uint64_t *)(mtag + 1); *enqueue_time = read_machclk(); m_tag_prepend(m, mtag); _addq(q, m); return (0); } c->drop_overlimit++; m_freem(m); return (-1); } static int codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, u_int64_t now) { struct m_tag *mtag; uint64_t *enqueue_time; if (m == NULL) { c->vars.first_above_time = 0; return (0); } mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); if (mtag == NULL) { /* Only one warning per second. */ if (ppsratecheck(&c->last_log, &c->last_pps, 1)) printf("%s: could not found the packet mtag!\n", __func__); c->vars.first_above_time = 0; return (0); } enqueue_time = (uint64_t *)(mtag + 1); c->vars.ldelay = now - *enqueue_time; c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); if (codel_time_before(c->vars.ldelay, c->params.target) || qsize(q) <= c->stats.maxpacket) { /* went below - stay below for at least interval */ c->vars.first_above_time = 0; return (0); } if (c->vars.first_above_time == 0) { /* just went above from below. If we stay above * for at least interval we'll say it's ok to drop */ c->vars.first_above_time = now + c->params.interval; return (0); } if (codel_time_after(now, c->vars.first_above_time)) return (1); return (0); } /* * Run a Newton method step: * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { uint32_t invsqrt, invsqrt2; uint64_t val; /* sizeof_in_bits(rec_inv_sqrt) */ #define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) /* needed shift to get a Q0.32 number from rec_inv_sqrt */ #define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } static u_int64_t codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) { return (t + (u_int32_t)(((u_int64_t)interval * (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); } struct mbuf * codel_getq(struct codel *c, class_queue_t *q) { struct mbuf *m; u_int64_t now; int drop; if ((m = _getq(q)) == NULL) { c->vars.dropping = 0; return (m); } now = read_machclk(); drop = codel_should_drop(c, q, m, now); if (c->vars.dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ c->vars.dropping = 0; } else if (codel_time_after_eq(now, c->vars.drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might * take us out of dropping state. * If not, schedule the next drop. * A large backlog might result in drop rates so high * that the next drop should happen now, * hence the while loop. */ while (c->vars.dropping && codel_time_after_eq(now, c->vars.drop_next)) { c->vars.count++; /* don't care of possible wrap * since there is no more * divide */ codel_Newton_step(&c->vars); /* TODO ECN */ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); m_freem(m); m = _getq(q); if (!codel_should_drop(c, q, m, now)) /* leave dropping state */ c->vars.dropping = 0; else /* and schedule the next drop */ c->vars.drop_next = codel_control_law(c->vars.drop_next, c->params.interval, c->vars.rec_inv_sqrt); } } } else if (drop) { /* TODO ECN */ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); m_freem(m); m = _getq(q); drop = codel_should_drop(c, q, m, now); c->vars.dropping = 1; /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ if (codel_time_before(now - c->vars.drop_next, 16 * c->params.interval)) { c->vars.count = (c->vars.count - c->vars.lastcount) | 1; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(&c->vars); } else { c->vars.count = 1; c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } c->vars.lastcount = c->vars.count; c->vars.drop_next = codel_control_law(now, c->params.interval, c->vars.rec_inv_sqrt); } return (m); } void codel_getstats(struct codel *c, struct codel_stats *s) { *s = c->stats; } #endif /* ALTQ_CODEL */ Index: head/sys/net/altq/altq_codel.h =================================================================== --- head/sys/net/altq/altq_codel.h (revision 338208) +++ head/sys/net/altq/altq_codel.h (revision 338209) @@ -1,129 +1,135 @@ /* * CoDel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2013 Ermal Luçi * Copyright (C) 2011-2012 Kathleen Nichols * Copyright (C) 2011-2012 Van Jacobson * Copyright (C) 2012 Michael D. Taht * Copyright (C) 2012 Eric Dumazet * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_CODEL_H_ #define _ALTQ_ALTQ_CODEL_H_ struct codel_stats { u_int32_t maxpacket; struct pktcntr drop_cnt; u_int marked_packets; }; struct codel_ifstats { u_int qlength; u_int qlimit; struct codel_stats stats; struct pktcntr cl_xmitcnt; /* transmitted packet counter */ struct pktcntr cl_dropcnt; /* dropped packet counter */ }; +/* + * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef _KERNEL #include /** * struct codel_params - contains codel parameters * target: target queue size (in time units) * interval: width of moving time window * ecn: is Explicit Congestion Notification enabled */ struct codel_params { u_int64_t target; u_int64_t interval; int ecn; }; /** * struct codel_vars - contains codel variables * count: how many drops we've done since the last time we * entered dropping state * lastcount: count at entry to dropping state * dropping: set to true if in dropping state * rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 * first_above_time: when we went (or will go) continuously above * target for interval * drop_next: time to drop next packet, or when we dropped last * ldelay: sojourn time of last dequeued packet */ struct codel_vars { u_int32_t count; u_int32_t lastcount; int dropping; u_int16_t rec_inv_sqrt; u_int64_t first_above_time; u_int64_t drop_next; u_int64_t ldelay; }; struct codel { int last_pps; struct codel_params params; struct codel_vars vars; struct codel_stats stats; struct timeval last_log; u_int32_t drop_overlimit; }; /* * codel interface state */ struct codel_if { struct codel_if *cif_next; /* interface state list */ struct ifaltq *cif_ifq; /* backpointer to ifaltq */ u_int cif_bandwidth; /* link bandwidth in bps */ class_queue_t *cl_q; /* class queue structure */ struct codel codel; /* statistics */ struct codel_ifstats cl_stats; }; struct codel *codel_alloc(int, int, int); void codel_destroy(struct codel *); int codel_addq(struct codel *, class_queue_t *, struct mbuf *); struct mbuf *codel_getq(struct codel *, class_queue_t *); void codel_getstats(struct codel *, struct codel_stats *); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_CODEL_H_ */ Index: head/sys/net/altq/altq_fairq.c =================================================================== --- head/sys/net/altq/altq_fairq.c (revision 338208) +++ head/sys/net/altq/altq_fairq.c (revision 338209) @@ -1,909 +1,909 @@ /* * Copyright (c) 2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. Neither the name of The DragonFly Project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific, prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $ * $FreeBSD$ */ /* * Matt: I gutted altq_priq.c and used it as a skeleton on which to build * fairq. The fairq algorithm is completely different then priq, of course, * but because I used priq's skeleton I believe I should include priq's * copyright. * * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * FAIRQ - take traffic classified by keep state (hashed into * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract * the first packet from each bucket in a round-robin fashion. * * TODO - better overall qlimit support (right now it is per-bucket). * - NOTE: red etc is per bucket, not overall. * - better service curve support. * * EXAMPLE: * * altq on em0 fairq bandwidth 650Kb queue { std, bulk } * queue std priority 3 bandwidth 400Kb \ * fairq (buckets 64, default, hogs 1Kb) qlimit 50 * queue bulk priority 2 bandwidth 100Kb \ * fairq (buckets 64, hogs 1Kb) qlimit 50 * * pass out on em0 from any to any keep state queue std * pass out on em0 inet proto tcp ..... port ... keep state queue bulk */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * function prototypes */ static int fairq_clear_interface(struct fairq_if *); static int fairq_request(struct ifaltq *, int, void *); static void fairq_purge(struct fairq_if *); static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int); static int fairq_class_destroy(struct fairq_class *); static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *fairq_dequeue(struct ifaltq *, int); static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t); static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); static void fairq_purgeq(struct fairq_class *); static void get_class_stats(struct fairq_classstats *, struct fairq_class *); static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); int fairq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc, fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); return (error); } int fairq_add_altq(struct pf_altq *a) { struct fairq_if *pif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct fairq_if), M_DEVBUF, M_WAITOK | M_ZERO); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int fairq_remove_altq(struct pf_altq *a) { struct fairq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; fairq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int fairq_add_queue(struct pf_altq *a) { struct fairq_if *pif; struct fairq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= FAIRQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, &a->pq_u.fairq_opts, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int fairq_remove_queue(struct pf_altq *a) { struct fairq_if *pif; struct fairq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (fairq_class_destroy(cl)); } int -fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct fairq_if *pif; struct fairq_class *cl; struct fairq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int fairq_clear_interface(struct fairq_if *pif) { struct fairq_class *cl; int pri; /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL) fairq_class_destroy(cl); } return (0); } static int fairq_request(struct ifaltq *ifq, int req, void *arg) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: fairq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void fairq_purge(struct fairq_if *pif) { struct fairq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) fairq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct fairq_class * fairq_class_create(struct fairq_if *pif, int pri, int qlimit, u_int bandwidth, struct fairq_opts *opts, int qid) { struct fairq_class *cl; int flags = opts->flags; u_int nbuckets = opts->nbuckets; int i; #ifndef ALTQ_RED if (flags & FARF_RED) { #ifdef ALTQ_DEBUG printf("fairq_class_create: RED not configured for FAIRQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & FARF_CODEL) { #ifdef ALTQ_DEBUG printf("fairq_class_create: CODEL not configured for FAIRQ!\n"); #endif return (NULL); } #endif if (nbuckets == 0) nbuckets = 256; if (nbuckets > FAIRQ_MAX_BUCKETS) nbuckets = FAIRQ_MAX_BUCKETS; /* enforce power-of-2 size */ while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) ++nbuckets; if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ IFQ_LOCK(cl->cl_pif->pif_ifq); if (cl->cl_head) fairq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) codel_destroy(cl->cl_codel); #endif } else { cl = malloc(sizeof(struct fairq_class), M_DEVBUF, M_WAITOK | M_ZERO); cl->cl_nbuckets = nbuckets; cl->cl_nbucket_mask = nbuckets - 1; cl->cl_buckets = malloc( sizeof(struct fairq_bucket) * cl->cl_nbuckets, M_DEVBUF, M_WAITOK | M_ZERO); cl->cl_head = NULL; } pif->pif_classes[pri] = cl; if (flags & FARF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ cl->cl_qlimit = qlimit; for (i = 0; i < cl->cl_nbuckets; ++i) { qlimit(&cl->cl_buckets[i].queue) = qlimit; } cl->cl_bandwidth = bandwidth / 8; cl->cl_qtype = Q_DROPTAIL; cl->cl_flags = flags & FARF_USERFLAGS; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; cl->cl_hogs_m1 = opts->hogs_m1 / 8; cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ #ifdef ALTQ_RED if (flags & (FARF_RED|FARF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & FARF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & FARF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & FARF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) cl->cl_qtype = Q_RIO; } else #endif if (flags & FARF_RED) { cl->cl_red = red_alloc(0, 0, cl->cl_qlimit * 10/100, cl->cl_qlimit * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) cl->cl_qtype = Q_RED; } } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & FARF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) cl->cl_qtype = Q_CODEL; } #endif return (cl); } static int fairq_class_destroy(struct fairq_class *cl) { struct fairq_if *pif; int pri; IFQ_LOCK(cl->cl_pif->pif_ifq); if (cl->cl_head) fairq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_poll_cache == cl) pif->pif_poll_cache = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) codel_destroy(cl->cl_codel); #endif } free(cl->cl_buckets, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * fairq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; struct fairq_class *cl = NULL; /* Make compiler happy */ struct pf_mtag *t; u_int32_t qid_hash = 0; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } if ((t = pf_find_mtag(m)) != NULL) { cl = clh_to_clp(pif, t->qid); qid_hash = t->qid_hash; } if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } cl->cl_flags |= FARF_HAS_PACKETS; cl->cl_pktattr = NULL; len = m_pktlen(m); if (fairq_addq(cl, m, qid_hash) != 0) { /* drop occurred. mbuf was freed in fairq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); return (0); } /* * fairq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * fairq_dequeue(struct ifaltq *ifq, int op) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; struct fairq_class *cl; struct fairq_class *best_cl; struct mbuf *best_m; struct mbuf *m = NULL; uint64_t cur_time = read_machclk(); int pri; int hit_limit; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) { return (NULL); } if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { best_cl = pif->pif_poll_cache; m = fairq_getq(best_cl, cur_time); pif->pif_poll_cache = NULL; if (m) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); return (m); } } else { best_cl = NULL; best_m = NULL; for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) == NULL) continue; if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) continue; m = fairq_pollq(cl, cur_time, &hit_limit); if (m == NULL) { cl->cl_flags &= ~FARF_HAS_PACKETS; continue; } /* * Only override the best choice if we are under * the BW limit. */ if (hit_limit == 0 || best_cl == NULL) { best_cl = cl; best_m = m; } /* * Remember the highest priority mbuf in case we * do not find any lower priority mbufs. */ if (hit_limit) continue; break; } if (op == ALTDQ_POLL) { pif->pif_poll_cache = best_cl; m = best_m; } else if (best_cl) { m = fairq_getq(best_cl, cur_time); if (m != NULL) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); } } return (m); } return (NULL); } static int fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid) { fairq_bucket_t *b; u_int hindex; uint64_t bw; /* * If the packet doesn't have any keep state put it on the end of * our queue. XXX this can result in out of order delivery. */ if (bucketid == 0) { if (cl->cl_head) b = cl->cl_head->prev; else b = &cl->cl_buckets[0]; } else { hindex = bucketid & cl->cl_nbucket_mask; b = &cl->cl_buckets[hindex]; } /* * Add the bucket to the end of the circular list of active buckets. * * As a special case we add the bucket to the beginning of the list * instead of the end if it was not previously on the list and if * its traffic is less then the hog level. */ if (b->in_use == 0) { b->in_use = 1; if (cl->cl_head == NULL) { cl->cl_head = b; b->next = b; b->prev = b; } else { b->next = cl->cl_head; b->prev = cl->cl_head->prev; b->prev->next = b; b->next->prev = b; if (b->bw_delta && cl->cl_hogs_m1) { bw = b->bw_bytes * machclk_freq / b->bw_delta; if (bw < cl->cl_hogs_m1) cl->cl_head = b; } } } #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) return codel_addq(cl->cl_codel, &b->queue, m); #endif if (qlen(&b->queue) >= qlimit(&b->queue)) { m_freem(m); return (-1); } if (cl->cl_flags & FARF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(&b->queue, m); return (0); } static struct mbuf * fairq_getq(struct fairq_class *cl, uint64_t cur_time) { fairq_bucket_t *b; struct mbuf *m; b = fairq_selectq(cl, 0); if (b == NULL) m = NULL; #ifdef ALTQ_RIO else if (cl->cl_qtype == Q_RIO) m = rio_getq((rio_t *)cl->cl_red, &b->queue); #endif #ifdef ALTQ_RED else if (cl->cl_qtype == Q_RED) m = red_getq(cl->cl_red, &b->queue); #endif #ifdef ALTQ_CODEL else if (cl->cl_qtype == Q_CODEL) m = codel_getq(cl->cl_codel, &b->queue); #endif else m = _getq(&b->queue); /* * Calculate the BW change */ if (m != NULL) { uint64_t delta; /* * Per-class bandwidth calculation */ delta = (cur_time - cl->cl_last_time); if (delta > machclk_freq * 8) delta = machclk_freq * 8; cl->cl_bw_delta += delta; cl->cl_bw_bytes += m->m_pkthdr.len; cl->cl_last_time = cur_time; cl->cl_bw_delta -= cl->cl_bw_delta >> 3; cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3; /* * Per-bucket bandwidth calculation */ delta = (cur_time - b->last_time); if (delta > machclk_freq * 8) delta = machclk_freq * 8; b->bw_delta += delta; b->bw_bytes += m->m_pkthdr.len; b->last_time = cur_time; b->bw_delta -= b->bw_delta >> 3; b->bw_bytes -= b->bw_bytes >> 3; } return(m); } /* * Figure out what the next packet would be if there were no limits. If * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise * it is set to 0. A non-NULL mbuf is returned either way. */ static struct mbuf * fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) { fairq_bucket_t *b; struct mbuf *m; uint64_t delta; uint64_t bw; *hit_limit = 0; b = fairq_selectq(cl, 1); if (b == NULL) return(NULL); m = qhead(&b->queue); /* * Did this packet exceed the class bandwidth? Calculate the * bandwidth component of the packet. * * - Calculate bytes per second */ delta = cur_time - cl->cl_last_time; if (delta > machclk_freq * 8) delta = machclk_freq * 8; cl->cl_bw_delta += delta; cl->cl_last_time = cur_time; if (cl->cl_bw_delta) { bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; if (bw > cl->cl_bandwidth) *hit_limit = 1; #ifdef ALTQ_DEBUG printf("BW %6ju relative to %6u %d queue %p\n", (uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b); #endif } return(m); } /* * Locate the next queue we want to pull a packet out of. This code * is also responsible for removing empty buckets from the circular list. */ static fairq_bucket_t * fairq_selectq(struct fairq_class *cl, int ispoll) { fairq_bucket_t *b; uint64_t bw; if (ispoll == 0 && cl->cl_polled) { b = cl->cl_polled; cl->cl_polled = NULL; return(b); } while ((b = cl->cl_head) != NULL) { /* * Remove empty queues from consideration */ if (qempty(&b->queue)) { b->in_use = 0; cl->cl_head = b->next; if (cl->cl_head == b) { cl->cl_head = NULL; } else { b->next->prev = b->prev; b->prev->next = b->next; } continue; } /* * Advance the round robin. Queues with bandwidths less * then the hog bandwidth are allowed to burst. */ if (cl->cl_hogs_m1 == 0) { cl->cl_head = b->next; } else if (b->bw_delta) { bw = b->bw_bytes * machclk_freq / b->bw_delta; if (bw >= cl->cl_hogs_m1) { cl->cl_head = b->next; } /* * XXX TODO - */ } /* * Return bucket b. */ break; } if (ispoll) cl->cl_polled = b; return(b); } static void fairq_purgeq(struct fairq_class *cl) { fairq_bucket_t *b; struct mbuf *m; while ((b = fairq_selectq(cl, 0)) != NULL) { while ((m = _getq(&b->queue)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(&b->queue) == 0); } } static void get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) { fairq_bucket_t *b; sp->class_handle = cl->cl_handle; sp->qlimit = cl->cl_qlimit; sp->xmit_cnt = cl->cl_xmitcnt; sp->drop_cnt = cl->cl_dropcnt; sp->qtype = cl->cl_qtype; sp->qlength = 0; if (cl->cl_head) { b = cl->cl_head; do { sp->qlength += qlen(&b->queue); b = b->next; } while (b != cl->cl_head); } #ifdef ALTQ_RED if (cl->cl_qtype == Q_RED) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (cl->cl_qtype == Q_RIO) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (cl->cl_qtype == Q_CODEL) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct fairq_class * clh_to_clp(struct fairq_if *pif, uint32_t chandle) { struct fairq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #endif /* ALTQ_FAIRQ */ Index: head/sys/net/altq/altq_fairq.h =================================================================== --- head/sys/net/altq/altq_fairq.h (revision 338208) +++ head/sys/net/altq/altq_fairq.h (revision 338209) @@ -1,145 +1,151 @@ /* * Copyright (c) 2008 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. Neither the name of The DragonFly Project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific, prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_FAIRQ_H_ #define _ALTQ_ALTQ_FAIRQ_H_ #include #include #include #include #include #include #define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */ #define FAIRQ_MAXPRI RM_MAXPRIO #define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8) #define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1) /* fairq class flags */ #define FARF_RED 0x0001 /* use RED */ #define FARF_ECN 0x0002 /* use RED/ECN */ #define FARF_RIO 0x0004 /* use RIO */ #define FARF_CODEL 0x0008 /* use CoDel */ #define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define FARF_DEFAULTCLASS 0x1000 /* default class */ #define FARF_HAS_PACKETS 0x2000 /* might have queued packets */ #define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \ FARF_DEFAULTCLASS) /* special class handles */ #define FAIRQ_NULLCLASS_HANDLE 0 typedef u_int fairq_bitmap_t; struct fairq_classstats { uint32_t class_handle; u_int qlength; u_int qlimit; struct pktcntr xmit_cnt; /* transmitted packet counter */ struct pktcntr drop_cnt; /* dropped packet counter */ /* codel, red and rio related info */ int qtype; struct redstats red[3]; /* rio has 3 red stats */ struct codel_stats codel; }; +/* + * FAIRQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef _KERNEL typedef struct fairq_bucket { struct fairq_bucket *next; /* circular list */ struct fairq_bucket *prev; /* circular list */ class_queue_t queue; /* the actual queue */ uint64_t bw_bytes; /* statistics used to calculate bw */ uint64_t bw_delta; /* statistics used to calculate bw */ uint64_t last_time; int in_use; } fairq_bucket_t; struct fairq_class { uint32_t cl_handle; /* class handle */ u_int cl_nbuckets; /* (power of 2) */ u_int cl_nbucket_mask; /* bucket mask */ fairq_bucket_t *cl_buckets; fairq_bucket_t *cl_head; /* head of circular bucket list */ fairq_bucket_t *cl_polled; union { struct red *cl_red; /* RED state */ struct codel *cl_codel; /* CoDel state */ } cl_aqm; #define cl_red cl_aqm.cl_red #define cl_codel cl_aqm.cl_codel u_int cl_hogs_m1; u_int cl_lssc_m1; u_int cl_bandwidth; uint64_t cl_bw_bytes; uint64_t cl_bw_delta; uint64_t cl_last_time; int cl_qtype; /* rollup */ int cl_qlimit; int cl_pri; /* priority */ int cl_flags; /* class flags */ struct fairq_if *cl_pif; /* back pointer to pif */ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ /* round robin index */ /* statistics */ struct pktcntr cl_xmitcnt; /* transmitted packet counter */ struct pktcntr cl_dropcnt; /* dropped packet counter */ }; /* * fairq interface state */ struct fairq_if { struct fairq_if *pif_next; /* interface state list */ struct ifaltq *pif_ifq; /* backpointer to ifaltq */ u_int pif_bandwidth; /* link bandwidth in bps */ int pif_maxpri; /* max priority in use */ struct fairq_class *pif_poll_cache;/* cached poll */ struct fairq_class *pif_default; /* default class */ struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */ }; #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_FAIRQ_H_ */ Index: head/sys/net/altq/altq_hfsc.c =================================================================== --- head/sys/net/altq/altq_hfsc.c (revision 338208) +++ head/sys/net/altq/altq_hfsc.c (revision 338209) @@ -1,2238 +1,2329 @@ /*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. * * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ * $FreeBSD$ */ /* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */ #include #include #include #include #include #include #include #if 1 /* ALTQ3_COMPAT */ #include #include #include #endif /* ALTQ3_COMPAT */ #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* * function prototypes */ static int hfsc_clear_interface(struct hfsc_if *); static int hfsc_request(struct ifaltq *, int, void *); static void hfsc_purge(struct hfsc_if *); static struct hfsc_class *hfsc_class_create(struct hfsc_if *, struct service_curve *, struct service_curve *, struct service_curve *, struct hfsc_class *, int, int, int); static int hfsc_class_destroy(struct hfsc_class *); static struct hfsc_class *hfsc_nextclass(struct hfsc_class *); static int hfsc_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *hfsc_dequeue(struct ifaltq *, int); static int hfsc_addq(struct hfsc_class *, struct mbuf *); static struct mbuf *hfsc_getq(struct hfsc_class *); static struct mbuf *hfsc_pollq(struct hfsc_class *); static void hfsc_purgeq(struct hfsc_class *); static void update_cfmin(struct hfsc_class *); static void set_active(struct hfsc_class *, int); static void set_passive(struct hfsc_class *); static void init_ed(struct hfsc_class *, int); static void update_ed(struct hfsc_class *, int); static void update_d(struct hfsc_class *, int); static void init_vf(struct hfsc_class *, int); static void update_vf(struct hfsc_class *, int, u_int64_t); static void ellist_insert(struct hfsc_class *); static void ellist_remove(struct hfsc_class *); static void ellist_update(struct hfsc_class *); struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t); static void actlist_insert(struct hfsc_class *); static void actlist_remove(struct hfsc_class *); static void actlist_update(struct hfsc_class *); static struct hfsc_class *actlist_firstfit(struct hfsc_class *, u_int64_t); static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); -static __inline u_int64_t m2sm(u_int); -static __inline u_int64_t m2ism(u_int); +static __inline u_int64_t m2sm(u_int64_t); +static __inline u_int64_t m2ism(u_int64_t); static __inline u_int64_t d2dx(u_int); -static u_int sm2m(u_int64_t); +static u_int64_t sm2m(u_int64_t); static u_int dx2d(u_int64_t); static void sc2isc(struct service_curve *, struct internal_sc *); static void rtsc_init(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t); static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t); static void rtsc_min(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); -static void get_class_stats(struct hfsc_classstats *, +static void get_class_stats_v0(struct hfsc_classstats_v0 *, struct hfsc_class *); +static void get_class_stats_v1(struct hfsc_classstats_v1 *, + struct hfsc_class *); static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t); #ifdef ALTQ3_COMPAT static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int); static int hfsc_detach(struct hfsc_if *); static int hfsc_class_modify(struct hfsc_class *, struct service_curve *, struct service_curve *, struct service_curve *); static int hfsccmd_if_attach(struct hfsc_attach *); static int hfsccmd_if_detach(struct hfsc_interface *); static int hfsccmd_add_class(struct hfsc_add_class *); static int hfsccmd_delete_class(struct hfsc_delete_class *); static int hfsccmd_modify_class(struct hfsc_modify_class *); static int hfsccmd_add_filter(struct hfsc_add_filter *); static int hfsccmd_delete_filter(struct hfsc_delete_filter *); static int hfsccmd_class_stats(struct hfsc_class_stats *); altqdev_decl(hfsc); #endif /* ALTQ3_COMPAT */ /* * macros */ #define is_a_parent_class(cl) ((cl)->cl_children != NULL) -#define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ +#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ #ifdef ALTQ3_COMPAT /* hif_list keeps all hfsc_if's allocated. */ static struct hfsc_if *hif_list = NULL; #endif /* ALTQ3_COMPAT */ int hfsc_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc, hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL); splx(s); return (error); } int hfsc_add_altq(struct pf_altq *a) { struct hfsc_if *hif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (hif == NULL) return (ENOMEM); TAILQ_INIT(&hif->hif_eligible); hif->hif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = hif; return (0); } int hfsc_remove_altq(struct pf_altq *a) { struct hfsc_if *hif; if ((hif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); free(hif, M_DEVBUF); return (0); } int hfsc_add_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl, *parent; - struct hfsc_opts *opts; + struct hfsc_opts_v1 *opts; struct service_curve rtsc, lssc, ulsc; if ((hif = a->altq_disc) == NULL) return (EINVAL); opts = &a->pq_u.hfsc_opts; if (a->parent_qid == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL) return (EINVAL); if (a->qid == 0) return (EINVAL); if (clh_to_clp(hif, a->qid) != NULL) return (EBUSY); rtsc.m1 = opts->rtsc_m1; rtsc.d = opts->rtsc_d; rtsc.m2 = opts->rtsc_m2; lssc.m1 = opts->lssc_m1; lssc.d = opts->lssc_d; lssc.m2 = opts->lssc_m2; ulsc.m1 = opts->ulsc_m1; ulsc.d = opts->ulsc_d; ulsc.m2 = opts->ulsc_m2; cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, parent, a->qlimit, opts->flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int hfsc_remove_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); return (hfsc_class_destroy(cl)); } int -hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct hfsc_if *hif; struct hfsc_class *cl; - struct hfsc_classstats stats; + union { + struct hfsc_classstats_v0 v0; + struct hfsc_classstats_v1 v1; + } stats; + size_t stats_size; int error = 0; if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); - if (*nbytes < sizeof(stats)) + if (version > HFSC_STATS_VERSION) return (EINVAL); - get_class_stats(&stats, cl); + memset(&stats, 0, sizeof(stats)); + switch (version) { + case 0: + get_class_stats_v0(&stats.v0, cl); + stats_size = sizeof(struct hfsc_classstats_v0); + break; + case 1: + get_class_stats_v1(&stats.v1, cl); + stats_size = sizeof(struct hfsc_classstats_v1); + break; + } - if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + if (*nbytes < stats_size) + return (EINVAL); + + if ((error = copyout((caddr_t)&stats, ubuf, stats_size)) != 0) return (error); - *nbytes = sizeof(stats); + *nbytes = stats_size; return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes except the root class. */ static int hfsc_clear_interface(struct hfsc_if *hif) { struct hfsc_class *cl; #ifdef ALTQ3_COMPAT /* free the filters for this interface */ acc_discard_filters(&hif->hif_classifier, NULL, 1); #endif /* clear out the classes */ while (hif->hif_rootclass != NULL && (cl = hif->hif_rootclass->cl_children) != NULL) { /* * remove the first leaf class found in the hierarchy * then start over */ for (; cl != NULL; cl = hfsc_nextclass(cl)) { if (!is_a_parent_class(cl)) { (void)hfsc_class_destroy(cl); break; } } } return (0); } static int hfsc_request(struct ifaltq *ifq, int req, void *arg) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: hfsc_purge(hif); break; } return (0); } /* discard all the queued packets on the interface */ static void hfsc_purge(struct hfsc_if *hif) { struct hfsc_class *cl; for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (ALTQ_IS_ENABLED(hif->hif_ifq)) hif->hif_ifq->ifq_len = 0; } struct hfsc_class * hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, struct service_curve *fsc, struct service_curve *usc, struct hfsc_class *parent, int qlimit, int flags, int qid) { struct hfsc_class *cl, *p; int i, s; if (hif->hif_classes >= HFSC_MAX_CLASSES) return (NULL); #ifndef ALTQ_RED if (flags & HFCF_RED) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: RED not configured for HFSC!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & HFCF_CODEL) { #ifdef ALTQ_DEBUG printf("hfsc_class_create: CODEL not configured for HFSC!\n"); #endif return (NULL); } #endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; TAILQ_INIT(&cl->cl_actc); if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; qsize(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { int red_flags, red_pkttime; u_int m2; m2 = 0; if (rsc != NULL && rsc->m2 > m2) m2 = rsc->m2; if (fsc != NULL && fsc->m2 > m2) m2 = fsc->m2; if (usc != NULL && usc->m2 > m2) m2 = usc->m2; red_flags = 0; if (flags & HFCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & HFCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (m2 < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (m2 / 8); if (flags & HFCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RED; } #ifdef ALTQ_RIO else { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red != NULL) qtype(cl->cl_q) = Q_RIO; } #endif } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & HFCF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) qtype(cl->cl_q) = Q_CODEL; } #endif if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_rsc == NULL) goto err_ret; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { cl->cl_fsc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_fsc == NULL) goto err_ret; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { cl->cl_usc = malloc(sizeof(struct internal_sc), M_DEVBUF, M_NOWAIT); if (cl->cl_usc == NULL) goto err_ret; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); } cl->cl_id = hif->hif_classid++; cl->cl_handle = qid; cl->cl_hif = hif; cl->cl_parent = parent; s = splnet(); IFQ_LOCK(hif->hif_ifq); hif->hif_classes++; /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = qid % HFSC_MAX_CLASSES; if (hif->hif_class_tbl[i] == NULL) hif->hif_class_tbl[i] = cl; else { for (i = 0; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) { hif->hif_class_tbl[i] = cl; break; } if (i == HFSC_MAX_CLASSES) { IFQ_UNLOCK(hif->hif_ifq); splx(s); goto err_ret; } } if (flags & HFCF_DEFAULTCLASS) hif->hif_defaultclass = cl; if (parent == NULL) { /* this is root class */ hif->hif_rootclass = cl; } else { /* add this class to the children list of the parent */ if ((p = parent->cl_children) == NULL) parent->cl_children = cl; else { while (p->cl_siblings != NULL) p = p->cl_siblings; p->cl_siblings = cl; } } IFQ_UNLOCK(hif->hif_ifq); splx(s); return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int hfsc_class_destroy(struct hfsc_class *cl) { int i, s; if (cl == NULL) return (0); if (is_a_parent_class(cl)) return (EBUSY); s = splnet(); IFQ_LOCK(cl->cl_hif->hif_ifq); #ifdef ALTQ3_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0); #endif /* ALTQ3_COMPAT */ if (!qempty(cl->cl_q)) hfsc_purgeq(cl); if (cl->cl_parent == NULL) { /* this is root class */ } else { struct hfsc_class *p = cl->cl_parent->cl_children; if (p == cl) cl->cl_parent->cl_children = cl->cl_siblings; else do { if (p->cl_siblings == cl) { p->cl_siblings = cl->cl_siblings; break; } } while ((p = p->cl_siblings) != NULL); ASSERT(p != NULL); } for (i = 0; i < HFSC_MAX_CLASSES; i++) if (cl->cl_hif->hif_class_tbl[i] == cl) { cl->cl_hif->hif_class_tbl[i] = NULL; break; } cl->cl_hif->hif_classes--; IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } IFQ_LOCK(cl->cl_hif->hif_ifq); if (cl == cl->cl_hif->hif_rootclass) cl->cl_hif->hif_rootclass = NULL; if (cl == cl->cl_hif->hif_defaultclass) cl->cl_hif->hif_defaultclass = NULL; IFQ_UNLOCK(cl->cl_hif->hif_ifq); if (cl->cl_usc != NULL) free(cl->cl_usc, M_DEVBUF); if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); if (cl->cl_rsc != NULL) free(cl->cl_rsc, M_DEVBUF); free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * hfsc_nextclass returns the next class in the tree. * usage: * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) * do_something; */ static struct hfsc_class * hfsc_nextclass(struct hfsc_class *cl) { if (cl->cl_children != NULL) cl = cl->cl_children; else if (cl->cl_siblings != NULL) cl = cl->cl_siblings; else { while ((cl = cl->cl_parent) != NULL) if (cl->cl_siblings) { cl = cl->cl_siblings; break; } } return (cl); } /* * hfsc_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(hif, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL || is_a_parent_class(cl)) { cl = hif->hif_defaultclass; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ else #endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (hfsc_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in hfsc_addq. */ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); cl->cl_hif->hif_packets++; /* successfully queued. */ if (qlen(cl->cl_q) == 1) set_active(cl, m_pktlen(m)); return (0); } /* * hfsc_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * hfsc_dequeue(struct ifaltq *ifq, int op) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; struct mbuf *m; int len, next_len; int realtime = 0; u_int64_t cur_time; IFQ_LOCK_ASSERT(ifq); if (hif->hif_packets == 0) /* no packet in the tree */ return (NULL); cur_time = read_machclk(); if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) { cl = hif->hif_pollcache; hif->hif_pollcache = NULL; /* check if the class was scheduled by real-time criteria */ if (cl->cl_rsc != NULL) realtime = (cl->cl_e <= cur_time); } else { /* * if there are eligible classes, use real-time criteria. * find the class with the minimum deadline among * the eligible classes. */ if ((cl = hfsc_get_mindl(hif, cur_time)) != NULL) { realtime = 1; } else { #ifdef ALTQ_DEBUG int fits = 0; #endif /* * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ cl = hif->hif_rootclass; while (is_a_parent_class(cl)) { cl = actlist_firstfit(cl, cur_time); if (cl == NULL) { #ifdef ALTQ_DEBUG if (fits > 0) printf("%d fit but none found\n",fits); #endif return (NULL); } /* * update parent's cl_cvtmin. * don't update if the new vt is smaller. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; #ifdef ALTQ_DEBUG fits++; #endif } } if (op == ALTDQ_POLL) { hif->hif_pollcache = cl; m = hfsc_pollq(cl); return (m); } } m = hfsc_getq(cl); if (m == NULL) panic("hfsc_dequeue:"); len = m_pktlen(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len); update_vf(cl, len, cur_time); if (realtime) cl->cl_cumul += len; if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) { /* update ed */ next_len = m_pktlen(qhead(cl->cl_q)); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); } } else { /* the class becomes passive */ set_passive(cl); } return (m); } static int hfsc_addq(struct hfsc_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_addq(cl->cl_codel, cl->cl_q, m); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & HFCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * hfsc_getq(struct hfsc_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_getq(cl->cl_codel, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * hfsc_pollq(struct hfsc_class *cl) { return qhead(cl->cl_q); } static void hfsc_purgeq(struct hfsc_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m)); m_freem(m); cl->cl_hif->hif_packets--; IFQ_DEC_LEN(cl->cl_hif->hif_ifq); } ASSERT(qlen(cl->cl_q) == 0); update_vf(cl, 0, 0); /* remove cl from the actlist */ set_passive(cl); } static void set_active(struct hfsc_class *cl, int len) { if (cl->cl_rsc != NULL) init_ed(cl, len); if (cl->cl_fsc != NULL) init_vf(cl, len); cl->cl_stats.period++; } static void set_passive(struct hfsc_class *cl) { if (cl->cl_rsc != NULL) ellist_remove(cl); /* * actlist is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from actlist */ } static void init_ed(struct hfsc_class *cl, int next_len) { u_int64_t cur_time; cur_time = read_machclk(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); /* * update the eligible curve. * for concave, it is equal to the deadline curve. * for convex, it is a linear curve with slope m2. */ cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } /* compute e and d */ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_insert(cl); } static void update_ed(struct hfsc_class *cl, int next_len) { cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); ellist_update(cl); } static void update_d(struct hfsc_class *cl, int next_len) { cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } static void init_vf(struct hfsc_class *cl, int len) { struct hfsc_class *max_cl, *p; u_int64_t vt, f, cur_time; int go_active; cur_time = 0; go_active = 1; for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { if (go_active && cl->cl_nactive++ == 0) go_active = 1; else go_active = 0; if (go_active) { max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); if (max_cl != NULL) { /* * set vt to the average of the min and max * classes. if the parent's period didn't * change, don't decrease vt of the class. */ vt = max_cl->cl_vt; if (cl->cl_parent->cl_cvtmin != 0) vt = (cl->cl_parent->cl_cvtmin + vt)/2; if (cl->cl_parent->cl_vtperiod != cl->cl_parentperiod || vt > cl->cl_vt) cl->cl_vt = vt; } else { /* * first child for a new parent backlog period. * add parent's cvtmax to vtoff of children * to make a new vt (vtoff + vt) larger than * the vt in the last period for all children. */ vt = cl->cl_parent->cl_cvtmax; for (p = cl->cl_parent->cl_children; p != NULL; p = p->cl_siblings) p->cl_vtoff += vt; cl->cl_vt = 0; cl->cl_parent->cl_cvtmax = 0; cl->cl_parent->cl_cvtmin = 0; } cl->cl_initvt = cl->cl_vt; /* update the virtual curve */ vt = cl->cl_vt + cl->cl_vtoff; rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total); if (cl->cl_virtual.x == vt) { cl->cl_virtual.x -= cl->cl_vtoff; cl->cl_vtoff = 0; } cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; if (cl->cl_parent->cl_nactive == 0) cl->cl_parentperiod++; cl->cl_f = 0; actlist_insert(cl); if (cl->cl_usc != NULL) { /* class has upper limit curve */ if (cur_time == 0) cur_time = read_machclk(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); cl->cl_myfadj = 0; } } if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) { u_int64_t f, myf_bound, delta; int go_passive; go_passive = qempty(cl->cl_q); for (; cl->cl_parent != NULL; cl = cl->cl_parent) { cl->cl_total += len; if (cl->cl_fsc == NULL || cl->cl_nactive == 0) continue; if (go_passive && --cl->cl_nactive == 0) go_passive = 1; else go_passive = 0; if (go_passive) { /* no more active child, going passive */ /* update cvtmax of the parent class */ if (cl->cl_vt > cl->cl_parent->cl_cvtmax) cl->cl_parent->cl_cvtmax = cl->cl_vt; /* remove this class from the vt list */ actlist_remove(cl); update_cfmin(cl->cl_parent); continue; } /* * update vt and f */ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - cl->cl_vtoff + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, * the class was skipped in the past due to non-fit. * if so, we need to adjust vtadj. */ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; cl->cl_vt = cl->cl_parent->cl_cvtmin; } /* update the vt list */ actlist_update(cl); if (cl->cl_usc != NULL) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); /* * if myf lags behind by more than one clock tick * from the current time, adjust myfadj to prevent * a rate-limited class from going greedy. * in a steady state under rate-limiting, myf * fluctuates within one clock tick. */ myf_bound = cur_time - machclk_per_tick; if (cl->cl_myf < myf_bound) { delta = cur_time - cl->cl_myf; cl->cl_myfadj += delta; cl->cl_myf += delta; } } /* cl_f is max(cl_myf, cl_cfmin) */ if (cl->cl_myf > cl->cl_cfmin) f = cl->cl_myf; else f = cl->cl_cfmin; if (f != cl->cl_f) { cl->cl_f = f; update_cfmin(cl->cl_parent); } } } static void update_cfmin(struct hfsc_class *cl) { struct hfsc_class *p; u_int64_t cfmin; if (TAILQ_EMPTY(&cl->cl_actc)) { cl->cl_cfmin = 0; return; } cfmin = HT_INFINITY; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f == 0) { cl->cl_cfmin = 0; return; } if (p->cl_f < cfmin) cfmin = p->cl_f; } cl->cl_cfmin = cfmin; } /* * TAILQ based ellist and actlist implementation * (ion wanted to make a calendar queue based implementation) */ /* * eligible list holds backlogged classes being sorted by their eligible times. * there is one eligible list per interface. */ static void ellist_insert(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL || p->cl_e <= cl->cl_e) { TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (cl->cl_e < p->cl_e) { TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } static void ellist_remove(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); } static void ellist_update(struct hfsc_class *cl) { struct hfsc_if *hif = cl->cl_hif; struct hfsc_class *p, *last; /* * the eligible time of a class increases monotonically. * if the next entry has a larger eligible time, nothing to do. */ p = TAILQ_NEXT(cl, cl_ellist); if (p == NULL || cl->cl_e <= p->cl_e) return; /* check the last entry */ last = TAILQ_LAST(&hif->hif_eligible, elighead); ASSERT(last != NULL); if (last->cl_e <= cl->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { if (cl->cl_e < p->cl_e) { TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); TAILQ_INSERT_BEFORE(p, cl, cl_ellist); return; } } ASSERT(0); /* should not reach here */ } /* find the class with the minimum deadline among the eligible classes */ struct hfsc_class * hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) { struct hfsc_class *p, *cl = NULL; TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return (cl); } /* * active children list holds backlogged child classes being sorted * by their virtual time. * each intermediate class has one active children list. */ static void actlist_insert(struct hfsc_class *cl) { struct hfsc_class *p; /* check the last entry first */ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL || p->cl_vt <= cl->cl_vt) { TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { if (cl->cl_vt < p->cl_vt) { TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static void actlist_remove(struct hfsc_class *cl) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); } static void actlist_update(struct hfsc_class *cl) { struct hfsc_class *p, *last; /* * the virtual time of a class increases monotonically during its * backlogged period. * if the next entry has a larger virtual time, nothing to do. */ p = TAILQ_NEXT(cl, cl_actlist); if (p == NULL || cl->cl_vt < p->cl_vt) return; /* check the last entry */ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead); ASSERT(last != NULL); if (last->cl_vt <= cl->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); return; } /* * the new position must be between the next entry * and the last entry */ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { if (cl->cl_vt < p->cl_vt) { TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); TAILQ_INSERT_BEFORE(p, cl, cl_actlist); return; } } ASSERT(0); /* should not reach here */ } static struct hfsc_class * actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) { struct hfsc_class *p; TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { if (p->cl_f <= cur_time) return (p); } return (NULL); } /* * service curve support functions * * external service curve parameters * m: bits/sec * d: msec * internal service curve parameters - * sm: (bytes/tsc_interval) << SM_SHIFT - * ism: (tsc_count/byte) << ISM_SHIFT - * dx: tsc_count + * sm: (bytes/machclk tick) << SM_SHIFT + * ism: (machclk ticks/byte) << ISM_SHIFT + * dx: machclk ticks * - * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. - * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU - * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective - * digits in decimal using the following table. + * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. we + * should be able to handle 100K-100Gbps linkspeed with 256 MHz machclk + * frequency and at least 3 effective digits in decimal. * - * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps - * ----------+------------------------------------------------------- - * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6 - * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6 - * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6 - * - * nsec/byte 80000 8000 800 80 8 - * ism(500MHz) 40000 4000 400 40 4 - * ism(200MHz) 16000 1600 160 16 1.6 */ #define SM_SHIFT 24 -#define ISM_SHIFT 10 +#define ISM_SHIFT 14 #define SM_MASK ((1LL << SM_SHIFT) - 1) #define ISM_MASK ((1LL << ISM_SHIFT) - 1) static __inline u_int64_t seg_x2y(u_int64_t x, u_int64_t sm) { u_int64_t y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return (y); } static __inline u_int64_t seg_y2x(u_int64_t y, u_int64_t ism) { u_int64_t x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return (x); } static __inline u_int64_t -m2sm(u_int m) +m2sm(u_int64_t m) { u_int64_t sm; - sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq; + sm = (m << SM_SHIFT) / 8 / machclk_freq; return (sm); } static __inline u_int64_t -m2ism(u_int m) +m2ism(u_int64_t m) { u_int64_t ism; if (m == 0) ism = HT_INFINITY; else ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m; return (ism); } static __inline u_int64_t d2dx(u_int d) { u_int64_t dx; dx = ((u_int64_t)d * machclk_freq) / 1000; return (dx); } -static u_int +static u_int64_t sm2m(u_int64_t sm) { u_int64_t m; m = (sm * 8 * machclk_freq) >> SM_SHIFT; - return ((u_int)m); + return (m); } static u_int dx2d(u_int64_t dx) { u_int64_t d; d = dx * 1000 / machclk_freq; return ((u_int)d); } static void sc2isc(struct service_curve *sc, struct internal_sc *isc) { isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2); } /* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */ static void rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x, u_int64_t y) { rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2; } /* * calculate the y-projection of the runtime service curve by the * given x-projection value */ static u_int64_t rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y) { u_int64_t x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return (x); } static u_int64_t rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x) { u_int64_t y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return (y); } /* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x, u_int64_t y) { u_int64_t y1, y2, dx, dy; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx) */ y1 = rtsc_x2y(rtsc, x); if (y1 <= y) { /* rtsc is below isc, no change to rtsc */ return; } y2 = rtsc_x2y(rtsc, x + isc->dx); if (y2 >= y + isc->dy) { /* rtsc is above isc, replace rtsc by isc */ rtsc->x = x; rtsc->y = y; rtsc->dx = isc->dx; rtsc->dy = isc->dy; return; } /* * the two curves intersect * compute the offsets (dx, dy) using the reverse * function of seg_x2y() * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) */ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. */ if (rtsc->x + rtsc->dx > x) dx += rtsc->x + rtsc->dx - x; dy = seg_x2y(dx, isc->sm1); rtsc->x = x; rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; return; } static void -get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl) +get_class_stats_v0(struct hfsc_classstats_v0 *sp, struct hfsc_class *cl) +{ + sp->class_id = cl->cl_id; + sp->class_handle = cl->cl_handle; + +#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) + + if (cl->cl_rsc != NULL) { + sp->rsc.m1 = SATU32(sm2m(cl->cl_rsc->sm1)); + sp->rsc.d = dx2d(cl->cl_rsc->dx); + sp->rsc.m2 = SATU32(sm2m(cl->cl_rsc->sm2)); + } else { + sp->rsc.m1 = 0; + sp->rsc.d = 0; + sp->rsc.m2 = 0; + } + if (cl->cl_fsc != NULL) { + sp->fsc.m1 = SATU32(sm2m(cl->cl_fsc->sm1)); + sp->fsc.d = dx2d(cl->cl_fsc->dx); + sp->fsc.m2 = SATU32(sm2m(cl->cl_fsc->sm2)); + } else { + sp->fsc.m1 = 0; + sp->fsc.d = 0; + sp->fsc.m2 = 0; + } + if (cl->cl_usc != NULL) { + sp->usc.m1 = SATU32(sm2m(cl->cl_usc->sm1)); + sp->usc.d = dx2d(cl->cl_usc->dx); + sp->usc.m2 = SATU32(sm2m(cl->cl_usc->sm2)); + } else { + sp->usc.m1 = 0; + sp->usc.d = 0; + sp->usc.m2 = 0; + } + +#undef SATU32 + + sp->total = cl->cl_total; + sp->cumul = cl->cl_cumul; + + sp->d = cl->cl_d; + sp->e = cl->cl_e; + sp->vt = cl->cl_vt; + sp->f = cl->cl_f; + + sp->initvt = cl->cl_initvt; + sp->vtperiod = cl->cl_vtperiod; + sp->parentperiod = cl->cl_parentperiod; + sp->nactive = cl->cl_nactive; + sp->vtoff = cl->cl_vtoff; + sp->cvtmax = cl->cl_cvtmax; + sp->myf = cl->cl_myf; + sp->cfmin = cl->cl_cfmin; + sp->cvtmin = cl->cl_cvtmin; + sp->myfadj = cl->cl_myfadj; + sp->vtadj = cl->cl_vtadj; + + sp->cur_time = read_machclk(); + sp->machclk_freq = machclk_freq; + + sp->qlength = qlen(cl->cl_q); + sp->qlimit = qlimit(cl->cl_q); + sp->xmit_cnt = cl->cl_stats.xmit_cnt; + sp->drop_cnt = cl->cl_stats.drop_cnt; + sp->period = cl->cl_stats.period; + + sp->qtype = qtype(cl->cl_q); +#ifdef ALTQ_RED + if (q_is_red(cl->cl_q)) + red_getstats(cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_RIO + if (q_is_rio(cl->cl_q)) + rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_getstats(cl->cl_codel, &sp->codel); +#endif +} + +static void +get_class_stats_v1(struct hfsc_classstats_v1 *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; if (cl->cl_rsc != NULL) { sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); sp->rsc.d = dx2d(cl->cl_rsc->dx); sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); } else { sp->rsc.m1 = 0; sp->rsc.d = 0; sp->rsc.m2 = 0; } if (cl->cl_fsc != NULL) { sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); sp->fsc.d = dx2d(cl->cl_fsc->dx); sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); } else { sp->fsc.m1 = 0; sp->fsc.d = 0; sp->fsc.m2 = 0; } if (cl->cl_usc != NULL) { sp->usc.m1 = sm2m(cl->cl_usc->sm1); sp->usc.d = dx2d(cl->cl_usc->dx); sp->usc.m2 = sm2m(cl->cl_usc->sm2); } else { sp->usc.m1 = 0; sp->usc.d = 0; sp->usc.m2 = 0; } sp->total = cl->cl_total; sp->cumul = cl->cl_cumul; sp->d = cl->cl_d; sp->e = cl->cl_e; sp->vt = cl->cl_vt; sp->f = cl->cl_f; sp->initvt = cl->cl_initvt; sp->vtperiod = cl->cl_vtperiod; sp->parentperiod = cl->cl_parentperiod; sp->nactive = cl->cl_nactive; sp->vtoff = cl->cl_vtoff; sp->cvtmax = cl->cl_cvtmax; sp->myf = cl->cl_myf; sp->cfmin = cl->cl_cfmin; sp->cvtmin = cl->cl_cvtmin; sp->myfadj = cl->cl_myfadj; sp->vtadj = cl->cl_vtadj; sp->cur_time = read_machclk(); sp->machclk_freq = machclk_freq; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->xmit_cnt = cl->cl_stats.xmit_cnt; sp->drop_cnt = cl->cl_stats.drop_cnt; sp->period = cl->cl_stats.period; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct hfsc_class * clh_to_clp(struct hfsc_if *hif, u_int32_t chandle) { int i; struct hfsc_class *cl; if (chandle == 0) return (NULL); /* * first, try optimistically the slot matching the lower bits of * the handle. if it fails, do the linear table search. */ i = chandle % HFSC_MAX_CLASSES; if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); for (i = 0; i < HFSC_MAX_CLASSES; i++) if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #ifdef ALTQ3_COMPAT static struct hfsc_if * hfsc_attach(ifq, bandwidth) struct ifaltq *ifq; u_int bandwidth; { struct hfsc_if *hif; hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK); if (hif == NULL) return (NULL); bzero(hif, sizeof(struct hfsc_if)); hif->hif_eligible = ellist_alloc(); if (hif->hif_eligible == NULL) { free(hif, M_DEVBUF); return NULL; } hif->hif_ifq = ifq; /* add this state to the hfsc list */ hif->hif_next = hif_list; hif_list = hif; return (hif); } static int hfsc_detach(hif) struct hfsc_if *hif; { (void)hfsc_clear_interface(hif); (void)hfsc_class_destroy(hif->hif_rootclass); /* remove this interface from the hif list */ if (hif_list == hif) hif_list = hif->hif_next; else { struct hfsc_if *h; for (h = hif_list; h != NULL; h = h->hif_next) if (h->hif_next == hif) { h->hif_next = hif->hif_next; break; } ASSERT(h != NULL); } ellist_destroy(hif->hif_eligible); free(hif, M_DEVBUF); return (0); } static int hfsc_class_modify(cl, rsc, fsc, usc) struct hfsc_class *cl; struct service_curve *rsc, *fsc, *usc; { struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp; u_int64_t cur_time; int s; rsc_tmp = fsc_tmp = usc_tmp = NULL; if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) && cl->cl_rsc == NULL) { rsc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (rsc_tmp == NULL) return (ENOMEM); } if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) && cl->cl_fsc == NULL) { fsc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (fsc_tmp == NULL) { free(rsc_tmp); return (ENOMEM); } } if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) && cl->cl_usc == NULL) { usc_tmp = malloc(sizeof(struct internal_sc), M_DEVBUF, M_WAITOK); if (usc_tmp == NULL) { free(rsc_tmp); free(fsc_tmp); return (ENOMEM); } } cur_time = read_machclk(); s = splnet(); IFQ_LOCK(cl->cl_hif->hif_ifq); if (rsc != NULL) { if (rsc->m1 == 0 && rsc->m2 == 0) { if (cl->cl_rsc != NULL) { if (!qempty(cl->cl_q)) hfsc_purgeq(cl); free(cl->cl_rsc, M_DEVBUF); cl->cl_rsc = NULL; } } else { if (cl->cl_rsc == NULL) cl->cl_rsc = rsc_tmp; sc2isc(rsc, cl->cl_rsc); rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } } } if (fsc != NULL) { if (fsc->m1 == 0 && fsc->m2 == 0) { if (cl->cl_fsc != NULL) { if (!qempty(cl->cl_q)) hfsc_purgeq(cl); free(cl->cl_fsc, M_DEVBUF); cl->cl_fsc = NULL; } } else { if (cl->cl_fsc == NULL) cl->cl_fsc = fsc_tmp; sc2isc(fsc, cl->cl_fsc); rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt, cl->cl_total); } } if (usc != NULL) { if (usc->m1 == 0 && usc->m2 == 0) { if (cl->cl_usc != NULL) { free(cl->cl_usc, M_DEVBUF); cl->cl_usc = NULL; cl->cl_myf = 0; } } else { if (cl->cl_usc == NULL) cl->cl_usc = usc_tmp; sc2isc(usc, cl->cl_usc); rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time, cl->cl_total); } } if (!qempty(cl->cl_q)) { if (cl->cl_rsc != NULL) update_ed(cl, m_pktlen(qhead(cl->cl_q))); if (cl->cl_fsc != NULL) update_vf(cl, 0, cur_time); /* is this enough? */ } IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); return (0); } /* * hfsc device interface */ int hfscopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("hfsc: no cpu clock available!\n"); return (ENXIO); } /* everything will be done when the queueing scheme is attached. */ return 0; } int hfscclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct hfsc_if *hif; int err, error = 0; while ((hif = hif_list) != NULL) { /* destroy all */ if (ALTQ_IS_ENABLED(hif->hif_ifq)) altq_disable(hif->hif_ifq); err = altq_detach(hif->hif_ifq); if (err == 0) err = hfsc_detach(hif); if (err != 0 && error == 0) error = err; } return error; } int hfscioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct hfsc_if *hif; struct hfsc_interface *ifacep; int error = 0; /* check super-user privilege */ switch (cmd) { case HFSC_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case HFSC_IF_ATTACH: error = hfsccmd_if_attach((struct hfsc_attach *)addr); break; case HFSC_IF_DETACH: error = hfsccmd_if_detach((struct hfsc_interface *)addr); break; case HFSC_ENABLE: case HFSC_DISABLE: case HFSC_CLEAR_HIERARCHY: ifacep = (struct hfsc_interface *)addr; if ((hif = altq_lookup(ifacep->hfsc_ifname, ALTQT_HFSC)) == NULL) { error = EBADF; break; } switch (cmd) { case HFSC_ENABLE: if (hif->hif_defaultclass == NULL) { #ifdef ALTQ_DEBUG printf("hfsc: no default class\n"); #endif error = EINVAL; break; } error = altq_enable(hif->hif_ifq); break; case HFSC_DISABLE: error = altq_disable(hif->hif_ifq); break; case HFSC_CLEAR_HIERARCHY: hfsc_clear_interface(hif); break; } break; case HFSC_ADD_CLASS: error = hfsccmd_add_class((struct hfsc_add_class *)addr); break; case HFSC_DEL_CLASS: error = hfsccmd_delete_class((struct hfsc_delete_class *)addr); break; case HFSC_MOD_CLASS: error = hfsccmd_modify_class((struct hfsc_modify_class *)addr); break; case HFSC_ADD_FILTER: error = hfsccmd_add_filter((struct hfsc_add_filter *)addr); break; case HFSC_DEL_FILTER: error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr); break; case HFSC_GETSTATS: error = hfsccmd_class_stats((struct hfsc_class_stats *)addr); break; default: error = EINVAL; break; } return error; } static int hfsccmd_if_attach(ap) struct hfsc_attach *ap; { struct hfsc_if *hif; struct ifnet *ifp; int error; if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL) return (ENXIO); if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL) return (ENOMEM); /* * set HFSC to this ifnet structure. */ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif, hfsc_enqueue, hfsc_dequeue, hfsc_request, &hif->hif_classifier, acc_classify)) != 0) (void)hfsc_detach(hif); return (error); } static int hfsccmd_if_detach(ap) struct hfsc_interface *ap; { struct hfsc_if *hif; int error; if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if (ALTQ_IS_ENABLED(hif->hif_ifq)) altq_disable(hif->hif_ifq); if ((error = altq_detach(hif->hif_ifq))) return (error); return hfsc_detach(hif); } static int hfsccmd_add_class(ap) struct hfsc_add_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl, *parent; int i; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if (ap->parent_handle == HFSC_NULLCLASS_HANDLE && hif->hif_rootclass == NULL) parent = NULL; else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL) return (EINVAL); /* assign a class handle (use a free slot number for now) */ for (i = 1; i < HFSC_MAX_CLASSES; i++) if (hif->hif_class_tbl[i] == NULL) break; if (i == HFSC_MAX_CLASSES) return (EBUSY); if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL, parent, ap->qlimit, ap->flags, i)) == NULL) return (ENOMEM); /* return a class handle to the user */ ap->class_handle = i; return (0); } static int hfsccmd_delete_class(ap) struct hfsc_delete_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); return hfsc_class_destroy(cl); } static int hfsccmd_modify_class(ap) struct hfsc_modify_class *ap; { struct hfsc_if *hif; struct hfsc_class *cl; struct service_curve *rsc = NULL; struct service_curve *fsc = NULL; struct service_curve *usc = NULL; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); if (ap->sctype & HFSC_REALTIMESC) rsc = &ap->service_curve; if (ap->sctype & HFSC_LINKSHARINGSC) fsc = &ap->service_curve; if (ap->sctype & HFSC_UPPERLIMITSC) usc = &ap->service_curve; return hfsc_class_modify(cl, rsc, fsc, usc); } static int hfsccmd_add_filter(ap) struct hfsc_add_filter *ap; { struct hfsc_if *hif; struct hfsc_class *cl; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL) return (EINVAL); if (is_a_parent_class(cl)) { #ifdef ALTQ_DEBUG printf("hfsccmd_add_filter: not a leaf class!\n"); #endif return (EINVAL); } return acc_add_filter(&hif->hif_classifier, &ap->filter, cl, &ap->filter_handle); } static int hfsccmd_delete_filter(ap) struct hfsc_delete_filter *ap; { struct hfsc_if *hif; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); return acc_delete_filter(&hif->hif_classifier, ap->filter_handle); } static int hfsccmd_class_stats(ap) struct hfsc_class_stats *ap; { struct hfsc_if *hif; struct hfsc_class *cl; struct hfsc_classstats stats, *usp; int n, nclasses, error; if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL) return (EBADF); ap->cur_time = read_machclk(); ap->machclk_freq = machclk_freq; ap->hif_classes = hif->hif_classes; ap->hif_packets = hif->hif_packets; /* skip the first N classes in the tree */ nclasses = ap->nskip; for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) ; if (n != nclasses) return (EINVAL); /* then, read the next N classes in the tree */ nclasses = ap->nclasses; usp = ap->stats; for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) { get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } ap->nclasses = n; return (0); } #ifdef KLD_MODULE static struct altqsw hfsc_sw = {"hfsc", hfscopen, hfscclose, hfscioctl}; ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw); MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1); MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_HFSC */ Index: head/sys/net/altq/altq_hfsc.h =================================================================== --- head/sys/net/altq/altq_hfsc.h (revision 338208) +++ head/sys/net/altq/altq_hfsc.h (revision 338209) @@ -1,319 +1,406 @@ /*- * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. * * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_HFSC_H_ #define _ALTQ_ALTQ_HFSC_H_ #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif -struct service_curve { +struct service_curve_v0 { u_int m1; /* slope of the first segment in bits/sec */ u_int d; /* the x-projection of the first segment in msec */ u_int m2; /* slope of the second segment in bits/sec */ }; +struct service_curve_v1 { + u_int64_t m1; /* slope of the first segment in bits/sec */ + u_int d; /* the x-projection of the first segment in msec */ + u_int64_t m2; /* slope of the second segment in bits/sec */ +}; + +/* Latest version of struct service_curve_vX */ +#define HFSC_SERVICE_CURVE_VERSION 1 + /* special class handles */ #define HFSC_NULLCLASS_HANDLE 0 #define HFSC_MAX_CLASSES 64 /* hfsc class flags */ #define HFCF_RED 0x0001 /* use RED */ #define HFCF_ECN 0x0002 /* use RED/ECN */ #define HFCF_RIO 0x0004 /* use RIO */ #define HFCF_CODEL 0x0008 /* use CoDel */ #define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define HFCF_DEFAULTCLASS 0x1000 /* default class */ /* service curve types */ #define HFSC_REALTIMESC 1 #define HFSC_LINKSHARINGSC 2 #define HFSC_UPPERLIMITSC 4 #define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC) -struct hfsc_classstats { +struct hfsc_classstats_v0 { u_int class_id; u_int32_t class_handle; - struct service_curve rsc; - struct service_curve fsc; - struct service_curve usc; /* upper limit service curve */ + struct service_curve_v0 rsc; + struct service_curve_v0 fsc; + struct service_curve_v0 usc; /* upper limit service curve */ u_int64_t total; /* total work in bytes */ u_int64_t cumul; /* cumulative work in bytes done by real-time criteria */ u_int64_t d; /* deadline */ u_int64_t e; /* eligible time */ u_int64_t vt; /* virtual time */ u_int64_t f; /* fit time for upper-limit */ /* info helpful for debugging */ u_int64_t initvt; /* init virtual time */ u_int64_t vtoff; /* cl_vt_ipoff */ u_int64_t cvtmax; /* cl_maxvt */ u_int64_t myf; /* cl_myf */ u_int64_t cfmin; /* cl_mincf */ u_int64_t cvtmin; /* cl_mincvt */ u_int64_t myfadj; /* cl_myfadj */ u_int64_t vtadj; /* cl_vtadj */ u_int64_t cur_time; u_int32_t machclk_freq; u_int qlength; u_int qlimit; struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int period; u_int vtperiod; /* vt period sequence no */ u_int parentperiod; /* parent's vt period seqno */ int nactive; /* number of active children */ /* codel, red and rio related info */ int qtype; struct redstats red[3]; struct codel_stats codel; }; +struct hfsc_classstats_v1 { + u_int class_id; + u_int32_t class_handle; + struct service_curve_v1 rsc; + struct service_curve_v1 fsc; + struct service_curve_v1 usc; /* upper limit service curve */ + + u_int64_t total; /* total work in bytes */ + u_int64_t cumul; /* cumulative work in bytes + done by real-time criteria */ + u_int64_t d; /* deadline */ + u_int64_t e; /* eligible time */ + u_int64_t vt; /* virtual time */ + u_int64_t f; /* fit time for upper-limit */ + + /* info helpful for debugging */ + u_int64_t initvt; /* init virtual time */ + u_int64_t vtoff; /* cl_vt_ipoff */ + u_int64_t cvtmax; /* cl_maxvt */ + u_int64_t myf; /* cl_myf */ + u_int64_t cfmin; /* cl_mincf */ + u_int64_t cvtmin; /* cl_mincvt */ + u_int64_t myfadj; /* cl_myfadj */ + u_int64_t vtadj; /* cl_vtadj */ + u_int64_t cur_time; + u_int32_t machclk_freq; + + u_int qlength; + u_int qlimit; + struct pktcntr xmit_cnt; + struct pktcntr drop_cnt; + u_int period; + + u_int vtperiod; /* vt period sequence no */ + u_int parentperiod; /* parent's vt period seqno */ + int nactive; /* number of active children */ + + /* codel, red and rio related info */ + int qtype; + struct redstats red[3]; + struct codel_stats codel; +}; + +/* + * HFSC_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef ALTQ3_COMPAT struct hfsc_interface { char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */ }; struct hfsc_attach { struct hfsc_interface iface; u_int bandwidth; /* link bandwidth in bits/sec */ }; struct hfsc_add_class { struct hfsc_interface iface; u_int32_t parent_handle; struct service_curve service_curve; int qlimit; int flags; u_int32_t class_handle; /* return value */ }; struct hfsc_delete_class { struct hfsc_interface iface; u_int32_t class_handle; }; struct hfsc_modify_class { struct hfsc_interface iface; u_int32_t class_handle; struct service_curve service_curve; int sctype; }; struct hfsc_add_filter { struct hfsc_interface iface; u_int32_t class_handle; struct flow_filter filter; u_long filter_handle; /* return value */ }; struct hfsc_delete_filter { struct hfsc_interface iface; u_long filter_handle; }; struct hfsc_class_stats { struct hfsc_interface iface; int nskip; /* skip # of classes */ int nclasses; /* # of class stats (WR) */ u_int64_t cur_time; /* current time */ u_int32_t machclk_freq; /* machine clock frequency */ u_int hif_classes; /* # of classes in the tree */ u_int hif_packets; /* # of packets in the tree */ struct hfsc_classstats *stats; /* pointer to stats array */ }; #define HFSC_IF_ATTACH _IOW('Q', 1, struct hfsc_attach) #define HFSC_IF_DETACH _IOW('Q', 2, struct hfsc_interface) #define HFSC_ENABLE _IOW('Q', 3, struct hfsc_interface) #define HFSC_DISABLE _IOW('Q', 4, struct hfsc_interface) #define HFSC_CLEAR_HIERARCHY _IOW('Q', 5, struct hfsc_interface) #define HFSC_ADD_CLASS _IOWR('Q', 7, struct hfsc_add_class) #define HFSC_DEL_CLASS _IOW('Q', 8, struct hfsc_delete_class) #define HFSC_MOD_CLASS _IOW('Q', 9, struct hfsc_modify_class) #define HFSC_ADD_FILTER _IOWR('Q', 10, struct hfsc_add_filter) #define HFSC_DEL_FILTER _IOW('Q', 11, struct hfsc_delete_filter) #define HFSC_GETSTATS _IOWR('Q', 12, struct hfsc_class_stats) #endif /* ALTQ3_COMPAT */ #ifdef _KERNEL /* * kernel internal service curve representation * coordinates are given by 64 bit unsigned integers. * x-axis: unit is clock count. for the intel x86 architecture, * the raw Pentium TSC (Timestamp Counter) value is used. * virtual time is also calculated in this time scale. * y-axis: unit is byte. * * the service curve parameters are converted to the internal * representation. * the slope values are scaled to avoid overflow. * the inverse slope values as well as the y-projection of the 1st * segment are kept in order to avoid 64-bit divide operations * that are expensive on 32-bit architectures. * * note: Intel Pentium TSC never wraps around in several thousands of years. * x-axis doesn't wrap around for 1089 years with 1GHz clock. * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth. */ /* kernel internal representation of a service curve */ struct internal_sc { u_int64_t sm1; /* scaled slope of the 1st segment */ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ u_int64_t dx; /* the x-projection of the 1st segment */ u_int64_t dy; /* the y-projection of the 1st segment */ u_int64_t sm2; /* scaled slope of the 2nd segment */ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ }; /* runtime service curve */ struct runtime_sc { u_int64_t x; /* current starting position on x-axis */ u_int64_t y; /* current starting position on x-axis */ u_int64_t sm1; /* scaled slope of the 1st segment */ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ u_int64_t dx; /* the x-projection of the 1st segment */ u_int64_t dy; /* the y-projection of the 1st segment */ u_int64_t sm2; /* scaled slope of the 2nd segment */ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ }; struct hfsc_class { u_int cl_id; /* class id (just for debug) */ u_int32_t cl_handle; /* class handle */ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */ int cl_flags; /* misc flags */ struct hfsc_class *cl_parent; /* parent class */ struct hfsc_class *cl_siblings; /* sibling classes */ struct hfsc_class *cl_children; /* child classes */ class_queue_t *cl_q; /* class queue structure */ union { struct red *cl_red; /* RED state */ struct codel *cl_codel; /* CoDel state */ } cl_aqm; #define cl_red cl_aqm.cl_red #define cl_codel cl_aqm.cl_codel struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ u_int64_t cl_total; /* total work in bytes */ u_int64_t cl_cumul; /* cumulative work in bytes done by real-time criteria */ u_int64_t cl_d; /* deadline */ u_int64_t cl_e; /* eligible time */ u_int64_t cl_vt; /* virtual time */ u_int64_t cl_f; /* time when this class will fit for link-sharing, max(myf, cfmin) */ u_int64_t cl_myf; /* my fit-time (as calculated from this class's own upperlimit curve) */ u_int64_t cl_myfadj; /* my fit-time adjustment (to cancel history dependence) */ u_int64_t cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u_int64_t cl_cvtmin; /* minimal virtual time among the children fit for link-sharing (monotonic within a period) */ u_int64_t cl_vtadj; /* intra-period cumulative vt adjustment */ u_int64_t cl_vtoff; /* inter-period cumulative vt offset */ u_int64_t cl_cvtmax; /* max child's vt in the last period */ u_int64_t cl_initvt; /* init virtual time (for debugging) */ struct internal_sc *cl_rsc; /* internal real-time service curve */ struct internal_sc *cl_fsc; /* internal fair service curve */ struct internal_sc *cl_usc; /* internal upperlimit service curve */ struct runtime_sc cl_deadline; /* deadline curve */ struct runtime_sc cl_eligible; /* eligible curve */ struct runtime_sc cl_virtual; /* virtual curve */ struct runtime_sc cl_ulimit; /* upperlimit curve */ u_int cl_vtperiod; /* vt period sequence no */ u_int cl_parentperiod; /* parent's vt period seqno */ int cl_nactive; /* number of active children */ TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */ TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */ TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */ struct { struct pktcntr xmit_cnt; struct pktcntr drop_cnt; u_int period; } cl_stats; }; /* * hfsc interface state */ struct hfsc_if { struct hfsc_if *hif_next; /* interface state list */ struct ifaltq *hif_ifq; /* backpointer to ifaltq */ struct hfsc_class *hif_rootclass; /* root class */ struct hfsc_class *hif_defaultclass; /* default class */ struct hfsc_class *hif_class_tbl[HFSC_MAX_CLASSES]; struct hfsc_class *hif_pollcache; /* cache for poll operation */ u_int hif_classes; /* # of classes in the tree */ u_int hif_packets; /* # of packets in the tree */ u_int hif_classid; /* class id sequence number */ TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */ #ifdef ALTQ3_CLFIER_COMPAT struct acc_classifier hif_classifier; #endif }; + +/* + * Kernel code always wants the latest version - avoid a bunch of renames in + * the code to the current latest versioned name. + */ +#define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION) + +#else /* _KERNEL */ + +#ifdef PFIOC_USE_LATEST +/* + * Maintaining in-tree consumers of the ioctl interface is easier when that + * code can be written in terms old names that refer to the latest interface + * version as that reduces the required changes in the consumers to those + * that are functionally necessary to accommodate a new interface version. + */ +#define hfsc_classstats __CONCAT(hfsc_classstats_v, HFSC_STATS_VERSION) +#define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION) + +#else +/* + * When building out-of-tree code that is written for the old interface, + * such as may exist in ports for example, resolve the old struct tags to + * the v0 versions. + */ +#define hfsc_classstats __CONCAT(hfsc_classstats_v, 0) +#define service_curve __CONCAT(service_curve_v, 0) + +#endif /* PFIOC_USE_LATEST */ #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_HFSC_H_ */ Index: head/sys/net/altq/altq_priq.c =================================================================== --- head/sys/net/altq/altq_priq.c (revision 338208) +++ head/sys/net/altq/altq_priq.c (revision 338209) @@ -1,1070 +1,1070 @@ /*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ * $FreeBSD$ */ /* * priority queue */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif #include /* * function prototypes */ #ifdef ALTQ3_COMPAT static struct priq_if *priq_attach(struct ifaltq *, u_int); static int priq_detach(struct priq_if *); #endif static int priq_clear_interface(struct priq_if *); static int priq_request(struct ifaltq *, int, void *); static void priq_purge(struct priq_if *); static struct priq_class *priq_class_create(struct priq_if *, int, int, int, int); static int priq_class_destroy(struct priq_class *); static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *priq_dequeue(struct ifaltq *, int); static int priq_addq(struct priq_class *, struct mbuf *); static struct mbuf *priq_getq(struct priq_class *); static struct mbuf *priq_pollq(struct priq_class *); static void priq_purgeq(struct priq_class *); #ifdef ALTQ3_COMPAT static int priqcmd_if_attach(struct priq_interface *); static int priqcmd_if_detach(struct priq_interface *); static int priqcmd_add_class(struct priq_add_class *); static int priqcmd_delete_class(struct priq_delete_class *); static int priqcmd_modify_class(struct priq_modify_class *); static int priqcmd_add_filter(struct priq_add_filter *); static int priqcmd_delete_filter(struct priq_delete_filter *); static int priqcmd_class_stats(struct priq_class_stats *); #endif /* ALTQ3_COMPAT */ static void get_class_stats(struct priq_classstats *, struct priq_class *); static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t); #ifdef ALTQ3_COMPAT altqdev_decl(priq); /* pif_list keeps all priq_if's allocated. */ static struct priq_if *pif_list = NULL; #endif /* ALTQ3_COMPAT */ int priq_pfattach(struct pf_altq *a) { struct ifnet *ifp; int s, error; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); s = splnet(); error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc, priq_enqueue, priq_dequeue, priq_request, NULL, NULL); splx(s); return (error); } int priq_add_altq(struct pf_altq *a) { struct priq_if *pif; struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (pif == NULL) return (ENOMEM); pif->pif_bandwidth = a->ifbandwidth; pif->pif_maxpri = -1; pif->pif_ifq = &ifp->if_snd; /* keep the state in pf_altq */ a->altq_disc = pif; return (0); } int priq_remove_altq(struct pf_altq *a) { struct priq_if *pif; if ((pif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; (void)priq_clear_interface(pif); free(pif, M_DEVBUF); return (0); } int priq_add_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); /* check parameters */ if (a->priority >= PRIQ_MAXPRI) return (EINVAL); if (a->qid == 0) return (EINVAL); if (pif->pif_classes[a->priority] != NULL) return (EBUSY); if (clh_to_clp(pif, a->qid) != NULL) return (EBUSY); cl = priq_class_create(pif, a->priority, a->qlimit, a->pq_u.priq_opts.flags, a->qid); if (cl == NULL) return (ENOMEM); return (0); } int priq_remove_queue(struct pf_altq *a) { struct priq_if *pif; struct priq_class *cl; if ((pif = a->altq_disc) == NULL) return (EINVAL); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); return (priq_class_destroy(cl)); } int -priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats; int error = 0; if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, a->qid)) == NULL) return (EINVAL); if (*nbytes < sizeof(stats)) return (EINVAL); get_class_stats(&stats, cl); if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } /* * bring the interface back to the initial state by discarding * all the filters and classes. */ static int priq_clear_interface(struct priq_if *pif) { struct priq_class *cl; int pri; #ifdef ALTQ3_CLFIER_COMPAT /* free the filters for this interface */ acc_discard_filters(&pif->pif_classifier, NULL, 1); #endif /* clear out the classes */ for (pri = 0; pri <= pif->pif_maxpri; pri++) if ((cl = pif->pif_classes[pri]) != NULL) priq_class_destroy(cl); return (0); } static int priq_request(struct ifaltq *ifq, int req, void *arg) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: priq_purge(pif); break; } return (0); } /* discard all the queued packets on the interface */ static void priq_purge(struct priq_if *pif) { struct priq_class *cl; int pri; for (pri = 0; pri <= pif->pif_maxpri; pri++) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) priq_purgeq(cl); } if (ALTQ_IS_ENABLED(pif->pif_ifq)) pif->pif_ifq->ifq_len = 0; } static struct priq_class * priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) { struct priq_class *cl; int s; #ifndef ALTQ_RED if (flags & PRCF_RED) { #ifdef ALTQ_DEBUG printf("priq_class_create: RED not configured for PRIQ!\n"); #endif return (NULL); } #endif #ifndef ALTQ_CODEL if (flags & PRCF_CODEL) { #ifdef ALTQ_DEBUG printf("priq_class_create: CODEL not configured for PRIQ!\n"); #endif return (NULL); } #endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ s = splnet(); IFQ_LOCK(cl->cl_pif->pif_ifq); if (!qempty(cl->cl_q)) priq_purgeq(cl); IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) return (NULL); cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl->cl_q == NULL) goto err_ret; } pif->pif_classes[pri] = cl; if (flags & PRCF_DEFAULTCLASS) pif->pif_default = cl; if (qlimit == 0) qlimit = 50; /* use default */ qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; qsize(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) pif->pif_maxpri = pri; cl->cl_pif = pif; cl->cl_handle = qid; #ifdef ALTQ_RED if (flags & (PRCF_RED|PRCF_RIO)) { int red_flags, red_pkttime; red_flags = 0; if (flags & PRCF_ECN) red_flags |= REDF_ECN; #ifdef ALTQ_RIO if (flags & PRCF_CLEARDSCP) red_flags |= RIOF_CLEARDSCP; #endif if (pif->pif_bandwidth < 8) red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ else red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); #ifdef ALTQ_RIO if (flags & PRCF_RIO) { cl->cl_red = (red_t *)rio_alloc(0, NULL, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RIO; } else #endif if (flags & PRCF_RED) { cl->cl_red = red_alloc(0, 0, qlimit(cl->cl_q) * 10/100, qlimit(cl->cl_q) * 30/100, red_flags, red_pkttime); if (cl->cl_red == NULL) goto err_ret; qtype(cl->cl_q) = Q_RED; } } #endif /* ALTQ_RED */ #ifdef ALTQ_CODEL if (flags & PRCF_CODEL) { cl->cl_codel = codel_alloc(5, 100, 0); if (cl->cl_codel != NULL) qtype(cl->cl_q) = Q_CODEL; } #endif return (cl); err_ret: if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (NULL); } static int priq_class_destroy(struct priq_class *cl) { struct priq_if *pif; int s, pri; s = splnet(); IFQ_LOCK(cl->cl_pif->pif_ifq); #ifdef ALTQ3_CLFIER_COMPAT /* delete filters referencing to this class */ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0); #endif if (!qempty(cl->cl_q)) priq_purgeq(cl); pif = cl->cl_pif; pif->pif_classes[cl->cl_pri] = NULL; if (pif->pif_maxpri == cl->cl_pri) { for (pri = cl->cl_pri; pri >= 0; pri--) if (pif->pif_classes[pri] != NULL) { pif->pif_maxpri = pri; break; } if (pri < 0) pif->pif_maxpri = -1; } IFQ_UNLOCK(cl->cl_pif->pif_ifq); splx(s); if (cl->cl_red != NULL) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_destroy((rio_t *)cl->cl_red); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_destroy(cl->cl_codel); #endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); return (0); } /* * priq_enqueue is an enqueue function to be registered to * (*altq_enqueue) in struct ifaltq. */ static int priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct pf_mtag *t; int len; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); return (ENOBUFS); } cl = NULL; if ((t = pf_find_mtag(m)) != NULL) cl = clh_to_clp(pif, t->qid); #ifdef ALTQ3_COMPAT else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL) cl = pktattr->pattr_class; #endif if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { m_freem(m); return (ENOBUFS); } } #ifdef ALTQ3_COMPAT if (pktattr != NULL) cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */ else #endif cl->cl_pktattr = NULL; len = m_pktlen(m); if (priq_addq(cl, m) != 0) { /* drop occurred. mbuf was freed in priq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); return (ENOBUFS); } IFQ_INC_LEN(ifq); /* successfully queued. */ return (0); } /* * priq_dequeue is a dequeue function to be registered to * (*altq_dequeue) in struct ifaltq. * * note: ALTDQ_POLL returns the next packet without removing the packet * from the queue. ALTDQ_REMOVE is a normal dequeue operation. * ALTDQ_REMOVE must return the same packet if called immediately * after ALTDQ_POLL. */ static struct mbuf * priq_dequeue(struct ifaltq *ifq, int op) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; struct mbuf *m; int pri; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) /* no packet in the queue */ return (NULL); for (pri = pif->pif_maxpri; pri >= 0; pri--) { if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q)) { if (op == ALTDQ_POLL) return (priq_pollq(cl)); m = priq_getq(cl); if (m != NULL) { IFQ_DEC_LEN(ifq); if (qempty(cl->cl_q)) cl->cl_period++; PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m)); } return (m); } } return (NULL); } static int priq_addq(struct priq_class *cl, struct mbuf *m) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_addq(cl->cl_codel, cl->cl_q, m); #endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); } if (cl->cl_flags & PRCF_CLEARDSCP) write_dsfield(m, cl->cl_pktattr, 0); _addq(cl->cl_q, m); return (0); } static struct mbuf * priq_getq(struct priq_class *cl) { #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) return rio_getq((rio_t *)cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) return codel_getq(cl->cl_codel, cl->cl_q); #endif return _getq(cl->cl_q); } static struct mbuf * priq_pollq(cl) struct priq_class *cl; { return qhead(cl->cl_q); } static void priq_purgeq(struct priq_class *cl) { struct mbuf *m; if (qempty(cl->cl_q)) return; while ((m = _getq(cl->cl_q)) != NULL) { PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); m_freem(m); } ASSERT(qlen(cl->cl_q) == 0); } static void get_class_stats(struct priq_classstats *sp, struct priq_class *cl) { sp->class_handle = cl->cl_handle; sp->qlength = qlen(cl->cl_q); sp->qlimit = qlimit(cl->cl_q); sp->period = cl->cl_period; sp->xmitcnt = cl->cl_xmitcnt; sp->dropcnt = cl->cl_dropcnt; sp->qtype = qtype(cl->cl_q); #ifdef ALTQ_RED if (q_is_red(cl->cl_q)) red_getstats(cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_RIO if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif #ifdef ALTQ_CODEL if (q_is_codel(cl->cl_q)) codel_getstats(cl->cl_codel, &sp->codel); #endif } /* convert a class handle to the corresponding class pointer */ static struct priq_class * clh_to_clp(struct priq_if *pif, u_int32_t chandle) { struct priq_class *cl; int idx; if (chandle == 0) return (NULL); for (idx = pif->pif_maxpri; idx >= 0; idx--) if ((cl = pif->pif_classes[idx]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } #ifdef ALTQ3_COMPAT static struct priq_if * priq_attach(ifq, bandwidth) struct ifaltq *ifq; u_int bandwidth; { struct priq_if *pif; pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_WAITOK); if (pif == NULL) return (NULL); bzero(pif, sizeof(struct priq_if)); pif->pif_bandwidth = bandwidth; pif->pif_maxpri = -1; pif->pif_ifq = ifq; /* add this state to the priq list */ pif->pif_next = pif_list; pif_list = pif; return (pif); } static int priq_detach(pif) struct priq_if *pif; { (void)priq_clear_interface(pif); /* remove this interface from the pif list */ if (pif_list == pif) pif_list = pif->pif_next; else { struct priq_if *p; for (p = pif_list; p != NULL; p = p->pif_next) if (p->pif_next == pif) { p->pif_next = pif->pif_next; break; } ASSERT(p != NULL); } free(pif, M_DEVBUF); return (0); } /* * priq device interface */ int priqopen(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { /* everything will be done when the queueing scheme is attached. */ return 0; } int priqclose(dev, flag, fmt, p) dev_t dev; int flag, fmt; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct priq_if *pif; int err, error = 0; while ((pif = pif_list) != NULL) { /* destroy all */ if (ALTQ_IS_ENABLED(pif->pif_ifq)) altq_disable(pif->pif_ifq); err = altq_detach(pif->pif_ifq); if (err == 0) err = priq_detach(pif); if (err != 0 && error == 0) error = err; } return error; } int priqioctl(dev, cmd, addr, flag, p) dev_t dev; ioctlcmd_t cmd; caddr_t addr; int flag; #if (__FreeBSD_version > 500000) struct thread *p; #else struct proc *p; #endif { struct priq_if *pif; struct priq_interface *ifacep; int error = 0; /* check super-user privilege */ switch (cmd) { case PRIQ_GETSTATS: break; default: #if (__FreeBSD_version > 700000) if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) return (error); #elsif (__FreeBSD_version > 400000) if ((error = suser(p)) != 0) return (error); #else if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); #endif break; } switch (cmd) { case PRIQ_IF_ATTACH: error = priqcmd_if_attach((struct priq_interface *)addr); break; case PRIQ_IF_DETACH: error = priqcmd_if_detach((struct priq_interface *)addr); break; case PRIQ_ENABLE: case PRIQ_DISABLE: case PRIQ_CLEAR: ifacep = (struct priq_interface *)addr; if ((pif = altq_lookup(ifacep->ifname, ALTQT_PRIQ)) == NULL) { error = EBADF; break; } switch (cmd) { case PRIQ_ENABLE: if (pif->pif_default == NULL) { #ifdef ALTQ_DEBUG printf("priq: no default class\n"); #endif error = EINVAL; break; } error = altq_enable(pif->pif_ifq); break; case PRIQ_DISABLE: error = altq_disable(pif->pif_ifq); break; case PRIQ_CLEAR: priq_clear_interface(pif); break; } break; case PRIQ_ADD_CLASS: error = priqcmd_add_class((struct priq_add_class *)addr); break; case PRIQ_DEL_CLASS: error = priqcmd_delete_class((struct priq_delete_class *)addr); break; case PRIQ_MOD_CLASS: error = priqcmd_modify_class((struct priq_modify_class *)addr); break; case PRIQ_ADD_FILTER: error = priqcmd_add_filter((struct priq_add_filter *)addr); break; case PRIQ_DEL_FILTER: error = priqcmd_delete_filter((struct priq_delete_filter *)addr); break; case PRIQ_GETSTATS: error = priqcmd_class_stats((struct priq_class_stats *)addr); break; default: error = EINVAL; break; } return error; } static int priqcmd_if_attach(ap) struct priq_interface *ap; { struct priq_if *pif; struct ifnet *ifp; int error; if ((ifp = ifunit(ap->ifname)) == NULL) return (ENXIO); if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL) return (ENOMEM); /* * set PRIQ to this ifnet structure. */ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif, priq_enqueue, priq_dequeue, priq_request, &pif->pif_classifier, acc_classify)) != 0) (void)priq_detach(pif); return (error); } static int priqcmd_if_detach(ap) struct priq_interface *ap; { struct priq_if *pif; int error; if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ALTQ_IS_ENABLED(pif->pif_ifq)) altq_disable(pif->pif_ifq); if ((error = altq_detach(pif->pif_ifq))) return (error); return priq_detach(pif); } static int priqcmd_add_class(ap) struct priq_add_class *ap; { struct priq_if *pif; struct priq_class *cl; int qid; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) return (EINVAL); if (pif->pif_classes[ap->pri] != NULL) return (EBUSY); qid = ap->pri + 1; if ((cl = priq_class_create(pif, ap->pri, ap->qlimit, ap->flags, qid)) == NULL) return (ENOMEM); /* return a class handle to the user */ ap->class_handle = cl->cl_handle; return (0); } static int priqcmd_delete_class(ap) struct priq_delete_class *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); return priq_class_destroy(cl); } static int priqcmd_modify_class(ap) struct priq_modify_class *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI) return (EINVAL); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); /* * if priority is changed, move the class to the new priority */ if (pif->pif_classes[ap->pri] != cl) { if (pif->pif_classes[ap->pri] != NULL) return (EEXIST); pif->pif_classes[cl->cl_pri] = NULL; pif->pif_classes[ap->pri] = cl; cl->cl_pri = ap->pri; } /* call priq_class_create to change class parameters */ if ((cl = priq_class_create(pif, ap->pri, ap->qlimit, ap->flags, ap->class_handle)) == NULL) return (ENOMEM); return 0; } static int priqcmd_add_filter(ap) struct priq_add_filter *ap; { struct priq_if *pif; struct priq_class *cl; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL) return (EINVAL); return acc_add_filter(&pif->pif_classifier, &ap->filter, cl, &ap->filter_handle); } static int priqcmd_delete_filter(ap) struct priq_delete_filter *ap; { struct priq_if *pif; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); return acc_delete_filter(&pif->pif_classifier, ap->filter_handle); } static int priqcmd_class_stats(ap) struct priq_class_stats *ap; { struct priq_if *pif; struct priq_class *cl; struct priq_classstats stats, *usp; int pri, error; if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL) return (EBADF); ap->maxpri = pif->pif_maxpri; /* then, read the next N classes in the tree */ usp = ap->stats; for (pri = 0; pri <= pif->pif_maxpri; pri++) { cl = pif->pif_classes[pri]; if (cl != NULL) get_class_stats(&stats, cl); else bzero(&stats, sizeof(stats)); if ((error = copyout((caddr_t)&stats, (caddr_t)usp++, sizeof(stats))) != 0) return (error); } return (0); } #ifdef KLD_MODULE static struct altqsw priq_sw = {"priq", priqopen, priqclose, priqioctl}; ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw); MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1); MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1); #endif /* KLD_MODULE */ #endif /* ALTQ3_COMPAT */ #endif /* ALTQ_PRIQ */ Index: head/sys/net/altq/altq_priq.h =================================================================== --- head/sys/net/altq/altq_priq.h (revision 338208) +++ head/sys/net/altq/altq_priq.h (revision 338209) @@ -1,180 +1,186 @@ /*- * Copyright (C) 2000-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_PRIQ_H_ #define _ALTQ_ALTQ_PRIQ_H_ #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define PRIQ_MAXPRI 16 /* upper limit of the number of priorities */ #ifdef ALTQ3_COMPAT struct priq_interface { char ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */ u_long arg; /* request-specific argument */ }; struct priq_add_class { struct priq_interface iface; int pri; /* priority (0 is the lowest) */ int qlimit; /* queue size limit */ int flags; /* misc flags (see below) */ u_int32_t class_handle; /* return value */ }; #endif /* ALTQ3_COMPAT */ /* priq class flags */ #define PRCF_RED 0x0001 /* use RED */ #define PRCF_ECN 0x0002 /* use RED/ECN */ #define PRCF_RIO 0x0004 /* use RIO */ #define PRCF_CODEL 0x0008 /* use CoDel */ #define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define PRCF_DEFAULTCLASS 0x1000 /* default class */ /* special class handles */ #define PRIQ_NULLCLASS_HANDLE 0 #ifdef ALTQ3_COMPAT struct priq_delete_class { struct priq_interface iface; u_int32_t class_handle; }; struct priq_modify_class { struct priq_interface iface; u_int32_t class_handle; int pri; int qlimit; int flags; }; struct priq_add_filter { struct priq_interface iface; u_int32_t class_handle; struct flow_filter filter; u_long filter_handle; /* return value */ }; struct priq_delete_filter { struct priq_interface iface; u_long filter_handle; }; #endif /* ALTQ3_COMPAT */ struct priq_classstats { u_int32_t class_handle; u_int qlength; u_int qlimit; u_int period; struct pktcntr xmitcnt; /* transmitted packet counter */ struct pktcntr dropcnt; /* dropped packet counter */ /* codel, red and rio related info */ int qtype; struct redstats red[3]; /* rio has 3 red stats */ struct codel_stats codel; }; +/* + * PRIQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef ALTQ3_COMPAT struct priq_class_stats { struct priq_interface iface; int maxpri; /* in/out */ struct priq_classstats *stats; /* pointer to stats array */ }; #define PRIQ_IF_ATTACH _IOW('Q', 1, struct priq_interface) #define PRIQ_IF_DETACH _IOW('Q', 2, struct priq_interface) #define PRIQ_ENABLE _IOW('Q', 3, struct priq_interface) #define PRIQ_DISABLE _IOW('Q', 4, struct priq_interface) #define PRIQ_CLEAR _IOW('Q', 5, struct priq_interface) #define PRIQ_ADD_CLASS _IOWR('Q', 7, struct priq_add_class) #define PRIQ_DEL_CLASS _IOW('Q', 8, struct priq_delete_class) #define PRIQ_MOD_CLASS _IOW('Q', 9, struct priq_modify_class) #define PRIQ_ADD_FILTER _IOWR('Q', 10, struct priq_add_filter) #define PRIQ_DEL_FILTER _IOW('Q', 11, struct priq_delete_filter) #define PRIQ_GETSTATS _IOWR('Q', 12, struct priq_class_stats) #endif /* ALTQ3_COMPAT */ #ifdef _KERNEL struct priq_class { u_int32_t cl_handle; /* class handle */ class_queue_t *cl_q; /* class queue structure */ union { struct red *cl_red; /* RED state */ struct codel *cl_codel; /* CoDel state */ } cl_aqm; #define cl_red cl_aqm.cl_red #define cl_codel cl_aqm.cl_codel int cl_pri; /* priority */ int cl_flags; /* class flags */ struct priq_if *cl_pif; /* back pointer to pif */ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ /* statistics */ u_int cl_period; /* backlog period */ struct pktcntr cl_xmitcnt; /* transmitted packet counter */ struct pktcntr cl_dropcnt; /* dropped packet counter */ }; /* * priq interface state */ struct priq_if { struct priq_if *pif_next; /* interface state list */ struct ifaltq *pif_ifq; /* backpointer to ifaltq */ u_int pif_bandwidth; /* link bandwidth in bps */ int pif_maxpri; /* max priority in use */ struct priq_class *pif_default; /* default class */ struct priq_class *pif_classes[PRIQ_MAXPRI]; /* classes */ #ifdef ALTQ3_CLFIER_COMPAT struct acc_classifier pif_classifier; /* classifier */ #endif }; #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _ALTQ_ALTQ_PRIQ_H_ */ Index: head/sys/net/altq/altq_subr.c =================================================================== --- head/sys/net/altq/altq_subr.c (revision 338208) +++ head/sys/net/altq/altq_subr.c (revision 338209) @@ -1,1976 +1,1966 @@ /*- * Copyright (C) 1997-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ * $FreeBSD$ */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef ALTQ3_COMPAT #include #endif /* machine dependent clock related includes */ #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include /* for pentium tsc */ #include /* for CPUID_TSC */ #include /* for cpu_feature */ #endif /* __amd64 || __i386__ */ /* * internal function prototypes */ static void tbr_timeout(void *); int (*altq_input)(struct mbuf *, int) = NULL; static struct mbuf *tbr_dequeue(struct ifaltq *, int); static int tbr_timer = 0; /* token bucket regulator timer */ #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) static struct callout tbr_callout = CALLOUT_INITIALIZER; #else static struct callout tbr_callout; #endif #ifdef ALTQ3_CLFIER_COMPAT static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); #ifdef INET6 static int extract_ports6(struct mbuf *, struct ip6_hdr *, struct flowinfo_in6 *); #endif static int apply_filter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static int apply_ppfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); #ifdef INET6 static int apply_filter6(u_int32_t, struct flow_filter6 *, struct flowinfo_in6 *); #endif static int apply_tosfilter4(u_int32_t, struct flow_filter *, struct flowinfo_in *); static u_long get_filt_handle(struct acc_classifier *, int); static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); static u_int32_t filt2fibmask(struct flow_filter *); static void ip4f_cache(struct ip *, struct flowinfo_in *); static int ip4f_lookup(struct ip *, struct flowinfo_in *); static int ip4f_init(void); static struct ip4_frag *ip4f_alloc(void); static void ip4f_free(struct ip4_frag *); #endif /* ALTQ3_CLFIER_COMPAT */ /* * alternate queueing support routines */ /* look up the queue state by the interface name and the queueing type. */ void * altq_lookup(name, type) char *name; int type; { struct ifnet *ifp; if ((ifp = ifunit(name)) != NULL) { /* read if_snd unlocked */ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) return (ifp->if_snd.altq_disc); } return NULL; } int altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) struct ifaltq *ifq; int type; void *discipline; int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); struct mbuf *(*dequeue)(struct ifaltq *, int); int (*request)(struct ifaltq *, int, void *); void *clfier; void *(*classify)(void *, struct mbuf *, int); { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } #ifdef ALTQ3_COMPAT /* * pfaltq can override the existing discipline, but altq3 cannot. * check these if clfier is not NULL (which implies altq3). */ if (clfier != NULL) { if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return EEXIST; } } #endif ifq->altq_type = type; ifq->altq_disc = discipline; ifq->altq_enqueue = enqueue; ifq->altq_dequeue = dequeue; ifq->altq_request = request; ifq->altq_clfier = clfier; ifq->altq_classify = classify; ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); #ifdef ALTQ3_COMPAT #ifdef ALTQ_KLD altq_module_incref(type); #endif #endif IFQ_UNLOCK(ifq); return 0; } int altq_detach(ifq) struct ifaltq *ifq; { IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return EBUSY; } if (!ALTQ_IS_ATTACHED(ifq)) { IFQ_UNLOCK(ifq); return (0); } #ifdef ALTQ3_COMPAT #ifdef ALTQ_KLD altq_module_declref(ifq->altq_type); #endif #endif ifq->altq_type = ALTQT_NONE; ifq->altq_disc = NULL; ifq->altq_enqueue = NULL; ifq->altq_dequeue = NULL; ifq->altq_request = NULL; ifq->altq_clfier = NULL; ifq->altq_classify = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; IFQ_UNLOCK(ifq); return 0; } int altq_enable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_READY(ifq)) { IFQ_UNLOCK(ifq); return ENXIO; } if (ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } s = splnet(); IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ ifq->altq_flags |= ALTQF_ENABLED; if (ifq->altq_clfier != NULL) ifq->altq_flags |= ALTQF_CLASSIFY; splx(s); IFQ_UNLOCK(ifq); return 0; } int altq_disable(ifq) struct ifaltq *ifq; { int s; IFQ_LOCK(ifq); if (!ALTQ_IS_ENABLED(ifq)) { IFQ_UNLOCK(ifq); return 0; } s = splnet(); IFQ_PURGE_NOLOCK(ifq); ASSERT(ifq->ifq_len == 0); ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); splx(s); IFQ_UNLOCK(ifq); return 0; } #ifdef ALTQ_DEBUG void altq_assert(file, line, failedexpr) const char *file, *failedexpr; int line; { (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", failedexpr, file, line); panic("altq assertion"); /* NOTREACHED */ } #endif /* * internal representation of token bucket parameters - * rate: byte_per_unittime << 32 - * (((bits_per_sec) / 8) << 32) / machclk_freq - * depth: byte << 32 + * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq + * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq + * depth: byte << TBR_SHIFT * */ -#define TBR_SHIFT 32 +#define TBR_SHIFT 29 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) static struct mbuf * tbr_dequeue(ifq, op) struct ifaltq *ifq; int op; { struct tb_regulator *tbr; struct mbuf *m; int64_t interval; u_int64_t now; IFQ_LOCK_ASSERT(ifq); tbr = ifq->altq_tbr; if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { /* if this is a remove after poll, bypass tbr check */ } else { /* update token only when it is negative */ if (tbr->tbr_token <= 0) { now = read_machclk(); interval = now - tbr->tbr_last; if (interval >= tbr->tbr_filluptime) tbr->tbr_token = tbr->tbr_depth; else { tbr->tbr_token += interval * tbr->tbr_rate; if (tbr->tbr_token > tbr->tbr_depth) tbr->tbr_token = tbr->tbr_depth; } tbr->tbr_last = now; } /* if token is still negative, don't allow dequeue */ if (tbr->tbr_token <= 0) return (NULL); } if (ALTQ_IS_ENABLED(ifq)) m = (*ifq->altq_dequeue)(ifq, op); else { if (op == ALTDQ_POLL) _IF_POLL(ifq, m); else _IF_DEQUEUE(ifq, m); } if (m != NULL && op == ALTDQ_REMOVE) tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); tbr->tbr_lastop = op; return (m); } /* * set a token bucket regulator. * if the specified rate is zero, the token bucket regulator is deleted. */ int tbr_set(ifq, profile) struct ifaltq *ifq; struct tb_profile *profile; { struct tb_regulator *tbr, *otbr; if (tbr_dequeue_ptr == NULL) tbr_dequeue_ptr = tbr_dequeue; if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) { printf("tbr_set: no cpu clock available!\n"); return (ENXIO); } IFQ_LOCK(ifq); if (profile->rate == 0) { /* delete this tbr */ if ((tbr = ifq->altq_tbr) == NULL) { IFQ_UNLOCK(ifq); return (ENOENT); } ifq->altq_tbr = NULL; free(tbr, M_DEVBUF); IFQ_UNLOCK(ifq); return (0); } tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); if (tbr == NULL) { IFQ_UNLOCK(ifq); return (ENOMEM); } tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; tbr->tbr_depth = TBR_SCALE(profile->depth); if (tbr->tbr_rate > 0) tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; else - tbr->tbr_filluptime = 0xffffffffffffffffLL; + tbr->tbr_filluptime = LLONG_MAX; + /* + * The longest time between tbr_dequeue() calls will be about 1 + * system tick, as the callout that drives it is scheduled once per + * tick. The refill-time detection logic in tbr_dequeue() can only + * properly detect the passage of up to LLONG_MAX machclk ticks. + * Therefore, in order for this logic to function properly in the + * extreme case, the maximum value of tbr_filluptime should be + * LLONG_MAX less one system tick's worth of machclk ticks less + * some additional slop factor (here one more system tick's worth + * of machclk ticks). + */ + if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) + tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; tbr->tbr_token = tbr->tbr_depth; tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; otbr = ifq->altq_tbr; ifq->altq_tbr = tbr; /* set the new tbr */ if (otbr != NULL) free(otbr, M_DEVBUF); else { if (tbr_timer == 0) { CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); tbr_timer = 1; } } IFQ_UNLOCK(ifq); return (0); } /* * tbr_timeout goes through the interface list, and kicks the drivers * if necessary. * * MPSAFE */ static void tbr_timeout(arg) void *arg; { VNET_ITERATOR_DECL(vnet_iter); struct ifnet *ifp; int active, s; active = 0; s = splnet(); IFNET_RLOCK_NOSLEEP(); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) { /* read from if_snd unlocked */ if (!TBR_IS_ENABLED(&ifp->if_snd)) continue; active++; if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) (*ifp->if_start)(ifp); } CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); IFNET_RUNLOCK_NOSLEEP(); splx(s); if (active > 0) CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); else tbr_timer = 0; /* don't need tbr_timer anymore */ } /* - * get token bucket regulator profile - */ -int -tbr_get(ifq, profile) - struct ifaltq *ifq; - struct tb_profile *profile; -{ - struct tb_regulator *tbr; - - IFQ_LOCK(ifq); - if ((tbr = ifq->altq_tbr) == NULL) { - profile->rate = 0; - profile->depth = 0; - } else { - profile->rate = - (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); - profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); - } - IFQ_UNLOCK(ifq); - return (0); -} - -/* * attach a discipline to the interface. if one already exists, it is * overridden. * Locking is done in the discipline specific attach functions. Basically * they call back to altq_attach which takes care of the attach and locking. */ int altq_pfattach(struct pf_altq *a) { int error = 0; switch (a->scheduler) { case ALTQT_NONE: break; #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_pfattach(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_pfattach(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_pfattach(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_pfattach(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_pfattach(a); break; #endif default: error = ENXIO; } return (error); } /* * detach a discipline from the interface. * it is possible that the discipline was already overridden by another * discipline. */ int altq_pfdetach(struct pf_altq *a) { struct ifnet *ifp; int s, error = 0; if ((ifp = ifunit(a->ifname)) == NULL) return (EINVAL); /* if this discipline is no longer referenced, just return */ /* read unlocked from if_snd */ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) return (0); s = splnet(); /* read unlocked from if_snd, _disable and _detach take care */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) error = altq_disable(&ifp->if_snd); if (error == 0) error = altq_detach(&ifp->if_snd); splx(s); return (error); } /* * add a discipline or a queue * Locking is done in the discipline specific functions with regards to * malloc with WAITOK, also it is not yet clear which lock to use. */ int altq_add(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_add_queue(a)); if (machclk_freq == 0) init_machclk(); if (machclk_freq == 0) panic("altq_add: no cpu clock"); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_altq(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_add_altq(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_add_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a discipline or a queue * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove(struct pf_altq *a) { int error = 0; if (a->qname[0] != 0) return (altq_remove_queue(a)); switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_altq(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_altq(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_altq(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_remove_altq(a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: error = codel_remove_altq(a); break; #endif default: error = ENXIO; } return (error); } /* * add a queue to the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_add_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_add_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_add_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_add_queue(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_add_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * remove a queue from the discipline * It is yet unclear what lock to use to protect this operation, the * discipline specific functions will determine and grab it */ int altq_remove_queue(struct pf_altq *a) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: error = cbq_remove_queue(a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: error = priq_remove_queue(a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: error = hfsc_remove_queue(a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: error = fairq_remove_queue(a); break; #endif default: error = ENXIO; } return (error); } /* * get queue statistics * Locking is done in the discipline specific functions with regards to * copyout operations, also it is not yet clear which lock to use. */ int -altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: - error = cbq_getqstats(a, ubuf, nbytes); + error = cbq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: - error = priq_getqstats(a, ubuf, nbytes); + error = priq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: - error = hfsc_getqstats(a, ubuf, nbytes); + error = hfsc_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: - error = fairq_getqstats(a, ubuf, nbytes); + error = fairq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: - error = codel_getqstats(a, ubuf, nbytes); + error = codel_getqstats(a, ubuf, nbytes, version); break; #endif default: error = ENXIO; } return (error); } /* * read and write diffserv field in IPv4 or IPv6 header */ u_int8_t read_dsfield(m, pktattr) struct mbuf *m; struct altq_pktattr *pktattr; { struct mbuf *m0; u_int8_t ds_field = 0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return ((u_int8_t)0); /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("read_dsfield: can't locate header!\n"); #endif return ((u_int8_t)0); } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; if (ip->ip_v != 4) return ((u_int8_t)0); /* version mismatch! */ ds_field = ip->ip_tos; } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return ((u_int8_t)0); /* version mismatch! */ ds_field = (flowlabel >> 20) & 0xff; } #endif return (ds_field); } void write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) { struct mbuf *m0; if (pktattr == NULL || (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) return; /* verify that pattr_hdr is within the mbuf data */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if ((pktattr->pattr_hdr >= m0->m_data) && (pktattr->pattr_hdr < m0->m_data + m0->m_len)) break; if (m0 == NULL) { /* ick, pattr_hdr is stale */ pktattr->pattr_af = AF_UNSPEC; #ifdef ALTQ_DEBUG printf("write_dsfield: can't locate header!\n"); #endif return; } if (pktattr->pattr_af == AF_INET) { struct ip *ip = (struct ip *)pktattr->pattr_hdr; u_int8_t old; int32_t sum; if (ip->ip_v != 4) return; /* version mismatch! */ old = ip->ip_tos; dsfield |= old & 3; /* leave CU bits */ if (old == dsfield) return; ip->ip_tos = dsfield; /* * update checksum (from RFC1624) * HC' = ~(~HC + ~m + m') */ sum = ~ntohs(ip->ip_sum) & 0xffff; sum += 0xff00 + (~old & 0xff) + dsfield; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); /* add carry */ ip->ip_sum = htons(~sum & 0xffff); } #ifdef INET6 else if (pktattr->pattr_af == AF_INET6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; u_int32_t flowlabel; flowlabel = ntohl(ip6->ip6_flow); if ((flowlabel >> 28) != 6) return; /* version mismatch! */ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); ip6->ip6_flow = htonl(flowlabel); } #endif return; } /* * high resolution clock support taking advantage of a machine dependent * high resolution time counter (e.g., timestamp counter of intel pentium). * we assume * - 64-bit-long monotonically-increasing counter * - frequency range is 100M-4GHz (CPU speed) */ /* if pcc is not available or disabled, emulate 256MHz using microtime() */ #define MACHCLK_SHIFT 8 int machclk_usepcc; u_int32_t machclk_freq; u_int32_t machclk_per_tick; #if defined(__i386__) && defined(__NetBSD__) extern u_int64_t cpu_tsc_freq; #endif #if (__FreeBSD_version >= 700035) /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) /* If TSC is P-state invariant, don't do anything. */ if (tsc_is_invariant) return; #endif /* Total setting for this level gives the new frequency in MHz. */ init_machclk(); } EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_LAST); #endif /* __FreeBSD_version >= 700035 */ static void init_machclk_setup(void) { #if (__FreeBSD_version >= 600000) callout_init(&tbr_callout, 0); #endif machclk_usepcc = 1; #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) machclk_usepcc = 0; #endif #if defined(__FreeBSD__) && defined(SMP) machclk_usepcc = 0; #endif #if defined(__NetBSD__) && defined(MULTIPROCESSOR) machclk_usepcc = 0; #endif #if defined(__amd64__) || defined(__i386__) /* check if TSC is available */ if ((cpu_feature & CPUID_TSC) == 0 || atomic_load_acq_64(&tsc_freq) == 0) machclk_usepcc = 0; #endif } void init_machclk(void) { static int called; /* Call one-time initialization function. */ if (!called) { init_machclk_setup(); called = 1; } if (machclk_usepcc == 0) { /* emulate 256MHz using microtime() */ machclk_freq = 1000000 << MACHCLK_SHIFT; machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: emulate %uHz cpu clock\n", machclk_freq); #endif return; } /* * if the clock frequency (of Pentium TSC or Alpha PCC) is * accessible, just use it. */ #if defined(__amd64__) || defined(__i386__) machclk_freq = atomic_load_acq_64(&tsc_freq); #endif /* * if we don't know the clock frequency, measure it. */ if (machclk_freq == 0) { static int wait; struct timeval tv_start, tv_end; u_int64_t start, end, diff; int timo; microtime(&tv_start); start = read_machclk(); timo = hz; /* 1 sec */ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); microtime(&tv_end); end = read_machclk(); diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 + tv_end.tv_usec - tv_start.tv_usec; if (diff != 0) machclk_freq = (u_int)((end - start) * 1000000 / diff); } machclk_per_tick = machclk_freq / hz; #ifdef ALTQ_DEBUG printf("altq: CPU clock: %uHz\n", machclk_freq); #endif } #if defined(__OpenBSD__) && defined(__i386__) static __inline u_int64_t rdtsc(void) { u_int64_t rv; __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); return (rv); } #endif /* __OpenBSD__ && __i386__ */ u_int64_t read_machclk(void) { u_int64_t val; if (machclk_usepcc) { #if defined(__amd64__) || defined(__i386__) val = rdtsc(); #else panic("read_machclk"); #endif } else { struct timeval tv, boottime; microtime(&tv); getboottime(&boottime); val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 + tv.tv_usec) << MACHCLK_SHIFT); } return (val); } #ifdef ALTQ3_CLFIER_COMPAT #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 /* encapsulating security payload */ #endif #ifndef IPPROTO_AH #define IPPROTO_AH 51 /* authentication header */ #endif /* * extract flow information from a given packet. * filt_mask shows flowinfo fields required. * we assume the ip header is in one mbuf, and addresses and ports are * in network byte order. */ int altq_extractflow(m, af, flow, filt_bmask) struct mbuf *m; int af; struct flowinfo *flow; u_int32_t filt_bmask; { switch (af) { case PF_INET: { struct flowinfo_in *fin; struct ip *ip; ip = mtod(m, struct ip *); if (ip->ip_v != 4) break; fin = (struct flowinfo_in *)flow; fin->fi_len = sizeof(struct flowinfo_in); fin->fi_family = AF_INET; fin->fi_proto = ip->ip_p; fin->fi_tos = ip->ip_tos; fin->fi_src.s_addr = ip->ip_src.s_addr; fin->fi_dst.s_addr = ip->ip_dst.s_addr; if (filt_bmask & FIMB4_PORTS) /* if port info is required, extract port numbers */ extract_ports4(m, ip, fin); else { fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; } return (1); } #ifdef INET6 case PF_INET6: { struct flowinfo_in6 *fin6; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); /* should we check the ip version? */ fin6 = (struct flowinfo_in6 *)flow; fin6->fi6_len = sizeof(struct flowinfo_in6); fin6->fi6_family = AF_INET6; fin6->fi6_proto = ip6->ip6_nxt; fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); fin6->fi6_src = ip6->ip6_src; fin6->fi6_dst = ip6->ip6_dst; if ((filt_bmask & FIMB6_PORTS) || ((filt_bmask & FIMB6_PROTO) && ip6->ip6_nxt > IPPROTO_IPV6)) /* * if port info is required, or proto is required * but there are option headers, extract port * and protocol numbers. */ extract_ports6(m, ip6, fin6); else { fin6->fi6_sport = 0; fin6->fi6_dport = 0; fin6->fi6_gpi = 0; } return (1); } #endif /* INET6 */ default: break; } /* failed */ flow->fi_len = sizeof(struct flowinfo); flow->fi_family = AF_UNSPEC; return (0); } /* * helper routine to extract port numbers */ /* structure for ipsec and ipv6 option header template */ struct _opt6 { u_int8_t opt6_nxt; /* next header */ u_int8_t opt6_hlen; /* header extension length */ u_int16_t _pad; u_int32_t ah_spi; /* security parameter index for authentication header */ }; /* * extract port numbers from a ipv4 packet. */ static int extract_ports4(m, ip, fin) struct mbuf *m; struct ip *ip; struct flowinfo_in *fin; { struct mbuf *m0; u_short ip_off; u_int8_t proto; int off; fin->fi_sport = 0; fin->fi_dport = 0; fin->fi_gpi = 0; ip_off = ntohs(ip->ip_off); /* if it is a fragment, try cached fragment info */ if (ip_off & IP_OFFMASK) { ip4f_lookup(ip, fin); return (1); } /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip >= m0->m_data) && ((caddr_t)ip < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports4: can't locate header! ip=%p\n", ip); #endif return (0); } off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); proto = ip->ip_p; #ifdef ALTQ_IPSEC again: #endif while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); /* bogus ip_hl! */ } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin->fi_sport = udp->uh_sport; fin->fi_dport = udp->uh_dport; fin->fi_proto = proto; } break; #ifdef ALTQ_IPSEC case IPPROTO_ESP: if (fin->fi_gpi == 0){ u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin->fi_gpi = *gpi; } fin->fi_proto = proto; break; case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); if (fin->fi_gpi == 0 && m0->m_len >= off + 8) fin->fi_gpi = opt6->ah_spi; } /* goto the next header */ goto again; #endif /* ALTQ_IPSEC */ default: fin->fi_proto = proto; return (0); } /* if this is a first fragment, cache it. */ if (ip_off & IP_MF) ip4f_cache(ip, fin); return (1); } #ifdef INET6 static int extract_ports6(m, ip6, fin6) struct mbuf *m; struct ip6_hdr *ip6; struct flowinfo_in6 *fin6; { struct mbuf *m0; int off; u_int8_t proto; fin6->fi6_gpi = 0; fin6->fi6_sport = 0; fin6->fi6_dport = 0; /* locate the mbuf containing the protocol header */ for (m0 = m; m0 != NULL; m0 = m0->m_next) if (((caddr_t)ip6 >= m0->m_data) && ((caddr_t)ip6 < m0->m_data + m0->m_len)) break; if (m0 == NULL) { #ifdef ALTQ_DEBUG printf("extract_ports6: can't locate header! ip6=%p\n", ip6); #endif return (0); } off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; do { while (off >= m0->m_len) { off -= m0->m_len; m0 = m0->m_next; if (m0 == NULL) return (0); } if (m0->m_len < off + 4) return (0); switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: { struct udphdr *udp; udp = (struct udphdr *)(mtod(m0, caddr_t) + off); fin6->fi6_sport = udp->uh_sport; fin6->fi6_dport = udp->uh_dport; fin6->fi6_proto = proto; } return (1); case IPPROTO_ESP: if (fin6->fi6_gpi == 0) { u_int32_t *gpi; gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); fin6->fi6_gpi = *gpi; } fin6->fi6_proto = proto; return (1); case IPPROTO_AH: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) fin6->fi6_gpi = opt6->ah_spi; proto = opt6->opt6_nxt; off += 8 + (opt6->opt6_hlen * 4); /* goto the next header */ break; } case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct _opt6 *opt6; opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); proto = opt6->opt6_nxt; off += (opt6->opt6_hlen + 1) * 8; /* goto the next header */ break; } case IPPROTO_FRAGMENT: /* ipv6 fragmentations are not supported yet */ default: fin6->fi6_proto = proto; return (0); } } while (1); /*NOTREACHED*/ } #endif /* INET6 */ /* * altq common classifier */ int acc_add_filter(classifier, filter, class, phandle) struct acc_classifier *classifier; struct flow_filter *filter; void *class; u_long *phandle; { struct acc_filter *afp, *prev, *tmp; int i, s; #ifdef INET6 if (filter->ff_flow.fi_family != AF_INET && filter->ff_flow.fi_family != AF_INET6) return (EINVAL); #else if (filter->ff_flow.fi_family != AF_INET) return (EINVAL); #endif afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK); if (afp == NULL) return (ENOMEM); bzero(afp, sizeof(struct acc_filter)); afp->f_filter = *filter; afp->f_class = class; i = ACC_WILDCARD_INDEX; if (filter->ff_flow.fi_family == AF_INET) { struct flow_filter *filter4 = &afp->f_filter; /* * if address is 0, it's a wildcard. if address mask * isn't set, use full mask. */ if (filter4->ff_flow.fi_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0; else if (filter4->ff_mask.mask_dst.s_addr == 0) filter4->ff_mask.mask_dst.s_addr = 0xffffffff; if (filter4->ff_flow.fi_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0; else if (filter4->ff_mask.mask_src.s_addr == 0) filter4->ff_mask.mask_src.s_addr = 0xffffffff; /* clear extra bits in addresses */ filter4->ff_flow.fi_dst.s_addr &= filter4->ff_mask.mask_dst.s_addr; filter4->ff_flow.fi_src.s_addr &= filter4->ff_mask.mask_src.s_addr; /* * if dst address is a wildcard, use hash-entry * ACC_WILDCARD_INDEX. */ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); } #ifdef INET6 else if (filter->ff_flow.fi_family == AF_INET6) { struct flow_filter6 *filter6 = (struct flow_filter6 *)&afp->f_filter; #ifndef IN6MASK0 /* taken from kame ipv6 */ #define IN6MASK0 {{{ 0, 0, 0, 0 }}} #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask128 = IN6MASK128; #endif if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) filter6->ff_mask6.mask6_dst = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) filter6->ff_mask6.mask6_dst = in6mask128; if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) filter6->ff_mask6.mask6_src = in6mask0; else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) filter6->ff_mask6.mask6_src = in6mask128; /* clear extra bits in addresses */ for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_dst.s6_addr[i] &= filter6->ff_mask6.mask6_dst.s6_addr[i]; for (i = 0; i < 16; i++) filter6->ff_flow6.fi6_src.s6_addr[i] &= filter6->ff_mask6.mask6_src.s6_addr[i]; if (filter6->ff_flow6.fi6_flowlabel == 0) i = ACC_WILDCARD_INDEX; else i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); } #endif /* INET6 */ afp->f_handle = get_filt_handle(classifier, i); /* update filter bitmask */ afp->f_fbmask = filt2fibmask(filter); classifier->acc_fbmask |= afp->f_fbmask; /* * add this filter to the filter list. * filters are ordered from the highest rule number. */ s = splnet(); prev = NULL; LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) prev = tmp; else break; } if (prev == NULL) LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); else LIST_INSERT_AFTER(prev, afp, f_chain); splx(s); *phandle = afp->f_handle; return (0); } int acc_delete_filter(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int s; if ((afp = filth_to_filtp(classifier, handle)) == NULL) return (EINVAL); s = splnet(); LIST_REMOVE(afp, f_chain); splx(s); free(afp, M_DEVBUF); /* todo: update filt_bmask */ return (0); } /* * delete filters referencing to the specified class. * if the all flag is not 0, delete all the filters. */ int acc_discard_filters(classifier, class, all) struct acc_classifier *classifier; void *class; int all; { struct acc_filter *afp; int i, s; s = splnet(); for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (all || afp->f_class == class) { LIST_REMOVE(afp, f_chain); free(afp, M_DEVBUF); /* start again from the head */ break; } } while (afp != NULL); } splx(s); if (all) classifier->acc_fbmask = 0; return (0); } void * acc_classify(clfier, m, af) void *clfier; struct mbuf *m; int af; { struct acc_classifier *classifier; struct flowinfo flow; struct acc_filter *afp; int i; classifier = (struct acc_classifier *)clfier; altq_extractflow(m, af, &flow, classifier->acc_fbmask); if (flow.fi_family == AF_INET) { struct flowinfo_in *fp = (struct flowinfo_in *)&flow; if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { /* only tos is used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_tosfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else if ((classifier->acc_fbmask & (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) == 0) { /* only proto and ports are used */ LIST_FOREACH(afp, &classifier->acc_filters[ACC_WILDCARD_INDEX], f_chain) if (apply_ppfilter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); } else { /* get the filter hash entry from its dest address */ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); do { /* * go through this loop twice. first for dst * hash, second for wildcards. */ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter4(afp->f_fbmask, &afp->f_filter, fp)) /* filter matched */ return (afp->f_class); /* * check again for filters with a dst addr * wildcard. * (daddr == 0 || dmask != 0xffffffff). */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } } #ifdef INET6 else if (flow.fi_family == AF_INET6) { struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; /* get the filter hash entry from its flow ID */ if (fp6->fi6_flowlabel != 0) i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); else /* flowlable can be zero */ i = ACC_WILDCARD_INDEX; /* go through this loop twice. first for flow hash, second for wildcards. */ do { LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (apply_filter6(afp->f_fbmask, (struct flow_filter6 *)&afp->f_filter, fp6)) /* filter matched */ return (afp->f_class); /* * check again for filters with a wildcard. */ if (i != ACC_WILDCARD_INDEX) i = ACC_WILDCARD_INDEX; else break; } while (1); } #endif /* INET6 */ /* no filter matched */ return (NULL); } static int apply_filter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_DADDR) && filt->ff_flow.fi_dst.s_addr != (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) return (0); if ((fbmask & FIMB4_SADDR) && filt->ff_flow.fi_src.s_addr != (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) return (0); /* match */ return (1); } /* * filter matching function optimized for a common case that checks * only protocol and port numbers */ static int apply_ppfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) return (0); if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) return (0); if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) return (0); /* match */ return (1); } /* * filter matching function only for tos field. */ static int apply_tosfilter4(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter *filt; struct flowinfo_in *pkt; { if (filt->ff_flow.fi_family != AF_INET) return (0); if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != (pkt->fi_tos & filt->ff_mask.mask_tos)) return (0); /* match */ return (1); } #ifdef INET6 static int apply_filter6(fbmask, filt, pkt) u_int32_t fbmask; struct flow_filter6 *filt; struct flowinfo_in6 *pkt; { int i; if (filt->ff_flow6.fi6_family != AF_INET6) return (0); if ((fbmask & FIMB6_FLABEL) && filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) return (0); if ((fbmask & FIMB6_PROTO) && filt->ff_flow6.fi6_proto != pkt->fi6_proto) return (0); if ((fbmask & FIMB6_SPORT) && filt->ff_flow6.fi6_sport != pkt->fi6_sport) return (0); if ((fbmask & FIMB6_DPORT) && filt->ff_flow6.fi6_dport != pkt->fi6_dport) return (0); if (fbmask & FIMB6_SADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_src.s6_addr32[i] != (pkt->fi6_src.s6_addr32[i] & filt->ff_mask6.mask6_src.s6_addr32[i])) return (0); } if (fbmask & FIMB6_DADDR) { for (i = 0; i < 4; i++) if (filt->ff_flow6.fi6_dst.s6_addr32[i] != (pkt->fi6_dst.s6_addr32[i] & filt->ff_mask6.mask6_dst.s6_addr32[i])) return (0); } if ((fbmask & FIMB6_TCLASS) && filt->ff_flow6.fi6_tclass != (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) return (0); if ((fbmask & FIMB6_GPI) && filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) return (0); /* match */ return (1); } #endif /* INET6 */ /* * filter handle: * bit 20-28: index to the filter hash table * bit 0-19: unique id in the hash bucket. */ static u_long get_filt_handle(classifier, i) struct acc_classifier *classifier; int i; { static u_long handle_number = 1; u_long handle; struct acc_filter *afp; while (1) { handle = handle_number++ & 0x000fffff; if (LIST_EMPTY(&classifier->acc_filters[i])) break; LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if ((afp->f_handle & 0x000fffff) == handle) break; if (afp == NULL) break; /* this handle is already used, try again */ } return ((i << 20) | handle); } /* convert filter handle to filter pointer */ static struct acc_filter * filth_to_filtp(classifier, handle) struct acc_classifier *classifier; u_long handle; { struct acc_filter *afp; int i; i = ACC_GET_HINDEX(handle); LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) if (afp->f_handle == handle) return (afp); return (NULL); } /* create flowinfo bitmask */ static u_int32_t filt2fibmask(filt) struct flow_filter *filt; { u_int32_t mask = 0; #ifdef INET6 struct flow_filter6 *filt6; #endif switch (filt->ff_flow.fi_family) { case AF_INET: if (filt->ff_flow.fi_proto != 0) mask |= FIMB4_PROTO; if (filt->ff_flow.fi_tos != 0) mask |= FIMB4_TOS; if (filt->ff_flow.fi_dst.s_addr != 0) mask |= FIMB4_DADDR; if (filt->ff_flow.fi_src.s_addr != 0) mask |= FIMB4_SADDR; if (filt->ff_flow.fi_sport != 0) mask |= FIMB4_SPORT; if (filt->ff_flow.fi_dport != 0) mask |= FIMB4_DPORT; if (filt->ff_flow.fi_gpi != 0) mask |= FIMB4_GPI; break; #ifdef INET6 case AF_INET6: filt6 = (struct flow_filter6 *)filt; if (filt6->ff_flow6.fi6_proto != 0) mask |= FIMB6_PROTO; if (filt6->ff_flow6.fi6_tclass != 0) mask |= FIMB6_TCLASS; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) mask |= FIMB6_DADDR; if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) mask |= FIMB6_SADDR; if (filt6->ff_flow6.fi6_sport != 0) mask |= FIMB6_SPORT; if (filt6->ff_flow6.fi6_dport != 0) mask |= FIMB6_DPORT; if (filt6->ff_flow6.fi6_gpi != 0) mask |= FIMB6_GPI; if (filt6->ff_flow6.fi6_flowlabel != 0) mask |= FIMB6_FLABEL; break; #endif /* INET6 */ } return (mask); } /* * helper functions to handle IPv4 fragments. * currently only in-sequence fragments are handled. * - fragment info is cached in a LRU list. * - when a first fragment is found, cache its flow info. * - when a non-first fragment is found, lookup the cache. */ struct ip4_frag { TAILQ_ENTRY(ip4_frag) ip4f_chain; char ip4f_valid; u_short ip4f_id; struct flowinfo_in ip4f_info; }; static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ static void ip4f_cache(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; if (TAILQ_EMPTY(&ip4f_list)) { /* first time call, allocate fragment cache entries. */ if (ip4f_init() < 0) /* allocation failed! */ return; } fp = ip4f_alloc(); fp->ip4f_id = ip->ip_id; fp->ip4f_info.fi_proto = ip->ip_p; fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; /* save port numbers */ fp->ip4f_info.fi_sport = fin->fi_sport; fp->ip4f_info.fi_dport = fin->fi_dport; fp->ip4f_info.fi_gpi = fin->fi_gpi; } static int ip4f_lookup(ip, fin) struct ip *ip; struct flowinfo_in *fin; { struct ip4_frag *fp; for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; fp = TAILQ_NEXT(fp, ip4f_chain)) if (ip->ip_id == fp->ip4f_id && ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && ip->ip_p == fp->ip4f_info.fi_proto) { /* found the matching entry */ fin->fi_sport = fp->ip4f_info.fi_sport; fin->fi_dport = fp->ip4f_info.fi_dport; fin->fi_gpi = fp->ip4f_info.fi_gpi; if ((ntohs(ip->ip_off) & IP_MF) == 0) /* this is the last fragment, release the entry. */ ip4f_free(fp); return (1); } /* no matching entry found */ return (0); } static int ip4f_init(void) { struct ip4_frag *fp; int i; TAILQ_INIT(&ip4f_list); for (i=0; iip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } return (0); } static struct ip4_frag * ip4f_alloc(void) { struct ip4_frag *fp; /* reclaim an entry at the tail, put it at the head */ fp = TAILQ_LAST(&ip4f_list, ip4f_list); TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 1; TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); return (fp); } static void ip4f_free(fp) struct ip4_frag *fp; { TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); fp->ip4f_valid = 0; TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); } #endif /* ALTQ3_CLFIER_COMPAT */ Index: head/sys/net/altq/altq_var.h =================================================================== --- head/sys/net/altq/altq_var.h (revision 338208) +++ head/sys/net/altq/altq_var.h (revision 338209) @@ -1,243 +1,242 @@ /*- * Copyright (C) 1998-2003 * Sony Computer Science Laboratories Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ * $FreeBSD$ */ #ifndef _ALTQ_ALTQ_VAR_H_ #define _ALTQ_ALTQ_VAR_H_ #ifdef _KERNEL #include #include #include #ifdef ALTQ3_CLFIER_COMPAT /* * filter structure for altq common classifier */ struct acc_filter { LIST_ENTRY(acc_filter) f_chain; void *f_class; /* pointer to the class */ u_long f_handle; /* filter id */ u_int32_t f_fbmask; /* filter bitmask */ struct flow_filter f_filter; /* filter value */ }; /* * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix * the handle assignment. */ #define ACC_FILTER_TABLESIZE (256+1) #define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2) #define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1) #ifdef __GNUC__ #define ACC_GET_HASH_INDEX(addr) \ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;}) #else #define ACC_GET_HASH_INDEX(addr) \ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \ & ACC_FILTER_MASK) #endif #define ACC_GET_HINDEX(handle) ((handle) >> 20) #if (__FreeBSD_version > 500000) #define ACC_LOCK_INIT(ac) mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF) #define ACC_LOCK_DESTROY(ac) mtx_destroy(&(ac)->acc_mtx) #define ACC_LOCK(ac) mtx_lock(&(ac)->acc_mtx) #define ACC_UNLOCK(ac) mtx_unlock(&(ac)->acc_mtx) #else #define ACC_LOCK_INIT(ac) #define ACC_LOCK_DESTROY(ac) #define ACC_LOCK(ac) #define ACC_UNLOCK(ac) #endif struct acc_classifier { u_int32_t acc_fbmask; LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE]; #if (__FreeBSD_version > 500000) struct mtx acc_mtx; #endif }; /* * flowinfo mask bits used by classifier */ /* for ipv4 */ #define FIMB4_PROTO 0x0001 #define FIMB4_TOS 0x0002 #define FIMB4_DADDR 0x0004 #define FIMB4_SADDR 0x0008 #define FIMB4_DPORT 0x0010 #define FIMB4_SPORT 0x0020 #define FIMB4_GPI 0x0040 #define FIMB4_ALL 0x007f /* for ipv6 */ #define FIMB6_PROTO 0x0100 #define FIMB6_TCLASS 0x0200 #define FIMB6_DADDR 0x0400 #define FIMB6_SADDR 0x0800 #define FIMB6_DPORT 0x1000 #define FIMB6_SPORT 0x2000 #define FIMB6_GPI 0x4000 #define FIMB6_FLABEL 0x8000 #define FIMB6_ALL 0xff00 #define FIMB_ALL (FIMB4_ALL|FIMB6_ALL) #define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI) #define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI) #endif /* ALTQ3_CLFIER_COMPAT */ /* * machine dependent clock * a 64bit high resolution time counter. */ extern int machclk_usepcc; extern u_int32_t machclk_freq; extern u_int32_t machclk_per_tick; extern void init_machclk(void); extern u_int64_t read_machclk(void); /* * debug support */ #ifdef ALTQ_DEBUG #ifdef __STDC__ #define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e)) #else /* PCC */ #define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e")) #endif #else #define ASSERT(e) ((void)0) #endif /* * misc stuff for compatibility */ /* ioctl cmd type */ typedef u_long ioctlcmd_t; /* * queue macros: * the interface of TAILQ_LAST macro changed after the introduction * of softupdate. redefine it here to make it work with pre-2.2.7. */ #undef TAILQ_LAST #define TAILQ_LAST(head, headname) \ (*(((struct headname *)((head)->tqh_last))->tqh_last)) #ifndef TAILQ_EMPTY #define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) #endif #ifndef TAILQ_FOREACH #define TAILQ_FOREACH(var, head, field) \ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field)) #endif /* macro for timeout/untimeout */ /* use callout */ #include #if (__FreeBSD_version > 500000) #define CALLOUT_INIT(c) callout_init((c), 0) #else #define CALLOUT_INIT(c) callout_init((c)) #endif #define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a)) #define CALLOUT_STOP(c) callout_stop((c)) #if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000) #define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 } #endif #define m_pktlen(m) ((m)->m_pkthdr.len) struct ifnet; struct mbuf; struct pf_altq; #ifdef ALTQ3_CLFIER_COMPAT struct flowinfo; #endif void *altq_lookup(char *, int); #ifdef ALTQ3_CLFIER_COMPAT int altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t); int acc_add_filter(struct acc_classifier *, struct flow_filter *, void *, u_long *); int acc_delete_filter(struct acc_classifier *, u_long); int acc_discard_filters(struct acc_classifier *, void *, int); void *acc_classify(void *, struct mbuf *, int); #endif u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *); void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t); void altq_assert(const char *, int, const char *); int tbr_set(struct ifaltq *, struct tb_profile *); -int tbr_get(struct ifaltq *, struct tb_profile *); int altq_pfattach(struct pf_altq *); int altq_pfdetach(struct pf_altq *); int altq_add(struct pf_altq *); int altq_remove(struct pf_altq *); int altq_add_queue(struct pf_altq *); int altq_remove_queue(struct pf_altq *); -int altq_getqstats(struct pf_altq *, void *, int *); +int altq_getqstats(struct pf_altq *, void *, int *, int); int cbq_pfattach(struct pf_altq *); int cbq_add_altq(struct pf_altq *); int cbq_remove_altq(struct pf_altq *); int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); -int cbq_getqstats(struct pf_altq *, void *, int *); +int cbq_getqstats(struct pf_altq *, void *, int *, int); int codel_pfattach(struct pf_altq *); int codel_add_altq(struct pf_altq *); int codel_remove_altq(struct pf_altq *); -int codel_getqstats(struct pf_altq *, void *, int *); +int codel_getqstats(struct pf_altq *, void *, int *, int); int priq_pfattach(struct pf_altq *); int priq_add_altq(struct pf_altq *); int priq_remove_altq(struct pf_altq *); int priq_add_queue(struct pf_altq *); int priq_remove_queue(struct pf_altq *); -int priq_getqstats(struct pf_altq *, void *, int *); +int priq_getqstats(struct pf_altq *, void *, int *, int); int hfsc_pfattach(struct pf_altq *); int hfsc_add_altq(struct pf_altq *); int hfsc_remove_altq(struct pf_altq *); int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); -int hfsc_getqstats(struct pf_altq *, void *, int *); +int hfsc_getqstats(struct pf_altq *, void *, int *, int); int fairq_pfattach(struct pf_altq *); int fairq_add_altq(struct pf_altq *); int fairq_remove_altq(struct pf_altq *); int fairq_add_queue(struct pf_altq *); int fairq_remove_queue(struct pf_altq *); -int fairq_getqstats(struct pf_altq *, void *, int *); +int fairq_getqstats(struct pf_altq *, void *, int *, int); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ Index: head/sys/net/pfvar.h =================================================================== --- head/sys/net/pfvar.h (revision 338208) +++ head/sys/net/pfvar.h (revision 338209) @@ -1,1768 +1,1860 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ * $FreeBSD$ */ #ifndef _NET_PFVAR_H_ #define _NET_PFVAR_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct pf_addr { union { struct in_addr v4; struct in6_addr v6; u_int8_t addr8[16]; u_int16_t addr16[8]; u_int32_t addr32[4]; } pfa; /* 128-bit address */ #define v4 pfa.v4 #define v6 pfa.v6 #define addr8 pfa.addr8 #define addr16 pfa.addr16 #define addr32 pfa.addr32 }; #define PFI_AFLAG_NETWORK 0x01 #define PFI_AFLAG_BROADCAST 0x02 #define PFI_AFLAG_PEER 0x04 #define PFI_AFLAG_MODEMASK 0x07 #define PFI_AFLAG_NOALIAS 0x08 struct pf_addr_wrap { union { struct { struct pf_addr addr; struct pf_addr mask; } a; char ifname[IFNAMSIZ]; char tblname[PF_TABLE_NAME_SIZE]; } v; union { struct pfi_dynaddr *dyn; struct pfr_ktable *tbl; int dyncnt; int tblcnt; } p; u_int8_t type; /* PF_ADDR_* */ u_int8_t iflags; /* PFI_AFLAG_* */ }; #ifdef _KERNEL struct pfi_dynaddr { TAILQ_ENTRY(pfi_dynaddr) entry; struct pf_addr pfid_addr4; struct pf_addr pfid_mask4; struct pf_addr pfid_addr6; struct pf_addr pfid_mask6; struct pfr_ktable *pfid_kt; struct pfi_kif *pfid_kif; int pfid_net; /* mask or 128 */ int pfid_acnt4; /* address count IPv4 */ int pfid_acnt6; /* address count IPv6 */ sa_family_t pfid_af; /* rule af */ u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; /* * Address manipulation macros */ #define HTONL(x) (x) = htonl((__uint32_t)(x)) #define HTONS(x) (x) = htons((__uint16_t)(x)) #define NTOHL(x) (x) = ntohl((__uint32_t)(x)) #define NTOHS(x) (x) = ntohs((__uint16_t)(x)) #define PF_NAME "pf" #define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED) #define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock) #define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock) #define PF_STATE_LOCK(s) \ do { \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \ PF_HASHROW_LOCK(_ih); \ } while (0) #define PF_STATE_UNLOCK(s) \ do { \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH((s))]; \ PF_HASHROW_UNLOCK(_ih); \ } while (0) #ifdef INVARIANTS #define PF_STATE_LOCK_ASSERT(s) \ do { \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(s)]; \ PF_HASHROW_ASSERT(_ih); \ } while (0) #else /* !INVARIANTS */ #define PF_STATE_LOCK_ASSERT(s) do {} while (0) #endif /* INVARIANTS */ extern struct mtx pf_unlnkdrules_mtx; #define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx) #define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx) extern struct rmlock pf_rules_lock; #define PF_RULES_RLOCK_TRACKER struct rm_priotracker _pf_rules_tracker #define PF_RULES_RLOCK() rm_rlock(&pf_rules_lock, &_pf_rules_tracker) #define PF_RULES_RUNLOCK() rm_runlock(&pf_rules_lock, &_pf_rules_tracker) #define PF_RULES_WLOCK() rm_wlock(&pf_rules_lock) #define PF_RULES_WUNLOCK() rm_wunlock(&pf_rules_lock) #define PF_RULES_ASSERT() rm_assert(&pf_rules_lock, RA_LOCKED) #define PF_RULES_RASSERT() rm_assert(&pf_rules_lock, RA_RLOCKED) #define PF_RULES_WASSERT() rm_assert(&pf_rules_lock, RA_WLOCKED) extern struct sx pf_end_lock; #define PF_MODVER 1 #define PFLOG_MODVER 1 #define PFSYNC_MODVER 1 #define PFLOG_MINVER 1 #define PFLOG_PREFVER PFLOG_MODVER #define PFLOG_MAXVER 1 #define PFSYNC_MINVER 1 #define PFSYNC_PREFVER PFSYNC_MODVER #define PFSYNC_MAXVER 1 #ifdef INET #ifndef INET6 #define PF_INET_ONLY #endif /* ! INET6 */ #endif /* INET */ #ifdef INET6 #ifndef INET #define PF_INET6_ONLY #endif /* ! INET */ #endif /* INET6 */ #ifdef INET #ifdef INET6 #define PF_INET_INET6 #endif /* INET6 */ #endif /* INET */ #else #define PF_INET_INET6 #endif /* _KERNEL */ /* Both IPv4 and IPv6 */ #ifdef PF_INET_INET6 #define PF_AEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ (c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0])) \ #define PF_ANEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[3] != (b)->addr32[3]))) \ #define PF_AZERO(a, c) \ ((c == AF_INET && !(a)->addr32[0]) || \ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ pf_addrcpy(a, b, f) #define PF_AINC(a, f) \ pf_addr_inc(a, f) #define PF_POOLMASK(a, b, c, d, f) \ pf_poolmask(a, b, c, d, f) #else /* Just IPv6 */ #ifdef PF_INET6_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0]) \ #define PF_ANEQ(a, b, c) \ ((a)->addr32[3] != (b)->addr32[3] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[0] != (b)->addr32[0]) \ #define PF_AZERO(a, c) \ (!(a)->addr32[0] && \ !(a)->addr32[1] && \ !(a)->addr32[2] && \ !(a)->addr32[3] ) \ #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ pf_addrcpy(a, b, f) #define PF_AINC(a, f) \ pf_addr_inc(a, f) #define PF_POOLMASK(a, b, c, d, f) \ pf_poolmask(a, b, c, d, f) #else /* Just IPv4 */ #ifdef PF_INET_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[0] == (b)->addr32[0]) #define PF_ANEQ(a, b, c) \ ((a)->addr32[0] != (b)->addr32[0]) #define PF_AZERO(a, c) \ (!(a)->addr32[0]) #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ (a)->v4.s_addr = (b)->v4.s_addr #define PF_AINC(a, f) \ do { \ (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ } while (0) #define PF_POOLMASK(a, b, c, d, f) \ do { \ (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ } while (0) #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ /* * XXX callers not FIB-aware in our version of pf yet. * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio. */ #define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \ ( \ (((aw)->type == PF_ADDR_NOROUTE && \ pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ pf_routable((x), (af), (ifp), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ ((aw)->type == PF_ADDR_RANGE && \ !pf_match_addr_range(&(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !PF_MATCHA(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))))) != \ (neg) \ ) struct pf_rule_uid { uid_t uid[2]; u_int8_t op; }; struct pf_rule_gid { uid_t gid[2]; u_int8_t op; }; struct pf_rule_addr { struct pf_addr_wrap addr; u_int16_t port[2]; u_int8_t neg; u_int8_t port_op; }; struct pf_pooladdr { struct pf_addr_wrap addr; TAILQ_ENTRY(pf_pooladdr) entries; char ifname[IFNAMSIZ]; struct pfi_kif *kif; }; TAILQ_HEAD(pf_palist, pf_pooladdr); struct pf_poolhashkey { union { u_int8_t key8[16]; u_int16_t key16[8]; u_int32_t key32[4]; } pfk; /* 128-bit hash key */ #define key8 pfk.key8 #define key16 pfk.key16 #define key32 pfk.key32 }; struct pf_pool { struct pf_palist list; struct pf_pooladdr *cur; struct pf_poolhashkey key; struct pf_addr counter; int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; }; /* A packed Operating System description for fingerprinting */ typedef u_int32_t pf_osfp_t; #define PF_OSFP_ANY ((pf_osfp_t)0) #define PF_OSFP_UNKNOWN ((pf_osfp_t)-1) #define PF_OSFP_NOMATCH ((pf_osfp_t)-2) struct pf_osfp_entry { SLIST_ENTRY(pf_osfp_entry) fp_entry; pf_osfp_t fp_os; int fp_enflags; #define PF_OSFP_EXPANDED 0x001 /* expanded entry */ #define PF_OSFP_GENERIC 0x002 /* generic signature */ #define PF_OSFP_NODETAIL 0x004 /* no p0f details */ #define PF_OSFP_LEN 32 char fp_class_nm[PF_OSFP_LEN]; char fp_version_nm[PF_OSFP_LEN]; char fp_subtype_nm[PF_OSFP_LEN]; }; #define PF_OSFP_ENTRY_EQ(a, b) \ ((a)->fp_os == (b)->fp_os && \ memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \ memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \ memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0) /* handle pf_osfp_t packing */ #define _FP_RESERVED_BIT 1 /* For the special negative #defines */ #define _FP_UNUSED_BITS 1 #define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */ #define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */ #define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */ #define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \ (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \ ((1 << _FP_CLASS_BITS) - 1); \ (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \ ((1 << _FP_VERSION_BITS) - 1);\ (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \ } while(0) #define PF_OSFP_PACK(osfp, class, version, subtype) do { \ (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \ + _FP_SUBTYPE_BITS); \ (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \ _FP_SUBTYPE_BITS; \ (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \ } while(0) /* the fingerprint of an OSes TCP SYN packet */ typedef u_int64_t pf_tcpopts_t; struct pf_os_fingerprint { SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */ pf_tcpopts_t fp_tcpopts; /* packed TCP options */ u_int16_t fp_wsize; /* TCP window size */ u_int16_t fp_psize; /* ip->ip_len */ u_int16_t fp_mss; /* TCP MSS */ u_int16_t fp_flags; #define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */ #define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */ #define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */ #define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */ #define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */ #define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */ #define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */ #define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */ #define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */ #define PF_OSFP_MSS 0x0200 /* TCP MSS */ #define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */ #define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ #define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ #define PF_OSFP_TS0 0x2000 /* Zero timestamp */ #define PF_OSFP_INET6 0x4000 /* IPv6 */ u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ #define PF_OSFP_MAXTTL_OFFSET 40 /* TCP options packing */ #define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */ #define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */ #define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */ #define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */ #define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */ #define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */ #define PF_OSFP_MAX_OPTS \ (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \ / PF_OSFP_TCPOPT_BITS SLIST_ENTRY(pf_os_fingerprint) fp_next; }; struct pf_osfp_ioctl { struct pf_osfp_entry fp_os; pf_tcpopts_t fp_tcpopts; /* packed TCP options */ u_int16_t fp_wsize; /* TCP window size */ u_int16_t fp_psize; /* ip->ip_len */ u_int16_t fp_mss; /* TCP MSS */ u_int16_t fp_flags; u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ int fp_getnum; /* DIOCOSFPGET number */ }; union pf_rule_ptr { struct pf_rule *ptr; u_int32_t nr; }; #define PF_ANCHOR_NAME_SIZE 64 struct pf_rule { struct pf_rule_addr src; struct pf_rule_addr dst; #define PF_SKIP_IFP 0 #define PF_SKIP_DIR 1 #define PF_SKIP_AF 2 #define PF_SKIP_PROTO 3 #define PF_SKIP_SRC_ADDR 4 #define PF_SKIP_SRC_PORT 5 #define PF_SKIP_DST_ADDR 6 #define PF_SKIP_DST_PORT 7 #define PF_SKIP_COUNT 8 union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; #define PF_TAG_NAME_SIZE 64 char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; char overload_tblname[PF_TABLE_NAME_SIZE]; TAILQ_ENTRY(pf_rule) entries; struct pf_pool rpool; u_int64_t evaluations; u_int64_t packets[2]; u_int64_t bytes[2]; struct pfi_kif *kif; struct pf_anchor *anchor; struct pfr_ktable *overload_tbl; pf_osfp_t os_fingerprint; int rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t max_states; u_int32_t max_src_nodes; u_int32_t max_src_states; u_int32_t max_src_conn; struct { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; u_int32_t qid; u_int32_t pqid; u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; uid_t cuid; pid_t cpid; counter_u64_t states_cur; counter_u64_t states_tot; counter_u64_t src_nodes; u_int16_t return_icmp; u_int16_t return_icmp6; u_int16_t max_mss; u_int16_t tag; u_int16_t match_tag; u_int16_t scrub_flags; struct pf_rule_uid uid; struct pf_rule_gid gid; u_int32_t rule_flag; u_int8_t action; u_int8_t direction; u_int8_t log; u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; u_int8_t natpass; #define PF_STATE_NORMAL 0x1 #define PF_STATE_MODULATE 0x2 #define PF_STATE_SYNPROXY 0x3 u_int8_t keep_state; sa_family_t af; u_int8_t proto; u_int8_t type; u_int8_t code; u_int8_t flags; u_int8_t flagset; u_int8_t min_ttl; u_int8_t allow_opts; u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; u_int8_t set_tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; #define PF_FLUSH 0x01 #define PF_FLUSH_GLOBAL 0x02 u_int8_t flush; #define PF_PRIO_ZERO 0xff /* match "prio 0" packets */ #define PF_PRIO_MAX 7 u_int8_t prio; u_int8_t set_prio[2]; struct { struct pf_addr addr; u_int16_t port; } divert; uint64_t u_states_cur; uint64_t u_states_tot; uint64_t u_src_nodes; }; /* rule flags */ #define PFRULE_DROP 0x0000 #define PFRULE_RETURNRST 0x0001 #define PFRULE_FRAGMENT 0x0002 #define PFRULE_RETURNICMP 0x0004 #define PFRULE_RETURN 0x0008 #define PFRULE_NOSYNC 0x0010 #define PFRULE_SRCTRACK 0x0020 /* track source states */ #define PFRULE_RULESRCTRACK 0x0040 /* per rule */ #define PFRULE_REFS 0x0080 /* rule has references */ /* scrub flags */ #define PFRULE_NODF 0x0100 #define PFRULE_RANDOMID 0x0800 #define PFRULE_REASSEMBLE_TCP 0x1000 #define PFRULE_SET_TOS 0x2000 /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ #define PFSTATE_HIWAT 100000 /* default state table size */ #define PFSTATE_ADAPT_START 60000 /* default adaptive timeout start */ #define PFSTATE_ADAPT_END 120000 /* default adaptive timeout end */ struct pf_threshold { u_int32_t limit; #define PF_THRESHOLD_MULT 1000 #define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT u_int32_t seconds; u_int32_t count; u_int32_t last; }; struct pf_src_node { LIST_ENTRY(pf_src_node) entry; struct pf_addr addr; struct pf_addr raddr; union pf_rule_ptr rule; struct pfi_kif *kif; u_int64_t bytes[2]; u_int64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_threshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; u_int8_t ruletype; }; #define PFSNODE_HIWAT 10000 /* default source node table size */ struct pf_state_scrub { struct timeval pfss_last; /* time received last packet */ u_int32_t pfss_tsecr; /* last echoed timestamp */ u_int32_t pfss_tsval; /* largest timestamp */ u_int32_t pfss_tsval0; /* original timestamp */ u_int16_t pfss_flags; #define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ #define PFSS_PAWS 0x0010 /* stricter PAWS checks */ #define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ #define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ #define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ u_int8_t pfss_ttl; /* stashed TTL */ u_int8_t pad; u_int32_t pfss_ts_mod; /* timestamp modulation */ }; struct pf_state_host { struct pf_addr addr; u_int16_t port; u_int16_t pad; }; struct pf_state_peer { struct pf_state_scrub *scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ u_int8_t pad[1]; }; /* Keep synced with struct pf_state_key. */ struct pf_state_key_cmp { struct pf_addr addr[2]; u_int16_t port[2]; sa_family_t af; u_int8_t proto; u_int8_t pad[2]; }; struct pf_state_key { struct pf_addr addr[2]; u_int16_t port[2]; sa_family_t af; u_int8_t proto; u_int8_t pad[2]; LIST_ENTRY(pf_state_key) entry; TAILQ_HEAD(, pf_state) states[2]; }; /* Keep synced with struct pf_state. */ struct pf_state_cmp { u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; }; struct pf_state { u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; u_int refs; TAILQ_ENTRY(pf_state) sync_list; TAILQ_ENTRY(pf_state) key_list[2]; LIST_ENTRY(pf_state) entry; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; union pf_rule_ptr anchor; union pf_rule_ptr nat_rule; struct pf_addr rt_addr; struct pf_state_key *key[2]; /* addresses stack and wire */ struct pfi_kif *kif; struct pfi_kif *rt_kif; struct pf_src_node *src_node; struct pf_src_node *nat_src_node; u_int64_t packets[2]; u_int64_t bytes[2]; u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; u_int16_t tag; u_int8_t log; u_int8_t state_flags; #define PFSTATE_ALLOWOPTS 0x01 #define PFSTATE_SLOPPY 0x02 /* was PFSTATE_PFLOW 0x04 */ #define PFSTATE_NOSYNC 0x08 #define PFSTATE_ACK 0x10 #define PFSTATE_SETPRIO 0x0200 #define PFSTATE_SETMASK (PFSTATE_SETPRIO) u_int8_t timeout; u_int8_t sync_state; /* PFSYNC_S_x */ /* XXX */ u_int8_t sync_updates; u_int8_t _tail[3]; }; /* * Unified state structures for pulling states out of the kernel * used by pfsync(4) and the pf(4) ioctl. */ struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ #define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; struct pfsync_state_peer { struct pfsync_state_scrub scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int8_t pad[6]; } __packed; struct pfsync_state_key { struct pf_addr addr[2]; u_int16_t port[2]; }; struct pfsync_state { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pfsync_state_peer src; struct pfsync_state_peer dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; u_int32_t packets[2][2]; u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; u_int8_t direction; u_int8_t __spare[2]; u_int8_t log; u_int8_t state_flags; u_int8_t timeout; u_int8_t sync_flags; u_int8_t updates; } __packed; #ifdef _KERNEL /* pfsync */ typedef int pfsync_state_import_t(struct pfsync_state *, u_int8_t); typedef void pfsync_insert_state_t(struct pf_state *); typedef void pfsync_update_state_t(struct pf_state *); typedef void pfsync_delete_state_t(struct pf_state *); typedef void pfsync_clear_states_t(u_int32_t, const char *); typedef int pfsync_defer_t(struct pf_state *, struct mbuf *); extern pfsync_state_import_t *pfsync_state_import_ptr; extern pfsync_insert_state_t *pfsync_insert_state_ptr; extern pfsync_update_state_t *pfsync_update_state_ptr; extern pfsync_delete_state_t *pfsync_delete_state_ptr; extern pfsync_clear_states_t *pfsync_clear_states_ptr; extern pfsync_defer_t *pfsync_defer_ptr; void pfsync_state_export(struct pfsync_state *, struct pf_state *); /* pflog */ struct pf_ruleset; struct pf_pdesc; typedef int pflog_packet_t(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, struct pf_pdesc *, int); extern pflog_packet_t *pflog_packet_ptr; #endif /* _KERNEL */ #define PFSYNC_FLAG_SRCNODE 0x04 #define PFSYNC_FLAG_NATSRCNODE 0x08 /* for copies to/from network byte order */ /* ioctl interface also uses network byte order */ #define pf_state_peer_hton(s,d) do { \ (d)->seqlo = htonl((s)->seqlo); \ (d)->seqhi = htonl((s)->seqhi); \ (d)->seqdiff = htonl((s)->seqdiff); \ (d)->max_win = htons((s)->max_win); \ (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ if ((s)->scrub) { \ (d)->scrub.pfss_flags = \ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ } \ } while (0) #define pf_state_peer_ntoh(s,d) do { \ (d)->seqlo = ntohl((s)->seqlo); \ (d)->seqhi = ntohl((s)->seqhi); \ (d)->seqdiff = ntohl((s)->seqdiff); \ (d)->max_win = ntohs((s)->max_win); \ (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ (d)->scrub != NULL) { \ (d)->scrub->pfss_flags = \ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ } \ } while (0) #define pf_state_counter_hton(s,d) do { \ d[0] = htonl((s>>32)&0xffffffff); \ d[1] = htonl(s&0xffffffff); \ } while (0) #define pf_state_counter_from_pfsync(s) \ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) #define pf_state_counter_ntoh(s,d) do { \ d = ntohl(s[0]); \ d = d<<32; \ d += ntohl(s[1]); \ } while (0) TAILQ_HEAD(pf_rulequeue, pf_rule); struct pf_anchor; struct pf_ruleset { struct { struct pf_rulequeue queues[2]; struct { struct pf_rulequeue *ptr; struct pf_rule **ptr_array; u_int32_t rcount; u_int32_t ticket; int open; } active, inactive; } rules[PF_RULESET_MAX]; struct pf_anchor *anchor; u_int32_t tticket; int tables; int topen; }; RB_HEAD(pf_anchor_global, pf_anchor); RB_HEAD(pf_anchor_node, pf_anchor); struct pf_anchor { RB_ENTRY(pf_anchor) entry_global; RB_ENTRY(pf_anchor) entry_node; struct pf_anchor *parent; struct pf_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; char path[MAXPATHLEN]; struct pf_ruleset ruleset; int refcnt; /* anchor rules */ int match; /* XXX: used for pfctl black magic */ }; RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PF_RESERVED_ANCHOR "_pf" #define PFR_TFLAG_PERSIST 0x00000001 #define PFR_TFLAG_CONST 0x00000002 #define PFR_TFLAG_ACTIVE 0x00000004 #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 #define PFR_TFLAG_COUNTERS 0x00000040 /* Adjust masks below when adding flags. */ #define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \ PFR_TFLAG_CONST | \ PFR_TFLAG_COUNTERS) #define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \ PFR_TFLAG_INACTIVE | \ PFR_TFLAG_REFERENCED | \ PFR_TFLAG_REFDANCHOR) #define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \ PFR_TFLAG_CONST | \ PFR_TFLAG_ACTIVE | \ PFR_TFLAG_INACTIVE | \ PFR_TFLAG_REFERENCED | \ PFR_TFLAG_REFDANCHOR | \ PFR_TFLAG_COUNTERS) struct pf_anchor_stackframe; struct pfr_table { char pfrt_anchor[MAXPATHLEN]; char pfrt_name[PF_TABLE_NAME_SIZE]; u_int32_t pfrt_flags; u_int8_t pfrt_fback; }; enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX }; struct pfr_addr { union { struct in_addr _pfra_ip4addr; struct in6_addr _pfra_ip6addr; } pfra_u; u_int8_t pfra_af; u_int8_t pfra_net; u_int8_t pfra_not; u_int8_t pfra_fback; }; #define pfra_ip4addr pfra_u._pfra_ip4addr #define pfra_ip6addr pfra_u._pfra_ip6addr enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; #define PFR_OP_XPASS PFR_OP_ADDR_MAX struct pfr_astats { struct pfr_addr pfras_a; u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; long pfras_tzero; }; enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; struct pfr_tstats { struct pfr_table pfrts_t; u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_match; u_int64_t pfrts_nomatch; long pfrts_tzero; int pfrts_cnt; int pfrts_refcnt[PFR_REFCNT_MAX]; }; #define pfrts_name pfrts_t.pfrt_name #define pfrts_flags pfrts_t.pfrt_flags #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ struct pfr_kcounters { u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; }; SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; SLIST_ENTRY(pfr_kentry) pfrke_workq; struct pfr_kcounters *pfrke_counters; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; }; SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); struct pfr_ktable { struct pfr_tstats pfrkt_ts; RB_ENTRY(pfr_ktable) pfrkt_tree; SLIST_ENTRY(pfr_ktable) pfrkt_workq; struct radix_node_head *pfrkt_ip4; struct radix_node_head *pfrkt_ip6; struct pfr_ktable *pfrkt_shadow; struct pfr_ktable *pfrkt_root; struct pf_ruleset *pfrkt_rs; long pfrkt_larg; int pfrkt_nflags; }; #define pfrkt_t pfrkt_ts.pfrts_t #define pfrkt_name pfrkt_t.pfrt_name #define pfrkt_anchor pfrkt_t.pfrt_anchor #define pfrkt_ruleset pfrkt_t.pfrt_ruleset #define pfrkt_flags pfrkt_t.pfrt_flags #define pfrkt_cnt pfrkt_ts.pfrts_cnt #define pfrkt_refcnt pfrkt_ts.pfrts_refcnt #define pfrkt_packets pfrkt_ts.pfrts_packets #define pfrkt_bytes pfrkt_ts.pfrts_bytes #define pfrkt_match pfrkt_ts.pfrts_match #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { char pfik_name[IFNAMSIZ]; }; struct pfi_kif { char pfik_name[IFNAMSIZ]; union { RB_ENTRY(pfi_kif) _pfik_tree; LIST_ENTRY(pfi_kif) _pfik_list; } _pfik_glue; #define pfik_tree _pfik_glue._pfik_tree #define pfik_list _pfik_glue._pfik_list u_int64_t pfik_packets[2][2][2]; u_int64_t pfik_bytes[2][2][2]; u_int32_t pfik_tzero; u_int pfik_flags; struct ifnet *pfik_ifp; struct ifg_group *pfik_group; u_int pfik_rulerefs; TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; #define PFI_IFLAG_REFS 0x0001 /* has state references */ #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ struct pf_pdesc { struct { int done; uid_t uid; gid_t gid; } lookup; u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; struct udphdr *udp; struct icmp *icmp; #ifdef INET6 struct icmp6_hdr *icmp6; #endif /* INET6 */ void *any; } hdr; struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ struct pf_addr *src; /* src address */ struct pf_addr *dst; /* dst address */ u_int16_t *sport; u_int16_t *dport; struct pf_mtag *pf_mtag; u_int32_t p_len; /* total length of payload */ u_int16_t *ip_sum; u_int16_t *proto_sum; u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ #define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ sa_family_t af; u_int8_t proto; u_int8_t tos; u_int8_t dir; /* direction */ u_int8_t sidx; /* key index for source */ u_int8_t didx; /* key index for destination */ }; /* flags for RDR options */ #define PF_DPORT_RANGE 0x01 /* Dest port uses range */ #define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ /* UDP state enumeration */ #define PFUDPS_NO_TRAFFIC 0 #define PFUDPS_SINGLE 1 #define PFUDPS_MULTIPLE 2 #define PFUDPS_NSTATES 3 /* number of state levels */ #define PFUDPS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } /* Other protocol state enumeration */ #define PFOTHERS_NO_TRAFFIC 0 #define PFOTHERS_SINGLE 1 #define PFOTHERS_MULTIPLE 2 #define PFOTHERS_NSTATES 3 /* number of state levels */ #define PFOTHERS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } #define ACTION_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ } while (0) #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ if (x < PFRES_MAX) \ counter_u64_add(V_pf_status.counters[x], 1); \ } while (0) struct pf_kstatus { counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */ counter_u64_t lcounters[LCNT_MAX]; /* limit counters */ counter_u64_t fcounters[FCNT_MAX]; /* state operation counters */ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */ uint32_t states; uint32_t src_nodes; uint32_t running; uint32_t since; uint32_t debug; uint32_t hostid; char ifname[IFNAMSIZ]; uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; }; struct pf_divert { union { struct in_addr ipv4; struct in6_addr ipv6; } addr; u_int16_t port; }; #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ /* * ioctl parameter structures */ struct pfioc_pooladdr { u_int32_t action; u_int32_t ticket; u_int32_t nr; u_int32_t r_num; u_int8_t r_action; u_int8_t r_last; u_int8_t af; char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; struct pfioc_rule { u_int32_t action; u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; char anchor[MAXPATHLEN]; char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; struct pfioc_natlook { struct pf_addr saddr; struct pf_addr daddr; struct pf_addr rsaddr; struct pf_addr rdaddr; u_int16_t sport; u_int16_t dport; u_int16_t rsport; u_int16_t rdport; sa_family_t af; u_int8_t proto; u_int8_t direction; }; struct pfioc_state { struct pfsync_state state; }; struct pfioc_src_node_kill { sa_family_t psnk_af; struct pf_rule_addr psnk_src; struct pf_rule_addr psnk_dst; u_int psnk_killed; }; struct pfioc_state_kill { struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; char psk_label[PF_RULE_LABEL_SIZE]; u_int psk_killed; }; struct pfioc_states { int ps_len; union { caddr_t psu_buf; struct pfsync_state *psu_states; } ps_u; #define ps_buf ps_u.psu_buf #define ps_states ps_u.psu_states }; struct pfioc_src_nodes { int psn_len; union { caddr_t psu_buf; struct pf_src_node *psu_src_nodes; } psn_u; #define psn_buf psn_u.psu_buf #define psn_src_nodes psn_u.psu_src_nodes }; struct pfioc_if { char ifname[IFNAMSIZ]; }; struct pfioc_tm { int timeout; int seconds; }; struct pfioc_limit { int index; unsigned limit; }; -struct pfioc_altq { +struct pfioc_altq_v0 { u_int32_t action; u_int32_t ticket; u_int32_t nr; - struct pf_altq altq; + struct pf_altq_v0 altq; }; -struct pfioc_qstats { +struct pfioc_altq_v1 { + u_int32_t action; u_int32_t ticket; u_int32_t nr; + /* + * Placed here so code that only uses the above parameters can be + * written entirely in terms of the v0 or v1 type. + */ + u_int32_t version; + struct pf_altq_v1 altq; +}; + +/* + * Latest version of struct pfioc_altq_vX. This must move in lock-step with + * the latest version of struct pf_altq_vX as it has that struct as a + * member. + */ +#define PFIOC_ALTQ_VERSION PF_ALTQ_VERSION + +struct pfioc_qstats_v0 { + u_int32_t ticket; + u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; }; +struct pfioc_qstats_v1 { + u_int32_t ticket; + u_int32_t nr; + void *buf; + int nbytes; + u_int8_t scheduler; + /* + * Placed here so code that only uses the above parameters can be + * written entirely in terms of the v0 or v1 type. + */ + u_int32_t version; /* Requested version of stats struct */ +}; + +/* Latest version of struct pfioc_qstats_vX */ +#define PFIOC_QSTATS_VERSION 1 + struct pfioc_ruleset { u_int32_t nr; char path[MAXPATHLEN]; char name[PF_ANCHOR_NAME_SIZE]; }; #define PF_RULESET_ALTQ (PF_RULESET_MAX) #define PF_RULESET_TABLE (PF_RULESET_MAX+1) struct pfioc_trans { int size; /* number of elements */ int esize; /* size of each element in bytes */ struct pfioc_trans_e { int rs_num; char anchor[MAXPATHLEN]; u_int32_t ticket; } *array; }; #define PFR_FLAG_ATOMIC 0x00000001 /* unused */ #define PFR_FLAG_DUMMY 0x00000002 #define PFR_FLAG_FEEDBACK 0x00000004 #define PFR_FLAG_CLSTATS 0x00000008 #define PFR_FLAG_ADDRSTOO 0x00000010 #define PFR_FLAG_REPLACE 0x00000020 #define PFR_FLAG_ALLRSETS 0x00000040 #define PFR_FLAG_ALLMASK 0x0000007F #ifdef _KERNEL #define PFR_FLAG_USERIOCTL 0x10000000 #endif struct pfioc_table { struct pfr_table pfrio_table; void *pfrio_buffer; int pfrio_esize; int pfrio_size; int pfrio_size2; int pfrio_nadd; int pfrio_ndel; int pfrio_nchange; int pfrio_flags; u_int32_t pfrio_ticket; }; #define pfrio_exists pfrio_nadd #define pfrio_nzero pfrio_nadd #define pfrio_nmatch pfrio_nadd #define pfrio_naddr pfrio_size2 #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; int pfiio_esize; int pfiio_size; int pfiio_nzero; int pfiio_flags; }; /* * ioctl operations */ #define DIOCSTART _IO ('D', 1) #define DIOCSTOP _IO ('D', 2) #define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) #define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) #define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) /* XXX cut 8 - 17 */ #define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill) #define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) #define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) #define DIOCGETSTATUS _IOWR('D', 21, struct pf_status) #define DIOCCLRSTATUS _IO ('D', 22) #define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) #define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) #define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) #define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) /* XXX cut 26 - 28 */ #define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) #define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) #define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) #define DIOCCLRRULECTRS _IO ('D', 38) #define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) #define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) #define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) -#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) -#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) -#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) -#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) -#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats) +#define DIOCADDALTQV0 _IOWR('D', 45, struct pfioc_altq_v0) +#define DIOCADDALTQV1 _IOWR('D', 45, struct pfioc_altq_v1) +#define DIOCGETALTQSV0 _IOWR('D', 47, struct pfioc_altq_v0) +#define DIOCGETALTQSV1 _IOWR('D', 47, struct pfioc_altq_v1) +#define DIOCGETALTQV0 _IOWR('D', 48, struct pfioc_altq_v0) +#define DIOCGETALTQV1 _IOWR('D', 48, struct pfioc_altq_v1) +#define DIOCCHANGEALTQV0 _IOWR('D', 49, struct pfioc_altq_v0) +#define DIOCCHANGEALTQV1 _IOWR('D', 49, struct pfioc_altq_v1) +#define DIOCGETQSTATSV0 _IOWR('D', 50, struct pfioc_qstats_v0) +#define DIOCGETQSTATSV1 _IOWR('D', 50, struct pfioc_qstats_v1) #define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) #define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) #define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) #define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) /* XXX cut 55 - 57 */ #define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) #define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) #define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) #define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) #define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) #define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) #define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) #define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) #define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) #define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) #define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) #define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) #define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) #define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) #define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) #define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) #define DIOCOSFPFLUSH _IO('D', 78) #define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) #define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) #define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) #define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) #define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) #define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) #define DIOCCLRSRCNODES _IO('D', 85) #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) -struct pf_ifspeed { +struct pf_ifspeed_v0 { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; -#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) + +struct pf_ifspeed_v1 { + char ifname[IFNAMSIZ]; + u_int32_t baudrate32; + /* layout identical to struct pf_ifspeed_v0 up to this point */ + u_int64_t baudrate; +}; + +/* Latest version of struct pf_ifspeed_vX */ +#define PF_IFSPEED_VERSION 1 + +#define DIOCGIFSPEEDV0 _IOWR('D', 92, struct pf_ifspeed_v0) +#define DIOCGIFSPEEDV1 _IOWR('D', 92, struct pf_ifspeed_v1) + +/* + * Compatibility and convenience macros + */ +#ifndef _KERNEL +#ifdef PFIOC_USE_LATEST +/* + * Maintaining in-tree consumers of the ioctl interface is easier when that + * code can be written in terms old names that refer to the latest interface + * version as that reduces the required changes in the consumers to those + * that are functionally necessary to accommodate a new interface version. + */ +#define pfioc_altq __CONCAT(pfioc_altq_v, PFIOC_ALTQ_VERSION) +#define pfioc_qstats __CONCAT(pfioc_qstats_v, PFIOC_QSTATS_VERSION) +#define pf_ifspeed __CONCAT(pf_ifspeed_v, PF_IFSPEED_VERSION) + +#define DIOCADDALTQ __CONCAT(DIOCADDALTQV, PFIOC_ALTQ_VERSION) +#define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, PFIOC_ALTQ_VERSION) +#define DIOCGETALTQ __CONCAT(DIOCGETALTQV, PFIOC_ALTQ_VERSION) +#define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, PFIOC_ALTQ_VERSION) +#define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, PFIOC_QSTATS_VERSION) +#define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, PF_IFSPEED_VERSION) +#else +/* + * When building out-of-tree code that is written for the old interface, + * such as may exist in ports for example, resolve the old struct tags and + * ioctl command names to the v0 versions. + */ +#define pfioc_altq __CONCAT(pfioc_altq_v, 0) +#define pfioc_qstats __CONCAT(pfioc_qstats_v, 0) +#define pf_ifspeed __CONCAT(pf_ifspeed_v, 0) + +#define DIOCADDALTQ __CONCAT(DIOCADDALTQV, 0) +#define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, 0) +#define DIOCGETALTQ __CONCAT(DIOCGETALTQV, 0) +#define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, 0) +#define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, 0) +#define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, 0) +#endif /* PFIOC_USE_LATEST */ +#endif /* _KERNEL */ #ifdef _KERNEL LIST_HEAD(pf_src_node_list, pf_src_node); struct pf_srchash { struct pf_src_node_list nodes; struct mtx lock; }; struct pf_keyhash { LIST_HEAD(, pf_state_key) keys; struct mtx lock; }; struct pf_idhash { LIST_HEAD(, pf_state) states; struct mtx lock; }; extern u_long pf_hashmask; extern u_long pf_srchashmask; #define PF_HASHSIZ (131072) #define PF_SRCHASHSIZ (PF_HASHSIZ/4) VNET_DECLARE(struct pf_keyhash *, pf_keyhash); VNET_DECLARE(struct pf_idhash *, pf_idhash); #define V_pf_keyhash VNET(pf_keyhash) #define V_pf_idhash VNET(pf_idhash) VNET_DECLARE(struct pf_srchash *, pf_srchash); #define V_pf_srchash VNET(pf_srchash) #define PF_IDHASH(s) (be64toh((s)->id) % (pf_hashmask + 1)) VNET_DECLARE(void *, pf_swi_cookie); #define V_pf_swi_cookie VNET(pf_swi_cookie) VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]); #define V_pf_stateid VNET(pf_stateid) TAILQ_HEAD(pf_altqqueue, pf_altq); VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]); #define V_pf_altqs VNET(pf_altqs) VNET_DECLARE(struct pf_palist, pf_pabuf); #define V_pf_pabuf VNET(pf_pabuf) VNET_DECLARE(u_int32_t, ticket_altqs_active); #define V_ticket_altqs_active VNET(ticket_altqs_active) VNET_DECLARE(u_int32_t, ticket_altqs_inactive); #define V_ticket_altqs_inactive VNET(ticket_altqs_inactive) VNET_DECLARE(int, altqs_inactive_open); #define V_altqs_inactive_open VNET(altqs_inactive_open) VNET_DECLARE(u_int32_t, ticket_pabuf); #define V_ticket_pabuf VNET(ticket_pabuf) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); #define V_pf_altqs_active VNET(pf_altqs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); #define V_pf_altqs_inactive VNET(pf_altqs_inactive) VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules); #define V_pf_unlinked_rules VNET(pf_unlinked_rules) void pf_initialize(void); void pf_mtag_initialize(void); void pf_mtag_cleanup(void); void pf_cleanup(void); struct pf_mtag *pf_get_mtag(struct mbuf *); extern void pf_calc_skip_steps(struct pf_rulequeue *); #ifdef ALTQ extern void pf_altq_ifnet_event(struct ifnet *, int); #endif VNET_DECLARE(uma_zone_t, pf_state_z); #define V_pf_state_z VNET(pf_state_z) VNET_DECLARE(uma_zone_t, pf_state_key_z); #define V_pf_state_key_z VNET(pf_state_key_z) VNET_DECLARE(uma_zone_t, pf_state_scrub_z); #define V_pf_state_scrub_z VNET(pf_state_scrub_z) extern void pf_purge_thread(void *); extern void pf_unload_vnet_purge(void); extern void pf_intr(void *); extern void pf_purge_expired_src_nodes(void); extern int pf_unlink_state(struct pf_state *, u_int); #define PF_ENTER_LOCKED 0x00000001 #define PF_RETURN_LOCKED 0x00000002 extern int pf_state_insert(struct pfi_kif *, struct pf_state_key *, struct pf_state_key *, struct pf_state *); extern void pf_free_state(struct pf_state *); static __inline void pf_ref_state(struct pf_state *s) { refcount_acquire(&s->refs); } static __inline int pf_release_state(struct pf_state *s) { if (refcount_release(&s->refs)) { pf_free_state(s); return (1); } else return (0); } extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); extern struct pf_src_node *pf_find_src_node(struct pf_addr *, struct pf_rule *, sa_family_t, int); extern void pf_unlink_src_node(struct pf_src_node *); extern u_int pf_free_src_nodes(struct pf_src_node_list *); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t, u_int16_t, u_int16_t, u_int8_t); VNET_DECLARE(struct ifnet *, sync_ifp); #define V_sync_ifp VNET(sync_ifp); VNET_DECLARE(struct pf_rule, pf_default_rule); #define V_pf_default_rule VNET(pf_default_rule) extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); void pf_free_rule(struct pf_rule *); #ifdef INET int pf_test(int, int, struct ifnet *, struct mbuf **, struct inpcb *); int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, struct pf_pdesc *); #endif /* INET */ #ifdef INET6 int pf_test6(int, int, struct ifnet *, struct mbuf **, struct inpcb *); int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, struct pf_pdesc *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *); #endif /* INET6 */ u_int32_t pf_new_isn(struct pf_state *); void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t); void pf_send_deferred_syn(struct pf_state *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_addr_range(struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); void pf_normalize_init(void); void pf_normalize_cleanup(void); int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *, struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_state *); int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_short *, struct tcphdr *, struct pf_state *, struct pf_state_peer *, struct pf_state_peer *, int *); u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); void pf_purge_fragments(uint32_t); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *, int); int pf_socket_lookup(int, struct pf_pdesc *, struct mbuf *); struct pf_state_key *pf_alloc_state_key(int); void pfr_initialize(void); void pfr_cleanup(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * pfr_attach_table(struct pf_ruleset *, char *); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); int pfr_del_tables(struct pfr_table *, int, int *, int); int pfr_table_count(struct pfr_table *, int); int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int); int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); int pfr_clr_tstats(struct pfr_table *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long); int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int); int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int); int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); MALLOC_DECLARE(PFI_MTYPE); VNET_DECLARE(struct pfi_kif *, pfi_all); #define V_pfi_all VNET(pfi_all) void pfi_initialize(void); void pfi_initialize_vnet(void); void pfi_cleanup(void); void pfi_cleanup_vnet(void); void pfi_kif_ref(struct pfi_kif *); void pfi_kif_unref(struct pfi_kif *); struct pfi_kif *pfi_kif_find(const char *); struct pfi_kif *pfi_kif_attach(struct pfi_kif *, const char *); int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); void pfi_kif_purge(void); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); void pfi_dynaddr_remove(struct pfi_dynaddr *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_update_status(const char *, struct pf_status *); void pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int); int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int); int pf_addr_cmp(struct pf_addr *, struct pf_addr *, sa_family_t); void pf_qid2qname(u_int32_t, char *); VNET_DECLARE(struct pf_kstatus, pf_status); #define V_pf_status VNET(pf_status) struct pf_limit { uma_zone_t zone; u_int limit; }; VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); #define V_pf_limits VNET(pf_limits) #endif /* _KERNEL */ #ifdef _KERNEL VNET_DECLARE(struct pf_anchor_global, pf_anchors); #define V_pf_anchors VNET(pf_anchors) VNET_DECLARE(struct pf_anchor, pf_main_anchor); #define V_pf_main_anchor VNET(pf_main_anchor) #define pf_main_ruleset V_pf_main_anchor.ruleset #endif /* these ruleset functions can be linked into userland programs (pfctl) */ int pf_get_ruleset_number(u_int8_t); void pf_init_ruleset(struct pf_ruleset *); int pf_anchor_setup(struct pf_rule *, const struct pf_ruleset *, const char *); int pf_anchor_copyout(const struct pf_ruleset *, const struct pf_rule *, struct pfioc_rule *); void pf_anchor_remove(struct pf_rule *); void pf_remove_if_empty_ruleset(struct pf_ruleset *); struct pf_ruleset *pf_find_ruleset(const char *); struct pf_ruleset *pf_find_or_create_ruleset(const char *); void pf_rs_initialize(void); /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, const struct tcphdr *); #endif /* _KERNEL */ void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); #ifdef _KERNEL void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); void pf_step_into_anchor(struct pf_anchor_stackframe *, int *, struct pf_ruleset **, int, struct pf_rule **, struct pf_rule **, int *); int pf_step_out_of_anchor(struct pf_anchor_stackframe *, int *, struct pf_ruleset **, int, struct pf_rule **, struct pf_rule **, int *); int pf_map_addr(u_int8_t, struct pf_rule *, struct pf_addr *, struct pf_addr *, struct pf_addr *, struct pf_src_node **); struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, uint16_t, uint16_t, struct pf_anchor_stackframe *); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); struct pf_state_key *pf_state_key_clone(struct pf_state_key *); #endif /* _KERNEL */ #endif /* _NET_PFVAR_H_ */ Index: head/sys/netpfil/pf/pf_altq.h =================================================================== --- head/sys/netpfil/pf/pf_altq.h (revision 338208) +++ head/sys/netpfil/pf/pf_altq.h (revision 338209) @@ -1,123 +1,258 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ * $FreeBSD$ */ #ifndef _NET_PF_ALTQ_H_ #define _NET_PF_ALTQ_H_ struct cbq_opts { u_int minburst; u_int maxburst; u_int pktsize; u_int maxpktsize; u_int ns_per_byte; u_int maxidle; int minidle; u_int offtime; int flags; }; struct codel_opts { u_int target; u_int interval; int ecn; }; struct priq_opts { int flags; }; -struct hfsc_opts { +struct hfsc_opts_v0 { /* real-time service curve */ u_int rtsc_m1; /* slope of the 1st segment in bps */ u_int rtsc_d; /* the x-projection of m1 in msec */ u_int rtsc_m2; /* slope of the 2nd segment in bps */ /* link-sharing service curve */ u_int lssc_m1; u_int lssc_d; u_int lssc_m2; /* upper-limit service curve */ u_int ulsc_m1; u_int ulsc_d; u_int ulsc_m2; int flags; }; +struct hfsc_opts_v1 { + /* real-time service curve */ + u_int64_t rtsc_m1; /* slope of the 1st segment in bps */ + u_int rtsc_d; /* the x-projection of m1 in msec */ + u_int64_t rtsc_m2; /* slope of the 2nd segment in bps */ + /* link-sharing service curve */ + u_int64_t lssc_m1; + u_int lssc_d; + u_int64_t lssc_m2; + /* upper-limit service curve */ + u_int64_t ulsc_m1; + u_int ulsc_d; + u_int64_t ulsc_m2; + int flags; +}; + /* + * struct hfsc_opts doesn't have a version indicator macro or + * backwards-compat and convenience macros because both in the kernel and + * the pfctl parser, there are struct hfsc_opts instances named 'hfsc_opts'. + * It is believed that only in-tree code uses struct hfsc_opts, so + * backwards-compat macros are not necessary. The few in-tree uses can just + * be updated to the latest versioned struct tag. + */ + +/* * XXX this needs some work */ struct fairq_opts { u_int nbuckets; u_int hogs_m1; int flags; /* link sharing service curve */ u_int lssc_m1; u_int lssc_d; u_int lssc_m2; }; -struct pf_altq { +/* + * struct pf_altq_v0, struct pf_altq_v1, etc. are the ioctl argument + * structures corresponding to struct pfioc_altq_v0, struct pfioc_altq_v1, + * etc. + * + */ +struct pf_altq_v0 { char ifname[IFNAMSIZ]; - void *altq_disc; /* discipline-specific state */ - TAILQ_ENTRY(pf_altq) entries; + /* + * This member is a holdover from when the kernel state structure + * was reused as the ioctl argument structure, and remains to + * preserve the size and layout of this struct for backwards compat. + */ + void *unused1; + TAILQ_ENTRY(pf_altq_v0) entries; /* scheduler spec */ uint8_t scheduler; /* scheduler type */ uint16_t tbrsize; /* tokenbucket regulator size */ uint32_t ifbandwidth; /* interface bandwidth */ /* queue spec */ char qname[PF_QNAME_SIZE]; /* queue name */ char parent[PF_QNAME_SIZE]; /* parent name */ uint32_t parent_qid; /* parent queue id */ uint32_t bandwidth; /* queue bandwidth */ uint8_t priority; /* priority */ uint8_t local_flags; /* dynamic interface */ #define PFALTQ_FLAG_IF_REMOVED 0x01 uint16_t qlimit; /* queue size limit */ uint16_t flags; /* misc flags */ union { struct cbq_opts cbq_opts; struct codel_opts codel_opts; struct priq_opts priq_opts; - struct hfsc_opts hfsc_opts; + struct hfsc_opts_v0 hfsc_opts; struct fairq_opts fairq_opts; } pq_u; uint32_t qid; /* return value */ }; + +struct pf_altq_v1 { + char ifname[IFNAMSIZ]; + + TAILQ_ENTRY(pf_altq_v1) entries; + + /* scheduler spec */ + uint8_t scheduler; /* scheduler type */ + uint32_t tbrsize; /* tokenbucket regulator size */ + uint64_t ifbandwidth; /* interface bandwidth */ + + /* queue spec */ + char qname[PF_QNAME_SIZE]; /* queue name */ + char parent[PF_QNAME_SIZE]; /* parent name */ + uint32_t parent_qid; /* parent queue id */ + uint64_t bandwidth; /* queue bandwidth */ + uint8_t priority; /* priority */ + uint8_t local_flags; /* dynamic interface, see _v0 */ + + uint16_t qlimit; /* queue size limit */ + uint16_t flags; /* misc flags */ + union { + struct cbq_opts cbq_opts; + struct codel_opts codel_opts; + struct priq_opts priq_opts; + struct hfsc_opts_v1 hfsc_opts; + struct fairq_opts fairq_opts; + } pq_u; + + uint32_t qid; /* return value */ +}; + +/* Latest version of struct pf_altq_vX */ +#define PF_ALTQ_VERSION 1 + +#ifdef _KERNEL +struct pf_kaltq { + char ifname[IFNAMSIZ]; + + void *altq_disc; /* discipline-specific state */ + TAILQ_ENTRY(pf_kaltq) entries; + + /* scheduler spec */ + uint8_t scheduler; /* scheduler type */ + uint32_t tbrsize; /* tokenbucket regulator size */ + uint64_t ifbandwidth; /* interface bandwidth */ + + /* queue spec */ + char qname[PF_QNAME_SIZE]; /* queue name */ + char parent[PF_QNAME_SIZE]; /* parent name */ + uint32_t parent_qid; /* parent queue id */ + uint64_t bandwidth; /* queue bandwidth */ + uint8_t priority; /* priority */ + uint8_t local_flags; /* dynamic interface, see _v0 */ + + uint16_t qlimit; /* queue size limit */ + uint16_t flags; /* misc flags */ + union { + struct cbq_opts cbq_opts; + struct codel_opts codel_opts; + struct priq_opts priq_opts; + struct hfsc_opts_v1 hfsc_opts; + struct fairq_opts fairq_opts; + } pq_u; + + uint32_t qid; /* return value */ +}; +#endif /* _KERNEL */ + +/* + * Compatibility and convenience macros + */ +#ifdef _KERNEL +/* + * Avoid a patch with 100+ lines of name substitution. + */ +#define pf_altq pf_kaltq + +#else /* _KERNEL */ + +#ifdef PFIOC_USE_LATEST +/* + * Maintaining in-tree consumers of the ioctl interface is easier when that + * code can be written in terms old names that refer to the latest interface + * version as that reduces the required changes in the consumers to those + * that are functionally necessary to accommodate a new interface version. + */ +#define pf_altq __CONCAT(pf_altq_v, PF_ALTQ_VERSION) + +#else /* PFIOC_USE_LATEST */ +/* + * When building out-of-tree code that is written for the old interface, + * such as may exist in ports for example, resolve the old pf_altq struct + * tag to the v0 version. + */ +#define pf_altq __CONCAT(pf_altq_v, 0) + +#endif /* PFIOC_USE_LATEST */ +#endif /* _KERNEL */ #endif /* _NET_PF_ALTQ_H_ */ Index: head/sys/netpfil/pf/pf_ioctl.c =================================================================== --- head/sys/netpfil/pf/pf_ioctl.c (revision 338208) +++ head/sys/netpfil/pf/pf_ioctl.c (revision 338209) @@ -1,4054 +1,4301 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * * $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #ifdef ALTQ #include #endif static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); static void pf_mv_pool(struct pf_palist *, struct pf_palist *); static void pf_empty_pool(struct pf_palist *); static int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); #ifdef ALTQ static int pf_begin_altq(u_int32_t *); static int pf_rollback_altq(u_int32_t); static int pf_commit_altq(u_int32_t); static int pf_enable_altq(struct pf_altq *); static int pf_disable_altq(struct pf_altq *); static u_int32_t pf_qname2qid(char *); static void pf_qid_unref(u_int32_t); #endif /* ALTQ */ static int pf_begin_rules(u_int32_t *, int, const char *); static int pf_rollback_rules(u_int32_t, int, char *); static int pf_setup_pfsync_matching(struct pf_ruleset *); static void pf_hash_rule(MD5_CTX *, struct pf_rule *); static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); static int pf_commit_rules(u_int32_t, int, char *); static int pf_addr_setup(struct pf_ruleset *, struct pf_addr_wrap *, sa_family_t); static void pf_addr_copyout(struct pf_addr_wrap *); +#ifdef ALTQ +static int pf_export_kaltq(struct pf_altq *, + struct pfioc_altq_v1 *, size_t); +static int pf_import_kaltq(struct pfioc_altq_v1 *, + struct pf_altq *, size_t); +#endif /* ALTQ */ VNET_DEFINE(struct pf_rule, pf_default_rule); #ifdef ALTQ VNET_DEFINE_STATIC(int, pf_altq_running); #define V_pf_altq_running VNET(pf_altq_running) #endif #define TAGID_MAX 50000 struct pf_tagname { TAILQ_ENTRY(pf_tagname) entries; char name[PF_TAG_NAME_SIZE]; uint16_t tag; int ref; }; TAILQ_HEAD(pf_tags, pf_tagname); #define V_pf_tags VNET(pf_tags) VNET_DEFINE(struct pf_tags, pf_tags); #define V_pf_qids VNET(pf_qids) VNET_DEFINE(struct pf_tags, pf_qids); static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names"); static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif static u_int16_t tagname2tag(struct pf_tags *, char *); static u_int16_t pf_tagname2tag(char *); static void tag_unref(struct pf_tags *, u_int16_t); #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x struct cdev *pf_dev; /* * XXX - These are new and need to be checked when moveing to a new version */ static void pf_clear_states(void); static int pf_clear_tables(void); static void pf_clear_srcnodes(struct pf_src_node *); static void pf_kill_srcnodes(struct pfioc_src_node_kill *); static void pf_tbladdr_copyout(struct pf_addr_wrap *); /* * Wrapper functions for pfil(9) hooks */ #ifdef INET static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp); static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp); #endif #ifdef INET6 static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp); static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp); #endif static int hook_pf(void); static int dehook_pf(void); static int shutdown_pf(void); static int pf_load(void); static void pf_unload(void); static struct cdevsw pf_cdevsw = { .d_ioctl = pfioctl, .d_name = PF_NAME, .d_version = D_VERSION, }; volatile VNET_DEFINE_STATIC(int, pf_pfil_hooked); #define V_pf_pfil_hooked VNET(pf_pfil_hooked) /* * We need a flag that is neither hooked nor running to know when * the VNET is "valid". We primarily need this to control (global) * external event, e.g., eventhandlers. */ VNET_DEFINE(int, pf_vnet_active); #define V_pf_vnet_active VNET(pf_vnet_active) int pf_end_threads; struct proc *pf_purge_proc; struct rmlock pf_rules_lock; struct sx pf_ioctl_lock; struct sx pf_end_lock; /* pfsync */ pfsync_state_import_t *pfsync_state_import_ptr = NULL; pfsync_insert_state_t *pfsync_insert_state_ptr = NULL; pfsync_update_state_t *pfsync_update_state_ptr = NULL; pfsync_delete_state_t *pfsync_delete_state_ptr = NULL; pfsync_clear_states_t *pfsync_clear_states_ptr = NULL; pfsync_defer_t *pfsync_defer_ptr = NULL; /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; extern u_long pf_ioctl_maxcount; static void pfattach_vnet(void) { u_int32_t *my_timeout = V_pf_default_rule.timeout; pf_initialize(); pfr_initialize(); pfi_initialize_vnet(); pf_normalize_init(); V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; RB_INIT(&V_pf_anchors); pf_init_ruleset(&pf_main_ruleset); /* default rule should never be garbage collected */ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; #ifdef PF_DEFAULT_TO_DROP V_pf_default_rule.action = PF_DROP; #else V_pf_default_rule.action = PF_PASS; #endif V_pf_default_rule.nr = -1; V_pf_default_rule.rtableid = -1; V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK); V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK); V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK); /* initialize default timeouts */ my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL; my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; bzero(&V_pf_status, sizeof(V_pf_status)); V_pf_status.debug = PF_DEBUG_URGENT; V_pf_pfil_hooked = 0; /* XXX do our best to avoid a conflict */ V_pf_status.hostid = arc4random(); for (int i = 0; i < PFRES_MAX; i++) V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK); for (int i = 0; i < LCNT_MAX; i++) V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK); for (int i = 0; i < FCNT_MAX; i++) V_pf_status.fcounters[i] = counter_u64_alloc(M_WAITOK); for (int i = 0; i < SCNT_MAX; i++) V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK); if (swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET, INTR_MPSAFE, &V_pf_swi_cookie) != 0) /* XXXGL: leaked all above. */ return; } static struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, u_int8_t check_ticket) { struct pf_ruleset *ruleset; struct pf_rule *rule; int rs_num; ruleset = pf_find_ruleset(anchor); if (ruleset == NULL) return (NULL); rs_num = pf_get_ruleset_number(rule_action); if (rs_num >= PF_RULESET_MAX) return (NULL); if (active) { if (check_ticket && ticket != ruleset->rules[rs_num].active.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_rulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); } else { if (check_ticket && ticket != ruleset->rules[rs_num].inactive.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); } if (!r_last) { while ((rule != NULL) && (rule->nr != rule_number)) rule = TAILQ_NEXT(rule, entries); } if (rule == NULL) return (NULL); return (&rule->rpool); } static void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { struct pf_pooladdr *mv_pool_pa; while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { TAILQ_REMOVE(poola, mv_pool_pa, entries); TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); } } static void pf_empty_pool(struct pf_palist *poola) { struct pf_pooladdr *pa; while ((pa = TAILQ_FIRST(poola)) != NULL) { switch (pa->addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(pa->addr.p.dyn); break; case PF_ADDR_TABLE: /* XXX: this could be unfinished pooladdr on pabuf */ if (pa->addr.p.tbl != NULL) pfr_detach_table(pa->addr.p.tbl); break; } if (pa->kif) pfi_kif_unref(pa->kif); TAILQ_REMOVE(poola, pa, entries); free(pa, M_PFRULE); } } static void pf_unlink_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) { PF_RULES_WASSERT(); TAILQ_REMOVE(rulequeue, rule, entries); PF_UNLNKDRULES_LOCK(); rule->rule_flag |= PFRULE_REFS; TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries); PF_UNLNKDRULES_UNLOCK(); } void pf_free_rule(struct pf_rule *rule) { PF_RULES_WASSERT(); if (rule->tag) tag_unref(&V_pf_tags, rule->tag); if (rule->match_tag) tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ if (rule->pqid != rule->qid) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif switch (rule->src.addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(rule->src.addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(rule->src.addr.p.tbl); break; } switch (rule->dst.addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(rule->dst.addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(rule->dst.addr.p.tbl); break; } if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); if (rule->kif) pfi_kif_unref(rule->kif); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); counter_u64_free(rule->states_cur); counter_u64_free(rule->states_tot); counter_u64_free(rule->src_nodes); free(rule, M_PFRULE); } static u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; u_int16_t new_tagid = 1; PF_RULES_WASSERT(); TAILQ_FOREACH(tag, head, entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; return (tag->tag); } /* * to avoid fragmentation, we do a linear search from the beginning * and take the first free slot we find. if there is none or the list * is empty, append a new entry at the end. */ /* new entry */ if (!TAILQ_EMPTY(head)) for (p = TAILQ_FIRST(head); p != NULL && p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) new_tagid = p->tag + 1; if (new_tagid > TAGID_MAX) return (0); /* allocate and fill new struct pf_tagname */ tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO); if (tag == NULL) return (0); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref++; if (p != NULL) /* insert new entry before p */ TAILQ_INSERT_BEFORE(p, tag, entries); else /* either list empty or no free slot in between */ TAILQ_INSERT_TAIL(head, tag, entries); return (tag->tag); } static void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; PF_RULES_WASSERT(); for (p = TAILQ_FIRST(head); p != NULL; p = next) { next = TAILQ_NEXT(p, entries); if (tag == p->tag) { if (--p->ref == 0) { TAILQ_REMOVE(head, p, entries); free(p, M_PFTAG); } break; } } } static u_int16_t pf_tagname2tag(char *tagname) { return (tagname2tag(&V_pf_tags, tagname)); } #ifdef ALTQ static u_int32_t pf_qname2qid(char *qname) { return ((u_int32_t)tagname2tag(&V_pf_qids, qname)); } static void pf_qid_unref(u_int32_t qid) { tag_unref(&V_pf_qids, (u_int16_t)qid); } static int pf_begin_altq(u_int32_t *ticket) { struct pf_altq *altq; int error = 0; PF_RULES_WASSERT(); /* Purge the old altq list */ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } if (error) return (error); *ticket = ++V_ticket_altqs_inactive; V_altqs_inactive_open = 1; return (0); } static int pf_rollback_altq(u_int32_t ticket) { struct pf_altq *altq; int error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); /* Purge the old altq list */ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } V_altqs_inactive_open = 0; return (error); } static int pf_commit_altq(u_int32_t ticket) { struct pf_altqqueue *old_altqs; struct pf_altq *altq; int err, error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (EBUSY); /* swap altqs, keep the old. */ old_altqs = V_pf_altqs_active; V_pf_altqs_active = V_pf_altqs_inactive; V_pf_altqs_inactive = old_altqs; V_ticket_altqs_active = V_ticket_altqs_inactive; /* Attach new disciplines */ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* attach the discipline */ error = altq_pfattach(altq); if (error == 0 && V_pf_altq_running) error = pf_enable_altq(altq); if (error != 0) return (error); } } /* Purge the old altq list */ while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ if (V_pf_altq_running) error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) error = err; err = altq_remove(altq); if (err != 0 && error == 0) error = err; } else pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } V_altqs_inactive_open = 0; return (error); } static int pf_enable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int error = 0; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); if (ifp->if_snd.altq_type != ALTQT_NONE) error = altq_enable(&ifp->if_snd); /* set tokenbucket regulator */ if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; error = tbr_set(&ifp->if_snd, &tb); } return (error); } static int pf_disable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int error; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); /* * when the discipline is no longer referenced, it was overridden * by a new one. if so, just return. */ if (altq->altq_disc != ifp->if_snd.altq_disc) return (0); error = altq_disable(&ifp->if_snd); if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; error = tbr_set(&ifp->if_snd, &tb); } return (error); } void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { struct ifnet *ifp1; struct pf_altq *a1, *a2, *a3; u_int32_t ticket; int error = 0; /* Interrupt userland queue modifications */ if (V_altqs_inactive_open) pf_rollback_altq(V_ticket_altqs_inactive); /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; } bcopy(a1, a2, sizeof(struct pf_altq)); if (a2->qname[0] != 0) { if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { error = EBUSY; free(a2, M_PFALTQ); break; } a2->altq_disc = NULL; TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0 && a3->qname[0] == 0) { a2->altq_disc = a3->altq_disc; break; } } } /* Deactivate the interface in question */ a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; if ((ifp1 = ifunit(a2->ifname)) == NULL || (remove && ifp1 == ifp)) { a2->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { error = altq_add(a2); if (ticket != V_ticket_altqs_inactive) error = EBUSY; if (error) { free(a2, M_PFALTQ); break; } } TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); } if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); } #endif /* ALTQ */ static int pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); } static int pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } rs->rules[rs_num].inactive.open = 0; return (0); } #define PF_MD5_UPD(st, elm) \ MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) #define PF_MD5_UPD_STR(st, elm) \ MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) #define PF_MD5_UPD_HTONL(st, elm, stor) do { \ (stor) = htonl((st)->elm); \ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ } while (0) #define PF_MD5_UPD_HTONS(st, elm, stor) do { \ (stor) = htons((st)->elm); \ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ } while (0) static void pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) { PF_MD5_UPD(pfr, addr.type); switch (pfr->addr.type) { case PF_ADDR_DYNIFTL: PF_MD5_UPD(pfr, addr.v.ifname); PF_MD5_UPD(pfr, addr.iflags); break; case PF_ADDR_TABLE: PF_MD5_UPD(pfr, addr.v.tblname); break; case PF_ADDR_ADDRMASK: /* XXX ignore af? */ PF_MD5_UPD(pfr, addr.v.a.addr.addr32); PF_MD5_UPD(pfr, addr.v.a.mask.addr32); break; } PF_MD5_UPD(pfr, port[0]); PF_MD5_UPD(pfr, port[1]); PF_MD5_UPD(pfr, neg); PF_MD5_UPD(pfr, port_op); } static void pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) { u_int16_t x; u_int32_t y; pf_hash_rule_addr(ctx, &rule->src); pf_hash_rule_addr(ctx, &rule->dst); PF_MD5_UPD_STR(rule, label); PF_MD5_UPD_STR(rule, ifname); PF_MD5_UPD_STR(rule, match_tagname); PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ PF_MD5_UPD_HTONL(rule, os_fingerprint, y); PF_MD5_UPD_HTONL(rule, prob, y); PF_MD5_UPD_HTONL(rule, uid.uid[0], y); PF_MD5_UPD_HTONL(rule, uid.uid[1], y); PF_MD5_UPD(rule, uid.op); PF_MD5_UPD_HTONL(rule, gid.gid[0], y); PF_MD5_UPD_HTONL(rule, gid.gid[1], y); PF_MD5_UPD(rule, gid.op); PF_MD5_UPD_HTONL(rule, rule_flag, y); PF_MD5_UPD(rule, action); PF_MD5_UPD(rule, direction); PF_MD5_UPD(rule, af); PF_MD5_UPD(rule, quick); PF_MD5_UPD(rule, ifnot); PF_MD5_UPD(rule, match_tag_not); PF_MD5_UPD(rule, natpass); PF_MD5_UPD(rule, keep_state); PF_MD5_UPD(rule, proto); PF_MD5_UPD(rule, type); PF_MD5_UPD(rule, code); PF_MD5_UPD(rule, flags); PF_MD5_UPD(rule, flagset); PF_MD5_UPD(rule, allow_opts); PF_MD5_UPD(rule, rt); PF_MD5_UPD(rule, tos); } static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; int error; u_int32_t old_rcount; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); /* Calculate checksum for the main ruleset */ if (rs == &pf_main_ruleset) { error = pf_setup_pfsync_matching(rs); if (error != 0) return (error); } /* Swap rules, keep the old. */ old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; old_array = rs->rules[rs_num].active.ptr_array; rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; rs->rules[rs_num].active.ptr_array = rs->rules[rs_num].inactive.ptr_array; rs->rules[rs_num].active.rcount = rs->rules[rs_num].inactive.rcount; rs->rules[rs_num].inactive.ptr = old_rules; rs->rules[rs_num].inactive.ptr_array = old_array; rs->rules[rs_num].inactive.rcount = old_rcount; rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_unlink_rule(old_rules, rule); if (rs->rules[rs_num].inactive.ptr_array) free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); rs->rules[rs_num].inactive.ptr_array = NULL; rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); return (0); } static int pf_setup_pfsync_matching(struct pf_ruleset *rs) { MD5_CTX ctx; struct pf_rule *rule; int rs_cnt; u_int8_t digest[PF_MD5_DIGEST_LENGTH]; MD5Init(&ctx); for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { /* XXX PF_RULESET_SCRUB as well? */ if (rs_cnt == PF_RULESET_SCRUB) continue; if (rs->rules[rs_cnt].inactive.ptr_array) free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); rs->rules[rs_cnt].inactive.ptr_array = NULL; if (rs->rules[rs_cnt].inactive.rcount) { rs->rules[rs_cnt].inactive.ptr_array = malloc(sizeof(caddr_t) * rs->rules[rs_cnt].inactive.rcount, M_TEMP, M_NOWAIT); if (!rs->rules[rs_cnt].inactive.ptr_array) return (ENOMEM); } TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, entries) { pf_hash_rule(&ctx, rule); (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; } } MD5Final(digest, &ctx); memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); return (0); } static int pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, sa_family_t af) { int error = 0; switch (addr->type) { case PF_ADDR_TABLE: addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname); if (addr->p.tbl == NULL) error = ENOMEM; break; case PF_ADDR_DYNIFTL: error = pfi_dynaddr_setup(addr, af); break; } return (error); } static void pf_addr_copyout(struct pf_addr_wrap *addr) { switch (addr->type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_copyout(addr); break; case PF_ADDR_TABLE: pf_tbladdr_copyout(addr); break; } } +#ifdef ALTQ +/* + * Handle export of struct pf_kaltq to user binaries that may be using any + * version of struct pf_altq. + */ static int +pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) +{ + u_int32_t version; + + if (ioc_size == sizeof(struct pfioc_altq_v0)) + version = 0; + else + version = pa->version; + + if (version > PFIOC_ALTQ_VERSION) + return (EINVAL); + +#define ASSIGN(x) exported_q->x = q->x +#define COPY(x) \ + bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x))) +#define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX) +#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) + + switch (version) { + case 0: { + struct pf_altq_v0 *exported_q = + &((struct pfioc_altq_v0 *)pa)->altq; + + COPY(ifname); + + ASSIGN(scheduler); + ASSIGN(tbrsize); + exported_q->tbrsize = SATU16(q->tbrsize); + exported_q->ifbandwidth = SATU32(q->ifbandwidth); + + COPY(qname); + COPY(parent); + ASSIGN(parent_qid); + exported_q->bandwidth = SATU32(q->bandwidth); + ASSIGN(priority); + ASSIGN(local_flags); + + ASSIGN(qlimit); + ASSIGN(flags); + + if (q->scheduler == ALTQT_HFSC) { +#define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x +#define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \ + SATU32(q->pq_u.hfsc_opts.x) + + ASSIGN_OPT_SATU32(rtsc_m1); + ASSIGN_OPT(rtsc_d); + ASSIGN_OPT_SATU32(rtsc_m2); + + ASSIGN_OPT_SATU32(lssc_m1); + ASSIGN_OPT(lssc_d); + ASSIGN_OPT_SATU32(lssc_m2); + + ASSIGN_OPT_SATU32(ulsc_m1); + ASSIGN_OPT(ulsc_d); + ASSIGN_OPT_SATU32(ulsc_m2); + + ASSIGN_OPT(flags); + +#undef ASSIGN_OPT +#undef ASSIGN_OPT_SATU32 + } else + COPY(pq_u); + + ASSIGN(qid); + break; + } + case 1: { + struct pf_altq_v1 *exported_q = + &((struct pfioc_altq_v1 *)pa)->altq; + + COPY(ifname); + + ASSIGN(scheduler); + ASSIGN(tbrsize); + ASSIGN(ifbandwidth); + + COPY(qname); + COPY(parent); + ASSIGN(parent_qid); + ASSIGN(bandwidth); + ASSIGN(priority); + ASSIGN(local_flags); + + ASSIGN(qlimit); + ASSIGN(flags); + COPY(pq_u); + + ASSIGN(qid); + break; + } + default: + panic("%s: unhandled struct pfioc_altq version", __func__); + break; + } + +#undef ASSIGN +#undef COPY +#undef SATU16 +#undef SATU32 + + return (0); +} + +/* + * Handle import to struct pf_kaltq of struct pf_altq from user binaries + * that may be using any version of it. + */ +static int +pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) +{ + u_int32_t version; + + if (ioc_size == sizeof(struct pfioc_altq_v0)) + version = 0; + else + version = pa->version; + + if (version > PFIOC_ALTQ_VERSION) + return (EINVAL); + +#define ASSIGN(x) q->x = imported_q->x +#define COPY(x) \ + bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x))) + + switch (version) { + case 0: { + struct pf_altq_v0 *imported_q = + &((struct pfioc_altq_v0 *)pa)->altq; + + COPY(ifname); + + ASSIGN(scheduler); + ASSIGN(tbrsize); /* 16-bit -> 32-bit */ + ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */ + + COPY(qname); + COPY(parent); + ASSIGN(parent_qid); + ASSIGN(bandwidth); /* 32-bit -> 64-bit */ + ASSIGN(priority); + ASSIGN(local_flags); + + ASSIGN(qlimit); + ASSIGN(flags); + + if (imported_q->scheduler == ALTQT_HFSC) { +#define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x + + /* + * The m1 and m2 parameters are being copied from + * 32-bit to 64-bit. + */ + ASSIGN_OPT(rtsc_m1); + ASSIGN_OPT(rtsc_d); + ASSIGN_OPT(rtsc_m2); + + ASSIGN_OPT(lssc_m1); + ASSIGN_OPT(lssc_d); + ASSIGN_OPT(lssc_m2); + + ASSIGN_OPT(ulsc_m1); + ASSIGN_OPT(ulsc_d); + ASSIGN_OPT(ulsc_m2); + + ASSIGN_OPT(flags); + +#undef ASSIGN_OPT + } else + COPY(pq_u); + + ASSIGN(qid); + break; + } + case 1: { + struct pf_altq_v1 *imported_q = + &((struct pfioc_altq_v1 *)pa)->altq; + + COPY(ifname); + + ASSIGN(scheduler); + ASSIGN(tbrsize); + ASSIGN(ifbandwidth); + + COPY(qname); + COPY(parent); + ASSIGN(parent_qid); + ASSIGN(bandwidth); + ASSIGN(priority); + ASSIGN(local_flags); + + ASSIGN(qlimit); + ASSIGN(flags); + COPY(pq_u); + + ASSIGN(qid); + break; + } + default: + panic("%s: unhandled struct pfioc_altq version", __func__); + break; + } + +#undef ASSIGN +#undef COPY + + return (0); +} +#endif /* ALTQ */ + +static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { int error = 0; PF_RULES_RLOCK_TRACKER; /* XXX keep in sync with switch() below */ if (securelevel_gt(td->td_ucred, 2)) switch (cmd) { case DIOCGETRULES: case DIOCGETRULE: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCSETSTATUSIF: case DIOCGETSTATUS: case DIOCCLRSTATUS: case DIOCNATLOOK: case DIOCSETDEBUG: case DIOCGETSTATES: case DIOCGETTIMEOUT: case DIOCCLRRULECTRS: case DIOCGETLIMIT: - case DIOCGETALTQS: - case DIOCGETALTQ: - case DIOCGETQSTATS: + case DIOCGETALTQSV0: + case DIOCGETALTQSV1: + case DIOCGETALTQV0: + case DIOCGETALTQV1: + case DIOCGETQSTATSV0: + case DIOCGETQSTATSV1: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRCLRASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: - case DIOCGIFSPEED: + case DIOCGIFSPEEDV0: + case DIOCGIFSPEEDV1: case DIOCSETIFFLAG: case DIOCCLRIFFLAG: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) break; /* dummy operation ok */ return (EPERM); default: return (EPERM); } if (!(flags & FWRITE)) switch (cmd) { case DIOCGETRULES: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCGETSTATUS: case DIOCGETSTATES: case DIOCGETTIMEOUT: case DIOCGETLIMIT: - case DIOCGETALTQS: - case DIOCGETALTQ: - case DIOCGETQSTATS: + case DIOCGETALTQSV0: + case DIOCGETALTQSV1: + case DIOCGETALTQV0: + case DIOCGETALTQV1: + case DIOCGETQSTATSV0: + case DIOCGETQSTATSV1: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCIGETIFACES: - case DIOCGIFSPEED: + case DIOCGIFSPEEDV1: + case DIOCGIFSPEEDV0: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) { flags |= FWRITE; /* need write lock for dummy */ break; /* dummy operation ok */ } return (EACCES); case DIOCGETRULE: if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) return (EACCES); break; default: return (EACCES); } CURVNET_SET(TD_TO_VNET(td)); switch (cmd) { case DIOCSTART: sx_xlock(&pf_ioctl_lock); if (V_pf_status.running) error = EEXIST; else { int cpu; error = hook_pf(); if (error) { DPFPRINTF(PF_DEBUG_MISC, ("pf: pfil registration failed\n")); break; } V_pf_status.running = 1; V_pf_status.since = time_second; CPU_FOREACH(cpu) V_pf_stateid[cpu] = time_second; DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } break; case DIOCSTOP: sx_xlock(&pf_ioctl_lock); if (!V_pf_status.running) error = ENOENT; else { V_pf_status.running = 0; error = dehook_pf(); if (error) { V_pf_status.running = 1; DPFPRINTF(PF_DEBUG_MISC, ("pf: pfil unregistration failed\n")); } V_pf_status.since = time_second; DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } break; case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *rule, *tail; struct pf_pooladdr *pa; struct pfi_kif *kif = NULL; int rs_num; if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } #ifndef INET if (pr->rule.af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pr->rule.af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK); bcopy(&pr->rule, rule, sizeof(struct pf_rule)); if (rule->ifname[0]) kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); rule->src_nodes = counter_u64_alloc(M_WAITOK); rule->cuid = td->td_ucred->cr_ruid; rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&rule->rpool.list); #define ERROUT(x) { error = (x); goto DIOCADDRULE_error; } PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) ERROUT(EINVAL); rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) ERROUT(EINVAL); if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, ("ticket: %d != [%d]%d\n", pr->ticket, rs_num, ruleset->rules[rs_num].inactive.ticket)); ERROUT(EBUSY); } if (pr->pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, ("pool_ticket: %d != %d\n", pr->pool_ticket, V_ticket_pabuf)); ERROUT(EBUSY); } tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) rule->nr = tail->nr + 1; else rule->nr = 0; if (rule->ifname[0]) { rule->kif = pfi_kif_attach(kif, rule->ifname); pfi_kif_ref(rule->kif); } else rule->kif = NULL; if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) error = EBUSY; #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { if ((rule->qid = pf_qname2qid(rule->qname)) == 0) error = EBUSY; else if (rule->pqname[0] != 0) { if ((rule->pqid = pf_qname2qid(rule->pqname)) == 0) error = EBUSY; } else rule->pqid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) error = EBUSY; if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag(rule->match_tagname)) == 0) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; if (!rule->log) rule->logif = 0; if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) error = ENOMEM; if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) error = ENOMEM; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; if (rule->scrub_flags & PFSTATE_SETPRIO && (rule->set_prio[0] > PF_PRIO_MAX || rule->set_prio[1] > PF_PRIO_MAX)) error = EINVAL; TAILQ_FOREACH(pa, &V_pf_pabuf, entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) error = ENOMEM; } rule->overload_tbl = NULL; if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, rule->overload_tblname)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_pool(&V_pf_pabuf, &rule->rpool.list); if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_NOPFROUTE)) && (TAILQ_FIRST(&rule->rpool.list) == NULL)) error = EINVAL; if (error) { pf_free_rule(rule); PF_RULES_WUNLOCK(); break; } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; PF_RULES_WUNLOCK(); break; #undef ERROUT DIOCADDRULE_error: PF_RULES_WUNLOCK(); counter_u64_free(rule->states_cur); counter_u64_free(rule->states_tot); counter_u64_free(rule->src_nodes); free(rule, M_PFRULE); if (kif) free(kif, PFI_MTYPE); break; } case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *tail; int rs_num; PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); error = EINVAL; break; } tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_rulequeue); if (tail) pr->nr = tail->nr + 1; else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; PF_RULES_WUNLOCK(); break; } case DIOCGETRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *rule; int rs_num, i; PF_RULES_WLOCK(); pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); error = EINVAL; break; } if (pr->ticket != ruleset->rules[rs_num].active.ticket) { PF_RULES_WUNLOCK(); error = EBUSY; break; } rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_WUNLOCK(); error = EBUSY; break; } bcopy(rule, &pr->rule, sizeof(struct pf_rule)); pr->rule.u_states_cur = counter_u64_fetch(rule->states_cur); pr->rule.u_states_tot = counter_u64_fetch(rule->states_tot); pr->rule.u_src_nodes = counter_u64_fetch(rule->src_nodes); if (pf_anchor_copyout(ruleset, rule, pr)) { PF_RULES_WUNLOCK(); error = EBUSY; break; } pf_addr_copyout(&pr->rule.src.addr); pf_addr_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = -1; else pr->rule.skip[i].nr = rule->skip[i].ptr->nr; if (pr->action == PF_GET_CLR_CNTR) { rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; counter_u64_zero(rule->states_tot); } PF_RULES_WUNLOCK(); break; } case DIOCCHANGERULE: { struct pfioc_rule *pcr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *oldrule = NULL, *newrule = NULL; struct pfi_kif *kif = NULL; struct pf_pooladdr *pa; u_int32_t nr = 0; int rs_num; if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; break; } if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } if (pcr->action != PF_CHANGE_REMOVE) { #ifndef INET if (pcr->rule.af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pcr->rule.af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK); bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); if (newrule->ifname[0]) kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); newrule->src_nodes = counter_u64_alloc(M_WAITOK); newrule->cuid = td->td_ucred->cr_ruid; newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->rpool.list); } #define ERROUT(x) { error = (x); goto DIOCCHANGERULE_error; } PF_RULES_WLOCK(); if (!(pcr->action == PF_CHANGE_REMOVE || pcr->action == PF_CHANGE_GET_TICKET) && pcr->pool_ticket != V_ticket_pabuf) ERROUT(EBUSY); ruleset = pf_find_ruleset(pcr->anchor); if (ruleset == NULL) ERROUT(EINVAL); rs_num = pf_get_ruleset_number(pcr->rule.action); if (rs_num >= PF_RULESET_MAX) ERROUT(EINVAL); if (pcr->action == PF_CHANGE_GET_TICKET) { pcr->ticket = ++ruleset->rules[rs_num].active.ticket; ERROUT(0); } else if (pcr->ticket != ruleset->rules[rs_num].active.ticket) ERROUT(EINVAL); if (pcr->action != PF_CHANGE_REMOVE) { if (newrule->ifname[0]) { newrule->kif = pfi_kif_attach(kif, newrule->ifname); pfi_kif_ref(newrule->kif); } else newrule->kif = NULL; if (newrule->rtableid > 0 && newrule->rtableid >= rt_numfibs) error = EBUSY; #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { if ((newrule->qid = pf_qname2qid(newrule->qname)) == 0) error = EBUSY; else if (newrule->pqname[0] != 0) { if ((newrule->pqid = pf_qname2qid(newrule->pqname)) == 0) error = EBUSY; } else newrule->pqid = newrule->qid; } #endif /* ALTQ */ if (newrule->tagname[0]) if ((newrule->tag = pf_tagname2tag(newrule->tagname)) == 0) error = EBUSY; if (newrule->match_tagname[0]) if ((newrule->match_tag = pf_tagname2tag( newrule->match_tagname)) == 0) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; if (!newrule->log) newrule->logif = 0; if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) error = ENOMEM; if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) error = ENOMEM; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; TAILQ_FOREACH(pa, &V_pf_pabuf, entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) error = ENOMEM; } newrule->overload_tbl = NULL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( ruleset, newrule->overload_tblname)) == NULL) error = EINVAL; else newrule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_pool(&V_pf_pabuf, &newrule->rpool.list); if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_NOPFROUTE)) && !newrule->anchor)) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; if (error) { pf_free_rule(newrule); PF_RULES_WUNLOCK(); break; } newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); newrule->evaluations = 0; newrule->packets[0] = newrule->packets[1] = 0; newrule->bytes[0] = newrule->bytes[1] = 0; } pf_empty_pool(&V_pf_pabuf); if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); else if (pcr->action == PF_CHANGE_ADD_TAIL) oldrule = TAILQ_LAST( ruleset->rules[rs_num].active.ptr, pf_rulequeue); else { oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { if (newrule != NULL) pf_free_rule(newrule); PF_RULES_WUNLOCK(); error = EINVAL; break; } } if (pcr->action == PF_CHANGE_REMOVE) { pf_unlink_rule(ruleset->rules[rs_num].active.ptr, oldrule); ruleset->rules[rs_num].active.rcount--; } else { if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, newrule, entries); else if (pcr->action == PF_CHANGE_ADD_HEAD || pcr->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldrule, newrule, entries); else TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); ruleset->rules[rs_num].active.rcount++; } nr = 0; TAILQ_FOREACH(oldrule, ruleset->rules[rs_num].active.ptr, entries) oldrule->nr = nr++; ruleset->rules[rs_num].active.ticket++; pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_ruleset(ruleset); PF_RULES_WUNLOCK(); break; #undef ERROUT DIOCCHANGERULE_error: PF_RULES_WUNLOCK(); if (newrule != NULL) { counter_u64_free(newrule->states_cur); counter_u64_free(newrule->states_tot); counter_u64_free(newrule->src_nodes); free(newrule, M_PFRULE); } if (kif != NULL) free(kif, PFI_MTYPE); break; } case DIOCCLRSTATES: { struct pf_state *s; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; u_int i, killed = 0; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock_DIOCCLRSTATES: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, s->kif->pfik_name)) { /* * Don't send out individual * delete messages. */ s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s, PF_ENTER_LOCKED); killed++; goto relock_DIOCCLRSTATES; } PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; if (pfsync_clear_states_ptr != NULL) pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); break; } case DIOCKILLSTATES: { struct pf_state *s; struct pf_state_key *sk; struct pf_addr *srcaddr, *dstaddr; u_int16_t srcport, dstport; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; u_int i, killed = 0; if (psk->psk_pfcmp.id) { if (psk->psk_pfcmp.creatorid == 0) psk->psk_pfcmp.creatorid = V_pf_status.hostid; if ((s = pf_find_state_byid(psk->psk_pfcmp.id, psk->psk_pfcmp.creatorid))) { pf_unlink_state(s, PF_ENTER_LOCKED); psk->psk_killed = 1; } break; } for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock_DIOCKILLSTATES: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { sk = s->key[PF_SK_WIRE]; if (s->direction == PF_OUT) { srcaddr = &sk->addr[1]; dstaddr = &sk->addr[0]; srcport = sk->port[1]; dstport = sk->port[0]; } else { srcaddr = &sk->addr[0]; dstaddr = &sk->addr[1]; srcport = sk->port[0]; dstport = sk->port[1]; } if ((!psk->psk_af || sk->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == sk->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, srcaddr, sk->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], srcport)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], dstport)) && (!psk->psk_label[0] || (s->rule.ptr->label[0] && !strcmp(psk->psk_label, s->rule.ptr->label))) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, s->kif->pfik_name))) { pf_unlink_state(s, PF_ENTER_LOCKED); killed++; goto relock_DIOCKILLSTATES; } } PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pfsync_state *sp = &ps->state; if (sp->timeout >= PFTM_MAX) { error = EINVAL; break; } if (pfsync_state_import_ptr != NULL) { PF_RULES_RLOCK(); error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); PF_RULES_RUNLOCK(); } else error = EOPNOTSUPP; break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *s; s = pf_find_state_byid(ps->state.id, ps->state.creatorid); if (s == NULL) { error = ENOENT; break; } pfsync_state_export(&ps->state, s); PF_STATE_UNLOCK(s); break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *s; struct pfsync_state *pstore, *p; int i, nr; if (ps->ps_len == 0) { nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pfsync_state) * nr; break; } p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK); nr = 0; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; if ((nr+1) * sizeof(*p) > ps->ps_len) { PF_HASHROW_UNLOCK(ih); goto DIOCGETSTATES_full; } pfsync_state_export(p, s); p++; nr++; } PF_HASHROW_UNLOCK(ih); } DIOCGETSTATES_full: error = copyout(pstore, ps->ps_states, sizeof(struct pfsync_state) * nr); if (error) { free(pstore, M_TEMP); break; } ps->ps_len = sizeof(struct pfsync_state) * nr; free(pstore, M_TEMP); break; } case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; PF_RULES_RLOCK(); s->running = V_pf_status.running; s->since = V_pf_status.since; s->debug = V_pf_status.debug; s->hostid = V_pf_status.hostid; s->states = V_pf_status.states; s->src_nodes = V_pf_status.src_nodes; for (int i = 0; i < PFRES_MAX; i++) s->counters[i] = counter_u64_fetch(V_pf_status.counters[i]); for (int i = 0; i < LCNT_MAX; i++) s->lcounters[i] = counter_u64_fetch(V_pf_status.lcounters[i]); for (int i = 0; i < FCNT_MAX; i++) s->fcounters[i] = counter_u64_fetch(V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) s->scounters[i] = counter_u64_fetch(V_pf_status.scounters[i]); bcopy(V_pf_status.ifname, s->ifname, IFNAMSIZ); bcopy(V_pf_status.pf_chksum, s->pf_chksum, PF_MD5_DIGEST_LENGTH); pfi_update_status(s->ifname, s); PF_RULES_RUNLOCK(); break; } case DIOCSETSTATUSIF: { struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { bzero(V_pf_status.ifname, IFNAMSIZ); break; } PF_RULES_WLOCK(); strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); PF_RULES_WUNLOCK(); break; } case DIOCCLRSTATUS: { PF_RULES_WLOCK(); for (int i = 0; i < PFRES_MAX; i++) counter_u64_zero(V_pf_status.counters[i]); for (int i = 0; i < FCNT_MAX; i++) counter_u64_zero(V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_zero(V_pf_status.scounters[i]); for (int i = 0; i < LCNT_MAX; i++) counter_u64_zero(V_pf_status.lcounters[i]); V_pf_status.since = time_second; if (*V_pf_status.ifname) pfi_update_status(V_pf_status.ifname, NULL); PF_RULES_WUNLOCK(); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; struct pf_state_key *sk; struct pf_state *state; struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; int sidx, didx; /* NATLOOK src and dst are reversed, so reverse sidx/didx */ sidx = (direction == PF_IN) ? 1 : 0; didx = (direction == PF_IN) ? 0 : 1; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || ((pnl->proto == IPPROTO_TCP || pnl->proto == IPPROTO_UDP) && (!pnl->dport || !pnl->sport))) error = EINVAL; else { bzero(&key, sizeof(key)); key.af = pnl->af; key.proto = pnl->proto; PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); key.port[sidx] = pnl->sport; PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; state = pf_find_state_all(&key, direction, &m); if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { /* XXXGL: not locked read */ sk = state->key[sidx]; PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); pnl->rdport = sk->port[didx]; } else error = ENOENT; } break; } case DIOCSETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; int old; if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || pt->seconds < 0) { error = EINVAL; break; } PF_RULES_WLOCK(); old = V_pf_default_rule.timeout[pt->timeout]; if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) pt->seconds = 1; V_pf_default_rule.timeout[pt->timeout] = pt->seconds; if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) wakeup(pf_purge_thread); pt->seconds = old; PF_RULES_WUNLOCK(); break; } case DIOCGETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { error = EINVAL; break; } PF_RULES_RLOCK(); pt->seconds = V_pf_default_rule.timeout[pt->timeout]; PF_RULES_RUNLOCK(); break; } case DIOCGETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { error = EINVAL; break; } PF_RULES_RLOCK(); pl->limit = V_pf_limits[pl->index].limit; PF_RULES_RUNLOCK(); break; } case DIOCSETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; int old_limit; PF_RULES_WLOCK(); if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || V_pf_limits[pl->index].zone == NULL) { PF_RULES_WUNLOCK(); error = EINVAL; break; } uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit); old_limit = V_pf_limits[pl->index].limit; V_pf_limits[pl->index].limit = pl->limit; pl->limit = old_limit; PF_RULES_WUNLOCK(); break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; PF_RULES_WLOCK(); V_pf_status.debug = *level; PF_RULES_WUNLOCK(); break; } case DIOCCLRRULECTRS: { /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; PF_RULES_WLOCK(); TAILQ_FOREACH(rule, ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; } PF_RULES_WUNLOCK(); break; } - case DIOCGIFSPEED: { - struct pf_ifspeed *psp = (struct pf_ifspeed *)addr; - struct pf_ifspeed ps; + case DIOCGIFSPEEDV0: + case DIOCGIFSPEEDV1: { + struct pf_ifspeed_v1 *psp = (struct pf_ifspeed_v1 *)addr; + struct pf_ifspeed_v1 ps; struct ifnet *ifp; if (psp->ifname[0] != 0) { /* Can we completely trust user-land? */ strlcpy(ps.ifname, psp->ifname, IFNAMSIZ); ifp = ifunit(ps.ifname); - if (ifp != NULL) - psp->baudrate = ifp->if_baudrate; - else + if (ifp != NULL) { + psp->baudrate32 = + (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX); + if (cmd == DIOCGIFSPEEDV1) + psp->baudrate = ifp->if_baudrate; + } else error = EINVAL; } else error = EINVAL; break; } #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) V_pf_altq_running = 1; PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } case DIOCSTOPALTQ: { struct pf_altq *altq; PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (altq->qname[0] == 0 && (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_disable_altq(altq); if (error != 0) break; } } if (error == 0) V_pf_altq_running = 0; PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } - case DIOCADDALTQ: { - struct pfioc_altq *pa = (struct pfioc_altq *)addr; + case DIOCADDALTQV0: + case DIOCADDALTQV1: { + struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq, *a; struct ifnet *ifp; - altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK); - bcopy(&pa->altq, altq, sizeof(struct pf_altq)); + altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO); + error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd)); + if (error) + break; altq->local_flags = 0; PF_RULES_WLOCK(); if (pa->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); error = EBUSY; break; } /* * if this is for a queue, find the discipline and * copy the necessary fields */ if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { PF_RULES_WUNLOCK(); error = EBUSY; free(altq, M_PFALTQ); break; } altq->altq_disc = NULL; TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && a->qname[0] == 0) { altq->altq_disc = a->altq_disc; break; } } } if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; else error = altq_add(altq); if (error) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); break; } TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); - bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + /* version error check done on import above */ + pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_WUNLOCK(); break; } - case DIOCGETALTQS: { - struct pfioc_altq *pa = (struct pfioc_altq *)addr; + case DIOCGETALTQSV0: + case DIOCGETALTQSV1: { + struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; PF_RULES_RLOCK(); pa->nr = 0; TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; PF_RULES_RUNLOCK(); break; } - case DIOCGETALTQ: { - struct pfioc_altq *pa = (struct pfioc_altq *)addr; + case DIOCGETALTQV0: + case DIOCGETALTQV1: { + struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; u_int32_t nr; PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; break; } nr = 0; altq = TAILQ_FIRST(V_pf_altqs_active); while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } - bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_RUNLOCK(); break; } - case DIOCCHANGEALTQ: + case DIOCCHANGEALTQV0: + case DIOCCHANGEALTQV1: /* CHANGEALTQ not supported yet! */ error = ENODEV; break; - case DIOCGETQSTATS: { - struct pfioc_qstats *pq = (struct pfioc_qstats *)addr; + case DIOCGETQSTATSV0: + case DIOCGETQSTATSV1: { + struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr; struct pf_altq *altq; u_int32_t nr; int nbytes; + u_int32_t version; PF_RULES_RLOCK(); if (pq->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; break; } nbytes = pq->nbytes; nr = 0; altq = TAILQ_FIRST(V_pf_altqs_active); while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { PF_RULES_RUNLOCK(); error = ENXIO; break; } PF_RULES_RUNLOCK(); - error = altq_getqstats(altq, pq->buf, &nbytes); + if (cmd == DIOCGETQSTATSV0) + version = 0; /* DIOCGETQSTATSV0 means stats struct v0 */ + else + version = pq->version; + error = altq_getqstats(altq, pq->buf, &nbytes, version); if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; } break; } #endif /* ALTQ */ case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; PF_RULES_WLOCK(); pf_empty_pool(&V_pf_pabuf); pp->ticket = ++V_ticket_pabuf; PF_RULES_WUNLOCK(); break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_pooladdr *pa; struct pfi_kif *kif = NULL; #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pp->af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ if (pp->addr.addr.type != PF_ADDR_ADDRMASK && pp->addr.addr.type != PF_ADDR_DYNIFTL && pp->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; break; } pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK); bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); PF_RULES_WLOCK(); if (pp->ticket != V_ticket_pabuf) { PF_RULES_WUNLOCK(); if (pa->ifname[0]) free(kif, PFI_MTYPE); free(pa, M_PFRULE); error = EBUSY; break; } if (pa->ifname[0]) { pa->kif = pfi_kif_attach(kif, pa->ifname); pfi_kif_ref(pa->kif); } else pa->kif = NULL; if (pa->addr.type == PF_ADDR_DYNIFTL && ((error = pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) { if (pa->ifname[0]) pfi_kif_unref(pa->kif); PF_RULES_WUNLOCK(); free(pa, M_PFRULE); break; } TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries); PF_RULES_WUNLOCK(); break; } case DIOCGETADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_pool *pool; struct pf_pooladdr *pa; PF_RULES_RLOCK(); pp->nr = 0; pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 0); if (pool == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; PF_RULES_RUNLOCK(); break; } case DIOCGETADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_pool *pool; struct pf_pooladdr *pa; u_int32_t nr = 0; PF_RULES_RLOCK(); pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 1); if (pool == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } pa = TAILQ_FIRST(&pool->list); while ((pa != NULL) && (nr < pp->nr)) { pa = TAILQ_NEXT(pa, entries); nr++; } if (pa == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); pf_addr_copyout(&pp->addr.addr); PF_RULES_RUNLOCK(); break; } case DIOCCHANGEADDR: { struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; struct pf_pool *pool; struct pf_pooladdr *oldpa = NULL, *newpa = NULL; struct pf_ruleset *ruleset; struct pfi_kif *kif = NULL; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { error = EINVAL; break; } if (pca->addr.addr.type != PF_ADDR_ADDRMASK && pca->addr.addr.type != PF_ADDR_DYNIFTL && pca->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; break; } if (pca->action != PF_CHANGE_REMOVE) { #ifndef INET if (pca->af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK); bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); if (newpa->ifname[0]) kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); newpa->kif = NULL; } #define ERROUT(x) { error = (x); goto DIOCCHANGEADDR_error; } PF_RULES_WLOCK(); ruleset = pf_find_ruleset(pca->anchor); if (ruleset == NULL) ERROUT(EBUSY); pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, pca->r_num, pca->r_last, 1, 1); if (pool == NULL) ERROUT(EBUSY); if (pca->action != PF_CHANGE_REMOVE) { if (newpa->ifname[0]) { newpa->kif = pfi_kif_attach(kif, newpa->ifname); pfi_kif_ref(newpa->kif); kif = NULL; } switch (newpa->addr.type) { case PF_ADDR_DYNIFTL: error = pfi_dynaddr_setup(&newpa->addr, pca->af); break; case PF_ADDR_TABLE: newpa->addr.p.tbl = pfr_attach_table(ruleset, newpa->addr.v.tblname); if (newpa->addr.p.tbl == NULL) error = ENOMEM; break; } if (error) goto DIOCCHANGEADDR_error; } switch (pca->action) { case PF_CHANGE_ADD_HEAD: oldpa = TAILQ_FIRST(&pool->list); break; case PF_CHANGE_ADD_TAIL: oldpa = TAILQ_LAST(&pool->list, pf_palist); break; default: oldpa = TAILQ_FIRST(&pool->list); for (int i = 0; oldpa && i < pca->nr; i++) oldpa = TAILQ_NEXT(oldpa, entries); if (oldpa == NULL) ERROUT(EINVAL); } if (pca->action == PF_CHANGE_REMOVE) { TAILQ_REMOVE(&pool->list, oldpa, entries); switch (oldpa->addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(oldpa->addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(oldpa->addr.p.tbl); break; } if (oldpa->kif) pfi_kif_unref(oldpa->kif); free(oldpa, M_PFRULE); } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); else if (pca->action == PF_CHANGE_ADD_HEAD || pca->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldpa, newpa, entries); else TAILQ_INSERT_AFTER(&pool->list, oldpa, newpa, entries); } pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); PF_RULES_WUNLOCK(); break; #undef ERROUT DIOCCHANGEADDR_error: if (newpa != NULL) { if (newpa->kif) pfi_kif_unref(newpa->kif); free(newpa, M_PFRULE); } PF_RULES_WUNLOCK(); if (kif != NULL) free(kif, PFI_MTYPE); break; } case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; struct pf_ruleset *ruleset; struct pf_anchor *anchor; PF_RULES_RLOCK(); pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { PF_RULES_RUNLOCK(); error = ENOENT; break; } pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) if (anchor->parent == NULL) pr->nr++; } else { RB_FOREACH(anchor, pf_anchor_node, &ruleset->anchor->children) pr->nr++; } PF_RULES_RUNLOCK(); break; } case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; struct pf_ruleset *ruleset; struct pf_anchor *anchor; u_int32_t nr = 0; PF_RULES_RLOCK(); pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { PF_RULES_RUNLOCK(); error = ENOENT; break; } pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_anchor_global, &V_pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } else { RB_FOREACH(anchor, pf_anchor_node, &ruleset->anchor->children) if (nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } if (!pr->name[0]) error = EBUSY; PF_RULES_RUNLOCK(); break; } case DIOCRCLRTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; break; } PF_RULES_WLOCK(); error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); break; } case DIOCRADDTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; break; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRDELTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; break; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRGETTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen, n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_NOWAIT); if (pfrts == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); break; } error = pfr_get_tables(&io->pfrio_table, pfrts, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrts, io->pfrio_buffer, totlen); free(pfrts, M_TEMP); break; } case DIOCRGETTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_tstats *pfrtstats; size_t totlen, n; if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; break; } PF_RULES_WLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_tstats); pfrtstats = mallocarray(io->pfrio_size, sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT); if (pfrtstats == NULL) { error = ENOMEM; PF_RULES_WUNLOCK(); break; } error = pfr_get_tstats(&io->pfrio_table, pfrtstats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0) error = copyout(pfrtstats, io->pfrio_buffer, totlen); free(pfrtstats, M_TEMP); break; } case DIOCRCLRTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen, n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } PF_RULES_WLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_NOWAIT); if (pfrts == NULL) { error = ENOMEM; PF_RULES_WUNLOCK(); break; } error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); PF_RULES_WUNLOCK(); break; } error = pfr_clr_tstats(pfrts, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRSETTFLAGS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen, n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } PF_RULES_WLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_NOWAIT); if (pfrts == NULL) { error = ENOMEM; PF_RULES_WUNLOCK(); break; } error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); PF_RULES_WUNLOCK(); break; } error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRCLRADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; break; } PF_RULES_WLOCK(); error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); break; } case DIOCRADDADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_add_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRDELADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_del_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRSETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen, count; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size2 < 0) { error = EINVAL; break; } count = max(io->pfrio_size, io->pfrio_size2); if (count > pf_ioctl_maxcount || WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = count * sizeof(struct pfr_addr); pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_set_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | PFR_FLAG_USERIOCTL, 0); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRGETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } PF_RULES_RLOCK(); error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRGETASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_astats *pfrastats; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_astats); pfrastats = mallocarray(io->pfrio_size, sizeof(struct pfr_astats), M_TEMP, M_NOWAIT); if (! pfrastats) { error = ENOMEM; break; } PF_RULES_RLOCK(); error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrastats, io->pfrio_buffer, totlen); free(pfrastats, M_TEMP); break; } case DIOCRCLRASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_clr_astats(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRTSTADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_RLOCK(); error = pfr_tst_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_NOWAIT); if (! pfras) { error = ENOMEM; break; } error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_ina_define(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfras, M_TEMP); break; } case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; PF_RULES_WLOCK(); error = pf_osfp_add(io); PF_RULES_WUNLOCK(); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; PF_RULES_RLOCK(); error = pf_osfp_get(io); PF_RULES_RUNLOCK(); break; } case DIOCXBEGIN: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioes, *ioe; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; break; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; break; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_TEMP, M_NOWAIT); if (! ioes) { error = ENOMEM; break; } error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_TEMP); break; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; } default: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; } } PF_RULES_WUNLOCK(); error = copyout(ioes, io->array, totlen); free(ioes, M_TEMP); break; } case DIOCXROLLBACK: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioe, *ioes; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; break; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; break; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_TEMP, M_NOWAIT); if (! ioes) { error = ENOMEM; break; } error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_TEMP); break; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } default: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); free(ioes, M_TEMP); break; } case DIOCXCOMMIT: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioe, *ioes; struct pf_ruleset *rs; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; break; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; break; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_TEMP, M_NOWAIT); if (ioes == NULL) { error = ENOMEM; break; } error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_TEMP); break; } PF_RULES_WLOCK(); /* First makes sure everything will succeed. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EBUSY; goto fail; } break; default: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || !rs->rules[ioe->rs_num].inactive.open || rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EBUSY; goto fail; } break; } } /* Now do the commit - no errors should happen here. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_commit(&table, ioe->ticket, NULL, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } default: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); free(ioes, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; struct pf_srchash *sh; struct pf_src_node *n, *p, *pstore; uint32_t i, nr = 0; if (psn->psn_len == 0) { for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) nr++; PF_HASHROW_UNLOCK(sh); } psn->psn_len = sizeof(struct pf_src_node) * nr; break; } p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) { int secs = time_uptime, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; bcopy(n, p, sizeof(struct pf_src_node)); if (n->rule.ptr != NULL) p->rule.nr = n->rule.ptr->nr; p->creation = secs - p->creation; if (p->expire > secs) p->expire -= secs; else p->expire = 0; /* Adjust the connection rate estimate. */ diff = secs - n->conn_rate.last; if (diff >= n->conn_rate.seconds) p->conn_rate.count = 0; else p->conn_rate.count -= n->conn_rate.count * diff / n->conn_rate.seconds; p++; nr++; } PF_HASHROW_UNLOCK(sh); } error = copyout(pstore, psn->psn_src_nodes, sizeof(struct pf_src_node) * nr); if (error) { free(pstore, M_TEMP); break; } psn->psn_len = sizeof(struct pf_src_node) * nr; free(pstore, M_TEMP); break; } case DIOCCLRSRCNODES: { pf_clear_srcnodes(NULL); pf_purge_expired_src_nodes(); break; } case DIOCKILLSRCNODES: pf_kill_srcnodes((struct pfioc_src_node_kill *)addr); break; case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; PF_RULES_WLOCK(); if (*hostid == 0) V_pf_status.hostid = arc4random(); else V_pf_status.hostid = *hostid; PF_RULES_WUNLOCK(); break; } case DIOCOSFPFLUSH: PF_RULES_WLOCK(); pf_osfp_flush(); PF_RULES_WUNLOCK(); break; case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; struct pfi_kif *ifstore; size_t bufsiz; if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } if (io->pfiio_size < 0 || io->pfiio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) { error = EINVAL; break; } bufsiz = io->pfiio_size * sizeof(struct pfi_kif); ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif), M_TEMP, M_NOWAIT); if (ifstore == NULL) { error = ENOMEM; break; } PF_RULES_RLOCK(); pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); PF_RULES_RUNLOCK(); error = copyout(ifstore, io->pfiio_buffer, bufsiz); free(ifstore, M_TEMP); break; } case DIOCSETIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; PF_RULES_WLOCK(); error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); PF_RULES_WUNLOCK(); break; } case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; PF_RULES_WLOCK(); error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); PF_RULES_WUNLOCK(); break; } default: error = ENODEV; break; } fail: if (sx_xlocked(&pf_ioctl_lock)) sx_xunlock(&pf_ioctl_lock); CURVNET_RESTORE(); return (error); } void pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) { bzero(sp, sizeof(struct pfsync_state)); /* copy from state key */ sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; sp->proto = st->key[PF_SK_WIRE]->proto; sp->af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(time_uptime - st->creation); sp->expire = pf_state_expires(st); if (sp->expire <= time_uptime) sp->expire = htonl(0); else sp->expire = htonl(sp->expire - time_uptime); sp->direction = st->direction; sp->log = st->log; sp->timeout = st->timeout; sp->state_flags = st->state_flags; if (st->src_node) sp->sync_flags |= PFSYNC_FLAG_SRCNODE; if (st->nat_src_node) sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; sp->id = st->id; sp->creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->src); pf_state_peer_hton(&st->dst, &sp->dst); if (st->rule.ptr == NULL) sp->rule = htonl(-1); else sp->rule = htonl(st->rule.ptr->nr); if (st->anchor.ptr == NULL) sp->anchor = htonl(-1); else sp->anchor = htonl(st->anchor.ptr->nr); if (st->nat_rule.ptr == NULL) sp->nat_rule = htonl(-1); else sp->nat_rule = htonl(st->nat_rule.ptr->nr); pf_state_counter_hton(st->packets[0], sp->packets[0]); pf_state_counter_hton(st->packets[1], sp->packets[1]); pf_state_counter_hton(st->bytes[0], sp->bytes[0]); pf_state_counter_hton(st->bytes[1], sp->bytes[1]); } static void pf_tbladdr_copyout(struct pf_addr_wrap *aw) { struct pfr_ktable *kt; KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type)); kt = aw->p.tbl; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; aw->p.tbl = NULL; aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? kt->pfrkt_cnt : -1; } /* * XXX - Check for version missmatch!!! */ static void pf_clear_states(void) { struct pf_state *s; u_int i; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { s->timeout = PFTM_PURGE; /* Don't send out individual delete messages. */ s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s, PF_ENTER_LOCKED); goto relock; } PF_HASHROW_UNLOCK(ih); } } static int pf_clear_tables(void) { struct pfioc_table io; int error; bzero(&io, sizeof(io)); error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel, io.pfrio_flags); return (error); } static void pf_clear_srcnodes(struct pf_src_node *n) { struct pf_state *s; int i; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (n == NULL || n == s->src_node) s->src_node = NULL; if (n == NULL || n == s->nat_src_node) s->nat_src_node = NULL; } PF_HASHROW_UNLOCK(ih); } if (n == NULL) { struct pf_srchash *sh; for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) { n->expire = 1; n->states = 0; } PF_HASHROW_UNLOCK(sh); } } else { /* XXX: hash slot should already be locked here. */ n->expire = 1; n->states = 0; } } static void pf_kill_srcnodes(struct pfioc_src_node_kill *psnk) { struct pf_src_node_list kill; LIST_INIT(&kill); for (int i = 0; i <= pf_srchashmask; i++) { struct pf_srchash *sh = &V_pf_srchash[i]; struct pf_src_node *sn, *tmp; PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) if (PF_MATCHA(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && PF_MATCHA(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af)) { pf_unlink_src_node(sn); LIST_INSERT_HEAD(&kill, sn, entry); sn->expire = 1; } PF_HASHROW_UNLOCK(sh); } for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_state *s; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->src_node && s->src_node->expire == 1) s->src_node = NULL; if (s->nat_src_node && s->nat_src_node->expire == 1) s->nat_src_node = NULL; } PF_HASHROW_UNLOCK(ih); } psnk->psnk_killed = pf_free_src_nodes(&kill); } /* * XXX - Check for version missmatch!!! */ /* * Duplicate pfctl -Fa operation to get rid of as much as we can. */ static int shutdown_pf(void) { int error = 0; u_int32_t t[5]; char nn = '\0'; do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n")); break; } if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn); pf_commit_rules(t[1], PF_RULESET_FILTER, &nn); pf_commit_rules(t[2], PF_RULESET_NAT, &nn); pf_commit_rules(t[3], PF_RULESET_BINAT, &nn); pf_commit_rules(t[4], PF_RULESET_RDR, &nn); if ((error = pf_clear_tables()) != 0) break; #ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); break; } pf_commit_altq(t[0]); #endif pf_clear_states(); pf_clear_srcnodes(NULL); /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ /* Free counters last as we updated them during shutdown. */ counter_u64_free(V_pf_default_rule.states_cur); counter_u64_free(V_pf_default_rule.states_tot); counter_u64_free(V_pf_default_rule.src_nodes); for (int i = 0; i < PFRES_MAX; i++) counter_u64_free(V_pf_status.counters[i]); for (int i = 0; i < LCNT_MAX; i++) counter_u64_free(V_pf_status.lcounters[i]); for (int i = 0; i < FCNT_MAX; i++) counter_u64_free(V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_free(V_pf_status.scounters[i]); } while(0); return (error); } #ifdef INET static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp) { int chk; chk = pf_test(PF_IN, flags, ifp, m, inp); if (chk && *m) { m_freem(*m); *m = NULL; } if (chk != PF_PASS) return (EACCES); return (0); } static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp) { int chk; chk = pf_test(PF_OUT, flags, ifp, m, inp); if (chk && *m) { m_freem(*m); *m = NULL; } if (chk != PF_PASS) return (EACCES); return (0); } #endif #ifdef INET6 static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp) { int chk; /* * In case of loopback traffic IPv6 uses the real interface in * order to support scoped addresses. In order to support stateful * filtering we have change this to lo0 as it is the case in IPv4. */ CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; } if (chk != PF_PASS) return (EACCES); return (0); } static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, int flags, struct inpcb *inp) { int chk; CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, flags, ifp, m, inp); CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; } if (chk != PF_PASS) return (EACCES); return (0); } #endif /* INET6 */ static int hook_pf(void) { #ifdef INET struct pfil_head *pfh_inet; #endif #ifdef INET6 struct pfil_head *pfh_inet6; #endif if (V_pf_pfil_hooked) return (0); #ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ pfil_add_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_add_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (pfh_inet6 == NULL) { #ifdef INET pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif return (ESRCH); /* XXX */ } pfil_add_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); pfil_add_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); #endif V_pf_pfil_hooked = 1; return (0); } static int dehook_pf(void) { #ifdef INET struct pfil_head *pfh_inet; #endif #ifdef INET6 struct pfil_head *pfh_inet6; #endif if (V_pf_pfil_hooked == 0) return (0); #ifdef INET pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ pfil_remove_hook_flags(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); pfil_remove_hook_flags(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (pfh_inet6 == NULL) return (ESRCH); /* XXX */ pfil_remove_hook_flags(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); pfil_remove_hook_flags(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); #endif V_pf_pfil_hooked = 0; return (0); } static void pf_load_vnet(void) { TAILQ_INIT(&V_pf_tags); TAILQ_INIT(&V_pf_qids); pfattach_vnet(); V_pf_vnet_active = 1; } static int pf_load(void) { int error; rm_init(&pf_rules_lock, "pf rulesets"); sx_init(&pf_ioctl_lock, "pf ioctl"); sx_init(&pf_end_lock, "pf end thread"); pf_mtag_initialize(); pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); if (pf_dev == NULL) return (ENOMEM); pf_end_threads = 0; error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge"); if (error != 0) return (error); pfi_initialize(); return (0); } static void pf_unload_vnet(void) { int error; V_pf_vnet_active = 0; V_pf_status.running = 0; error = dehook_pf(); if (error) { /* * Should not happen! * XXX Due to error code ESRCH, kldunload will show * a message like 'No such process'. */ printf("%s : pfil unregisteration fail\n", __FUNCTION__); return; } PF_RULES_WLOCK(); shutdown_pf(); PF_RULES_WUNLOCK(); swi_remove(V_pf_swi_cookie); pf_unload_vnet_purge(); pf_normalize_cleanup(); PF_RULES_WLOCK(); pfi_cleanup_vnet(); PF_RULES_WUNLOCK(); pfr_cleanup(); pf_osfp_flush(); pf_cleanup(); if (IS_DEFAULT_VNET(curvnet)) pf_mtag_cleanup(); } static void pf_unload(void) { sx_xlock(&pf_end_lock); pf_end_threads = 1; while (pf_end_threads < 2) { wakeup_one(pf_purge_thread); sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0); } sx_xunlock(&pf_end_lock); if (pf_dev != NULL) destroy_dev(pf_dev); pfi_cleanup(); rm_destroy(&pf_rules_lock); sx_destroy(&pf_ioctl_lock); sx_destroy(&pf_end_lock); } static void vnet_pf_init(void *unused __unused) { pf_load_vnet(); } VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_init, NULL); static void vnet_pf_uninit(const void *unused __unused) { pf_unload_vnet(); } SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL); VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_uninit, NULL); static int pf_modevent(module_t mod, int type, void *data) { int error = 0; switch(type) { case MOD_LOAD: error = pf_load(); break; case MOD_UNLOAD: /* Handled in SYSUNINIT(pf_unload) to ensure it's done after * the vnet_pf_uninit()s */ break; default: error = EINVAL; break; } return (error); } static moduledata_t pf_mod = { "pf", pf_modevent, 0 }; DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND); MODULE_VERSION(pf, PF_MODVER); Index: head/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c =================================================================== --- head/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c (revision 338208) +++ head/usr.sbin/bsnmpd/modules/snmp_pf/pf_snmp.c (revision 338209) @@ -1,1800 +1,1804 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Philip Paeps * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ +#define PFIOC_USE_LATEST + #include #include #include #include #include #include #include #include #include #include #include #include #define SNMPTREE_TYPES #include "pf_oid.h" #include "pf_tree.h" struct lmodule *module; static int dev = -1; static int started; static uint64_t pf_tick; static struct pf_status pfs; enum { IN, OUT }; enum { IPV4, IPV6 }; enum { PASS, BLOCK }; #define PFI_IFTYPE_GROUP 0 #define PFI_IFTYPE_INSTANCE 1 #define PFI_IFTYPE_DETACHED 2 struct pfi_entry { struct pfi_kif pfi; u_int index; TAILQ_ENTRY(pfi_entry) link; }; TAILQ_HEAD(pfi_table, pfi_entry); static struct pfi_table pfi_table; static time_t pfi_table_age; static int pfi_table_count; #define PFI_TABLE_MAXAGE 5 struct pft_entry { struct pfr_tstats pft; u_int index; TAILQ_ENTRY(pft_entry) link; }; TAILQ_HEAD(pft_table, pft_entry); static struct pft_table pft_table; static time_t pft_table_age; static int pft_table_count; #define PFT_TABLE_MAXAGE 5 struct pfa_entry { struct pfr_astats pfas; u_int index; TAILQ_ENTRY(pfa_entry) link; }; TAILQ_HEAD(pfa_table, pfa_entry); static struct pfa_table pfa_table; static time_t pfa_table_age; static int pfa_table_count; #define PFA_TABLE_MAXAGE 5 struct pfq_entry { struct pf_altq altq; u_int index; TAILQ_ENTRY(pfq_entry) link; }; TAILQ_HEAD(pfq_table, pfq_entry); static struct pfq_table pfq_table; static time_t pfq_table_age; static int pfq_table_count; static int altq_enabled = 0; #define PFQ_TABLE_MAXAGE 5 struct pfl_entry { char name[MAXPATHLEN + PF_RULE_LABEL_SIZE]; u_int64_t evals; u_int64_t bytes[2]; u_int64_t pkts[2]; u_int index; TAILQ_ENTRY(pfl_entry) link; }; TAILQ_HEAD(pfl_table, pfl_entry); static struct pfl_table pfl_table; static time_t pfl_table_age; static int pfl_table_count; #define PFL_TABLE_MAXAGE 5 /* Forward declarations */ static int pfi_refresh(void); static int pfq_refresh(void); static int pfs_refresh(void); static int pft_refresh(void); static int pfa_refresh(void); static int pfl_refresh(void); static struct pfi_entry * pfi_table_find(u_int idx); static struct pfq_entry * pfq_table_find(u_int idx); static struct pft_entry * pft_table_find(u_int idx); static struct pfa_entry * pfa_table_find(u_int idx); static struct pfl_entry * pfl_table_find(u_int idx); static int altq_is_enabled(int pfdevice); int pf_status(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; time_t runtime; unsigned char str[128]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if (pfs_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfStatusRunning: val->v.uint32 = pfs.running; break; case LEAF_pfStatusRuntime: runtime = (pfs.since > 0) ? time(NULL) - pfs.since : 0; val->v.uint32 = runtime * 100; break; case LEAF_pfStatusDebug: val->v.uint32 = pfs.debug; break; case LEAF_pfStatusHostId: sprintf(str, "0x%08x", ntohl(pfs.hostid)); return (string_get(val, str, strlen(str))); default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_counter(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if (pfs_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfCounterMatch: val->v.counter64 = pfs.counters[PFRES_MATCH]; break; case LEAF_pfCounterBadOffset: val->v.counter64 = pfs.counters[PFRES_BADOFF]; break; case LEAF_pfCounterFragment: val->v.counter64 = pfs.counters[PFRES_FRAG]; break; case LEAF_pfCounterShort: val->v.counter64 = pfs.counters[PFRES_SHORT]; break; case LEAF_pfCounterNormalize: val->v.counter64 = pfs.counters[PFRES_NORM]; break; case LEAF_pfCounterMemDrop: val->v.counter64 = pfs.counters[PFRES_MEMORY]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_statetable(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if (pfs_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfStateTableCount: val->v.uint32 = pfs.states; break; case LEAF_pfStateTableSearches: val->v.counter64 = pfs.fcounters[FCNT_STATE_SEARCH]; break; case LEAF_pfStateTableInserts: val->v.counter64 = pfs.fcounters[FCNT_STATE_INSERT]; break; case LEAF_pfStateTableRemovals: val->v.counter64 = pfs.fcounters[FCNT_STATE_REMOVALS]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_srcnodes(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if (pfs_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfSrcNodesCount: val->v.uint32 = pfs.src_nodes; break; case LEAF_pfSrcNodesSearches: val->v.counter64 = pfs.scounters[SCNT_SRC_NODE_SEARCH]; break; case LEAF_pfSrcNodesInserts: val->v.counter64 = pfs.scounters[SCNT_SRC_NODE_INSERT]; break; case LEAF_pfSrcNodesRemovals: val->v.counter64 = pfs.scounters[SCNT_SRC_NODE_REMOVALS]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_limits(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; struct pfioc_limit pl; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { bzero(&pl, sizeof(struct pfioc_limit)); switch (which) { case LEAF_pfLimitsStates: pl.index = PF_LIMIT_STATES; break; case LEAF_pfLimitsSrcNodes: pl.index = PF_LIMIT_SRC_NODES; break; case LEAF_pfLimitsFrags: pl.index = PF_LIMIT_FRAGS; break; default: return (SNMP_ERR_NOSUCHNAME); } if (ioctl(dev, DIOCGETLIMIT, &pl)) { syslog(LOG_ERR, "pf_limits(): ioctl(): %s", strerror(errno)); return (SNMP_ERR_GENERR); } val->v.uint32 = pl.limit; return (SNMP_ERR_NOERROR); } abort(); } int pf_timeouts(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; struct pfioc_tm pt; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { bzero(&pt, sizeof(struct pfioc_tm)); switch (which) { case LEAF_pfTimeoutsTcpFirst: pt.timeout = PFTM_TCP_FIRST_PACKET; break; case LEAF_pfTimeoutsTcpOpening: pt.timeout = PFTM_TCP_OPENING; break; case LEAF_pfTimeoutsTcpEstablished: pt.timeout = PFTM_TCP_ESTABLISHED; break; case LEAF_pfTimeoutsTcpClosing: pt.timeout = PFTM_TCP_CLOSING; break; case LEAF_pfTimeoutsTcpFinWait: pt.timeout = PFTM_TCP_FIN_WAIT; break; case LEAF_pfTimeoutsTcpClosed: pt.timeout = PFTM_TCP_CLOSED; break; case LEAF_pfTimeoutsUdpFirst: pt.timeout = PFTM_UDP_FIRST_PACKET; break; case LEAF_pfTimeoutsUdpSingle: pt.timeout = PFTM_UDP_SINGLE; break; case LEAF_pfTimeoutsUdpMultiple: pt.timeout = PFTM_UDP_MULTIPLE; break; case LEAF_pfTimeoutsIcmpFirst: pt.timeout = PFTM_ICMP_FIRST_PACKET; break; case LEAF_pfTimeoutsIcmpError: pt.timeout = PFTM_ICMP_ERROR_REPLY; break; case LEAF_pfTimeoutsOtherFirst: pt.timeout = PFTM_OTHER_FIRST_PACKET; break; case LEAF_pfTimeoutsOtherSingle: pt.timeout = PFTM_OTHER_SINGLE; break; case LEAF_pfTimeoutsOtherMultiple: pt.timeout = PFTM_OTHER_MULTIPLE; break; case LEAF_pfTimeoutsFragment: pt.timeout = PFTM_FRAG; break; case LEAF_pfTimeoutsInterval: pt.timeout = PFTM_INTERVAL; break; case LEAF_pfTimeoutsAdaptiveStart: pt.timeout = PFTM_ADAPTIVE_START; break; case LEAF_pfTimeoutsAdaptiveEnd: pt.timeout = PFTM_ADAPTIVE_END; break; case LEAF_pfTimeoutsSrcNode: pt.timeout = PFTM_SRC_NODE; break; default: return (SNMP_ERR_NOSUCHNAME); } if (ioctl(dev, DIOCGETTIMEOUT, &pt)) { syslog(LOG_ERR, "pf_timeouts(): ioctl(): %s", strerror(errno)); return (SNMP_ERR_GENERR); } val->v.integer = pt.seconds; return (SNMP_ERR_NOERROR); } abort(); } int pf_logif(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; unsigned char str[IFNAMSIZ]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if (pfs_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfLogInterfaceName: strlcpy(str, pfs.ifname, sizeof str); return (string_get(val, str, strlen(str))); case LEAF_pfLogInterfaceIp4BytesIn: val->v.counter64 = pfs.bcounters[IPV4][IN]; break; case LEAF_pfLogInterfaceIp4BytesOut: val->v.counter64 = pfs.bcounters[IPV4][OUT]; break; case LEAF_pfLogInterfaceIp4PktsInPass: val->v.counter64 = pfs.pcounters[IPV4][IN][PF_PASS]; break; case LEAF_pfLogInterfaceIp4PktsInDrop: val->v.counter64 = pfs.pcounters[IPV4][IN][PF_DROP]; break; case LEAF_pfLogInterfaceIp4PktsOutPass: val->v.counter64 = pfs.pcounters[IPV4][OUT][PF_PASS]; break; case LEAF_pfLogInterfaceIp4PktsOutDrop: val->v.counter64 = pfs.pcounters[IPV4][OUT][PF_DROP]; break; case LEAF_pfLogInterfaceIp6BytesIn: val->v.counter64 = pfs.bcounters[IPV6][IN]; break; case LEAF_pfLogInterfaceIp6BytesOut: val->v.counter64 = pfs.bcounters[IPV6][OUT]; break; case LEAF_pfLogInterfaceIp6PktsInPass: val->v.counter64 = pfs.pcounters[IPV6][IN][PF_PASS]; break; case LEAF_pfLogInterfaceIp6PktsInDrop: val->v.counter64 = pfs.pcounters[IPV6][IN][PF_DROP]; break; case LEAF_pfLogInterfaceIp6PktsOutPass: val->v.counter64 = pfs.pcounters[IPV6][OUT][PF_PASS]; break; case LEAF_pfLogInterfaceIp6PktsOutDrop: val->v.counter64 = pfs.pcounters[IPV6][OUT][PF_DROP]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_interfaces(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if ((time(NULL) - pfi_table_age) > PFI_TABLE_MAXAGE) if (pfi_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfInterfacesIfNumber: val->v.uint32 = pfi_table_count; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_iftable(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; struct pfi_entry *e = NULL; if ((time(NULL) - pfi_table_age) > PFI_TABLE_MAXAGE) pfi_refresh(); switch (op) { case SNMP_OP_SET: return (SNMP_ERR_NOT_WRITEABLE); case SNMP_OP_GETNEXT: if ((e = NEXT_OBJECT_INT(&pfi_table, &val->var, sub)) == NULL) return (SNMP_ERR_NOSUCHNAME); val->var.len = sub + 1; val->var.subs[sub] = e->index; break; case SNMP_OP_GET: if (val->var.len - sub != 1) return (SNMP_ERR_NOSUCHNAME); if ((e = pfi_table_find(val->var.subs[sub])) == NULL) return (SNMP_ERR_NOSUCHNAME); break; case SNMP_OP_COMMIT: case SNMP_OP_ROLLBACK: default: abort(); } switch (which) { case LEAF_pfInterfacesIfDescr: return (string_get(val, e->pfi.pfik_name, -1)); case LEAF_pfInterfacesIfType: val->v.integer = PFI_IFTYPE_INSTANCE; break; case LEAF_pfInterfacesIfTZero: val->v.uint32 = (time(NULL) - e->pfi.pfik_tzero) * 100; break; case LEAF_pfInterfacesIfRefsRule: val->v.uint32 = e->pfi.pfik_rulerefs; break; case LEAF_pfInterfacesIf4BytesInPass: val->v.counter64 = e->pfi.pfik_bytes[IPV4][IN][PASS]; break; case LEAF_pfInterfacesIf4BytesInBlock: val->v.counter64 = e->pfi.pfik_bytes[IPV4][IN][BLOCK]; break; case LEAF_pfInterfacesIf4BytesOutPass: val->v.counter64 = e->pfi.pfik_bytes[IPV4][OUT][PASS]; break; case LEAF_pfInterfacesIf4BytesOutBlock: val->v.counter64 = e->pfi.pfik_bytes[IPV4][OUT][BLOCK]; break; case LEAF_pfInterfacesIf4PktsInPass: val->v.counter64 = e->pfi.pfik_packets[IPV4][IN][PASS]; break; case LEAF_pfInterfacesIf4PktsInBlock: val->v.counter64 = e->pfi.pfik_packets[IPV4][IN][BLOCK]; break; case LEAF_pfInterfacesIf4PktsOutPass: val->v.counter64 = e->pfi.pfik_packets[IPV4][OUT][PASS]; break; case LEAF_pfInterfacesIf4PktsOutBlock: val->v.counter64 = e->pfi.pfik_packets[IPV4][OUT][BLOCK]; break; case LEAF_pfInterfacesIf6BytesInPass: val->v.counter64 = e->pfi.pfik_bytes[IPV6][IN][PASS]; break; case LEAF_pfInterfacesIf6BytesInBlock: val->v.counter64 = e->pfi.pfik_bytes[IPV6][IN][BLOCK]; break; case LEAF_pfInterfacesIf6BytesOutPass: val->v.counter64 = e->pfi.pfik_bytes[IPV6][OUT][PASS]; break; case LEAF_pfInterfacesIf6BytesOutBlock: val->v.counter64 = e->pfi.pfik_bytes[IPV6][OUT][BLOCK]; break; case LEAF_pfInterfacesIf6PktsInPass: val->v.counter64 = e->pfi.pfik_packets[IPV6][IN][PASS]; break; case LEAF_pfInterfacesIf6PktsInBlock: val->v.counter64 = e->pfi.pfik_packets[IPV6][IN][BLOCK]; break; case LEAF_pfInterfacesIf6PktsOutPass: val->v.counter64 = e->pfi.pfik_packets[IPV6][OUT][PASS]; break; case LEAF_pfInterfacesIf6PktsOutBlock: val->v.counter64 = e->pfi.pfik_packets[IPV6][OUT][BLOCK]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } int pf_tables(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if ((time(NULL) - pft_table_age) > PFT_TABLE_MAXAGE) if (pft_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfTablesTblNumber: val->v.uint32 = pft_table_count; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); } int pf_tbltable(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; struct pft_entry *e = NULL; if ((time(NULL) - pft_table_age) > PFT_TABLE_MAXAGE) pft_refresh(); switch (op) { case SNMP_OP_SET: return (SNMP_ERR_NOT_WRITEABLE); case SNMP_OP_GETNEXT: if ((e = NEXT_OBJECT_INT(&pft_table, &val->var, sub)) == NULL) return (SNMP_ERR_NOSUCHNAME); val->var.len = sub + 1; val->var.subs[sub] = e->index; break; case SNMP_OP_GET: if (val->var.len - sub != 1) return (SNMP_ERR_NOSUCHNAME); if ((e = pft_table_find(val->var.subs[sub])) == NULL) return (SNMP_ERR_NOSUCHNAME); break; case SNMP_OP_COMMIT: case SNMP_OP_ROLLBACK: default: abort(); } switch (which) { case LEAF_pfTablesTblDescr: return (string_get(val, e->pft.pfrts_name, -1)); case LEAF_pfTablesTblCount: val->v.integer = e->pft.pfrts_cnt; break; case LEAF_pfTablesTblTZero: val->v.uint32 = (time(NULL) - e->pft.pfrts_tzero) * 100; break; case LEAF_pfTablesTblRefsAnchor: val->v.integer = e->pft.pfrts_refcnt[PFR_REFCNT_ANCHOR]; break; case LEAF_pfTablesTblRefsRule: val->v.integer = e->pft.pfrts_refcnt[PFR_REFCNT_RULE]; break; case LEAF_pfTablesTblEvalMatch: val->v.counter64 = e->pft.pfrts_match; break; case LEAF_pfTablesTblEvalNoMatch: val->v.counter64 = e->pft.pfrts_nomatch; break; case LEAF_pfTablesTblBytesInPass: val->v.counter64 = e->pft.pfrts_bytes[PFR_DIR_IN][PFR_OP_PASS]; break; case LEAF_pfTablesTblBytesInBlock: val->v.counter64 = e->pft.pfrts_bytes[PFR_DIR_IN][PFR_OP_BLOCK]; break; case LEAF_pfTablesTblBytesInXPass: val->v.counter64 = e->pft.pfrts_bytes[PFR_DIR_IN][PFR_OP_XPASS]; break; case LEAF_pfTablesTblBytesOutPass: val->v.counter64 = e->pft.pfrts_bytes[PFR_DIR_OUT][PFR_OP_PASS]; break; case LEAF_pfTablesTblBytesOutBlock: val->v.counter64 = e->pft.pfrts_bytes[PFR_DIR_OUT][PFR_OP_BLOCK]; break; case LEAF_pfTablesTblBytesOutXPass: val->v.counter64 = e->pft.pfrts_bytes[PFR_DIR_OUT][PFR_OP_XPASS]; break; case LEAF_pfTablesTblPktsInPass: val->v.counter64 = e->pft.pfrts_packets[PFR_DIR_IN][PFR_OP_PASS]; break; case LEAF_pfTablesTblPktsInBlock: val->v.counter64 = e->pft.pfrts_packets[PFR_DIR_IN][PFR_OP_BLOCK]; break; case LEAF_pfTablesTblPktsInXPass: val->v.counter64 = e->pft.pfrts_packets[PFR_DIR_IN][PFR_OP_XPASS]; break; case LEAF_pfTablesTblPktsOutPass: val->v.counter64 = e->pft.pfrts_packets[PFR_DIR_OUT][PFR_OP_PASS]; break; case LEAF_pfTablesTblPktsOutBlock: val->v.counter64 = e->pft.pfrts_packets[PFR_DIR_OUT][PFR_OP_BLOCK]; break; case LEAF_pfTablesTblPktsOutXPass: val->v.counter64 = e->pft.pfrts_packets[PFR_DIR_OUT][PFR_OP_XPASS]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } int pf_tbladdr(struct snmp_context __unused *ctx, struct snmp_value __unused *val, u_int __unused sub, u_int __unused vindex, enum snmp_op __unused op) { asn_subid_t which = val->var.subs[sub - 1]; struct pfa_entry *e = NULL; if ((time(NULL) - pfa_table_age) > PFA_TABLE_MAXAGE) pfa_refresh(); switch (op) { case SNMP_OP_SET: return (SNMP_ERR_NOT_WRITEABLE); case SNMP_OP_GETNEXT: if ((e = NEXT_OBJECT_INT(&pfa_table, &val->var, sub)) == NULL) return (SNMP_ERR_NOSUCHNAME); val->var.len = sub + 1; val->var.subs[sub] = e->index; break; case SNMP_OP_GET: if (val->var.len - sub != 1) return (SNMP_ERR_NOSUCHNAME); if ((e = pfa_table_find(val->var.subs[sub])) == NULL) return (SNMP_ERR_NOSUCHNAME); break; case SNMP_OP_COMMIT: case SNMP_OP_ROLLBACK: default: abort(); } switch (which) { case LEAF_pfTablesAddrNetType: if (e->pfas.pfras_a.pfra_af == AF_INET) val->v.integer = pfTablesAddrNetType_ipv4; else if (e->pfas.pfras_a.pfra_af == AF_INET6) val->v.integer = pfTablesAddrNetType_ipv6; else return (SNMP_ERR_GENERR); break; case LEAF_pfTablesAddrNet: if (e->pfas.pfras_a.pfra_af == AF_INET) { return (string_get(val, (u_char *)&e->pfas.pfras_a.pfra_ip4addr, 4)); } else if (e->pfas.pfras_a.pfra_af == AF_INET6) return (string_get(val, (u_char *)&e->pfas.pfras_a.pfra_ip6addr, 16)); else return (SNMP_ERR_GENERR); break; case LEAF_pfTablesAddrPrefix: val->v.integer = (int32_t) e->pfas.pfras_a.pfra_net; break; case LEAF_pfTablesAddrTZero: val->v.uint32 = (time(NULL) - e->pfas.pfras_tzero) * 100; break; case LEAF_pfTablesAddrBytesInPass: val->v.counter64 = e->pfas.pfras_bytes[PFR_DIR_IN][PFR_OP_PASS]; break; case LEAF_pfTablesAddrBytesInBlock: val->v.counter64 = e->pfas.pfras_bytes[PFR_DIR_IN][PFR_OP_BLOCK]; break; case LEAF_pfTablesAddrBytesOutPass: val->v.counter64 = e->pfas.pfras_bytes[PFR_DIR_OUT][PFR_OP_PASS]; break; case LEAF_pfTablesAddrBytesOutBlock: val->v.counter64 = e->pfas.pfras_bytes[PFR_DIR_OUT][PFR_OP_BLOCK]; break; case LEAF_pfTablesAddrPktsInPass: val->v.counter64 = e->pfas.pfras_packets[PFR_DIR_IN][PFR_OP_PASS]; break; case LEAF_pfTablesAddrPktsInBlock: val->v.counter64 = e->pfas.pfras_packets[PFR_DIR_IN][PFR_OP_BLOCK]; break; case LEAF_pfTablesAddrPktsOutPass: val->v.counter64 = e->pfas.pfras_packets[PFR_DIR_OUT][PFR_OP_PASS]; break; case LEAF_pfTablesAddrPktsOutBlock: val->v.counter64 = e->pfas.pfras_packets[PFR_DIR_OUT][PFR_OP_BLOCK]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } int pf_altq(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (!altq_enabled) return (SNMP_ERR_NOSUCHNAME); if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if ((time(NULL) - pfq_table_age) > PFQ_TABLE_MAXAGE) if (pfq_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfAltqQueueNumber: val->v.uint32 = pfq_table_count; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); return (SNMP_ERR_GENERR); } int pf_altqq(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; struct pfq_entry *e = NULL; if (!altq_enabled) return (SNMP_ERR_NOSUCHNAME); if ((time(NULL) - pfq_table_age) > PFQ_TABLE_MAXAGE) pfq_refresh(); switch (op) { case SNMP_OP_SET: return (SNMP_ERR_NOT_WRITEABLE); case SNMP_OP_GETNEXT: if ((e = NEXT_OBJECT_INT(&pfq_table, &val->var, sub)) == NULL) return (SNMP_ERR_NOSUCHNAME); val->var.len = sub + 1; val->var.subs[sub] = e->index; break; case SNMP_OP_GET: if (val->var.len - sub != 1) return (SNMP_ERR_NOSUCHNAME); if ((e = pfq_table_find(val->var.subs[sub])) == NULL) return (SNMP_ERR_NOSUCHNAME); break; case SNMP_OP_COMMIT: case SNMP_OP_ROLLBACK: default: abort(); } switch (which) { case LEAF_pfAltqQueueDescr: return (string_get(val, e->altq.qname, -1)); case LEAF_pfAltqQueueParent: return (string_get(val, e->altq.parent, -1)); case LEAF_pfAltqQueueScheduler: val->v.integer = e->altq.scheduler; break; case LEAF_pfAltqQueueBandwidth: - val->v.uint32 = e->altq.bandwidth; + val->v.uint32 = (e->altq.bandwidth > UINT_MAX) ? + UINT_MAX : (u_int32_t)e->altq.bandwidth; break; case LEAF_pfAltqQueuePriority: val->v.integer = e->altq.priority; break; case LEAF_pfAltqQueueLimit: val->v.integer = e->altq.qlimit; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } int pf_labels(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; if (op == SNMP_OP_SET) return (SNMP_ERR_NOT_WRITEABLE); if (op == SNMP_OP_GET) { if ((time(NULL) - pfl_table_age) > PFL_TABLE_MAXAGE) if (pfl_refresh() == -1) return (SNMP_ERR_GENERR); switch (which) { case LEAF_pfLabelsLblNumber: val->v.uint32 = pfl_table_count; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } abort(); return (SNMP_ERR_GENERR); } int pf_lbltable(struct snmp_context __unused *ctx, struct snmp_value *val, u_int sub, u_int __unused vindex, enum snmp_op op) { asn_subid_t which = val->var.subs[sub - 1]; struct pfl_entry *e = NULL; if ((time(NULL) - pfl_table_age) > PFL_TABLE_MAXAGE) pfl_refresh(); switch (op) { case SNMP_OP_SET: return (SNMP_ERR_NOT_WRITEABLE); case SNMP_OP_GETNEXT: if ((e = NEXT_OBJECT_INT(&pfl_table, &val->var, sub)) == NULL) return (SNMP_ERR_NOSUCHNAME); val->var.len = sub + 1; val->var.subs[sub] = e->index; break; case SNMP_OP_GET: if (val->var.len - sub != 1) return (SNMP_ERR_NOSUCHNAME); if ((e = pfl_table_find(val->var.subs[sub])) == NULL) return (SNMP_ERR_NOSUCHNAME); break; case SNMP_OP_COMMIT: case SNMP_OP_ROLLBACK: default: abort(); } switch (which) { case LEAF_pfLabelsLblName: return (string_get(val, e->name, -1)); case LEAF_pfLabelsLblEvals: val->v.counter64 = e->evals; break; case LEAF_pfLabelsLblBytesIn: val->v.counter64 = e->bytes[IN]; break; case LEAF_pfLabelsLblBytesOut: val->v.counter64 = e->bytes[OUT]; break; case LEAF_pfLabelsLblPktsIn: val->v.counter64 = e->pkts[IN]; break; case LEAF_pfLabelsLblPktsOut: val->v.counter64 = e->pkts[OUT]; break; default: return (SNMP_ERR_NOSUCHNAME); } return (SNMP_ERR_NOERROR); } static struct pfi_entry * pfi_table_find(u_int idx) { struct pfi_entry *e; TAILQ_FOREACH(e, &pfi_table, link) if (e->index == idx) return (e); return (NULL); } static struct pfq_entry * pfq_table_find(u_int idx) { struct pfq_entry *e; TAILQ_FOREACH(e, &pfq_table, link) if (e->index == idx) return (e); return (NULL); } static struct pft_entry * pft_table_find(u_int idx) { struct pft_entry *e; TAILQ_FOREACH(e, &pft_table, link) if (e->index == idx) return (e); return (NULL); } static struct pfa_entry * pfa_table_find(u_int idx) { struct pfa_entry *e; TAILQ_FOREACH(e, &pfa_table, link) if (e->index == idx) return (e); return (NULL); } static struct pfl_entry * pfl_table_find(u_int idx) { struct pfl_entry *e; TAILQ_FOREACH(e, &pfl_table, link) if (e->index == idx) return (e); return (NULL); } static int pfi_refresh(void) { struct pfioc_iface io; struct pfi_kif *p = NULL; struct pfi_entry *e; int i, numifs = 1; if (started && this_tick <= pf_tick) return (0); while (!TAILQ_EMPTY(&pfi_table)) { e = TAILQ_FIRST(&pfi_table); TAILQ_REMOVE(&pfi_table, e, link); free(e); } bzero(&io, sizeof(io)); io.pfiio_esize = sizeof(struct pfi_kif); for (;;) { p = reallocf(p, numifs * sizeof(struct pfi_kif)); if (p == NULL) { syslog(LOG_ERR, "pfi_refresh(): reallocf() numifs=%d: %s", numifs, strerror(errno)); goto err2; } io.pfiio_size = numifs; io.pfiio_buffer = p; if (ioctl(dev, DIOCIGETIFACES, &io)) { syslog(LOG_ERR, "pfi_refresh(): ioctl(): %s", strerror(errno)); goto err2; } if (numifs >= io.pfiio_size) break; numifs = io.pfiio_size; } for (i = 0; i < numifs; i++) { e = malloc(sizeof(struct pfi_entry)); if (e == NULL) goto err1; e->index = i + 1; memcpy(&e->pfi, p+i, sizeof(struct pfi_kif)); TAILQ_INSERT_TAIL(&pfi_table, e, link); } pfi_table_age = time(NULL); pfi_table_count = numifs; pf_tick = this_tick; free(p); return (0); err1: while (!TAILQ_EMPTY(&pfi_table)) { e = TAILQ_FIRST(&pfi_table); TAILQ_REMOVE(&pfi_table, e, link); free(e); } err2: free(p); return(-1); } static int pfq_refresh(void) { struct pfioc_altq pa; struct pfq_entry *e; int i, numqs, ticket; if (started && this_tick <= pf_tick) return (0); while (!TAILQ_EMPTY(&pfq_table)) { e = TAILQ_FIRST(&pfq_table); TAILQ_REMOVE(&pfq_table, e, link); free(e); } bzero(&pa, sizeof(pa)); - + pa.version = PFIOC_ALTQ_VERSION; if (ioctl(dev, DIOCGETALTQS, &pa)) { syslog(LOG_ERR, "pfq_refresh: ioctl(DIOCGETALTQS): %s", strerror(errno)); return (-1); } numqs = pa.nr; ticket = pa.ticket; for (i = 0; i < numqs; i++) { e = malloc(sizeof(struct pfq_entry)); if (e == NULL) { syslog(LOG_ERR, "pfq_refresh(): " "malloc(): %s", strerror(errno)); goto err; } pa.ticket = ticket; pa.nr = i; if (ioctl(dev, DIOCGETALTQ, &pa)) { syslog(LOG_ERR, "pfq_refresh(): " "ioctl(DIOCGETALTQ): %s", strerror(errno)); goto err; } if (pa.altq.qid > 0) { memcpy(&e->altq, &pa.altq, sizeof(struct pf_altq)); e->index = pa.altq.qid; pfq_table_count = i; INSERT_OBJECT_INT_LINK_INDEX(e, &pfq_table, link, index); } } pfq_table_age = time(NULL); pf_tick = this_tick; return (0); err: free(e); while (!TAILQ_EMPTY(&pfq_table)) { e = TAILQ_FIRST(&pfq_table); TAILQ_REMOVE(&pfq_table, e, link); free(e); } return(-1); } static int pfs_refresh(void) { if (started && this_tick <= pf_tick) return (0); bzero(&pfs, sizeof(struct pf_status)); if (ioctl(dev, DIOCGETSTATUS, &pfs)) { syslog(LOG_ERR, "pfs_refresh(): ioctl(): %s", strerror(errno)); return (-1); } pf_tick = this_tick; return (0); } static int pft_refresh(void) { struct pfioc_table io; struct pfr_tstats *t = NULL; struct pft_entry *e; int i, numtbls = 1; if (started && this_tick <= pf_tick) return (0); while (!TAILQ_EMPTY(&pft_table)) { e = TAILQ_FIRST(&pft_table); TAILQ_REMOVE(&pft_table, e, link); free(e); } bzero(&io, sizeof(io)); io.pfrio_esize = sizeof(struct pfr_tstats); for (;;) { t = reallocf(t, numtbls * sizeof(struct pfr_tstats)); if (t == NULL) { syslog(LOG_ERR, "pft_refresh(): reallocf() numtbls=%d: %s", numtbls, strerror(errno)); goto err2; } io.pfrio_size = numtbls; io.pfrio_buffer = t; if (ioctl(dev, DIOCRGETTSTATS, &io)) { syslog(LOG_ERR, "pft_refresh(): ioctl(): %s", strerror(errno)); goto err2; } if (numtbls >= io.pfrio_size) break; numtbls = io.pfrio_size; } for (i = 0; i < numtbls; i++) { e = malloc(sizeof(struct pft_entry)); if (e == NULL) goto err1; e->index = i + 1; memcpy(&e->pft, t+i, sizeof(struct pfr_tstats)); TAILQ_INSERT_TAIL(&pft_table, e, link); } pft_table_age = time(NULL); pft_table_count = numtbls; pf_tick = this_tick; free(t); return (0); err1: while (!TAILQ_EMPTY(&pft_table)) { e = TAILQ_FIRST(&pft_table); TAILQ_REMOVE(&pft_table, e, link); free(e); } err2: free(t); return(-1); } static int pfa_table_addrs(u_int sidx, struct pfr_table *pt) { struct pfioc_table io; struct pfr_astats *t = NULL; struct pfa_entry *e; int i, numaddrs = 1; if (pt == NULL) return (-1); memset(&io, 0, sizeof(io)); strlcpy(io.pfrio_table.pfrt_name, pt->pfrt_name, sizeof(io.pfrio_table.pfrt_name)); for (;;) { t = reallocf(t, numaddrs * sizeof(struct pfr_astats)); if (t == NULL) { syslog(LOG_ERR, "pfa_table_addrs(): reallocf(): %s", strerror(errno)); numaddrs = -1; goto error; } memset(t, 0, sizeof(*t)); io.pfrio_size = numaddrs; io.pfrio_buffer = t; io.pfrio_esize = sizeof(struct pfr_astats); if (ioctl(dev, DIOCRGETASTATS, &io)) { syslog(LOG_ERR, "pfa_table_addrs(): ioctl() on %s: %s", pt->pfrt_name, strerror(errno)); numaddrs = -1; break; } if (numaddrs >= io.pfrio_size) break; numaddrs = io.pfrio_size; } for (i = 0; i < numaddrs; i++) { if ((t + i)->pfras_a.pfra_af != AF_INET && (t + i)->pfras_a.pfra_af != AF_INET6) { numaddrs = i; break; } e = (struct pfa_entry *)malloc(sizeof(struct pfa_entry)); if (e == NULL) { syslog(LOG_ERR, "pfa_table_addrs(): malloc(): %s", strerror(errno)); numaddrs = -1; break; } e->index = sidx + i; memcpy(&e->pfas, t + i, sizeof(struct pfr_astats)); TAILQ_INSERT_TAIL(&pfa_table, e, link); } free(t); error: return (numaddrs); } static int pfa_refresh(void) { struct pfioc_table io; struct pfr_table *pt = NULL, *it = NULL; struct pfa_entry *e; int i, numtbls = 1, cidx, naddrs; if (started && this_tick <= pf_tick) return (0); while (!TAILQ_EMPTY(&pfa_table)) { e = TAILQ_FIRST(&pfa_table); TAILQ_REMOVE(&pfa_table, e, link); free(e); } memset(&io, 0, sizeof(io)); io.pfrio_esize = sizeof(struct pfr_table); for (;;) { pt = reallocf(pt, numtbls * sizeof(struct pfr_table)); if (pt == NULL) { syslog(LOG_ERR, "pfa_refresh(): reallocf() %s", strerror(errno)); return (-1); } memset(pt, 0, sizeof(*pt)); io.pfrio_size = numtbls; io.pfrio_buffer = pt; if (ioctl(dev, DIOCRGETTABLES, &io)) { syslog(LOG_ERR, "pfa_refresh(): ioctl(): %s", strerror(errno)); goto err2; } if (numtbls >= io.pfrio_size) break; numtbls = io.pfrio_size; } cidx = 1; for (it = pt, i = 0; i < numtbls; it++, i++) { /* * Skip the table if not active - ioctl(DIOCRGETASTATS) will * return ESRCH for this entry anyway. */ if (!(it->pfrt_flags & PFR_TFLAG_ACTIVE)) continue; if ((naddrs = pfa_table_addrs(cidx, it)) < 0) goto err1; cidx += naddrs; } pfa_table_age = time(NULL); pfa_table_count = cidx; pf_tick = this_tick; free(pt); return (0); err1: while (!TAILQ_EMPTY(&pfa_table)) { e = TAILQ_FIRST(&pfa_table); TAILQ_REMOVE(&pfa_table, e, link); free(e); } err2: free(pt); return (-1); } static int pfl_scan_ruleset(const char *path) { struct pfioc_rule pr; struct pfl_entry *e; u_int32_t nr, i; bzero(&pr, sizeof(pr)); strlcpy(pr.anchor, path, sizeof(pr.anchor)); pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { syslog(LOG_ERR, "pfl_scan_ruleset: ioctl(DIOCGETRULES): %s", strerror(errno)); goto err; } for (nr = pr.nr, i = 0; i < nr; i++) { pr.nr = i; if (ioctl(dev, DIOCGETRULE, &pr)) { syslog(LOG_ERR, "pfl_scan_ruleset: ioctl(DIOCGETRULE):" " %s", strerror(errno)); goto err; } if (pr.rule.label[0]) { e = (struct pfl_entry *)malloc(sizeof(*e)); if (e == NULL) goto err; strlcpy(e->name, path, sizeof(e->name)); if (path[0]) strlcat(e->name, "/", sizeof(e->name)); strlcat(e->name, pr.rule.label, sizeof(e->name)); e->evals = pr.rule.evaluations; e->bytes[IN] = pr.rule.bytes[IN]; e->bytes[OUT] = pr.rule.bytes[OUT]; e->pkts[IN] = pr.rule.packets[IN]; e->pkts[OUT] = pr.rule.packets[OUT]; e->index = ++pfl_table_count; TAILQ_INSERT_TAIL(&pfl_table, e, link); } } return (0); err: return (-1); } static int pfl_walk_rulesets(const char *path) { struct pfioc_ruleset prs; char newpath[MAXPATHLEN]; u_int32_t nr, i; if (pfl_scan_ruleset(path)) goto err; bzero(&prs, sizeof(prs)); strlcpy(prs.path, path, sizeof(prs.path)); if (ioctl(dev, DIOCGETRULESETS, &prs)) { syslog(LOG_ERR, "pfl_walk_rulesets: ioctl(DIOCGETRULESETS): %s", strerror(errno)); goto err; } for (nr = prs.nr, i = 0; i < nr; i++) { prs.nr = i; if (ioctl(dev, DIOCGETRULESET, &prs)) { syslog(LOG_ERR, "pfl_walk_rulesets: ioctl(DIOCGETRULESET):" " %s", strerror(errno)); goto err; } if (strcmp(prs.name, PF_RESERVED_ANCHOR) == 0) continue; strlcpy(newpath, path, sizeof(newpath)); if (path[0]) strlcat(newpath, "/", sizeof(newpath)); strlcat(newpath, prs.name, sizeof(newpath)); if (pfl_walk_rulesets(newpath)) goto err; } return (0); err: return (-1); } static int pfl_refresh(void) { struct pfl_entry *e; if (started && this_tick <= pf_tick) return (0); while (!TAILQ_EMPTY(&pfl_table)) { e = TAILQ_FIRST(&pfl_table); TAILQ_REMOVE(&pfl_table, e, link); free(e); } pfl_table_count = 0; if (pfl_walk_rulesets("")) goto err; pfl_table_age = time(NULL); pf_tick = this_tick; return (0); err: while (!TAILQ_EMPTY(&pfl_table)) { e = TAILQ_FIRST(&pfl_table); TAILQ_REMOVE(&pfl_table, e, link); free(e); } pfl_table_count = 0; return (-1); } /* * check whether altq support is enabled in kernel */ static int altq_is_enabled(int pfdev) { struct pfioc_altq pa; errno = 0; + pa.version = PFIOC_ALTQ_VERSION; if (ioctl(pfdev, DIOCGETALTQS, &pa)) { if (errno == ENODEV) { syslog(LOG_INFO, "No ALTQ support in kernel\n" "ALTQ related functions disabled\n"); return (0); } else syslog(LOG_ERR, "DIOCGETALTQS returned an error: %s", strerror(errno)); return (-1); } return (1); } /* * Implement the bsnmpd module interface */ static int pf_init(struct lmodule *mod, int __unused argc, char __unused *argv[]) { module = mod; if ((dev = open("/dev/pf", O_RDONLY)) == -1) { syslog(LOG_ERR, "pf_init(): open(): %s\n", strerror(errno)); return (-1); } if ((altq_enabled = altq_is_enabled(dev)) == -1) { syslog(LOG_ERR, "pf_init(): altq test failed"); return (-1); } /* Prepare internal state */ TAILQ_INIT(&pfi_table); TAILQ_INIT(&pfq_table); TAILQ_INIT(&pft_table); TAILQ_INIT(&pfa_table); TAILQ_INIT(&pfl_table); pfi_refresh(); if (altq_enabled) { pfq_refresh(); } pfs_refresh(); pft_refresh(); pfa_refresh(); pfl_refresh(); started = 1; return (0); } static int pf_fini(void) { struct pfi_entry *i1, *i2; struct pfq_entry *q1, *q2; struct pft_entry *t1, *t2; struct pfa_entry *a1, *a2; struct pfl_entry *l1, *l2; /* Empty the list of interfaces */ i1 = TAILQ_FIRST(&pfi_table); while (i1 != NULL) { i2 = TAILQ_NEXT(i1, link); free(i1); i1 = i2; } /* List of queues */ q1 = TAILQ_FIRST(&pfq_table); while (q1 != NULL) { q2 = TAILQ_NEXT(q1, link); free(q1); q1 = q2; } /* List of tables */ t1 = TAILQ_FIRST(&pft_table); while (t1 != NULL) { t2 = TAILQ_NEXT(t1, link); free(t1); t1 = t2; } /* List of table addresses */ a1 = TAILQ_FIRST(&pfa_table); while (a1 != NULL) { a2 = TAILQ_NEXT(a1, link); free(a1); a1 = a2; } /* And the list of labeled filter rules */ l1 = TAILQ_FIRST(&pfl_table); while (l1 != NULL) { l2 = TAILQ_NEXT(l1, link); free(l1); l1 = l2; } close(dev); return (0); } static void pf_dump(void) { pfi_refresh(); if (altq_enabled) { pfq_refresh(); } pft_refresh(); pfa_refresh(); pfl_refresh(); syslog(LOG_ERR, "Dump: pfi_table_age = %jd", (intmax_t)pfi_table_age); syslog(LOG_ERR, "Dump: pfi_table_count = %d", pfi_table_count); syslog(LOG_ERR, "Dump: pfq_table_age = %jd", (intmax_t)pfq_table_age); syslog(LOG_ERR, "Dump: pfq_table_count = %d", pfq_table_count); syslog(LOG_ERR, "Dump: pft_table_age = %jd", (intmax_t)pft_table_age); syslog(LOG_ERR, "Dump: pft_table_count = %d", pft_table_count); syslog(LOG_ERR, "Dump: pfa_table_age = %jd", (intmax_t)pfa_table_age); syslog(LOG_ERR, "Dump: pfa_table_count = %d", pfa_table_count); syslog(LOG_ERR, "Dump: pfl_table_age = %jd", (intmax_t)pfl_table_age); syslog(LOG_ERR, "Dump: pfl_table_count = %d", pfl_table_count); } const struct snmp_module config = { .comment = "This module implements a MIB for the pf packet filter.", .init = pf_init, .fini = pf_fini, .tree = pf_ctree, .dump = pf_dump, .tree_size = pf_CTREE_SIZE, };