Index: vendor-sys/pf/dist/sys/contrib/pf/net/if_pflog.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/if_pflog.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/if_pflog.c (revision 171164) @@ -1,227 +1,275 @@ -/* $OpenBSD: if_pflog.c,v 1.12 2004/05/19 17:50:51 dhartmei Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * * This code was written by John Ioannidis for BSD/OS in Athens, Greece, * in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis * and Niels Provos. * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos. * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #include "bpfilter.h" #include "pflog.h" #include #include #include +#include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #ifdef INET6 #ifndef INET #include #endif #include #endif /* INET6 */ #include #include #define PFLOGMTU (32768 + MHLEN + MLEN) #ifdef PFLOGDEBUG #define DPRINTF(x) do { if (pflogdebug) printf x ; } while (0) #else #define DPRINTF(x) #endif -struct pflog_softc pflogif[NPFLOG]; - void pflogattach(int); int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pflogioctl(struct ifnet *, u_long, caddr_t); -void pflogrtrequest(int, struct rtentry *, struct sockaddr *); void pflogstart(struct ifnet *); +int pflog_clone_create(struct if_clone *, int); +int pflog_clone_destroy(struct ifnet *); +LIST_HEAD(, pflog_softc) pflogif_list; +struct if_clone pflog_cloner = + IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy); + +struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ + extern int ifqmaxlen; void pflogattach(int npflog) { + int i; + LIST_INIT(&pflogif_list); + for (i = 0; i < PFLOGIFS_MAX; i++) + pflogifs[i] = NULL; + (void) pflog_clone_create(&pflog_cloner, 0); + if_clone_attach(&pflog_cloner); +} + +int +pflog_clone_create(struct if_clone *ifc, int unit) +{ struct ifnet *ifp; - int i; + struct pflog_softc *pflogif; + int s; - bzero(pflogif, sizeof(pflogif)); + if (unit >= PFLOGIFS_MAX) + return (EINVAL); - for (i = 0; i < NPFLOG; i++) { - ifp = &pflogif[i].sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", i); - ifp->if_softc = &pflogif[i]; - ifp->if_mtu = PFLOGMTU; - ifp->if_ioctl = pflogioctl; - ifp->if_output = pflogoutput; - ifp->if_start = pflogstart; - ifp->if_type = IFT_PFLOG; - ifp->if_snd.ifq_maxlen = ifqmaxlen; - ifp->if_hdrlen = PFLOG_HDRLEN; - if_attach(ifp); - if_alloc_sadl(ifp); + if ((pflogif = malloc(sizeof(*pflogif), M_DEVBUF, M_NOWAIT)) == NULL) + return (ENOMEM); + bzero(pflogif, sizeof(*pflogif)); + pflogif->sc_unit = unit; + ifp = &pflogif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit); + ifp->if_softc = pflogif; + ifp->if_mtu = PFLOGMTU; + ifp->if_ioctl = pflogioctl; + ifp->if_output = pflogoutput; + ifp->if_start = pflogstart; + ifp->if_type = IFT_PFLOG; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFLOG_HDRLEN; + if_attach(ifp); + if_alloc_sadl(ifp); + #if NBPFILTER > 0 - bpfattach(&pflogif[i].sc_if.if_bpf, ifp, DLT_PFLOG, - PFLOG_HDRLEN); + bpfattach(&pflogif->sc_if.if_bpf, ifp, DLT_PFLOG, PFLOG_HDRLEN); #endif - } + + s = splnet(); + LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); + pflogifs[unit] = ifp; + splx(s); + + return (0); } +int +pflog_clone_destroy(struct ifnet *ifp) +{ + struct pflog_softc *pflogif = ifp->if_softc; + int s; + + s = splnet(); + pflogifs[pflogif->sc_unit] = NULL; + LIST_REMOVE(pflogif, sc_list); + splx(s); + +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + free(pflogif, M_DEVBUF); + return (0); +} + /* * Start output on the pflog interface. */ void pflogstart(struct ifnet *ifp) { struct mbuf *m; int s; for (;;) { - s = splimp(); + s = splnet(); IF_DROP(&ifp->if_snd); IF_DEQUEUE(&ifp->if_snd, m); splx(s); if (m == NULL) return; else m_freem(m); } } int pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { m_freem(m); return (0); } /* ARGSUSED */ -void -pflogrtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) -{ - if (rt) - rt->rt_rmx.rmx_mtu = PFLOGMTU; -} - -/* ARGSUSED */ int pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { switch (cmd) { case SIOCSIFADDR: case SIOCAIFADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) ifp->if_flags |= IFF_RUNNING; else ifp->if_flags &= ~IFF_RUNNING; break; default: return (EINVAL); } return (0); } int pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, - struct pf_ruleset *ruleset) + struct pf_ruleset *ruleset, struct pf_pdesc *pd) { #if NBPFILTER > 0 struct ifnet *ifn; struct pfloghdr hdr; - struct mbuf m1; - if (kif == NULL || m == NULL || rm == NULL) + if (kif == NULL || m == NULL || rm == NULL || pd == NULL) return (-1); + if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) + return (0); + bzero(&hdr, sizeof(hdr)); hdr.length = PFLOG_REAL_HDRLEN; hdr.af = af; hdr.action = rm->action; hdr.reason = reason; memcpy(hdr.ifname, kif->pfik_name, sizeof(hdr.ifname)); if (am == NULL) { hdr.rulenr = htonl(rm->nr); hdr.subrulenr = -1; } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); if (ruleset != NULL && ruleset->anchor != NULL) strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) + pd->lookup.done = pf_socket_lookup(dir, pd); + if (pd->lookup.done > 0) { + hdr.uid = pd->lookup.uid; + hdr.pid = pd->lookup.pid; + } else { + hdr.uid = UID_MAX; + hdr.pid = NO_PID; + } + hdr.rule_uid = rm->cuid; + hdr.rule_pid = rm->cpid; hdr.dir = dir; #ifdef INET if (af == AF_INET && dir == PF_OUT) { struct ip *ip; ip = mtod(m, struct ip *); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, ip->ip_hl << 2); } #endif /* INET */ - m1.m_next = m; - m1.m_len = PFLOG_HDRLEN; - m1.m_data = (char *) &hdr; - - ifn = &(pflogif[0].sc_if); - - if (ifn->if_bpf) - bpf_mtap(ifn->if_bpf, &m1); + ifn->if_opackets++; + ifn->if_obytes += m->m_pkthdr.len; + bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m, + BPF_DIRECTION_OUT); #endif return (0); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/if_pflog.h =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/if_pflog.h (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/if_pflog.h (revision 171164) @@ -1,72 +1,80 @@ -/* $OpenBSD: if_pflog.h,v 1.11 2004/05/19 17:50:51 dhartmei Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ /* * Copyright 2001 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _NET_IF_PFLOG_H_ #define _NET_IF_PFLOG_H_ +#define PFLOGIFS_MAX 16 + struct pflog_softc { - struct ifnet sc_if; /* the interface */ + struct ifnet sc_if; /* the interface */ + int sc_unit; + LIST_ENTRY(pflog_softc) sc_list; }; #define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { u_int8_t length; sa_family_t af; u_int8_t action; u_int8_t reason; char ifname[IFNAMSIZ]; char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; }; #define PFLOG_HDRLEN sizeof(struct pfloghdr) /* minus pad, also used as a signature */ #define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) /* XXX remove later when old format logs are no longer needed */ struct old_pfloghdr { u_int32_t af; char ifname[IFNAMSIZ]; short rnr; u_short reason; u_short action; u_short dir; }; #define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) #ifdef _KERNEL #if NPFLOG > 0 -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) #else -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) #endif /* NPFLOG > 0 */ #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ Index: vendor-sys/pf/dist/sys/contrib/pf/net/if_pfsync.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/if_pfsync.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/if_pfsync.c (revision 171164) @@ -1,1411 +1,1729 @@ -/* $OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ -#include "bpfilter.h" -#include "pfsync.h" #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #ifdef INET -#include #include #include #include #include #endif #ifdef INET6 -#ifndef INET -#include -#endif #include #endif /* INET6 */ #include "carp.h" #if NCARP > 0 -extern int carp_suppress_preempt; +#include #endif #include #include +#include "bpfilter.h" +#include "pfsync.h" + #define PFSYNC_MINMTU \ (sizeof(struct pfsync_header) + sizeof(struct pf_state)) #ifdef PFSYNCDEBUG #define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) int pfsyncdebug; #else #define DPRINTF(x) #endif -struct pfsync_softc pfsyncif; -struct pfsyncstats pfsyncstats; +struct pfsync_softc *pfsyncif = NULL; +struct pfsyncstats pfsyncstats; void pfsyncattach(int); +int pfsync_clone_create(struct if_clone *, int); +int pfsync_clone_destroy(struct ifnet *); void pfsync_setmtu(struct pfsync_softc *, int); -int pfsync_insert_net_state(struct pfsync_state *); +int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, + struct pf_state_peer *); +int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); +void pfsync_update_net_tdb(struct pfsync_tdb *); int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pfsyncioctl(struct ifnet *, u_long, caddr_t); void pfsyncstart(struct ifnet *); struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); int pfsync_sendout(struct pfsync_softc *); +int pfsync_tdb_sendout(struct pfsync_softc *); +int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); void pfsync_timeout(void *); +void pfsync_tdb_timeout(void *); void pfsync_send_bus(struct pfsync_softc *, u_int8_t); void pfsync_bulk_update(void *); void pfsync_bulkfail(void *); int pfsync_sync_ok; extern int ifqmaxlen; +struct if_clone pfsync_cloner = + IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); + void pfsyncattach(int npfsync) { + if_clone_attach(&pfsync_cloner); +} +int +pfsync_clone_create(struct if_clone *ifc, int unit) +{ struct ifnet *ifp; + if (unit != 0) + return (EINVAL); + pfsync_sync_ok = 1; - bzero(&pfsyncif, sizeof(pfsyncif)); - pfsyncif.sc_mbuf = NULL; - pfsyncif.sc_mbuf_net = NULL; - pfsyncif.sc_statep.s = NULL; - pfsyncif.sc_statep_net.s = NULL; - pfsyncif.sc_maxupdates = 128; - pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; - pfsyncif.sc_ureq_received = 0; - pfsyncif.sc_ureq_sent = 0; - ifp = &pfsyncif.sc_if; - strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname); - ifp->if_softc = &pfsyncif; + if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT)) == NULL) + return (ENOMEM); + bzero(pfsyncif, sizeof(*pfsyncif)); + pfsyncif->sc_mbuf = NULL; + pfsyncif->sc_mbuf_net = NULL; + pfsyncif->sc_mbuf_tdb = NULL; + pfsyncif->sc_statep.s = NULL; + pfsyncif->sc_statep_net.s = NULL; + pfsyncif->sc_statep_tdb.t = NULL; + pfsyncif->sc_maxupdates = 128; + pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_ureq_received = 0; + pfsyncif->sc_ureq_sent = 0; + pfsyncif->sc_bulk_send_next = NULL; + pfsyncif->sc_bulk_terminator = NULL; + ifp = &pfsyncif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); + ifp->if_softc = pfsyncif; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = PFSYNC_HDRLEN; - pfsync_setmtu(&pfsyncif, MCLBYTES); - timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif); - timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif); - timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif); + pfsync_setmtu(pfsyncif, ETHERMTU); + timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); + timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); if_attach(ifp); if_alloc_sadl(ifp); +#if NCARP > 0 + if_addgroup(ifp, "carp"); +#endif + #if NBPFILTER > 0 - bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); + bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); #endif + + return (0); } +int +pfsync_clone_destroy(struct ifnet *ifp) +{ +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + free(pfsyncif, M_DEVBUF); + pfsyncif = NULL; + return (0); +} + /* * Start output on the pfsync interface. */ void pfsyncstart(struct ifnet *ifp) { struct mbuf *m; int s; for (;;) { - s = splimp(); + s = splnet(); IF_DROP(&ifp->if_snd); IF_DEQUEUE(&ifp->if_snd, m); splx(s); if (m == NULL) return; else m_freem(m); } } int -pfsync_insert_net_state(struct pfsync_state *sp) +pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, + struct pf_state_peer *d) { + if (s->scrub.scrub_flag && d->scrub == NULL) { + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (d->scrub == NULL) + return (ENOMEM); + bzero(d->scrub, sizeof(*d->scrub)); + } + + return (0); +} + +int +pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) +{ struct pf_state *st = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_insert_net_state: invalid creator id:" " %08x\n", ntohl(sp->creatorid)); return (EINVAL); } - kif = pfi_lookup_create(sp->ifname); + kif = pfi_kif_get(sp->ifname); if (kif == NULL) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert_net_state: " "unknown interface: %s\n", sp->ifname); /* skip this state */ return (0); } /* - * Just use the default rule until we have infrastructure to find the - * best matching rule. + * If the ruleset checksums match, it's safe to associate the state + * with the rule of that number. */ - r = &pf_default_rule; + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag) + r = pf_main_ruleset.rules[ + PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; + else + r = &pf_default_rule; if (!r->max_states || r->states < r->max_states) st = pool_get(&pf_state_pl, PR_NOWAIT); if (st == NULL) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); return (ENOMEM); } bzero(st, sizeof(*st)); + /* allocate memory for scrub info */ + if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); + pool_put(&pf_state_pl, st); + return (ENOMEM); + } + st->rule.ptr = r; /* XXX get pointers to nat_rule and anchor */ /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ r->states++; /* fill in the rest of the state entry */ pf_state_host_ntoh(&sp->lan, &st->lan); pf_state_host_ntoh(&sp->gwy, &st->gwy); pf_state_host_ntoh(&sp->ext, &st->ext); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_second - ntohl(sp->creation); st->expire = ntohl(sp->expire) + time_second; st->af = sp->af; st->proto = sp->proto; st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; st->allow_opts = sp->allow_opts; bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; st->sync_flags = PFSTATE_FROMSYNC; - if (pf_insert_state(kif, st)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ r->states--; + if (st->dst.scrub) + pool_put(&pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); pool_put(&pf_state_pl, st); return (EINVAL); } return (0); } void pfsync_input(struct mbuf *m, ...) { struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; - struct pfsync_softc *sc = &pfsyncif; - struct pf_state *st, key; + struct pfsync_softc *sc = pfsyncif; + struct pf_state *st; + struct pf_state_cmp key; struct pfsync_state *sp; struct pfsync_state_upd *up; struct pfsync_state_del *dp; struct pfsync_state_clr *cp; struct pfsync_state_upd_req *rup; struct pfsync_state_bus *bus; + struct pfsync_tdb *pt; struct in_addr src; struct mbuf *mp; int iplen, action, error, i, s, count, offp, sfail, stale = 0; + u_int8_t chksum_flag = 0; pfsyncstats.pfsyncs_ipackets++; /* verify that we have a sync interface configured */ - if (!sc->sc_sync_ifp || !pf_status.running) + if (!sc || !sc->sc_sync_ifp || !pf_status.running) goto done; /* verify that the packet came in on the right interface */ if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { pfsyncstats.pfsyncs_badif++; goto done; } /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { pfsyncstats.pfsyncs_badttl++; goto done; } iplen = ip->ip_hl << 2; if (m->m_pkthdr.len < iplen + sizeof(*ph)) { pfsyncstats.pfsyncs_hdrops++; goto done; } if (iplen + sizeof(*ph) > m->m_len) { if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { pfsyncstats.pfsyncs_hdrops++; goto done; } ip = mtod(m, struct ip *); } ph = (struct pfsync_header *)((char *)ip + iplen); /* verify the version */ if (ph->version != PFSYNC_VERSION) { pfsyncstats.pfsyncs_badver++; goto done; } action = ph->action; count = ph->count; /* make sure it's a valid action code */ if (action >= PFSYNC_ACT_MAX) { pfsyncstats.pfsyncs_badact++; goto done; } /* Cheaper to grab this now than having to mess with mbufs later */ src = ip->ip_src; + if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) + chksum_flag++; + switch (action) { case PFSYNC_ACT_CLR: { struct pf_state *nexts; struct pfi_kif *kif; u_int32_t creatorid; if ((mp = m_pulldown(m, iplen + sizeof(*ph), sizeof(*cp), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } cp = (struct pfsync_state_clr *)(mp->m_data + offp); creatorid = cp->creatorid; s = splsoftnet(); if (cp->ifname[0] == '\0') { for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); + nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); if (st->creatorid == creatorid) { - st->timeout = PFTM_PURGE; - pf_purge_expired_state(st); + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } } } else { - kif = pfi_lookup_if(cp->ifname); - if (kif == NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_input: PFSYNC_ACT_CLR " - "bad interface: %s\n", cp->ifname); + if ((kif = pfi_kif_get(cp->ifname)) == NULL) { splx(s); - goto done; + return; } for (st = RB_MIN(pf_state_tree_lan_ext, &kif->pfik_lan_ext); st; st = nexts) { nexts = RB_NEXT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, st); if (st->creatorid == creatorid) { - st->timeout = PFTM_PURGE; - pf_purge_expired_state(st); + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } } } splx(s); break; } case PFSYNC_ACT_INS: if ((mp = m_pulldown(m, iplen + sizeof(*ph), count * sizeof(*sp), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { /* check for invalid values */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST || sp->direction > PF_OUT || (sp->af != AF_INET && sp->af != AF_INET6)) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: PFSYNC_ACT_INS: " "invalid value\n"); pfsyncstats.pfsyncs_badstate++; continue; } - if ((error = pfsync_insert_net_state(sp))) { + if ((error = pfsync_insert_net_state(sp, + chksum_flag))) { if (error == ENOMEM) { splx(s); goto done; } continue; } } splx(s); break; case PFSYNC_ACT_UPD: if ((mp = m_pulldown(m, iplen + sizeof(*ph), count * sizeof(*sp), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { int flags = PFSYNC_FLAG_STALE; /* check for invalid values */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: PFSYNC_ACT_UPD: " "invalid value\n"); pfsyncstats.pfsyncs_badstate++; continue; } bcopy(sp->id, &key.id, sizeof(key.id)); key.creatorid = sp->creatorid; st = pf_find_state_byid(&key); if (st == NULL) { /* insert the update */ - if (pfsync_insert_net_state(sp)) + if (pfsync_insert_net_state(sp, chksum_flag)) pfsyncstats.pfsyncs_badstate++; continue; } sfail = 0; if (st->proto == IPPROTO_TCP) { /* * The state should never go backwards except * for syn-proxy states. Neither should the * sequence window slide backwards. */ if (st->src.state > sp->src.state && (st->src.state < PF_TCPS_PROXY_SRC || sp->src.state >= PF_TCPS_PROXY_SRC)) sfail = 1; else if (SEQ_GT(st->src.seqlo, ntohl(sp->src.seqlo))) sfail = 3; else if (st->dst.state > sp->dst.state) { /* There might still be useful * information about the src state here, * so import that part of the update, * then "fail" so we send the updated * state back to the peer who is missing * our what we know. */ pf_state_peer_ntoh(&sp->src, &st->src); /* XXX do anything with timeouts? */ sfail = 7; flags = 0; } else if (st->dst.state >= TCPS_SYN_SENT && SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) sfail = 4; } else { /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > sp->src.state) sfail = 5; - else if ( st->dst.state > sp->dst.state) + else if (st->dst.state > sp->dst.state) sfail = 6; } if (sfail) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: %s stale update " "(%d) id: %016llx " "creatorid: %08x\n", (sfail < 7 ? "ignoring" : "partial"), sfail, betoh64(st->id), ntohl(st->creatorid)); pfsyncstats.pfsyncs_badstate++; if (!(sp->sync_flags & PFSTATE_STALE)) { /* we have a better state, send it */ if (sc->sc_mbuf != NULL && !stale) pfsync_sendout(sc); stale++; if (!st->sync_flags) pfsync_pack_state( PFSYNC_ACT_UPD, st, flags); } continue; } + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = ntohl(sp->expire) + time_second; st->timeout = sp->timeout; } if (stale && sc->sc_mbuf != NULL) pfsync_sendout(sc); splx(s); break; /* * It's not strictly necessary for us to support the "uncompressed" * delete action, but it's relatively simple and maintains consistency. */ case PFSYNC_ACT_DEL: if ((mp = m_pulldown(m, iplen + sizeof(*ph), count * sizeof(*sp), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { bcopy(sp->id, &key.id, sizeof(key.id)); key.creatorid = sp->creatorid; st = pf_find_state_byid(&key); if (st == NULL) { pfsyncstats.pfsyncs_badstate++; continue; } - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; - pf_purge_expired_state(st); + pf_unlink_state(st); } splx(s); break; case PFSYNC_ACT_UPD_C: { int update_requested = 0; if ((mp = m_pulldown(m, iplen + sizeof(*ph), count * sizeof(*up), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } s = splsoftnet(); for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); i < count; i++, up++) { /* check for invalid values */ if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); pfsyncstats.pfsyncs_badstate++; continue; } bcopy(up->id, &key.id, sizeof(key.id)); key.creatorid = up->creatorid; st = pf_find_state_byid(&key); if (st == NULL) { /* We don't have this state. Ask for it. */ error = pfsync_request_update(up, &src); if (error == ENOMEM) { splx(s); goto done; } update_requested = 1; pfsyncstats.pfsyncs_badstate++; continue; } sfail = 0; if (st->proto == IPPROTO_TCP) { /* * The state should never go backwards except * for syn-proxy states. Neither should the * sequence window slide backwards. */ if (st->src.state > up->src.state && (st->src.state < PF_TCPS_PROXY_SRC || up->src.state >= PF_TCPS_PROXY_SRC)) sfail = 1; else if (st->dst.state > up->dst.state) sfail = 2; else if (SEQ_GT(st->src.seqlo, ntohl(up->src.seqlo))) sfail = 3; else if (st->dst.state >= TCPS_SYN_SENT && SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) sfail = 4; } else { /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > up->src.state) sfail = 5; else if (st->dst.state > up->dst.state) sfail = 6; } if (sfail) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: ignoring stale update " "(%d) id: %016llx " "creatorid: %08x\n", sfail, betoh64(st->id), ntohl(st->creatorid)); pfsyncstats.pfsyncs_badstate++; /* we have a better state, send it out */ if ((!stale || update_requested) && sc->sc_mbuf != NULL) { pfsync_sendout(sc); update_requested = 0; } stale++; if (!st->sync_flags) pfsync_pack_state(PFSYNC_ACT_UPD, st, PFSYNC_FLAG_STALE); continue; } + pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = ntohl(up->expire) + time_second; st->timeout = up->timeout; } if ((update_requested || stale) && sc->sc_mbuf) pfsync_sendout(sc); splx(s); break; } case PFSYNC_ACT_DEL_C: if ((mp = m_pulldown(m, iplen + sizeof(*ph), count * sizeof(*dp), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } s = splsoftnet(); for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); i < count; i++, dp++) { bcopy(dp->id, &key.id, sizeof(key.id)); key.creatorid = dp->creatorid; st = pf_find_state_byid(&key); if (st == NULL) { pfsyncstats.pfsyncs_badstate++; continue; } - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; - pf_purge_expired_state(st); + pf_unlink_state(st); } splx(s); break; case PFSYNC_ACT_INS_F: case PFSYNC_ACT_DEL_F: /* not implemented */ break; case PFSYNC_ACT_UREQ: if ((mp = m_pulldown(m, iplen + sizeof(*ph), count * sizeof(*rup), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } s = splsoftnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); for (i = 0, rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); i < count; i++, rup++) { bcopy(rup->id, &key.id, sizeof(key.id)); key.creatorid = rup->creatorid; if (key.id == 0 && key.creatorid == 0) { sc->sc_ureq_received = time_uptime; + if (sc->sc_bulk_send_next == NULL) + sc->sc_bulk_send_next = + TAILQ_FIRST(&state_list); + sc->sc_bulk_terminator = sc->sc_bulk_send_next; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received " "bulk update request\n"); pfsync_send_bus(sc, PFSYNC_BUS_START); timeout_add(&sc->sc_bulk_tmo, 1 * hz); } else { st = pf_find_state_byid(&key); if (st == NULL) { pfsyncstats.pfsyncs_badstate++; continue; } if (!st->sync_flags) pfsync_pack_state(PFSYNC_ACT_UPD, st, 0); } } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); splx(s); break; case PFSYNC_ACT_BUS: /* If we're not waiting for a bulk update, who cares. */ if (sc->sc_ureq_sent == 0) break; if ((mp = m_pulldown(m, iplen + sizeof(*ph), sizeof(*bus), &offp)) == NULL) { pfsyncstats.pfsyncs_badlen++; return; } bus = (struct pfsync_state_bus *)(mp->m_data + offp); switch (bus->status) { case PFSYNC_BUS_START: timeout_add(&sc->sc_bulkfail_tmo, pf_pool_limits[PF_LIMIT_STATES].limit / (PFSYNC_BULKPACKETS * sc->sc_maxcount)); if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk " "update start\n"); break; case PFSYNC_BUS_END: if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; timeout_del(&sc->sc_bulkfail_tmo); #if NCARP > 0 if (!pfsync_sync_ok) - carp_suppress_preempt--; + carp_group_demote_adj(&sc->sc_if, -1); #endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received valid " "bulk update end\n"); } else { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } break; + case PFSYNC_ACT_TDB_UPD: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*pt), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + s = splsoftnet(); + for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); + i < count; i++, pt++) + pfsync_update_net_tdb(pt); + splx(s); + break; } done: if (m) m_freem(m); } int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { m_freem(m); return (0); } /* ARGSUSED */ int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct proc *p = curproc; struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct ip_moptions *imo = &sc->sc_imo; struct pfsyncreq pfsyncr; struct ifnet *sifp; int s, error; switch (cmd) { case SIOCSIFADDR: case SIOCAIFADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) ifp->if_flags |= IFF_RUNNING; else ifp->if_flags &= ~IFF_RUNNING; break; case SIOCSIFMTU: if (ifr->ifr_mtu < PFSYNC_MINMTU) return (EINVAL); if (ifr->ifr_mtu > MCLBYTES) ifr->ifr_mtu = MCLBYTES; s = splnet(); if (ifr->ifr_mtu < ifp->if_mtu) pfsync_sendout(sc); pfsync_setmtu(sc, ifr->ifr_mtu); splx(s); break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); if (sc->sc_sync_ifp) strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_ifp->if_xname, IFNAMSIZ); pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) return (error); break; case SIOCSETPFSYNC: if ((error = suser(p, p->p_acflag)) != 0) return (error); if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) return (error); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; else sc->sc_sync_peer.s_addr = pfsyncr.pfsyncr_syncpeer.s_addr; if (pfsyncr.pfsyncr_maxupdates > 255) return (EINVAL); sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; if (pfsyncr.pfsyncr_syncdev[0] == 0) { sc->sc_sync_ifp = NULL; if (sc->sc_mbuf_net != NULL) { /* Don't keep stale pfsync packets around. */ s = splnet(); m_freem(sc->sc_mbuf_net); sc->sc_mbuf_net = NULL; sc->sc_statep_net.s = NULL; splx(s); } if (imo->imo_num_memberships > 0) { in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); imo->imo_multicast_ifp = NULL; } break; } if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); s = splnet(); if (sifp->if_mtu < sc->sc_if.if_mtu || (sc->sc_sync_ifp != NULL && sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip)) pfsync_sendout(sc); sc->sc_sync_ifp = sifp; pfsync_setmtu(sc, sc->sc_if.if_mtu); if (imo->imo_num_memberships > 0) { in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); imo->imo_multicast_ifp = NULL; } if (sc->sc_sync_ifp && sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { struct in_addr addr; if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { sc->sc_sync_ifp = NULL; splx(s); return (EADDRNOTAVAIL); } addr.s_addr = INADDR_PFSYNC_GROUP; if ((imo->imo_membership[0] = in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { sc->sc_sync_ifp = NULL; splx(s); return (ENOBUFS); } imo->imo_num_memberships++; imo->imo_multicast_ifp = sc->sc_sync_ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; } if (sc->sc_sync_ifp || sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { /* Request a full state table update. */ sc->sc_ureq_sent = time_uptime; #if NCARP > 0 if (pfsync_sync_ok) - carp_suppress_preempt++; + carp_group_demote_adj(&sc->sc_if, 1); #endif pfsync_sync_ok = 0; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); error = pfsync_request_update(NULL, NULL); if (error == ENOMEM) { splx(s); return (ENOMEM); } pfsync_sendout(sc); } splx(s); break; default: return (ENOTTY); } return (0); } void pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) { int mtu; if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) mtu = sc->sc_sync_ifp->if_mtu; else mtu = mtu_req; sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / sizeof(struct pfsync_state); if (sc->sc_maxcount > 254) sc->sc_maxcount = 254; sc->sc_if.if_mtu = sizeof(struct pfsync_header) + sc->sc_maxcount * sizeof(struct pfsync_state); } struct mbuf * pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) { struct pfsync_header *h; struct mbuf *m; int len; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { sc->sc_if.if_oerrors++; return (NULL); } switch (action) { case PFSYNC_ACT_CLR: len = sizeof(struct pfsync_header) + sizeof(struct pfsync_state_clr); break; case PFSYNC_ACT_UPD_C: len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + sizeof(struct pfsync_header); break; case PFSYNC_ACT_DEL_C: len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + sizeof(struct pfsync_header); break; case PFSYNC_ACT_UREQ: len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + sizeof(struct pfsync_header); break; case PFSYNC_ACT_BUS: len = sizeof(struct pfsync_header) + sizeof(struct pfsync_state_bus); break; + case PFSYNC_ACT_TDB_UPD: + len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + + sizeof(struct pfsync_header); + break; default: len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + sizeof(struct pfsync_header); break; } if (len > MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); sc->sc_if.if_oerrors++; return (NULL); } m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); } else MH_ALIGN(m, len); m->m_pkthdr.rcvif = NULL; m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); h = mtod(m, struct pfsync_header *); h->version = PFSYNC_VERSION; h->af = 0; h->count = 0; h->action = action; + if (action != PFSYNC_ACT_TDB_UPD) + bcopy(&pf_status.pf_chksum, &h->pf_chksum, + PF_MD5_DIGEST_LENGTH); *sp = (void *)((char *)h + PFSYNC_HDRLEN); - timeout_add(&sc->sc_tmo, hz); + if (action == PFSYNC_ACT_TDB_UPD) + timeout_add(&sc->sc_tdb_tmo, hz); + else + timeout_add(&sc->sc_tmo, hz); return (m); } int pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) { - struct ifnet *ifp = &pfsyncif.sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_header *h, *h_net; struct pfsync_state *sp = NULL; struct pfsync_state_upd *up = NULL; struct pfsync_state_del *dp = NULL; struct pf_rule *r; u_long secs; int s, ret = 0; u_int8_t i = 255, newaction = 0; + if (sc == NULL) + return (0); + ifp = &sc->sc_if; + /* * If a packet falls in the forest and there's nobody around to * hear, does it make a sound? */ if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { /* Don't leave any stale pfsync packets hanging around. */ if (sc->sc_mbuf != NULL) { m_freem(sc->sc_mbuf); sc->sc_mbuf = NULL; sc->sc_statep.s = NULL; } return (0); } if (action >= PFSYNC_ACT_MAX) return (EINVAL); s = splnet(); if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, (void *)&sc->sc_statep.s)) == NULL) { splx(s); return (ENOMEM); } h = mtod(sc->sc_mbuf, struct pfsync_header *); } else { h = mtod(sc->sc_mbuf, struct pfsync_header *); if (h->action != action) { pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, (void *)&sc->sc_statep.s)) == NULL) { splx(s); return (ENOMEM); } h = mtod(sc->sc_mbuf, struct pfsync_header *); } else { /* * If it's an update, look in the packet to see if * we already have an update for the state. */ if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { struct pfsync_state *usp = (void *)((char *)h + PFSYNC_HDRLEN); for (i = 0; i < h->count; i++) { if (!memcmp(usp->id, &st->id, PFSYNC_ID_LEN) && usp->creatorid == st->creatorid) { sp = usp; sp->updates++; break; } usp++; } } } } secs = time_second; st->pfsync_time = time_uptime; - TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); if (sp == NULL) { /* not a "duplicate" update */ i = 255; sp = sc->sc_statep.s++; sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(struct pfsync_state); h->count++; bzero(sp, sizeof(*sp)); bcopy(&st->id, sp->id, sizeof(sp->id)); sp->creatorid = st->creatorid; strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname)); pf_state_host_hton(&st->lan, &sp->lan); pf_state_host_hton(&st->gwy, &sp->gwy); pf_state_host_hton(&st->ext, &sp->ext); bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(secs - st->creation); - sp->packets[0] = htonl(st->packets[0]); - sp->packets[1] = htonl(st->packets[1]); - sp->bytes[0] = htonl(st->bytes[0]); - sp->bytes[1] = htonl(st->bytes[1]); + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); if ((r = st->rule.ptr) == NULL) sp->rule = htonl(-1); else sp->rule = htonl(r->nr); if ((r = st->anchor.ptr) == NULL) sp->anchor = htonl(-1); else sp->anchor = htonl(r->nr); sp->af = st->af; sp->proto = st->proto; sp->direction = st->direction; sp->log = st->log; sp->allow_opts = st->allow_opts; sp->timeout = st->timeout; if (flags & PFSYNC_FLAG_STALE) sp->sync_flags |= PFSTATE_STALE; } pf_state_peer_hton(&st->src, &sp->src); pf_state_peer_hton(&st->dst, &sp->dst); if (st->expire <= secs) sp->expire = htonl(0); else sp->expire = htonl(st->expire - secs); /* do we need to build "compressed" actions for network transfer? */ if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { switch (action) { case PFSYNC_ACT_UPD: newaction = PFSYNC_ACT_UPD_C; break; case PFSYNC_ACT_DEL: newaction = PFSYNC_ACT_DEL_C; break; default: /* by default we just send the uncompressed states */ break; } } if (newaction) { if (sc->sc_mbuf_net == NULL) { if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, (void *)&sc->sc_statep_net.s)) == NULL) { splx(s); return (ENOMEM); } } h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); switch (newaction) { case PFSYNC_ACT_UPD_C: if (i != 255) { up = (void *)((char *)h_net + PFSYNC_HDRLEN + (i * sizeof(*up))); up->updates++; } else { h_net->count++; sc->sc_mbuf_net->m_pkthdr.len = sc->sc_mbuf_net->m_len += sizeof(*up); up = sc->sc_statep_net.u++; bzero(up, sizeof(*up)); bcopy(&st->id, up->id, sizeof(up->id)); up->creatorid = st->creatorid; } up->timeout = st->timeout; up->expire = sp->expire; up->src = sp->src; up->dst = sp->dst; break; case PFSYNC_ACT_DEL_C: sc->sc_mbuf_net->m_pkthdr.len = sc->sc_mbuf_net->m_len += sizeof(*dp); dp = sc->sc_statep_net.d++; h_net->count++; bzero(dp, sizeof(*dp)); bcopy(&st->id, dp->id, sizeof(dp->id)); dp->creatorid = st->creatorid; break; } } if (h->count == sc->sc_maxcount || (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) ret = pfsync_sendout(sc); splx(s); return (ret); } /* This must be called in splnet() */ int pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) { - struct ifnet *ifp = &pfsyncif.sc_if; + struct ifnet *ifp = NULL; struct pfsync_header *h; - struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_upd_req *rup; int ret = 0; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, (void *)&sc->sc_statep.s)) == NULL) return (ENOMEM); h = mtod(sc->sc_mbuf, struct pfsync_header *); } else { h = mtod(sc->sc_mbuf, struct pfsync_header *); if (h->action != PFSYNC_ACT_UREQ) { pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, (void *)&sc->sc_statep.s)) == NULL) return (ENOMEM); h = mtod(sc->sc_mbuf, struct pfsync_header *); } } if (src != NULL) sc->sc_sendaddr = *src; sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); h->count++; rup = sc->sc_statep.r++; bzero(rup, sizeof(*rup)); if (up != NULL) { bcopy(up->id, rup->id, sizeof(rup->id)); rup->creatorid = up->creatorid; } if (h->count == sc->sc_maxcount) ret = pfsync_sendout(sc); return (ret); } int pfsync_clear_states(u_int32_t creatorid, char *ifname) { - struct ifnet *ifp = &pfsyncif.sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_clr *cp; int s, ret; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; s = splnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, (void *)&sc->sc_statep.c)) == NULL) { splx(s); return (ENOMEM); } sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); cp = sc->sc_statep.c; cp->creatorid = creatorid; if (ifname != NULL) strlcpy(cp->ifname, ifname, IFNAMSIZ); ret = (pfsync_sendout(sc)); splx(s); return (ret); } void pfsync_timeout(void *v) { struct pfsync_softc *sc = v; int s; s = splnet(); pfsync_sendout(sc); splx(s); } +void +pfsync_tdb_timeout(void *v) +{ + struct pfsync_softc *sc = v; + int s; + + s = splnet(); + pfsync_tdb_sendout(sc); + splx(s); +} + /* This must be called in splnet() */ void pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) { struct pfsync_state_bus *bus; if (sc->sc_mbuf != NULL) pfsync_sendout(sc); if (pfsync_sync_ok && (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, (void *)&sc->sc_statep.b)) != NULL) { sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); bus = sc->sc_statep.b; bus->creatorid = pf_status.hostid; bus->status = status; bus->endtime = htonl(time_uptime - sc->sc_ureq_received); pfsync_sendout(sc); } } void pfsync_bulk_update(void *v) { struct pfsync_softc *sc = v; int s, i = 0; struct pf_state *state; s = splnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); /* * Grab at most PFSYNC_BULKPACKETS worth of states which have not * been sent since the latest request was made. */ - while ((state = TAILQ_FIRST(&state_updates)) != NULL && - ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) { - if (state->pfsync_time > sc->sc_ureq_received) { - /* we're done */ - pfsync_send_bus(sc, PFSYNC_BUS_END); - sc->sc_ureq_received = 0; - timeout_del(&sc->sc_bulk_tmo); - if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync: bulk update complete\n"); - break; - } else { - /* send an update and move to end of list */ - if (!state->sync_flags) + state = sc->sc_bulk_send_next; + if (state) + do { + /* send state update if syncable and not already sent */ + if (!state->sync_flags + && state->timeout < PFTM_MAX + && state->pfsync_time <= sc->sc_ureq_received) { pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - state->pfsync_time = time_uptime; - TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, state, - u.s.entry_updates); + i++; + } - /* look again for more in a bit */ - timeout_add(&sc->sc_bulk_tmo, 1); - } + /* figure next state to send */ + state = TAILQ_NEXT(state, u.s.entry_list); + + /* wrap to start of list if we hit the end */ + if (!state) + state = TAILQ_FIRST(&state_list); + } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && + state != sc->sc_bulk_terminator); + + if (!state || state == sc->sc_bulk_terminator) { + /* we're done */ + pfsync_send_bus(sc, PFSYNC_BUS_END); + sc->sc_ureq_received = 0; + sc->sc_bulk_send_next = NULL; + sc->sc_bulk_terminator = NULL; + timeout_del(&sc->sc_bulk_tmo); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: bulk update complete\n"); + } else { + /* look again for more in a bit */ + timeout_add(&sc->sc_bulk_tmo, 1); + sc->sc_bulk_send_next = state; } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); splx(s); } void pfsync_bulkfail(void *v) { struct pfsync_softc *sc = v; int s, error; if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again in a bit */ timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); s = splnet(); error = pfsync_request_update(NULL, NULL); if (error == ENOMEM) { if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: cannot allocate mbufs for " "bulk update\n"); } else pfsync_sendout(sc); splx(s); } else { /* Pretend like the transfer was ok */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; #if NCARP > 0 if (!pfsync_sync_ok) - carp_suppress_preempt--; + carp_group_demote_adj(&sc->sc_if, -1); #endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: failed to receive " "bulk update status\n"); timeout_del(&sc->sc_bulkfail_tmo); } } /* This must be called in splnet() */ int -pfsync_sendout(sc) - struct pfsync_softc *sc; +pfsync_sendout(struct pfsync_softc *sc) { +#if NBPFILTER > 0 struct ifnet *ifp = &sc->sc_if; +#endif struct mbuf *m; timeout_del(&sc->sc_tmo); if (sc->sc_mbuf == NULL) return (0); m = sc->sc_mbuf; sc->sc_mbuf = NULL; sc->sc_statep.s = NULL; #if NBPFILTER > 0 if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m); + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif if (sc->sc_mbuf_net) { m_freem(m); m = sc->sc_mbuf_net; sc->sc_mbuf_net = NULL; sc->sc_statep_net.s = NULL; } - if (sc->sc_sync_ifp || sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { - struct ip *ip; - struct sockaddr sa; + return pfsync_sendout_mbuf(sc, m); +} +int +pfsync_tdb_sendout(struct pfsync_softc *sc) +{ +#if NBPFILTER > 0 + struct ifnet *ifp = &sc->sc_if; +#endif + struct mbuf *m; + + timeout_del(&sc->sc_tdb_tmo); + + if (sc->sc_mbuf_tdb == NULL) + return (0); + m = sc->sc_mbuf_tdb; + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + + return pfsync_sendout_mbuf(sc, m); +} + +int +pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) +{ + struct sockaddr sa; + struct ip *ip; + + if (sc->sc_sync_ifp || + sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (m == NULL) { pfsyncstats.pfsyncs_onomem++; return (0); } ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = IPTOS_LOWDELAY; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_id = htons(ip_randomid()); ip->ip_off = htons(IP_DF); ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_sum = 0; bzero(&sa, sizeof(sa)); ip->ip_src.s_addr = INADDR_ANY; if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) m->m_flags |= M_MCAST; ip->ip_dst = sc->sc_sendaddr; sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; pfsyncstats.pfsyncs_opackets++; if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) pfsyncstats.pfsyncs_oerrors++; } else m_freem(m); return (0); +} + +/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +void +pfsync_update_net_tdb(struct pfsync_tdb *pt) +{ + struct tdb *tdb; + int s; + + /* check for invalid values */ + if (ntohl(pt->spi) <= SPI_RESERVED_MAX || + (pt->dst.sa.sa_family != AF_INET && + pt->dst.sa.sa_family != AF_INET6)) + goto bad; + + s = spltdb(); + tdb = gettdb(pt->spi, &pt->dst, pt->sproto); + if (tdb) { + pt->rpl = ntohl(pt->rpl); + pt->cur_bytes = betoh64(pt->cur_bytes); + + /* Neither replay nor byte counter should ever decrease. */ + if (pt->rpl < tdb->tdb_rpl || + pt->cur_bytes < tdb->tdb_cur_bytes) { + splx(s); + goto bad; + } + + tdb->tdb_rpl = pt->rpl; + tdb->tdb_cur_bytes = pt->cur_bytes; + } + splx(s); + return; + + bad: + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + return; +} + +/* One of our local tdbs have been updated, need to sync rpl with others */ +int +pfsync_update_tdb(struct tdb *tdb, int output) +{ + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; + struct pfsync_header *h; + struct pfsync_tdb *pt = NULL; + int s, i, ret; + + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + /* Don't leave any stale pfsync packets hanging around. */ + if (sc->sc_mbuf_tdb != NULL) { + m_freem(sc->sc_mbuf_tdb); + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + } + return (0); + } + + s = splnet(); + if (sc->sc_mbuf_tdb == NULL) { + if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, + (void *)&sc->sc_statep_tdb.t)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else { + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + if (h->action != PFSYNC_ACT_TDB_UPD) { + /* + * XXX will never happen as long as there's + * only one "TDB action". + */ + pfsync_tdb_sendout(sc); + sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, + PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); + if (sc->sc_mbuf_tdb == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else if (sc->sc_maxupdates) { + /* + * If it's an update, look in the packet to see if + * we already have an update for the state. + */ + struct pfsync_tdb *u = + (void *)((char *)h + PFSYNC_HDRLEN); + + for (i = 0; !pt && i < h->count; i++) { + if (tdb->tdb_spi == u->spi && + tdb->tdb_sproto == u->sproto && + !bcmp(&tdb->tdb_dst, &u->dst, + SA_LEN(&u->dst.sa))) { + pt = u; + pt->updates++; + } + u++; + } + } + } + + if (pt == NULL) { + /* not a "duplicate" update */ + pt = sc->sc_statep_tdb.t++; + sc->sc_mbuf_tdb->m_pkthdr.len = + sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); + h->count++; + bzero(pt, sizeof(*pt)); + + pt->spi = tdb->tdb_spi; + memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); + pt->sproto = tdb->tdb_sproto; + } + + /* + * When a failover happens, the master's rpl is probably above + * what we see here (we may be up to a second late), so + * increase it a bit for outbound tdbs to manage most such + * situations. + * + * For now, just add an offset that is likely to be larger + * than the number of packets we can see in one second. The RFC + * just says the next packet must have a higher seq value. + * + * XXX What is a good algorithm for this? We could use + * a rate-determined increase, but to know it, we would have + * to extend struct tdb. + * XXX pt->rpl can wrap over MAXINT, but if so the real tdb + * will soon be replaced anyway. For now, just don't handle + * this edge case. + */ +#define RPL_INCR 16384 + pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); + pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); + + if (h->count == sc->sc_maxcount || + (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) + ret = pfsync_tdb_sendout(sc); + + splx(s); + return (ret); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/if_pfsync.h =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/if_pfsync.h (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/if_pfsync.h (revision 171164) @@ -1,287 +1,336 @@ -/* $OpenBSD: if_pfsync.h,v 1.19 2005/01/20 17:47:38 mcbride Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _NET_IF_PFSYNC_H_ #define _NET_IF_PFSYNC_H_ #define PFSYNC_ID_LEN sizeof(u_int64_t) struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; struct pfsync_state_host { struct pf_addr addr; u_int16_t port; u_int16_t pad[3]; } __packed; struct pfsync_state_peer { struct pfsync_state_scrub scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int8_t scrub_flag; - u_int8_t pad[5]; + u_int8_t pad[6]; } __packed; struct pfsync_state { u_int32_t id[2]; char ifname[IFNAMSIZ]; struct pfsync_state_host lan; struct pfsync_state_host gwy; struct pfsync_state_host ext; struct pfsync_state_peer src; struct pfsync_state_peer dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; - u_int32_t packets[2]; - u_int32_t bytes[2]; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; u_int8_t direction; u_int8_t log; u_int8_t allow_opts; u_int8_t timeout; u_int8_t sync_flags; u_int8_t updates; } __packed; #define PFSYNC_FLAG_COMPRESS 0x01 #define PFSYNC_FLAG_STALE 0x02 +struct pfsync_tdb { + u_int32_t spi; + union sockaddr_union dst; + u_int32_t rpl; + u_int64_t cur_bytes; + u_int8_t sproto; + u_int8_t updates; + u_int8_t pad[2]; +} __packed; + struct pfsync_state_upd { u_int32_t id[2]; struct pfsync_state_peer src; struct pfsync_state_peer dst; u_int32_t creatorid; u_int32_t expire; u_int8_t timeout; u_int8_t updates; u_int8_t pad[6]; } __packed; struct pfsync_state_del { u_int32_t id[2]; u_int32_t creatorid; struct { u_int8_t state; } src; struct { u_int8_t state; } dst; u_int8_t pad[2]; } __packed; struct pfsync_state_upd_req { u_int32_t id[2]; u_int32_t creatorid; u_int32_t pad; } __packed; struct pfsync_state_clr { char ifname[IFNAMSIZ]; u_int32_t creatorid; u_int32_t pad; } __packed; struct pfsync_state_bus { u_int32_t creatorid; u_int32_t endtime; u_int8_t status; #define PFSYNC_BUS_START 1 #define PFSYNC_BUS_END 2 u_int8_t pad[7]; } __packed; #ifdef _KERNEL union sc_statep { struct pfsync_state *s; struct pfsync_state_upd *u; struct pfsync_state_del *d; struct pfsync_state_clr *c; struct pfsync_state_bus *b; struct pfsync_state_upd_req *r; }; +union sc_tdb_statep { + struct pfsync_tdb *t; +}; + extern int pfsync_sync_ok; struct pfsync_softc { struct ifnet sc_if; struct ifnet *sc_sync_ifp; struct ip_moptions sc_imo; struct timeout sc_tmo; + struct timeout sc_tdb_tmo; struct timeout sc_bulk_tmo; struct timeout sc_bulkfail_tmo; struct in_addr sc_sync_peer; struct in_addr sc_sendaddr; struct mbuf *sc_mbuf; /* current cumulative mbuf */ struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ + struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */ union sc_statep sc_statep; union sc_statep sc_statep_net; + union sc_tdb_statep sc_statep_tdb; u_int32_t sc_ureq_received; u_int32_t sc_ureq_sent; + struct pf_state *sc_bulk_send_next; + struct pf_state *sc_bulk_terminator; int sc_bulk_tries; int sc_maxcount; /* number of states in mtu */ int sc_maxupdates; /* number of updates/state */ }; + +extern struct pfsync_softc *pfsyncif; #endif struct pfsync_header { u_int8_t version; -#define PFSYNC_VERSION 2 +#define PFSYNC_VERSION 3 u_int8_t af; u_int8_t action; #define PFSYNC_ACT_CLR 0 /* clear all states */ #define PFSYNC_ACT_INS 1 /* insert state */ #define PFSYNC_ACT_UPD 2 /* update state */ #define PFSYNC_ACT_DEL 3 /* delete state */ #define PFSYNC_ACT_UPD_C 4 /* "compressed" state update */ #define PFSYNC_ACT_DEL_C 5 /* "compressed" state delete */ #define PFSYNC_ACT_INS_F 6 /* insert fragment */ #define PFSYNC_ACT_DEL_F 7 /* delete fragments */ #define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ #define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ -#define PFSYNC_ACT_MAX 10 +#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */ +#define PFSYNC_ACT_MAX 11 u_int8_t count; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; } __packed; #define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ #define PFSYNC_MAX_BULKTRIES 12 #define PFSYNC_HDRLEN sizeof(struct pfsync_header) #define PFSYNC_ACTIONS \ "CLR ST", "INS ST", "UPD ST", "DEL ST", \ "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ - "UPD REQ", "BLK UPD STAT" + "UPD REQ", "BLK UPD STAT", "TDB UPD" #define PFSYNC_DFLTTL 255 struct pfsyncstats { u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */ u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */ u_int64_t pfsyncs_badif; /* not the right interface */ u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */ u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */ u_int64_t pfsyncs_badact; /* bad action */ u_int64_t pfsyncs_badlen; /* data length does not match */ u_int64_t pfsyncs_badauth; /* bad authentication */ u_int64_t pfsyncs_stale; /* stale state */ u_int64_t pfsyncs_badval; /* bad values */ u_int64_t pfsyncs_badstate; /* insert/lookup failed */ u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */ u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */ u_int64_t pfsyncs_oerrors; /* ip output error */ }; /* * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC */ struct pfsyncreq { char pfsyncr_syncdev[IFNAMSIZ]; struct in_addr pfsyncr_syncpeer; int pfsyncr_maxupdates; int pfsyncr_authlevel; }; #define pf_state_peer_hton(s,d) do { \ (d)->seqlo = htonl((s)->seqlo); \ (d)->seqhi = htonl((s)->seqhi); \ (d)->seqdiff = htonl((s)->seqdiff); \ (d)->max_win = htons((s)->max_win); \ (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ } while (0) #define pf_state_peer_ntoh(s,d) do { \ (d)->seqlo = ntohl((s)->seqlo); \ (d)->seqhi = ntohl((s)->seqhi); \ (d)->seqdiff = ntohl((s)->seqdiff); \ (d)->max_win = ntohs((s)->max_win); \ (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ + } \ } while (0) #define pf_state_host_hton(s,d) do { \ bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ (d)->port = (s)->port; \ } while (0) #define pf_state_host_ntoh(s,d) do { \ bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ (d)->port = (s)->port; \ } while (0) +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ +} while (0) + +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) + #ifdef _KERNEL void pfsync_input(struct mbuf *, ...); int pfsync_clear_states(u_int32_t, char *); int pfsync_pack_state(u_int8_t, struct pf_state *, int); #define pfsync_insert_state(st) do { \ if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || \ (st->proto == IPPROTO_PFSYNC)) \ st->sync_flags |= PFSTATE_NOSYNC; \ else if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_INS, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_INS, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_update_state(st) do { \ if (!st->sync_flags) \ pfsync_pack_state(PFSYNC_ACT_UPD, (st), \ PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_delete_state(st) do { \ if (!st->sync_flags) \ pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ PFSYNC_FLAG_COMPRESS); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) +int pfsync_update_tdb(struct tdb *, int); #endif #endif /* _NET_IF_PFSYNC_H_ */ Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf.c (revision 171164) @@ -1,6383 +1,6665 @@ -/* $OpenBSD: pf.c,v 1.483 2005/03/15 17:38:43 dhartmei Exp $ */ +/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * */ #include "bpfilter.h" #include "pflog.h" #include "pfsync.h" #include #include #include #include #include #include #include #include #include +#include +#include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if NPFSYNC > 0 #include #endif /* NPFSYNC > 0 */ #ifdef INET6 #include #include #include #include #endif /* INET6 */ #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x /* * Global variables */ -struct pf_anchor_global pf_anchors; -struct pf_ruleset pf_main_ruleset; struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; struct pf_altqqueue *pf_altqs_active; struct pf_altqqueue *pf_altqs_inactive; struct pf_status pf_status; u_int32_t ticket_altqs_active; u_int32_t ticket_altqs_inactive; int altqs_inactive_open; u_int32_t ticket_pabuf; -struct timeout pf_expire_to; /* expire timeout */ - struct pf_anchor_stackframe { struct pf_ruleset *rs; struct pf_rule *r; struct pf_anchor_node *parent; struct pf_anchor *child; } pf_anchor_stack[64]; struct pool pf_src_tree_pl, pf_rule_pl; struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); void pf_add_threshold(struct pf_threshold *); int pf_check_threshold(struct pf_threshold *); void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); +int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *); #ifdef INET6 void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); #endif /* INET6 */ void pf_change_icmp(struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t *, u_int16_t *, u_int16_t *, u_int16_t *, u_int8_t, sa_family_t); void pf_send_tcp(const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, - struct ether_header *, struct ifnet *); + u_int16_t, struct ether_header *, struct ifnet *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, int); struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t *); int pf_test_tcp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **, struct ifqueue *); int pf_test_udp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **, struct ifqueue *); int pf_test_icmp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **, struct ifqueue *); int pf_test_other(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **, struct ifqueue *); int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **); int pf_test_state_tcp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_udp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *); int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct pf_pdesc *); -struct pf_tag *pf_get_tag(struct mbuf *); int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_tag **, int *); + struct pf_mtag *, int *); +int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); int pf_map_addr(u_int8_t, struct pf_rule *, struct pf_addr *, struct pf_addr *, struct pf_addr *, struct pf_src_node **); int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, struct pf_addr *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, struct pf_src_node **); void pf_route(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); void pf_route6(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); -int pf_socket_lookup(uid_t *, gid_t *, - int, struct pf_pdesc *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +int pf_socket_lookup(int, struct pf_pdesc *); u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, u_int16_t); void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); -static int pf_add_mbuf_tag(struct mbuf *, u_int); struct pf_state *pf_find_state_recurse(struct pfi_kif *, - struct pf_state *, u_int8_t); + struct pf_state_cmp *, u_int8_t); int pf_src_connlimit(struct pf_state **); int pf_check_congestion(struct ifqueue *); +extern struct pool pfr_ktable_pl; +extern struct pool pfr_kentry_pl; + struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { { &pf_state_pl, PFSTATE_HIWAT }, { &pf_src_tree_pl, PFSNODE_HIWAT }, - { &pf_frent_pl, PFFRAG_FRENT_HIWAT } + { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, + { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, + { &pfr_kentry_pl, PFR_KENTRY_HIWAT } }; #define STATE_LOOKUP() \ do { \ if (direction == PF_IN) \ *state = pf_find_state_recurse( \ kif, &key, PF_EXT_GWY); \ else \ *state = pf_find_state_recurse( \ kif, &key, PF_LAN_EXT); \ if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ return (PF_DROP); \ if (direction == PF_OUT && \ (((*state)->rule.ptr->rt == PF_ROUTETO && \ (*state)->rule.ptr->direction == PF_OUT) || \ ((*state)->rule.ptr->rt == PF_REPLYTO && \ (*state)->rule.ptr->direction == PF_IN)) && \ (*state)->rt_kif != NULL && \ (*state)->rt_kif != kif) \ return (PF_PASS); \ } while (0) #define STATE_TRANSLATE(s) \ (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \ ((s)->af == AF_INET6 && \ ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \ (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \ (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ (s)->lan.port != (s)->gwy.port -#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) : \ - ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ - (k)->pfik_parent->pfik_parent) +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all #define STATE_INC_COUNTERS(s) \ do { \ s->rule.ptr->states++; \ if (s->anchor.ptr != NULL) \ s->anchor.ptr->states++; \ if (s->nat_rule.ptr != NULL) \ s->nat_rule.ptr->states++; \ } while (0) #define STATE_DEC_COUNTERS(s) \ do { \ if (s->nat_rule.ptr != NULL) \ s->nat_rule.ptr->states--; \ if (s->anchor.ptr != NULL) \ s->anchor.ptr->states--; \ s->rule.ptr->states--; \ } while (0) static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); static __inline int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *); static __inline int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); static __inline int pf_state_compare_id(struct pf_state *, struct pf_state *); -static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; -struct pf_state_queue state_updates; +struct pf_state_queue state_list; RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); RB_GENERATE(pf_state_tree_lan_ext, pf_state, u.s.entry_lan_ext, pf_state_compare_lan_ext); RB_GENERATE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); RB_GENERATE(pf_state_tree_id, pf_state, u.s.entry_id, pf_state_compare_id); -RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); -RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); static __inline int pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) { int diff; if (a->rule.ptr > b->rule.ptr) return (1); if (a->rule.ptr < b->rule.ptr) return (-1); if ((diff = a->af - b->af) != 0) return (diff); switch (a->af) { #ifdef INET case AF_INET: if (a->addr.addr32[0] > b->addr.addr32[0]) return (1); if (a->addr.addr32[0] < b->addr.addr32[0]) return (-1); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (a->addr.addr32[3] > b->addr.addr32[3]) return (1); if (a->addr.addr32[3] < b->addr.addr32[3]) return (-1); if (a->addr.addr32[2] > b->addr.addr32[2]) return (1); if (a->addr.addr32[2] < b->addr.addr32[2]) return (-1); if (a->addr.addr32[1] > b->addr.addr32[1]) return (1); if (a->addr.addr32[1] < b->addr.addr32[1]) return (-1); if (a->addr.addr32[0] > b->addr.addr32[0]) return (1); if (a->addr.addr32[0] < b->addr.addr32[0]) return (-1); break; #endif /* INET6 */ } return (0); } static __inline int pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) { int diff; if ((diff = a->proto - b->proto) != 0) return (diff); if ((diff = a->af - b->af) != 0) return (diff); switch (a->af) { #ifdef INET case AF_INET: if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) return (1); if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) return (-1); if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) return (1); if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) return (-1); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) return (1); if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) return (-1); if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) return (1); if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) return (-1); if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) return (1); if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) return (-1); if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) return (1); if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) return (-1); if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) return (1); if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) return (-1); if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) return (1); if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) return (-1); if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) return (1); if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) return (-1); if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) return (1); if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) return (-1); break; #endif /* INET6 */ } if ((diff = a->lan.port - b->lan.port) != 0) return (diff); if ((diff = a->ext.port - b->ext.port) != 0) return (diff); return (0); } static __inline int pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b) { int diff; if ((diff = a->proto - b->proto) != 0) return (diff); if ((diff = a->af - b->af) != 0) return (diff); switch (a->af) { #ifdef INET case AF_INET: if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) return (1); if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) return (-1); if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) return (1); if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) return (-1); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) return (1); if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) return (-1); if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) return (1); if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) return (-1); if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) return (1); if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) return (-1); if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) return (1); if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) return (-1); if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) return (1); if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) return (-1); if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) return (1); if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) return (-1); if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) return (1); if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) return (-1); if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) return (1); if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) return (-1); break; #endif /* INET6 */ } if ((diff = a->ext.port - b->ext.port) != 0) return (diff); if ((diff = a->gwy.port - b->gwy.port) != 0) return (diff); return (0); } static __inline int pf_state_compare_id(struct pf_state *a, struct pf_state *b) { if (a->id > b->id) return (1); if (a->id < b->id) return (-1); if (a->creatorid > b->creatorid) return (1); if (a->creatorid < b->creatorid) return (-1); return (0); } -static __inline int -pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) -{ - int c = strcmp(a->path, b->path); - - return (c ? (c < 0 ? -1 : 1) : 0); -} - #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: dst->addr32[0] = src->addr32[0]; break; #endif /* INET */ case AF_INET6: dst->addr32[0] = src->addr32[0]; dst->addr32[1] = src->addr32[1]; dst->addr32[2] = src->addr32[2]; dst->addr32[3] = src->addr32[3]; break; } } #endif /* INET6 */ struct pf_state * -pf_find_state_byid(struct pf_state *key) +pf_find_state_byid(struct pf_state_cmp *key) { pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, key)); + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); } struct pf_state * -pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) +pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree) { struct pf_state *s; pf_status.fcounters[FCNT_STATE_SEARCH]++; switch (tree) { case PF_LAN_EXT: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); case PF_EXT_GWY: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); default: panic("pf_find_state_recurse"); } } struct pf_state * -pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) +pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more) { struct pf_state *s, *ss = NULL; struct pfi_kif *kif; pf_status.fcounters[FCNT_STATE_SEARCH]++; switch (tree) { case PF_LAN_EXT: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); + &kif->pfik_lan_ext, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) return (s); ss = s; (*more)++; } return (ss); case PF_EXT_GWY: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); + &kif->pfik_ext_gwy, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) return (s); ss = s; (*more)++; } return (ss); default: panic("pf_find_state_all"); } } void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) { threshold->limit = limit * PF_THRESHOLD_MULT; threshold->seconds = seconds; threshold->count = 0; threshold->last = time_second; } void pf_add_threshold(struct pf_threshold *threshold) { u_int32_t t = time_second, diff = t - threshold->last; if (diff >= threshold->seconds) threshold->count = 0; else threshold->count -= threshold->count * diff / threshold->seconds; threshold->count += PF_THRESHOLD_MULT; threshold->last = t; } int pf_check_threshold(struct pf_threshold *threshold) { return (threshold->count > threshold->limit); } int pf_src_connlimit(struct pf_state **state) { struct pf_state *s; int bad = 0; (*state)->src_node->conn++; + (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { pf_status.lcounters[LCNT_SRCCONN]++; bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { pf_status.lcounters[LCNT_SRCCONNRATE]++; bad++; } if (!bad) return (0); if ((*state)->rule.ptr->overload_tbl) { struct pfr_addr p; u_int32_t killed = 0; pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf_src_connlimit: blocking address "); pf_print_host(&(*state)->src_node->addr, 0, (*state)->af); } bzero(&p, sizeof(p)); p.pfra_af = (*state)->af; switch ((*state)->af) { #ifdef INET case AF_INET: p.pfra_net = 32; p.pfra_ip4addr = (*state)->src_node->addr.v4; break; #endif /* INET */ #ifdef INET6 case AF_INET6: p.pfra_net = 128; p.pfra_ip6addr = (*state)->src_node->addr.v6; break; #endif /* INET6 */ } pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second); /* kill existing states if that's required. */ if ((*state)->rule.ptr->flush) { pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; RB_FOREACH(s, pf_state_tree_id, &tree_id) { /* * Kill states from this source. (Only those * from the same rule if PF_FLUSH_GLOBAL is not * set) */ if (s->af == (*state)->af && (((*state)->direction == PF_OUT && PF_AEQ(&(*state)->src_node->addr, &s->lan.addr, s->af)) || ((*state)->direction == PF_IN && PF_AEQ(&(*state)->src_node->addr, &s->ext.addr, s->af))) && ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL || (*state)->rule.ptr == s->rule.ptr)) { s->timeout = PFTM_PURGE; s->src.state = s->dst.state = TCPS_CLOSED; killed++; } } if (pf_status.debug >= PF_DEBUG_MISC) printf(", %u states killed", killed); } if (pf_status.debug >= PF_DEBUG_MISC) printf("\n"); } /* kill this state */ (*state)->timeout = PFTM_PURGE; (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; return (1); } int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, struct pf_addr *src, sa_family_t af) { struct pf_src_node k; if (*sn == NULL) { k.af = af; PF_ACPY(&k.addr, src, af); if (rule->rule_flag & PFRULE_RULESRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR) k.rule.ptr = rule; else k.rule.ptr = NULL; pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); } if (*sn == NULL) { if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); else pf_status.lcounters[LCNT_SRCNODES]++; if ((*sn) == NULL) return (-1); bzero(*sn, sizeof(struct pf_src_node)); pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, rule->max_src_conn_rate.seconds); (*sn)->af = af; if (rule->rule_flag & PFRULE_RULESRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR) (*sn)->rule.ptr = rule; else (*sn)->rule.ptr = NULL; PF_ACPY(&(*sn)->addr, src, af); if (RB_INSERT(pf_src_tree, &tree_src_tracking, *sn) != NULL) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: src_tree insert failed: "); pf_print_host(&(*sn)->addr, 0, af); printf("\n"); } pool_put(&pf_src_tree_pl, *sn); return (-1); } (*sn)->creation = time_second; (*sn)->ruletype = rule->action; if ((*sn)->rule.ptr != NULL) (*sn)->rule.ptr->src_nodes++; pf_status.scounters[SCNT_SRC_NODE_INSERT]++; pf_status.src_nodes++; } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { pf_status.lcounters[LCNT_SRCSTATES]++; return (-1); } } return (0); } int pf_insert_state(struct pfi_kif *kif, struct pf_state *state) { /* Thou MUST NOT insert multiple duplicate keys */ state->u.s.kif = kif; if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state insert failed: tree_lan_ext"); printf(" lan: "); pf_print_host(&state->lan.addr, state->lan.port, state->af); printf(" gwy: "); pf_print_host(&state->gwy.addr, state->gwy.port, state->af); printf(" ext: "); pf_print_host(&state->ext.addr, state->ext.port, state->af); if (state->sync_flags & PFSTATE_FROMSYNC) printf(" (from sync)"); printf("\n"); } return (-1); } if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state insert failed: tree_ext_gwy"); printf(" lan: "); pf_print_host(&state->lan.addr, state->lan.port, state->af); printf(" gwy: "); pf_print_host(&state->gwy.addr, state->gwy.port, state->af); printf(" ext: "); pf_print_host(&state->ext.addr, state->ext.port, state->af); if (state->sync_flags & PFSTATE_FROMSYNC) printf(" (from sync)"); printf("\n"); } RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); return (-1); } if (state->id == 0 && state->creatorid == 0) { state->id = htobe64(pf_status.stateid++); state->creatorid = pf_status.hostid; } if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state insert failed: " "id: %016llx creatorid: %08x", betoh64(state->id), ntohl(state->creatorid)); if (state->sync_flags & PFSTATE_FROMSYNC) printf(" (from sync)"); printf("\n"); } RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); return (-1); } - TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates); - + TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list); pf_status.fcounters[FCNT_STATE_INSERT]++; pf_status.states++; - pfi_attach_state(kif); + pfi_kif_ref(kif, PFI_KIF_REF_STATE); #if NPFSYNC pfsync_insert_state(state); #endif return (0); } void -pf_purge_timeout(void *arg) +pf_purge_thread(void *v) { - struct timeout *to = arg; - int s; + int nloops = 0, s; - s = splsoftnet(); - pf_purge_expired_states(); - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - splx(s); + for (;;) { + tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); - timeout_add(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz); + s = splsoftnet(); + + /* process a fraction of the state table every second */ + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); + + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(0); + nloops = 0; + } + + splx(s); + } } u_int32_t pf_state_expires(const struct pf_state *state) { u_int32_t timeout; u_int32_t start; u_int32_t end; u_int32_t states; /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) return (time_second); if (state->timeout == PFTM_UNTIL_PACKET) return (0); + KASSERT(state->timeout != PFTM_UNLINKED); KASSERT(state->timeout < PFTM_MAX); timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) timeout = pf_default_rule.timeout[state->timeout]; start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; states = state->rule.ptr->states; } else { start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; states = pf_status.states; } if (end && states > start && start < end) { if (states < end) return (state->expire + timeout * (end - states) / (end - start)); else return (time_second); } return (state->expire + timeout); } void -pf_purge_expired_src_nodes(void) +pf_purge_expired_src_nodes(int waslocked) { struct pf_src_node *cur, *next; + int locked = waslocked; for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { + rw_enter_write(&pf_consistency_lock); + next = RB_NEXT(pf_src_tree, + &tree_src_tracking, cur); + locked = 1; + } if (cur->rule.ptr != NULL) { cur->rule.ptr->src_nodes--; if (cur->rule.ptr->states <= 0 && cur->rule.ptr->max_src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); } RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, cur); } } + + if (locked && !waslocked) + rw_exit_write(&pf_consistency_lock); } void pf_src_tree_remove_state(struct pf_state *s) { u_int32_t timeout; if (s->src_node != NULL) { if (s->proto == IPPROTO_TCP) { - if (s->src.state == PF_TCPS_PROXY_DST || - s->timeout >= PFTM_TCP_ESTABLISHED) + if (s->src.tcp_est) --s->src_node->conn; } if (--s->src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = pf_default_rule.timeout[PFTM_SRC_NODE]; s->src_node->expire = time_second + timeout; } } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { if (--s->nat_src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) timeout = pf_default_rule.timeout[PFTM_SRC_NODE]; s->nat_src_node->expire = time_second + timeout; } } s->src_node = s->nat_src_node = NULL; } +/* callers should be at splsoftnet */ void -pf_purge_expired_state(struct pf_state *cur) +pf_unlink_state(struct pf_state *cur) { - if (cur->src.state == PF_TCPS_PROXY_DST) + if (cur->src.state == PF_TCPS_PROXY_DST) { pf_send_tcp(cur->rule.ptr, cur->af, &cur->ext.addr, &cur->lan.addr, cur->ext.port, cur->lan.port, cur->src.seqhi, cur->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, 1, NULL, NULL); + TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); + } RB_REMOVE(pf_state_tree_ext_gwy, &cur->u.s.kif->pfik_ext_gwy, cur); RB_REMOVE(pf_state_tree_lan_ext, &cur->u.s.kif->pfik_lan_ext, cur); RB_REMOVE(pf_state_tree_id, &tree_id, cur); #if NPFSYNC - pfsync_delete_state(cur); + if (cur->creatorid == pf_status.hostid) + pfsync_delete_state(cur); #endif + cur->timeout = PFTM_UNLINKED; pf_src_tree_remove_state(cur); +} + +/* callers should be at splsoftnet and hold the + * write_lock on pf_consistency_lock */ +void +pf_free_state(struct pf_state *cur) +{ +#if NPFSYNC + if (pfsyncif != NULL && + (pfsyncif->sc_bulk_send_next == cur || + pfsyncif->sc_bulk_terminator == cur)) + return; +#endif + KASSERT(cur->timeout == PFTM_UNLINKED); if (--cur->rule.ptr->states <= 0 && cur->rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); if (cur->nat_rule.ptr != NULL) if (--cur->nat_rule.ptr->states <= 0 && cur->nat_rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->nat_rule.ptr); if (cur->anchor.ptr != NULL) if (--cur->anchor.ptr->states <= 0) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); - pfi_detach_state(cur->u.s.kif); - TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); + pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE); + TAILQ_REMOVE(&state_list, cur, u.s.entry_list); if (cur->tag) pf_tag_unref(cur->tag); pool_put(&pf_state_pl, cur); pf_status.fcounters[FCNT_STATE_REMOVALS]++; pf_status.states--; } void -pf_purge_expired_states(void) +pf_purge_expired_states(u_int32_t maxcheck) { - struct pf_state *cur, *next; + static struct pf_state *cur = NULL; + struct pf_state *next; + int locked = 0; - for (cur = RB_MIN(pf_state_tree_id, &tree_id); - cur; cur = next) { - next = RB_NEXT(pf_state_tree_id, &tree_id, cur); - if (pf_state_expires(cur) <= time_second) - pf_purge_expired_state(cur); + while (maxcheck--) { + /* wrap to start of list when we hit the end */ + if (cur == NULL) { + cur = TAILQ_FIRST(&state_list); + if (cur == NULL) + break; /* list empty */ + } + + /* get next state, as cur may get deleted */ + next = TAILQ_NEXT(cur, u.s.entry_list); + + if (cur->timeout == PFTM_UNLINKED) { + /* free unlinked state */ + if (! locked) { + rw_enter_write(&pf_consistency_lock); + locked = 1; + } + pf_free_state(cur); + } else if (pf_state_expires(cur) <= time_second) { + /* unlink and free expired state */ + pf_unlink_state(cur); + if (! locked) { + rw_enter_write(&pf_consistency_lock); + locked = 1; + } + pf_free_state(cur); + } + cur = next; } + + if (locked) + rw_exit_write(&pf_consistency_lock); } int pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_TABLE) return (0); if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) return (1); return (0); } void pf_tbladdr_remove(struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) return; pfr_detach_table(aw->p.tbl); aw->p.tbl = NULL; } void pf_tbladdr_copyout(struct pf_addr_wrap *aw) { struct pfr_ktable *kt = aw->p.tbl; if (aw->type != PF_ADDR_TABLE || kt == NULL) return; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; aw->p.tbl = NULL; aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? kt->pfrkt_cnt : -1; } void pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: { u_int32_t a = ntohl(addr->addr32[0]); printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, (a>>8)&255, a&255); if (p) { p = ntohs(p); printf(":%u", p); } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { u_int16_t b; u_int8_t i, curstart = 255, curend = 0, maxstart = 0, maxend = 0; for (i = 0; i < 8; i++) { if (!addr->addr16[i]) { if (curstart == 255) curstart = i; else curend = i; } else { if (curstart) { if ((curend - curstart) > (maxend - maxstart)) { maxstart = curstart; maxend = curend; curstart = 255; } } } } for (i = 0; i < 8; i++) { if (i >= maxstart && i <= maxend) { if (maxend != 7) { if (i == maxstart) printf(":"); } else { if (i == maxend) printf(":"); } } else { b = ntohs(addr->addr16[i]); printf("%x", b); if (i < 7) printf(":"); } } if (p) { p = ntohs(p); printf("[%u]", p); } break; } #endif /* INET6 */ } } void pf_print_state(struct pf_state *s) { switch (s->proto) { case IPPROTO_TCP: printf("TCP "); break; case IPPROTO_UDP: printf("UDP "); break; case IPPROTO_ICMP: printf("ICMP "); break; case IPPROTO_ICMPV6: printf("ICMPV6 "); break; default: printf("%u ", s->proto); break; } pf_print_host(&s->lan.addr, s->lan.port, s->af); printf(" "); pf_print_host(&s->gwy.addr, s->gwy.port, s->af); printf(" "); pf_print_host(&s->ext.addr, s->ext.port, s->af); printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, s->src.seqhi, s->src.max_win, s->src.seqdiff); if (s->src.wscale && s->dst.wscale) printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); printf("]"); printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); if (s->src.wscale && s->dst.wscale) printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); printf("]"); printf(" %u:%u", s->src.state, s->dst.state); } void pf_print_flags(u_int8_t f) { if (f) printf(" "); if (f & TH_FIN) printf("F"); if (f & TH_SYN) printf("S"); if (f & TH_RST) printf("R"); if (f & TH_PUSH) printf("P"); if (f & TH_ACK) printf("A"); if (f & TH_URG) printf("U"); if (f & TH_ECE) printf("E"); if (f & TH_CWR) printf("W"); } #define PF_SET_SKIP_STEPS(i) \ do { \ while (head[i] != cur) { \ head[i]->skip[i].ptr = cur; \ head[i] = TAILQ_NEXT(head[i], entries); \ } \ } while (0) void pf_calc_skip_steps(struct pf_rulequeue *rules) { struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; int i; cur = TAILQ_FIRST(rules); prev = cur; for (i = 0; i < PF_SKIP_COUNT; ++i) head[i] = cur; while (cur != NULL) { if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) PF_SET_SKIP_STEPS(PF_SKIP_IFP); if (cur->direction != prev->direction) PF_SET_SKIP_STEPS(PF_SKIP_DIR); if (cur->af != prev->af) PF_SET_SKIP_STEPS(PF_SKIP_AF); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PF_SKIP_PROTO); if (cur->src.neg != prev->src.neg || pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); if (cur->src.port[0] != prev->src.port[0] || cur->src.port[1] != prev->src.port[1] || cur->src.port_op != prev->src.port_op) PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); if (cur->dst.neg != prev->dst.neg || pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); if (cur->dst.port[0] != prev->dst.port[0] || cur->dst.port[1] != prev->dst.port[1] || cur->dst.port_op != prev->dst.port_op) PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); prev = cur; cur = TAILQ_NEXT(cur, entries); } for (i = 0; i < PF_SKIP_COUNT; ++i) PF_SET_SKIP_STEPS(i); } int pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) { if (aw1->type != aw2->type) return (1); switch (aw1->type) { case PF_ADDR_ADDRMASK: if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) return (1); if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) return (1); return (0); case PF_ADDR_DYNIFTL: return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); + case PF_ADDR_RTLABEL: + return (aw1->v.rtlabel != aw2->v.rtlabel); default: printf("invalid address type: %d\n", aw1->type); return (1); } } u_int16_t pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { u_int32_t l; if (udp && !cksum) return (0x0000); l = cksum + old - new; l = (l >> 16) + (l & 65535); l = l & 65535; if (udp && !l) return (0xFFFF); return (l); } void pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) { struct pf_addr ao; u_int16_t po = *p; PF_ACPY(&ao, a, af); PF_ACPY(a, an, af); *p = pn; switch (af) { #ifdef INET case AF_INET: *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, ao.addr16[0], an->addr16[0], 0), ao.addr16[1], an->addr16[1], 0); *p = pn; *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), po, pn, u); break; #endif /* INET */ #ifdef INET6 case AF_INET6: *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), ao.addr16[2], an->addr16[2], u), ao.addr16[3], an->addr16[3], u), ao.addr16[4], an->addr16[4], u), ao.addr16[5], an->addr16[5], u), ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u), po, pn, u); break; #endif /* INET6 */ } } /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ void pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) { u_int32_t ao; memcpy(&ao, a, sizeof(ao)); memcpy(a, &an, sizeof(u_int32_t)); *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), ao % 65536, an % 65536, u); } #ifdef INET6 void pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; PF_ACPY(&ao, a, AF_INET6); PF_ACPY(a, an, AF_INET6); *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*c, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), ao.addr16[2], an->addr16[2], u), ao.addr16[3], an->addr16[3], u), ao.addr16[4], an->addr16[4], u), ao.addr16[5], an->addr16[5], u), ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u); } #endif /* INET6 */ void pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) { struct pf_addr oia, ooa; PF_ACPY(&oia, ia, af); PF_ACPY(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { u_int16_t oip = *ip; u_int32_t opc; if (pc != NULL) opc = *pc; *ip = np; if (pc != NULL) *pc = pf_cksum_fixup(*pc, oip, *ip, u); *ic = pf_cksum_fixup(*ic, oip, *ip, 0); if (pc != NULL) *ic = pf_cksum_fixup(*ic, opc, *pc, 0); } /* Change inner ip address, fix inner ip and icmp checksums. */ PF_ACPY(ia, na, af); switch (af) { #ifdef INET case AF_INET: { u_int32_t oh2c = *h2c; *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, oia.addr16[0], ia->addr16[0], 0), oia.addr16[1], ia->addr16[1], 0); *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, oia.addr16[0], ia->addr16[0], 0), oia.addr16[1], ia->addr16[1], 0); *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); break; } #endif /* INET */ #ifdef INET6 case AF_INET6: *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*ic, oia.addr16[0], ia->addr16[0], u), oia.addr16[1], ia->addr16[1], u), oia.addr16[2], ia->addr16[2], u), oia.addr16[3], ia->addr16[3], u), oia.addr16[4], ia->addr16[4], u), oia.addr16[5], ia->addr16[5], u), oia.addr16[6], ia->addr16[6], u), oia.addr16[7], ia->addr16[7], u); break; #endif /* INET6 */ } /* Change outer ip address, fix outer ip or icmpv6 checksum. */ PF_ACPY(oa, na, af); switch (af) { #ifdef INET case AF_INET: *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, ooa.addr16[0], oa->addr16[0], 0), ooa.addr16[1], oa->addr16[1], 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*ic, ooa.addr16[0], oa->addr16[0], u), ooa.addr16[1], oa->addr16[1], u), ooa.addr16[2], oa->addr16[2], u), ooa.addr16[3], oa->addr16[3], u), ooa.addr16[4], oa->addr16[4], u), ooa.addr16[5], oa->addr16[5], u), ooa.addr16[6], oa->addr16[6], u), ooa.addr16[7], oa->addr16[7], u); break; #endif /* INET6 */ } } + +/* + * Need to modulate the sequence numbers in the TCP SACK option + * (credits to Krzysztof Pfaff for report and patch) + */ +int +pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *dst) +{ + int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; + int copyback = 0, i, olen; + struct sackblk sack; + +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) + if (hlen < TCPOLEN_SACKLEN || + !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) + return 0; + + while (hlen >= TCPOLEN_SACKLEN) { + olen = opt[1]; + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_SACK: + if (olen > hlen) + olen = hlen; + if (olen >= TCPOLEN_SACKLEN) { + for (i = 2; i + TCPOLEN_SACK <= olen; + i += TCPOLEN_SACK) { + memcpy(&sack, &opt[i], sizeof(sack)); + pf_change_a(&sack.start, &th->th_sum, + htonl(ntohl(sack.start) - + dst->seqdiff), 0); + pf_change_a(&sack.end, &th->th_sum, + htonl(ntohl(sack.end) - + dst->seqdiff), 0); + memcpy(&opt[i], &sack, sizeof(sack)); + } + copyback = 1; + } + /* FALLTHROUGH */ + default: + if (olen < 2) + olen = 2; + hlen -= olen; + opt += olen; + } + } + + if (copyback) + m_copyback(m, off + sizeof(*th), thoptlen, opts); + return (copyback); +} + void pf_send_tcp(const struct pf_rule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, - struct ether_header *eh, struct ifnet *ifp) + u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) { struct mbuf *m; int len, tlen; #ifdef INET struct ip *h; #endif /* INET */ #ifdef INET6 struct ip6_hdr *h6; #endif /* INET6 */ struct tcphdr *th; - char *opt; + char *opt; + struct pf_mtag *pf_mtag; /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); if (mss) tlen += 4; switch (af) { #ifdef INET case AF_INET: len = sizeof(struct ip) + tlen; break; #endif /* INET */ #ifdef INET6 case AF_INET6: len = sizeof(struct ip6_hdr) + tlen; break; #endif /* INET6 */ } /* create outgoing mbuf */ m = m_gethdr(M_DONTWAIT, MT_HEADER); if (m == NULL) return; - if (tag) { - struct m_tag *mtag; - - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) { - m_freem(m); - return; - } - m_tag_prepend(m, mtag); + if ((pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + return; } + if (tag) + pf_mtag->flags |= PF_TAG_GENERATED; + + pf_mtag->tag = rtag; + + if (r != NULL && r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; + #ifdef ALTQ if (r != NULL && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = af; - atag->hdr = mtod(m, struct ip *); - m_tag_prepend(m, mtag); - } + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m, struct ip *); } #endif /* ALTQ */ m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; bzero(m->m_data, len); switch (af) { #ifdef INET case AF_INET: h = mtod(m, struct ip *); /* IP header fields included in the TCP checksum */ h->ip_p = IPPROTO_TCP; h->ip_len = htons(tlen); h->ip_src.s_addr = saddr->v4.s_addr; h->ip_dst.s_addr = daddr->v4.s_addr; th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: h6 = mtod(m, struct ip6_hdr *); /* IP header fields included in the TCP checksum */ h6->ip6_nxt = IPPROTO_TCP; h6->ip6_plen = htons(tlen); memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); break; #endif /* INET6 */ } /* TCP header */ th->th_sport = sport; th->th_dport = dport; th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_off = tlen >> 2; th->th_flags = flags; th->th_win = htons(win); if (mss) { opt = (char *)(th + 1); opt[0] = TCPOPT_MAXSEG; opt[1] = 4; HTONS(mss); bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); } switch (af) { #ifdef INET case AF_INET: /* TCP checksum */ th->th_sum = in_cksum(m, len); /* Finish the IP header */ h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; h->ip_len = htons(len); h->ip_off = htons(ip_mtudisc ? IP_DF : 0); h->ip_ttl = ttl ? ttl : ip_defttl; h->ip_sum = 0; if (eh == NULL) { ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL, (void *)NULL); } else { struct route ro; struct rtentry rt; struct ether_header *e = (void *)ro.ro_dst.sa_data; if (ifp == NULL) { m_freem(m); return; } rt.rt_ifp = ifp; ro.ro_rt = &rt; ro.ro_dst.sa_len = sizeof(ro.ro_dst); ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); e->ether_type = eh->ether_type; ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER, (void *)NULL, (void *)NULL); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* TCP checksum */ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen); h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; ip6_output(m, NULL, NULL, 0, NULL, NULL); break; #endif /* INET6 */ } } void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { - struct m_tag *mtag; + struct pf_mtag *pf_mtag; struct mbuf *m0; - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - return; m0 = m_copy(m, 0, M_COPYALL); - if (m0 == NULL) { - m_tag_free(mtag); + + if ((pf_mtag = pf_get_mtag(m0)) == NULL) return; - } - m_tag_prepend(m0, mtag); + pf_mtag->flags |= PF_TAG_GENERATED; + if (r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; + #ifdef ALTQ if (r->qid) { - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = af; - atag->hdr = mtod(m0, struct ip *); - m_tag_prepend(m0, mtag); - } + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m0, struct ip *); } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: - icmp_error(m0, type, code, 0, (void *)NULL); + icmp_error(m0, type, code, 0, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: icmp6_error(m0, type, code, 0); break; #endif /* INET6 */ } } /* * Return 1 if the addresses a and b match (with mask m), otherwise return 0. * If n is 0, they match if they are equal. If n is != 0, they match if they * are different. */ int pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, struct pf_addr *b, sa_family_t af) { int match = 0; switch (af) { #ifdef INET case AF_INET: if ((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) match++; break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) && ((a->addr32[1] & m->addr32[1]) == (b->addr32[1] & m->addr32[1])) && ((a->addr32[2] & m->addr32[2]) == (b->addr32[2] & m->addr32[2])) && ((a->addr32[3] & m->addr32[3]) == (b->addr32[3] & m->addr32[3]))) match++; break; #endif /* INET6 */ } if (match) { if (n) return (0); else return (1); } else { if (n) return (1); else return (0); } } int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { switch (op) { case PF_OP_IRG: return ((p > a1) && (p < a2)); case PF_OP_XRG: return ((p < a1) || (p > a2)); case PF_OP_RRG: return ((p >= a1) && (p <= a2)); case PF_OP_EQ: return (p == a1); case PF_OP_NE: return (p != a1); case PF_OP_LT: return (p < a1); case PF_OP_LE: return (p <= a1); case PF_OP_GT: return (p > a1); case PF_OP_GE: return (p >= a1); } return (0); /* never reached */ } int pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) { NTOHS(a1); NTOHS(a2); NTOHS(p); return (pf_match(op, a1, a2, p)); } int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, u)); } int pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) { if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, g)); } -struct pf_tag * -pf_get_tag(struct mbuf *m) +struct pf_mtag * +pf_find_mtag(struct mbuf *m) { struct m_tag *mtag; - if ((mtag = m_tag_find(m, PACKET_TAG_PF_TAG, NULL)) != NULL) - return ((struct pf_tag *)(mtag + 1)); - else + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) return (NULL); + + return ((struct pf_mtag *)(mtag + 1)); } -int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_tag **pftag, int *tag) +struct pf_mtag * +pf_get_mtag(struct mbuf *m) { - if (*tag == -1) { /* find mbuf tag */ - *pftag = pf_get_tag(m); - if (*pftag != NULL) - *tag = (*pftag)->tag; - else - *tag = 0; + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) { + mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_mtag)); + m_tag_prepend(m, mtag); } + return ((struct pf_mtag *)(mtag + 1)); +} + +int +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, + int *tag) +{ + if (*tag == -1) + *tag = pf_mtag->tag; + return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int -pf_tag_packet(struct mbuf *m, struct pf_tag *pftag, int tag) +pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) { - struct m_tag *mtag; - - if (tag <= 0) + if (tag <= 0 && rtableid < 0) return (0); - if (pftag == NULL) { - mtag = m_tag_get(PACKET_TAG_PF_TAG, sizeof(*pftag), M_NOWAIT); - if (mtag == NULL) + if (pf_mtag == NULL) + if ((pf_mtag = pf_get_mtag(m)) == NULL) return (1); - ((struct pf_tag *)(mtag + 1))->tag = tag; - m_tag_prepend(m, mtag); - } else - pftag->tag = tag; + if (tag > 0) + pf_mtag->tag = tag; + if (rtableid >= 0) + pf_mtag->rtableid = rtableid; return (0); } static void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; + (*r)->anchor->match = 0; + if (match) + *match = 0; if (*depth >= sizeof(pf_anchor_stack) / sizeof(pf_anchor_stack[0])) { printf("pf_step_into_anchor: stack overflow\n"); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; f = pf_anchor_stack + (*depth)++; f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { f->parent = &(*r)->anchor->children; if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == NULL) { *r = NULL; return; } *rs = &f->child->ruleset; } else { f->parent = NULL; f->child = NULL; *rs = &(*r)->anchor->ruleset; } *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); } -static void +int pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; + int quick = 0; do { if (*depth <= 0) break; f = pf_anchor_stack + *depth - 1; if (f->parent != NULL && f->child != NULL) { + if (f->child->match || + (match != NULL && *match)) { + f->r->anchor->match = 1; + *match = 0; + } f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); if (f->child != NULL) { *rs = &f->child->ruleset; *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); if (*r == NULL) continue; else break; } } (*depth)--; if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; + if (f->r->anchor->match || (match != NULL && *match)) + quick = f->r->quick; *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); + + return (quick); } #ifdef INET6 void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); break; #endif /* INET */ case AF_INET6: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); break; } } void pf_addr_inc(struct pf_addr *addr, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); break; #endif /* INET */ case AF_INET6: if (addr->addr32[3] == 0xffffffff) { addr->addr32[3] = 0; if (addr->addr32[2] == 0xffffffff) { addr->addr32[2] = 0; if (addr->addr32[1] == 0xffffffff) { addr->addr32[1] = 0; addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); } else addr->addr32[1] = htonl(ntohl(addr->addr32[1]) + 1); } else addr->addr32[2] = htonl(ntohl(addr->addr32[2]) + 1); } else addr->addr32[3] = htonl(ntohl(addr->addr32[3]) + 1); break; } } #endif /* INET6 */ #define mix(a,b,c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (0) /* * hash function based on bridge_hash in if_bridge.c */ void pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, struct pf_poolhashkey *key, sa_family_t af) { u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; switch (af) { #ifdef INET case AF_INET: a += inaddr->addr32[0]; b += key->key32[1]; mix(a, b, c); hash->addr32[0] = c + key->key32[2]; break; #endif /* INET */ #ifdef INET6 case AF_INET6: a += inaddr->addr32[0]; b += inaddr->addr32[2]; mix(a, b, c); hash->addr32[0] = c; a += inaddr->addr32[1]; b += inaddr->addr32[3]; c += key->key32[1]; mix(a, b, c); hash->addr32[1] = c; a += inaddr->addr32[2]; b += inaddr->addr32[1]; c += key->key32[2]; mix(a, b, c); hash->addr32[2] = c; a += inaddr->addr32[3]; b += inaddr->addr32[0]; c += key->key32[3]; mix(a, b, c); hash->addr32[3] = c; break; #endif /* INET6 */ } } int pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) { unsigned char hash[16]; struct pf_pool *rpool = &r->rpool; struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; struct pf_pooladdr *acur = rpool->cur; struct pf_src_node k; if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { k.af = af; PF_ACPY(&k.addr, saddr, af); if (r->rule_flag & PFRULE_RULESRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) k.rule.ptr = r; else k.rule.ptr = NULL; pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { PF_ACPY(naddr, &(*sn)->raddr, af); if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf_map_addr: src tracking maps "); pf_print_host(&k.addr, 0, af); printf(" to "); pf_print_host(naddr, 0, af); printf("\n"); } return (0); } } if (rpool->cur->addr.type == PF_ADDR_NOROUTE) return (1); if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { switch (af) { #ifdef INET case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); raddr = &rpool->cur->addr.p.dyn->pfid_addr4; rmask = &rpool->cur->addr.p.dyn->pfid_mask4; break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); raddr = &rpool->cur->addr.p.dyn->pfid_addr6; rmask = &rpool->cur->addr.p.dyn->pfid_mask6; break; #endif /* INET6 */ } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); /* unsupported */ } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; } switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_NONE: PF_ACPY(naddr, raddr, af); break; case PF_POOL_BITMASK: PF_POOLMASK(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: if (init_addr != NULL && PF_AZERO(init_addr, af)) { switch (af) { #ifdef INET case AF_INET: rpool->counter.addr32[0] = htonl(arc4random()); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rmask->addr32[3] != 0xffffffff) rpool->counter.addr32[3] = htonl(arc4random()); else break; if (rmask->addr32[2] != 0xffffffff) rpool->counter.addr32[2] = htonl(arc4random()); else break; if (rmask->addr32[1] != 0xffffffff) rpool->counter.addr32[1] = htonl(arc4random()); else break; if (rmask->addr32[0] != 0xffffffff) rpool->counter.addr32[0] = htonl(arc4random()); break; #endif /* INET6 */ } PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); PF_ACPY(init_addr, naddr, af); } else { PF_AINC(&rpool->counter, af); PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); } break; case PF_POOL_SRCHASH: pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); break; case PF_POOL_ROUNDROBIN: if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) goto get_addr; } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) goto get_addr; try_next: if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); if (rpool->cur->addr.type == PF_ADDR_TABLE) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; return (1); } } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; return (1); } } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; PF_ACPY(&rpool->counter, raddr, af); } get_addr: PF_ACPY(naddr, &rpool->counter, af); if (init_addr != NULL && PF_AZERO(init_addr, af)) PF_ACPY(init_addr, naddr, af); PF_AINC(&rpool->counter, af); break; } if (*sn != NULL) PF_ACPY(&(*sn)->raddr, naddr, af); if (pf_status.debug >= PF_DEBUG_MISC && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { printf("pf_map_addr: selected address "); pf_print_host(naddr, 0, af); printf("\n"); } return (0); } int pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, struct pf_src_node **sn) { - struct pf_state key; + struct pf_state_cmp key; struct pf_addr init_addr; u_int16_t cut; bzero(&init_addr, sizeof(init_addr)); if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) return (1); if (proto == IPPROTO_ICMP) { low = 1; high = 65535; } do { key.af = af; key.proto = proto; PF_ACPY(&key.ext.addr, daddr, key.af); PF_ACPY(&key.gwy.addr, naddr, key.af); key.ext.port = dport; /* * port search; start random, step; * similar 2 portloop in in_pcbbind */ if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || proto == IPPROTO_ICMP)) { key.gwy.port = dport; if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) return (0); } else if (low == 0 && high == 0) { key.gwy.port = *nport; if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) return (0); } else if (low == high) { key.gwy.port = htons(low); if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { *nport = htons(low); return (0); } } else { u_int16_t tmp; if (low > high) { tmp = low; low = high; high = tmp; } /* low < high */ cut = htonl(arc4random()) % (1 + high - low) + low; /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { key.gwy.port = htons(tmp); if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { *nport = htons(tmp); return (0); } } for (tmp = cut - 1; tmp >= low; --(tmp)) { key.gwy.port = htons(tmp); if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { *nport = htons(tmp); return (0); } } } switch (r->rpool.opts & PF_POOL_TYPEMASK) { case PF_POOL_RANDOM: case PF_POOL_ROUNDROBIN: if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) return (1); break; case PF_POOL_NONE: case PF_POOL_SRCHASH: case PF_POOL_BITMASK: default: return (1); } } while (! PF_AEQ(&init_addr, naddr, af) ); return (1); /* none available */ } struct pf_rule * pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr, u_int16_t dport, int rs_num) { struct pf_rule *r, *rm = NULL; struct pf_ruleset *ruleset = NULL; - struct pf_tag *pftag = NULL; int tag = -1; + int rtableid = -1; int asd = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); while (r && rm == NULL) { struct pf_rule_addr *src = NULL, *dst = NULL; struct pf_addr_wrap *xdst = NULL; if (r->action == PF_BINAT && direction == PF_IN) { src = &r->dst; if (r->rpool.cur != NULL) xdst = &r->rpool.cur->addr; } else { src = &r->src; dst = &r->dst; } r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != pd->af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->neg)) + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR].ptr; else if (src->port_op && !pf_match_port(src->port_op, src->port[0], src->port[1], sport)) r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT].ptr; else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg)) + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL)) r = TAILQ_NEXT(r, entries); else if (dst != NULL && dst->port_op && !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, pd->hdr.tcp), r->os_fingerprint))) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { rm = r; } else - pf_step_into_anchor(&asd, &ruleset, rs_num, &r, NULL); + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); } if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, NULL); + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); } - if (pf_tag_packet(m, pftag, tag)) + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) return (NULL); if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) return (NULL); return (rm); } struct pf_rule * pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_src_node **sn, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr, u_int16_t dport, struct pf_addr *naddr, u_int16_t *nport) { struct pf_rule *r = NULL; if (direction == PF_OUT) { r = pf_match_translation(pd, m, off, direction, kif, saddr, sport, daddr, dport, PF_RULESET_BINAT); if (r == NULL) r = pf_match_translation(pd, m, off, direction, kif, saddr, sport, daddr, dport, PF_RULESET_NAT); } else { r = pf_match_translation(pd, m, off, direction, kif, saddr, sport, daddr, dport, PF_RULESET_RDR); if (r == NULL) r = pf_match_translation(pd, m, off, direction, kif, saddr, sport, daddr, dport, PF_RULESET_BINAT); } if (r != NULL) { switch (r->action) { case PF_NONAT: case PF_NOBINAT: case PF_NORDR: return (NULL); case PF_NAT: if (pf_get_sport(pd->af, pd->proto, r, saddr, daddr, dport, naddr, nport, r->rpool.proxy_port[0], r->rpool.proxy_port[1], sn)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation " "(%u-%u) failed\n", r->rpool.proxy_port[0], r->rpool.proxy_port[1])); return (NULL); } break; case PF_BINAT: switch (direction) { case PF_OUT: if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ switch (pd->af) { #ifdef INET case AF_INET: if (r->rpool.cur->addr.p.dyn-> pfid_acnt4 < 1) return (NULL); PF_POOLMASK(naddr, &r->rpool.cur->addr.p.dyn-> pfid_addr4, &r->rpool.cur->addr.p.dyn-> pfid_mask4, saddr, AF_INET); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (r->rpool.cur->addr.p.dyn-> pfid_acnt6 < 1) return (NULL); PF_POOLMASK(naddr, &r->rpool.cur->addr.p.dyn-> pfid_addr6, &r->rpool.cur->addr.p.dyn-> pfid_mask6, saddr, AF_INET6); break; #endif /* INET6 */ } } else PF_POOLMASK(naddr, &r->rpool.cur->addr.v.a.addr, &r->rpool.cur->addr.v.a.mask, saddr, pd->af); break; case PF_IN: if (r->src.addr.type == PF_ADDR_DYNIFTL) { switch (pd->af) { #ifdef INET case AF_INET: if (r->src.addr.p.dyn-> pfid_acnt4 < 1) return (NULL); PF_POOLMASK(naddr, &r->src.addr.p.dyn-> pfid_addr4, &r->src.addr.p.dyn-> pfid_mask4, daddr, AF_INET); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (r->src.addr.p.dyn-> pfid_acnt6 < 1) return (NULL); PF_POOLMASK(naddr, &r->src.addr.p.dyn-> pfid_addr6, &r->src.addr.p.dyn-> pfid_mask6, daddr, AF_INET6); break; #endif /* INET6 */ } } else PF_POOLMASK(naddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, daddr, pd->af); break; } break; case PF_RDR: { if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) return (NULL); if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask, daddr, pd->af); if (r->rpool.proxy_port[1]) { u_int32_t tmp_nport; tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) % (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] + 1)) + r->rpool.proxy_port[0]; /* wrap around if necessary */ if (tmp_nport > 65535) tmp_nport -= 65535; *nport = htons((u_int16_t)tmp_nport); } else if (r->rpool.proxy_port[0]) *nport = htons(r->rpool.proxy_port[0]); break; } default: return (NULL); } } return (r); } int -pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) +pf_socket_lookup(int direction, struct pf_pdesc *pd) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; struct inpcbtable *tb; struct inpcb *inp; - *uid = UID_MAX; - *gid = GID_MAX; + if (pd == NULL) + return (-1); + pd->lookup.uid = UID_MAX; + pd->lookup.gid = GID_MAX; + pd->lookup.pid = NO_PID; switch (pd->proto) { case IPPROTO_TCP: + if (pd->hdr.tcp == NULL) + return (-1); sport = pd->hdr.tcp->th_sport; dport = pd->hdr.tcp->th_dport; tb = &tcbtable; break; case IPPROTO_UDP: + if (pd->hdr.udp == NULL) + return (-1); sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; tb = &udbtable; break; default: - return (0); + return (-1); } if (direction == PF_IN) { saddr = pd->src; daddr = pd->dst; } else { u_int16_t p; p = sport; sport = dport; dport = p; saddr = pd->dst; daddr = pd->src; } switch (pd->af) { #ifdef INET case AF_INET: inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); if (inp == NULL) { inp = in_pcblookup_listen(tb, daddr->v4, dport, 0); if (inp == NULL) - return (0); + return (-1); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, dport); if (inp == NULL) { inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0); if (inp == NULL) - return (0); + return (-1); } break; #endif /* INET6 */ default: - return (0); + return (-1); } - *uid = inp->inp_socket->so_euid; - *gid = inp->inp_socket->so_egid; + pd->lookup.uid = inp->inp_socket->so_euid; + pd->lookup.gid = inp->inp_socket->so_egid; + pd->lookup.pid = inp->inp_socket->so_cpid; return (1); } u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; u_int8_t wscale = 0; hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) return (0); if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) return (0); opt = hdr + sizeof(struct tcphdr); hlen -= sizeof(struct tcphdr); while (hlen >= 3) { switch (*opt) { case TCPOPT_EOL: case TCPOPT_NOP: ++opt; --hlen; break; case TCPOPT_WINDOW: wscale = opt[2]; if (wscale > TCP_MAX_WINSHIFT) wscale = TCP_MAX_WINSHIFT; wscale |= PF_WSCALE_FLAG; /* FALLTHROUGH */ default: optlen = opt[1]; if (optlen < 2) optlen = 2; hlen -= optlen; opt += optlen; break; } } return (wscale); } u_int16_t pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; u_int16_t mss = tcp_mssdflt; hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) return (0); if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) return (0); opt = hdr + sizeof(struct tcphdr); hlen -= sizeof(struct tcphdr); while (hlen >= TCPOLEN_MAXSEG) { switch (*opt) { case TCPOPT_EOL: case TCPOPT_NOP: ++opt; --hlen; break; case TCPOPT_MAXSEG: bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); NTOHS(mss); /* FALLTHROUGH */ default: optlen = opt[1]; if (optlen < 2) optlen = 2; hlen -= optlen; opt += optlen; break; } } return (mss); } u_int16_t pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) { #ifdef INET struct sockaddr_in *dst; struct route ro; #endif /* INET */ #ifdef INET6 struct sockaddr_in6 *dst6; struct route_in6 ro6; #endif /* INET6 */ struct rtentry *rt = NULL; int hlen; u_int16_t mss = tcp_mssdflt; switch (af) { #ifdef INET case AF_INET: hlen = sizeof(struct ip); bzero(&ro, sizeof(ro)); dst = (struct sockaddr_in *)&ro.ro_dst; dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; rtalloc_noclone(&ro, NO_CLONING); rt = ro.ro_rt; break; #endif /* INET */ #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); bzero(&ro6, sizeof(ro6)); dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; rtalloc_noclone((struct route *)&ro6, NO_CLONING); rt = ro6.ro_rt; break; #endif /* INET6 */ } if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); mss = max(tcp_mssdflt, mss); RTFREE(rt); } mss = min(mss, offer); mss = max(mss, 64); /* sanity - at least max opt space */ return (mss); } void pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) { struct pf_rule *r = s->rule.ptr; s->rt_kif = NULL; if (!r->rt || r->rt == PF_FASTROUTE) return; switch (s->af) { #ifdef INET case AF_INET: pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, &s->nat_src_node); s->rt_kif = r->rpool.cur->kif; break; #endif /* INET */ #ifdef INET6 case AF_INET6: pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, &s->nat_src_node); s->rt_kif = r->rpool.cur->kif; break; #endif /* INET6 */ } } int pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; struct tcphdr *th = pd->hdr.tcp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; u_int16_t mss = tcp_mssdflt; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { bport = nport = th->th_sport; /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, saddr, th->th_sport, daddr, th->th_dport, &pd->naddr, &nport)) != NULL) { PF_ACPY(&pd->baddr, saddr, af); pf_change_ap(saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &pd->naddr, nport, 0, af); rewrite++; if (nr->natpass) r = NULL; pd->nat_rule = nr; } } else { bport = nport = th->th_dport; /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, saddr, th->th_sport, daddr, th->th_dport, &pd->naddr, &nport)) != NULL) { PF_ACPY(&pd->baddr, daddr, af); pf_change_ap(daddr, &th->th_dport, pd->ip_sum, &th->th_sum, &pd->naddr, nport, 0, af); rewrite++; if (nr->natpass) r = NULL; pd->nat_rule = nr; } } while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_TCP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if ((r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*th), th); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { if (direction == PF_OUT) { pf_change_ap(saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &pd->baddr, bport, 0, af); rewrite++; } else { pf_change_ap(daddr, &th->th_dport, pd->ip_sum, &th->th_sum, &pd->baddr, bport, 0, af); rewrite++; } } if (((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURN)) && !(th->th_flags & TH_RST)) { u_int32_t ack = ntohl(th->th_seq) + pd->p_len; if (th->th_flags & TH_SYN) ack++; if (th->th_flags & TH_FIN) ack++; pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl, 1, pd->eh, kif->pfik_ifp); + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); } else if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); else if ((af == AF_INET6) && r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r); } if (r->action == PF_DROP) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } if (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM)) { /* create new state */ u_int16_t len; struct pf_state *s = NULL; struct pf_src_node *sn = NULL; len = pd->tot_len - off - (th->th_off << 2); /* check maximums */ if (r->max_states && (r->states >= r->max_states)) { pf_status.lcounters[LCNT_STATES]++; REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, sn); } if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } return (PF_DROP); } bzero(s, sizeof(*s)); s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_TCP; s->direction = direction; s->af = af; if (direction == PF_OUT) { PF_ACPY(&s->gwy.addr, saddr, af); s->gwy.port = th->th_sport; /* sport */ PF_ACPY(&s->ext.addr, daddr, af); s->ext.port = th->th_dport; if (nr != NULL) { PF_ACPY(&s->lan.addr, &pd->baddr, af); s->lan.port = bport; } else { PF_ACPY(&s->lan.addr, &s->gwy.addr, af); s->lan.port = s->gwy.port; } } else { PF_ACPY(&s->lan.addr, daddr, af); s->lan.port = th->th_dport; PF_ACPY(&s->ext.addr, saddr, af); s->ext.port = th->th_sport; if (nr != NULL) { PF_ACPY(&s->gwy.addr, &pd->baddr, af); s->gwy.port = bport; } else { PF_ACPY(&s->gwy.addr, &s->lan.addr, af); s->gwy.port = s->lan.port; } } s->src.seqlo = ntohl(th->th_seq); s->src.seqhi = s->src.seqlo + len + 1; if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ - while ((s->src.seqdiff = htonl(arc4random())) == 0) + while ((s->src.seqdiff = + tcp_rndiss_next() - s->src.seqlo) == 0) ; pf_change_a(&th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); rewrite = 1; } else s->src.seqdiff = 0; if (th->th_flags & TH_SYN) { s->src.seqhi++; s->src.wscale = pf_get_wscale(m, off, th->th_off, af); } s->src.max_win = MAX(ntohs(th->th_win), 1); if (s->src.wscale & PF_WSCALE_MASK) { /* Remove scale factor from initial window */ int win = s->src.max_win; win += 1 << (s->src.wscale & PF_WSCALE_MASK); s->src.max_win = (win - 1) >> (s->src.wscale & PF_WSCALE_MASK); } if (th->th_flags & TH_FIN) s->src.seqhi++; s->dst.seqhi = 1; s->dst.max_win = 1; s->src.state = TCPS_SYN_SENT; s->dst.state = TCPS_CLOSED; s->creation = time_second; s->expire = time_second; s->timeout = PFTM_TCP_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { s->src_node = sn; s->src_node->states++; } if (nsn != NULL) { PF_ACPY(&nsn->raddr, &pd->naddr, af); s->nat_src_node = nsn; s->nat_src_node->states++; } if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, &s->src, &s->dst, &rewrite)) { /* This really shouldn't happen!!! */ DPFPRINTF(PF_DEBUG_URGENT, ("pf_normalize_tcp_stateful failed on first pkt")); pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { pf_normalize_tcp_cleanup(s); REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; if (tag > 0) { pf_tag_ref(tag); s->tag = tag; } if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { s->src.state = PF_TCPS_PROXY_SRC; if (nr != NULL) { if (direction == PF_OUT) { pf_change_ap(saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &pd->baddr, bport, 0, af); } else { pf_change_ap(daddr, &th->th_dport, pd->ip_sum, &th->th_sum, &pd->baddr, bport, 0, af); } } s->src.seqhi = htonl(arc4random()); /* Find mss option */ mss = pf_get_mss(m, off, th->th_off, af); mss = pf_calc_mss(saddr, af, mss); mss = pf_calc_mss(daddr, af, mss); s->src.mss = mss; pf_send_tcp(r, af, daddr, saddr, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, NULL, NULL); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } /* copy back packet headers if we performed NAT operations */ if (rewrite) m_copyback(m, off, sizeof(*th), th); return (PF_PASS); } int pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; struct udphdr *uh = pd->hdr.udp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { bport = nport = uh->uh_sport; /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr, &nport)) != NULL) { PF_ACPY(&pd->baddr, saddr, af); pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, &uh->uh_sum, &pd->naddr, nport, 1, af); rewrite++; if (nr->natpass) r = NULL; pd->nat_rule = nr; } } else { bport = nport = uh->uh_dport; /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr, &nport)) != NULL) { PF_ACPY(&pd->baddr, daddr, af); pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &pd->naddr, nport, 1, af); rewrite++; if (nr->natpass) r = NULL; pd->nat_rule = nr; } } while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_UDP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], uh->uh_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], uh->uh_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*uh), uh); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { if (direction == PF_OUT) { pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, &uh->uh_sum, &pd->baddr, bport, 1, af); rewrite++; } else { pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &pd->baddr, bport, 1, af); rewrite++; } } if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); else if ((af == AF_INET6) && r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r); } if (r->action == PF_DROP) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } if (r->keep_state || nr != NULL) { /* create new state */ struct pf_state *s = NULL; struct pf_src_node *sn = NULL; /* check maximums */ if (r->max_states && (r->states >= r->max_states)) { pf_status.lcounters[LCNT_STATES]++; REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, sn); } if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } return (PF_DROP); } bzero(s, sizeof(*s)); s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_UDP; s->direction = direction; s->af = af; if (direction == PF_OUT) { PF_ACPY(&s->gwy.addr, saddr, af); s->gwy.port = uh->uh_sport; PF_ACPY(&s->ext.addr, daddr, af); s->ext.port = uh->uh_dport; if (nr != NULL) { PF_ACPY(&s->lan.addr, &pd->baddr, af); s->lan.port = bport; } else { PF_ACPY(&s->lan.addr, &s->gwy.addr, af); s->lan.port = s->gwy.port; } } else { PF_ACPY(&s->lan.addr, daddr, af); s->lan.port = uh->uh_dport; PF_ACPY(&s->ext.addr, saddr, af); s->ext.port = uh->uh_sport; if (nr != NULL) { PF_ACPY(&s->gwy.addr, &pd->baddr, af); s->gwy.port = bport; } else { PF_ACPY(&s->gwy.addr, &s->lan.addr, af); s->gwy.port = s->lan.port; } } s->src.state = PFUDPS_SINGLE; s->dst.state = PFUDPS_NO_TRAFFIC; s->creation = time_second; s->expire = time_second; s->timeout = PFTM_UDP_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { s->src_node = sn; s->src_node->states++; } if (nsn != NULL) { PF_ACPY(&nsn->raddr, &pd->naddr, af); s->nat_src_node = nsn; s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; if (tag > 0) { pf_tag_ref(tag); s->tag = tag; } } /* copy back packet headers if we performed NAT operations */ if (rewrite) m_copyback(m, off, sizeof(*uh), uh); return (PF_PASS); } int pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; u_int16_t icmpid, bport, nport = 0; sa_family_t af = pd->af; u_int8_t icmptype, icmpcode; int state_icmp = 0; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; #ifdef INET6 int rewrite = 0; #endif /* INET6 */ int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } switch (pd->proto) { #ifdef INET case IPPROTO_ICMP: icmptype = pd->hdr.icmp->icmp_type; icmpcode = pd->hdr.icmp->icmp_code; icmpid = pd->hdr.icmp->icmp_id; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || icmptype == ICMP_REDIRECT || icmptype == ICMP_TIMXCEED || icmptype == ICMP_PARAMPROB) state_icmp++; break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: icmptype = pd->hdr.icmp6->icmp6_type; icmpcode = pd->hdr.icmp6->icmp6_code; icmpid = pd->hdr.icmp6->icmp6_id; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || icmptype == ICMP6_TIME_EXCEEDED || icmptype == ICMP6_PARAM_PROB) state_icmp++; break; #endif /* INET6 */ } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { bport = nport = icmpid; /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != NULL) { PF_ACPY(&pd->baddr, saddr, af); switch (af) { #ifdef INET case AF_INET: pf_change_a(&saddr->v4.s_addr, pd->ip_sum, pd->naddr.v4.s_addr, 0); pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, nport, 0); pd->hdr.icmp->icmp_id = nport; m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case AF_INET6: pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, &pd->naddr, 0); rewrite++; break; #endif /* INET6 */ } if (nr->natpass) r = NULL; pd->nat_rule = nr; } } else { bport = nport = icmpid; /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != NULL) { PF_ACPY(&pd->baddr, daddr, af); switch (af) { #ifdef INET case AF_INET: pf_change_a(&daddr->v4.s_addr, pd->ip_sum, pd->naddr.v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, &pd->naddr, 0); rewrite++; break; #endif /* INET6 */ } if (nr->natpass) r = NULL; pd->nat_rule = nr; } } while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { #ifdef INET6 if (rewrite) m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); #endif /* INET6 */ - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } if (!state_icmp && (r->keep_state || nr != NULL)) { /* create new state */ struct pf_state *s = NULL; struct pf_src_node *sn = NULL; /* check maximums */ if (r->max_states && (r->states >= r->max_states)) { pf_status.lcounters[LCNT_STATES]++; REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, sn); } if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } return (PF_DROP); } bzero(s, sizeof(*s)); s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; if (direction == PF_OUT) { PF_ACPY(&s->gwy.addr, saddr, af); s->gwy.port = nport; PF_ACPY(&s->ext.addr, daddr, af); s->ext.port = 0; if (nr != NULL) { PF_ACPY(&s->lan.addr, &pd->baddr, af); s->lan.port = bport; } else { PF_ACPY(&s->lan.addr, &s->gwy.addr, af); s->lan.port = s->gwy.port; } } else { PF_ACPY(&s->lan.addr, daddr, af); s->lan.port = nport; PF_ACPY(&s->ext.addr, saddr, af); s->ext.port = 0; if (nr != NULL) { PF_ACPY(&s->gwy.addr, &pd->baddr, af); s->gwy.port = bport; } else { PF_ACPY(&s->gwy.addr, &s->lan.addr, af); s->gwy.port = s->lan.port; } } s->creation = time_second; s->expire = time_second; s->timeout = PFTM_ICMP_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { s->src_node = sn; s->src_node->states++; } if (nsn != NULL) { PF_ACPY(&nsn->raddr, &pd->naddr, af); s->nat_src_node = nsn; s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; if (tag > 0) { pf_tag_ref(tag); s->tag = tag; } } #ifdef INET6 /* copy back packet headers if we performed IPv6 NAT operations */ if (rewrite) m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); #endif /* INET6 */ return (PF_PASS); } int pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; sa_family_t af = pd->af; u_short reason; - struct pf_tag *pftag = NULL; - int tag = -1; + int tag = -1, rtableid = -1; int asd = 0; + int match = 0; if (pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { PF_ACPY(&pd->baddr, saddr, af); switch (af) { #ifdef INET case AF_INET: pf_change_a(&saddr->v4.s_addr, pd->ip_sum, pd->naddr.v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: PF_ACPY(saddr, &pd->naddr, af); break; #endif /* INET6 */ } if (nr->natpass) r = NULL; pd->nat_rule = nr; } } else { /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { PF_ACPY(&pd->baddr, daddr, af); switch (af) { #ifdef INET case AF_INET: pf_change_a(&daddr->v4.s_addr, pd->ip_sum, pd->naddr.v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: PF_ACPY(daddr, &pd->naddr, af); break; #endif /* INET6 */ } if (nr->natpass) r = NULL; pd->nat_rule = nr; } } while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); - if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + if (r->log || (nr != NULL && nr->natpass && nr->log)) + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { struct pf_addr *a = NULL; if (nr != NULL) { if (direction == PF_OUT) a = saddr; else a = daddr; } if (a != NULL) { switch (af) { #ifdef INET case AF_INET: pf_change_a(&a->v4.s_addr, pd->ip_sum, pd->baddr.v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: PF_ACPY(a, &pd->baddr, af); break; #endif /* INET6 */ } } if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); else if ((af == AF_INET6) && r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r); } if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } if (r->keep_state || nr != NULL) { /* create new state */ struct pf_state *s = NULL; struct pf_src_node *sn = NULL; /* check maximums */ if (r->max_states && (r->states >= r->max_states)) { pf_status.lcounters[LCNT_STATES]++; REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; } - /* src node for flter rule */ + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, saddr, af) != 0) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, sn); } if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } return (PF_DROP); } bzero(s, sizeof(*s)); s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; if (direction == PF_OUT) { PF_ACPY(&s->gwy.addr, saddr, af); PF_ACPY(&s->ext.addr, daddr, af); if (nr != NULL) PF_ACPY(&s->lan.addr, &pd->baddr, af); else PF_ACPY(&s->lan.addr, &s->gwy.addr, af); } else { PF_ACPY(&s->lan.addr, daddr, af); PF_ACPY(&s->ext.addr, saddr, af); if (nr != NULL) PF_ACPY(&s->gwy.addr, &pd->baddr, af); else PF_ACPY(&s->gwy.addr, &s->lan.addr, af); } s->src.state = PFOTHERS_SINGLE; s->dst.state = PFOTHERS_NO_TRAFFIC; s->creation = time_second; s->expire = time_second; s->timeout = PFTM_OTHER_FIRST_PACKET; pf_set_rt_ifp(s, saddr); if (sn != NULL) { s->src_node = sn; s->src_node->states++; } if (nsn != NULL) { PF_ACPY(&nsn->raddr, &pd->naddr, af); s->nat_src_node = nsn; s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; if (tag > 0) { pf_tag_ref(tag); s->tag = tag; } } return (PF_PASS); } int pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) { struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; sa_family_t af = pd->af; u_short reason; - struct pf_tag *pftag = NULL; int tag = -1; int asd = 0; + int match = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->src.port_op || r->dst.port_op || r->flagset || r->type || r->code || r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, &pftag, &tag)) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL) - pf_step_out_of_anchor(&asd, &ruleset, - PF_RULESET_FILTER, &r, &a); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, + pd); if (r->action != PF_PASS) return (PF_DROP); - if (pf_tag_packet(m, pftag, tag)) { + if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } return (PF_PASS); } int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_state key; + struct pf_state_cmp key; struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); u_int32_t ack, end, seq, orig_seq; u_int8_t sws, dws; int ackskew; int copyback = 0; struct pf_state_peer *src, *dst; key.af = pd->af; key.proto = IPPROTO_TCP; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd->src, key.af); PF_ACPY(&key.gwy.addr, pd->dst, key.af); key.ext.port = th->th_sport; key.gwy.port = th->th_dport; } else { PF_ACPY(&key.lan.addr, pd->src, key.af); PF_ACPY(&key.ext.addr, pd->dst, key.af); key.lan.port = th->th_sport; key.ext.port = th->th_dport; } STATE_LOOKUP(); if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } if ((*state)->src.state == PF_TCPS_PROXY_SRC) { if (direction != (*state)->direction) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } if (th->th_flags & TH_SYN) { if (ntohl(th->th_seq) != (*state)->src.seqlo) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, - NULL, NULL); + 0, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else if ((*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } else (*state)->src.state = PF_TCPS_PROXY_DST; } if ((*state)->src.state == PF_TCPS_PROXY_DST) { struct pf_state_host *src, *dst; if (direction == PF_OUT) { src = &(*state)->gwy; dst = &(*state)->ext; } else { src = &(*state)->ext; dst = &(*state)->lan; } if (direction == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) (*state)->dst.seqhi = htonl(arc4random()); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, NULL, NULL); + (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != (TH_SYN|TH_ACK)) || (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, 0, - NULL, NULL); + (*state)->tag, NULL, NULL); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, 1, - NULL, NULL); + 0, NULL, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - (*state)->dst.seqlo; (*state)->src.seqhi = (*state)->src.seqlo + - (*state)->src.max_win; - (*state)->dst.seqhi = (*state)->dst.seqlo + (*state)->dst.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->src.max_win; (*state)->src.wscale = (*state)->dst.wscale = 0; (*state)->src.state = (*state)->dst.state = TCPS_ESTABLISHED; REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { sws = src->wscale & PF_WSCALE_MASK; dws = dst->wscale & PF_WSCALE_MASK; } else sws = dws = 0; /* * Sequence tracking algorithm from Guido van Rooij's paper: * http://www.madison-gurkha.com/publications/tcp_filtering/ * tcp_filtering.ps */ orig_seq = seq = ntohl(th->th_seq); if (src->seqlo == 0) { /* First packet from this end. Set its state */ if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && src->scrub == NULL) { if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } } /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { - while ((src->seqdiff = htonl(arc4random())) == 0) + while ((src->seqdiff = tcp_rndiss_next() - seq) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); copyback = 1; } else { ack = ntohl(th->th_ack); } end = seq + pd->p_len; if (th->th_flags & TH_SYN) { end++; if (dst->wscale & PF_WSCALE_FLAG) { src->wscale = pf_get_wscale(m, off, th->th_off, pd->af); if (src->wscale & PF_WSCALE_FLAG) { /* Remove scale factor from initial * window */ sws = src->wscale & PF_WSCALE_MASK; win = ((u_int32_t)win + (1 << sws) - 1) >> sws; dws = dst->wscale & PF_WSCALE_MASK; } else { /* fixup other window */ dst->max_win <<= dst->wscale & PF_WSCALE_MASK; /* in case of a retrans SYN|ACK */ dst->wscale = 0; } } } if (th->th_flags & TH_FIN) end++; src->seqlo = seq; if (src->state < TCPS_SYN_SENT) src->state = TCPS_SYN_SENT; /* * May need to slide the window (seqhi may have been set by * the crappy stack check or if we picked up the connection * after establishment) */ if (src->seqhi == 1 || SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) src->seqhi = end + MAX(1, dst->max_win << dws); if (win > src->max_win) src->max_win = win; } else { ack = ntohl(th->th_ack) - dst->seqdiff; if (src->seqdiff) { /* Modulate sequence numbers */ pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); copyback = 1; } end = seq + pd->p_len; if (th->th_flags & TH_SYN) end++; if (th->th_flags & TH_FIN) end++; } if ((th->th_flags & TH_ACK) == 0) { /* Let it pass through the ack skew check */ ack = dst->seqlo; } else if ((ack == 0 && (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || /* broken tcp stacks do not set ack */ (dst->state < TCPS_SYN_SENT)) { /* * Many stacks (ours included) will set the ACK number in an * FIN|ACK if the SYN times out -- no sequence to ACK. */ ack = dst->seqlo; } if (seq == end) { /* Ease sequencing restrictions on no data packets */ seq = src->seqlo; end = seq; } ackskew = dst->seqlo - ack; + + /* + * Need to demodulate the sequence numbers in any TCP SACK options + * (Selective ACK). We could optionally validate the SACK values + * against the current ACK window, either forwards or backwards, but + * I'm not confident that SACK has been implemented properly + * everywhere. It wouldn't surprise me if several stacks accidently + * SACK too far backwards of previously ACKed data. There really aren't + * any security implications of bad SACKing unless the target stack + * doesn't validate the option length correctly. Someone trying to + * spoof into a TCP connection won't bother blindly sending SACK + * options anyway. + */ + if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { + if (pf_modulate_sack(m, off, pd, th, dst)) + copyback = 1; + } + + #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && /* Retrans: not more than one window back */ (ackskew >= -MAXACKWINDOW) && /* Acking not more than one reassembled fragment backwards */ (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || - (pd->flags & PFDESC_IP_REAS) == 0)) { - /* Require an exact sequence match on resets when possible */ + (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, *state, src, dst, ©back)) return (PF_DROP); } /* update max window */ if (src->max_win < win) src->max_win = win; /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) src->seqlo = end; /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) dst->seqhi = ack + MAX((win << sws), 1); /* update states */ if (th->th_flags & TH_SYN) if (src->state < TCPS_SYN_SENT) src->state = TCPS_SYN_SENT; if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_ACK) { if (dst->state == TCPS_SYN_SENT) { dst->state = TCPS_ESTABLISHED; if (src->state == TCPS_ESTABLISHED && (*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (dst->state == TCPS_CLOSING) dst->state = TCPS_FIN_WAIT_2; } if (th->th_flags & TH_RST) src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ (*state)->expire = time_second; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; - else if (src->state >= TCPS_FIN_WAIT_2 || - dst->state >= TCPS_FIN_WAIT_2) + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) (*state)->timeout = PFTM_TCP_OPENING; else if (src->state >= TCPS_CLOSING || dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_CLOSING; else (*state)->timeout = PFTM_TCP_ESTABLISHED; /* Fall through to PASS packet */ } else if ((dst->state < TCPS_SYN_SENT || dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) && SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && /* Within a window forward of the originating packet */ SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { /* Within a window backward of the originating packet */ /* * This currently handles three situations: * 1) Stupid stacks will shotgun SYNs before their peer * replies. * 2) When PF catches an already established stream (the * firewall rebooted, the state table was flushed, routes * changed...) * 3) Packets get funky immediately after the connection * closes (this should catch Solaris spurious ACK|FINs * that web servers like to spew after a close) * * This must be a little more careful than the above code * since packet floods will also be caught here. We don't * update the TTL here to mitigate the damage of a packet * flood and so the same code can handle awkward establishment * and a loosened connection close. * In the establishment case, a correct peer response will * validate the connection, go through the normal state code * and keep updating the state TTL. */ if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n", - seq, ack, pd->p_len, ackskew, - (*state)->packets[0], (*state)->packets[1]); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, + ackskew, (*state)->packets[0], + (*state)->packets[1]); } if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, *state, src, dst, ©back)) return (PF_DROP); } /* update max window */ if (src->max_win < win) src->max_win = win; /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) src->seqlo = end; /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) dst->seqhi = ack + MAX((win << sws), 1); /* * Cannot set dst->seqhi here since this could be a shotgunned * SYN and not an already established connection. */ if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_RST) src->state = dst->state = TCPS_TIME_WAIT; /* Fall through to PASS packet */ } else { if ((*state)->dst.state == TCPS_SYN_SENT && (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, - (*state)->rule.ptr->return_ttl, 1, + (*state)->rule.ptr->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; } else if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d " - "dir=%s,%s\n", seq, ack, pd->p_len, ackskew, + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu dir=%s,%s\n", + seq, orig_seq, ack, pd->p_len, ackskew, (*state)->packets[0], (*state)->packets[1], direction == PF_IN ? "in" : "out", direction == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? ' ': '2', (ackskew >= -MAXACKWINDOW) ? ' ' : '3', (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } /* Any packets which have gotten here are to be passed */ /* translate source/destination address, if necessary */ if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, &th->th_sum, &(*state)->gwy.addr, (*state)->gwy.port, 0, pd->af); else pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, &th->th_sum, &(*state)->lan.addr, (*state)->lan.port, 0, pd->af); m_copyback(m, off, sizeof(*th), th); } else if (copyback) { /* Copyback sequence modulation or stateful scrub changes */ m_copyback(m, off, sizeof(*th), th); } return (PF_PASS); } int pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; struct udphdr *uh = pd->hdr.udp; key.af = pd->af; key.proto = IPPROTO_UDP; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd->src, key.af); PF_ACPY(&key.gwy.addr, pd->dst, key.af); key.ext.port = uh->uh_sport; key.gwy.port = uh->uh_dport; } else { PF_ACPY(&key.lan.addr, pd->src, key.af); PF_ACPY(&key.ext.addr, pd->dst, key.af); key.lan.port = uh->uh_sport; key.ext.port = uh->uh_dport; } STATE_LOOKUP(); if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } /* update states */ if (src->state < PFUDPS_SINGLE) src->state = PFUDPS_SINGLE; if (dst->state == PFUDPS_SINGLE) dst->state = PFUDPS_MULTIPLE; /* update expire time */ (*state)->expire = time_second; if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) (*state)->timeout = PFTM_UDP_MULTIPLE; else (*state)->timeout = PFTM_UDP_SINGLE; /* translate source/destination address, if necessary */ if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, &uh->uh_sum, &(*state)->gwy.addr, (*state)->gwy.port, 1, pd->af); else pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &(*state)->lan.addr, (*state)->lan.port, 1, pd->af); m_copyback(m, off, sizeof(*uh), uh); } return (PF_PASS); } int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; u_int16_t icmpid, *icmpsum; u_int8_t icmptype; int state_icmp = 0; + struct pf_state_cmp key; switch (pd->proto) { #ifdef INET case IPPROTO_ICMP: icmptype = pd->hdr.icmp->icmp_type; icmpid = pd->hdr.icmp->icmp_id; icmpsum = &pd->hdr.icmp->icmp_cksum; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || icmptype == ICMP_REDIRECT || icmptype == ICMP_TIMXCEED || icmptype == ICMP_PARAMPROB) state_icmp++; break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: icmptype = pd->hdr.icmp6->icmp6_type; icmpid = pd->hdr.icmp6->icmp6_id; icmpsum = &pd->hdr.icmp6->icmp6_cksum; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || icmptype == ICMP6_TIME_EXCEEDED || icmptype == ICMP6_PARAM_PROB) state_icmp++; break; #endif /* INET6 */ } if (!state_icmp) { /* * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ - struct pf_state key; - key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd->src, key.af); PF_ACPY(&key.gwy.addr, pd->dst, key.af); key.ext.port = 0; key.gwy.port = icmpid; } else { PF_ACPY(&key.lan.addr, pd->src, key.af); PF_ACPY(&key.ext.addr, pd->dst, key.af); key.lan.port = icmpid; key.ext.port = 0; } STATE_LOOKUP(); (*state)->expire = time_second; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) { switch (pd->af) { #ifdef INET case AF_INET: pf_change_a(&saddr->v4.s_addr, pd->ip_sum, (*state)->gwy.addr.v4.s_addr, 0); pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, (*state)->gwy.port, 0); pd->hdr.icmp->icmp_id = (*state)->gwy.port; m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case AF_INET6: pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, &(*state)->gwy.addr, 0); m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); break; #endif /* INET6 */ } } else { switch (pd->af) { #ifdef INET case AF_INET: pf_change_a(&daddr->v4.s_addr, pd->ip_sum, (*state)->lan.addr.v4.s_addr, 0); pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, (*state)->lan.port, 0); pd->hdr.icmp->icmp_id = (*state)->lan.port; m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case AF_INET6: pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, &(*state)->lan.addr, 0); m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); break; #endif /* INET6 */ } } } return (PF_PASS); } else { /* * ICMP error message in response to a TCP/UDP packet. * Extract the inner TCP/UDP header and search for that state. */ struct pf_pdesc pd2; #ifdef INET struct ip h2; #endif /* INET */ #ifdef INET6 struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ int ipoff2; int off2; pd2.af = pd->af; switch (pd->af) { #ifdef INET case AF_INET: /* offset of h2 in mbuf chain */ ipoff2 = off + ICMP_MINLEN; if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip)\n")); return (PF_DROP); } /* * ICMP error messages don't refer to non-first * fragments */ if (h2.ip_off & htons(IP_OFFMASK)) { REASON_SET(reason, PFRES_FRAG); return (PF_DROP); } /* offset of protocol header that follows h2 */ off2 = ipoff2 + (h2.ip_hl << 2); pd2.proto = h2.ip_p; pd2.src = (struct pf_addr *)&h2.ip_src; pd2.dst = (struct pf_addr *)&h2.ip_dst; pd2.ip_sum = &h2.ip_sum; break; #endif /* INET */ #ifdef INET6 case AF_INET6: ipoff2 = off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip6)\n")); return (PF_DROP); } pd2.proto = h2_6.ip6_nxt; pd2.src = (struct pf_addr *)&h2_6.ip6_src; pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; pd2.ip_sum = NULL; off2 = ipoff2 + sizeof(h2_6); do { switch (pd2.proto) { case IPPROTO_FRAGMENT: /* * ICMPv6 error messages for * non-first fragments */ REASON_SET(reason, PFRES_FRAG); return (PF_DROP); case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off2, &opt6, sizeof(opt6), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMPv6 short opt\n")); return (PF_DROP); } if (pd2.proto == IPPROTO_AH) off2 += (opt6.ip6e_len + 2) * 4; else off2 += (opt6.ip6e_len + 1) * 8; pd2.proto = opt6.ip6e_nxt; /* goto the next header */ break; } default: terminal++; break; } } while (!terminal); break; #endif /* INET6 */ } switch (pd2.proto) { case IPPROTO_TCP: { struct tcphdr th; u_int32_t seq; - struct pf_state key; struct pf_state_peer *src, *dst; u_int8_t dws; int copyback = 0; /* * Only the first 8 bytes of the TCP header can be * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(tcp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_TCP; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); key.ext.port = th.th_dport; key.gwy.port = th.th_sport; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = th.th_dport; key.ext.port = th.th_sport; } STATE_LOOKUP(); if (direction == (*state)->direction) { src = &(*state)->dst; dst = &(*state)->src; } else { src = &(*state)->src; dst = &(*state)->dst; } if (src->wscale && dst->wscale && !(th.th_flags & TH_SYN)) dws = dst->wscale & PF_WSCALE_MASK; else dws = 0; /* Demodulate sequence number */ seq = ntohl(th.th_seq) - src->seqdiff; if (src->seqdiff) { pf_change_a(&th.th_seq, icmpsum, htonl(seq), 0); copyback = 1; } if (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); printf(" state: "); pf_print_state(*state); printf(" seq=%u\n", seq); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } if (STATE_TRANSLATE(*state)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, &th.th_sport, daddr, &(*state)->lan.addr, (*state)->lan.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); } else { pf_change_icmp(pd2.dst, &th.th_dport, saddr, &(*state)->gwy.addr, (*state)->gwy.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); } copyback = 1; } if (copyback) { switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), &h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); break; #endif /* INET6 */ } m_copyback(m, off2, 8, &th); } return (PF_PASS); break; } case IPPROTO_UDP: { struct udphdr uh; - struct pf_state key; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(udp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_UDP; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); key.ext.port = uh.uh_dport; key.gwy.port = uh.uh_sport; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = uh.uh_dport; key.ext.port = uh.uh_sport; } STATE_LOOKUP(); if (STATE_TRANSLATE(*state)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, &uh.uh_sport, daddr, &(*state)->lan.addr, (*state)->lan.port, &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); } else { pf_change_icmp(pd2.dst, &uh.uh_dport, saddr, &(*state)->gwy.addr, (*state)->gwy.port, &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); } switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), &h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); break; #endif /* INET6 */ } m_copyback(m, off2, sizeof(uh), &uh); } return (PF_PASS); break; } #ifdef INET case IPPROTO_ICMP: { struct icmp iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short i" "(icmp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_ICMP; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); key.ext.port = 0; key.gwy.port = iih.icmp_id; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = iih.icmp_id; key.ext.port = 0; } STATE_LOOKUP(); if (STATE_TRANSLATE(*state)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, &iih.icmp_id, daddr, &(*state)->lan.addr, (*state)->lan.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); } else { pf_change_icmp(pd2.dst, &iih.icmp_id, saddr, &(*state)->gwy.addr, (*state)->gwy.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); } m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), &h2); m_copyback(m, off2, ICMP_MINLEN, &iih); } return (PF_PASS); break; } #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: { struct icmp6_hdr iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(icmp6)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_ICMPV6; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); key.ext.port = 0; key.gwy.port = iih.icmp6_id; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = iih.icmp6_id; key.ext.port = 0; } STATE_LOOKUP(); if (STATE_TRANSLATE(*state)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, &iih.icmp6_id, daddr, &(*state)->lan.addr, (*state)->lan.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); } else { pf_change_icmp(pd2.dst, &iih.icmp6_id, saddr, &(*state)->gwy.addr, (*state)->gwy.port, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); } m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), &iih); } return (PF_PASS); break; } #endif /* INET6 */ default: { - struct pf_state key; - key.af = pd2.af; key.proto = pd2.proto; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); key.ext.port = 0; key.gwy.port = 0; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = 0; key.ext.port = 0; } STATE_LOOKUP(); if (STATE_TRANSLATE(*state)) { if (direction == PF_IN) { pf_change_icmp(pd2.src, NULL, daddr, &(*state)->lan.addr, 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); } else { pf_change_icmp(pd2.dst, NULL, saddr, &(*state)->gwy.addr, 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); } switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), &h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); break; #endif /* INET6 */ } } return (PF_PASS); break; } } } } int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd->src, key.af); PF_ACPY(&key.gwy.addr, pd->dst, key.af); key.ext.port = 0; key.gwy.port = 0; } else { PF_ACPY(&key.lan.addr, pd->src, key.af); PF_ACPY(&key.ext.addr, pd->dst, key.af); key.lan.port = 0; key.ext.port = 0; } STATE_LOOKUP(); if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } /* update states */ if (src->state < PFOTHERS_SINGLE) src->state = PFOTHERS_SINGLE; if (dst->state == PFOTHERS_SINGLE) dst->state = PFOTHERS_MULTIPLE; /* update expire time */ (*state)->expire = time_second; if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) (*state)->timeout = PFTM_OTHER_MULTIPLE; else (*state)->timeout = PFTM_OTHER_SINGLE; /* translate source/destination address, if necessary */ if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) switch (pd->af) { #ifdef INET case AF_INET: pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, (*state)->gwy.addr.v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af); break; #endif /* INET6 */ } else switch (pd->af) { #ifdef INET case AF_INET: pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, (*state)->lan.addr.v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af); break; #endif /* INET6 */ } } return (PF_PASS); } /* * ipoff and off are measured from the start of the mbuf chain. * h must be at "ipoff" on the mbuf chain. */ void * pf_pull_hdr(struct mbuf *m, int off, void *p, int len, u_short *actionp, u_short *reasonp, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: { struct ip *h = mtod(m, struct ip *); u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; if (fragoff) { if (fragoff >= len) ACTION_SET(actionp, PF_PASS); else { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_FRAG); } return (NULL); } if (m->m_pkthdr.len < off + len || ntohs(h->ip_len) < off + len) { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); if (m->m_pkthdr.len < off + len || (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < (unsigned)(off + len)) { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } break; } #endif /* INET6 */ } m_copydata(m, off, len, p); return (p); } int -pf_routable(struct pf_addr *addr, sa_family_t af) +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) { struct sockaddr_in *dst; + int ret = 1; + int check_mpath; + extern int ipmultipath; #ifdef INET6 + extern int ip6_multipath; struct sockaddr_in6 *dst6; struct route_in6 ro; #else struct route ro; #endif + struct radix_node *rn; + struct rtentry *rt; + struct ifnet *ifp; + check_mpath = 0; bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: dst = satosin(&ro.ro_dst); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; + if (ipmultipath) + check_mpath = 1; break; #ifdef INET6 case AF_INET6: dst6 = (struct sockaddr_in6 *)&ro.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; + if (ip6_multipath) + check_mpath = 1; break; #endif /* INET6 */ default: return (0); } + /* Skip checks for ipsec interfaces */ + if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) + goto out; + rtalloc_noclone((struct route *)&ro, NO_CLONING); if (ro.ro_rt != NULL) { - RTFREE(ro.ro_rt); - return (1); - } + /* No interface given, this is a no-route check */ + if (kif == NULL) + goto out; - return (0); + if (kif->pfik_ifp == NULL) { + ret = 0; + goto out; + } + + /* Perform uRPF check if passed input interface */ + ret = 0; + rn = (struct radix_node *)ro.ro_rt; + do { + rt = (struct rtentry *)rn; + if (rt->rt_ifp->if_type == IFT_CARP) + ifp = rt->rt_ifp->if_carpdev; + else + ifp = rt->rt_ifp; + + if (kif->pfik_ifp == ifp) + ret = 1; + rn = rn_mpath_next(rn); + } while (check_mpath == 1 && rn != NULL && ret == 0); + } else + ret = 0; +out: + if (ro.ro_rt != NULL) + RTFREE(ro.ro_rt); + return (ret); } int pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) { struct sockaddr_in *dst; #ifdef INET6 struct sockaddr_in6 *dst6; struct route_in6 ro; #else struct route ro; #endif int ret = 0; bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: dst = satosin(&ro.ro_dst); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; break; #ifdef INET6 case AF_INET6: dst6 = (struct sockaddr_in6 *)&ro.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; break; #endif /* INET6 */ default: return (0); } rtalloc_noclone((struct route *)&ro, NO_CLONING); if (ro.ro_rt != NULL) { if (ro.ro_rt->rt_labelid == aw->v.rtlabel) ret = 1; RTFREE(ro.ro_rt); } return (ret); } #ifdef INET void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; - struct m_tag *mtag; struct route iproute; - struct route *ro; + struct route *ro = NULL; struct sockaddr_in *dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; +#ifdef IPSEC + struct m_tag *mtag; +#endif /* IPSEC */ if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route: invalid parameters"); - if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { - if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == - NULL) { - m0 = *m; - *m = NULL; - goto bad; - } - *(char *)(mtag + 1) = 1; - m_tag_prepend(*m, mtag); - } else { - if (*(char *)(mtag + 1) > 3) { - m0 = *m; - *m = NULL; - goto bad; - } - (*(char *)(mtag + 1))++; + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; } if (r->rt == PF_DUPTO) { if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) return; } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) return; m0 = *m; } if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_route: m0->m_len < sizeof(struct ip)\n")); goto bad; } ip = mtod(m0, struct ip *); ro = &iproute; bzero((caddr_t)ro, sizeof(*ro)); dst = satosin(&ro->ro_dst); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { rtalloc(ro); if (ro->ro_rt == 0) { ipstat.ips_noroute++; goto bad; } ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = satosin(ro->ro_rt->rt_gateway); } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); goto bad; } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET)) dst->sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) dst->sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } } if (ifp == NULL) goto bad; if (oifp != ifp) { if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_route: m0->m_len < sizeof(struct ip)\n")); goto bad; } ip = mtod(m0, struct ip *); } /* Copied from ip_output. */ #ifdef IPSEC /* * If deferred crypto processing is needed, check that the * interface supports it. */ if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { /* Notify IPsec to do its own crypto. */ ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); goto bad; } #endif /* IPSEC */ /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ - if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { + if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */ } - } else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { + } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */ } } if (ntohs(ip->ip_len) <= ifp->if_mtu) { if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && ifp->if_bridge == NULL) { - m0->m_pkthdr.csum |= M_IPV4_CSUM_OUT; + m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; ipstat.ips_outhwcsum++; } else { ip->ip_sum = 0; ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); } /* Update relevant hardware checksum stats for TCP/UDP */ - if (m0->m_pkthdr.csum & M_TCPV4_CSUM_OUT) + if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) tcpstat.tcps_outhwcsum++; - else if (m0->m_pkthdr.csum & M_UDPV4_CSUM_OUT) + else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) udpstat.udps_outhwcsum++; error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); goto done; } /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ if (ip->ip_off & htons(IP_DF)) { ipstat.ips_cantfrag++; if (r->rt != PF_DUPTO) { icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, - ifp); + ifp->if_mtu); goto done; } else goto bad; } m1 = m0; error = ip_fragment(m0, ifp, ifp->if_mtu); if (error) { m0 = NULL; goto bad; } for (m0 = m1; m0; m0 = m1) { m1 = m0->m_nextpkt; m0->m_nextpkt = 0; if (error == 0) error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL); else m_freem(m0); } if (error == 0) ipstat.ips_fragmented++; done: if (r->rt != PF_DUPTO) *m = NULL; if (ro == &iproute && ro->ro_rt) RTFREE(ro->ro_rt); return; bad: m_freem(m0); goto done; } #endif /* INET */ #ifdef INET6 void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; - struct m_tag *mtag; struct route_in6 ip6route; struct route_in6 *ro; struct sockaddr_in6 *dst; struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) panic("pf_route6: invalid parameters"); - if ((mtag = m_tag_find(*m, PACKET_TAG_PF_ROUTED, NULL)) == NULL) { - if ((mtag = m_tag_get(PACKET_TAG_PF_ROUTED, 1, M_NOWAIT)) == - NULL) { - m0 = *m; - *m = NULL; - goto bad; - } - *(char *)(mtag + 1) = 1; - m_tag_prepend(*m, mtag); - } else { - if (*(char *)(mtag + 1) > 3) { - m0 = *m; - *m = NULL; - goto bad; - } - (*(char *)(mtag + 1))++; + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; } if (r->rt == PF_DUPTO) { if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) return; } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) return; m0 = *m; } if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); dst = (struct sockaddr_in6 *)&ro->ro_dst; dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); dst->sin6_addr = ip6->ip6_dst; - /* Cheat. */ + /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { - mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m0, mtag); + pd->pf_mtag->flags |= PF_TAG_GENERATED; ip6_output(m0, NULL, NULL, 0, NULL, NULL); return; } if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); goto bad; } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst->sin6_addr, &naddr, AF_INET6); ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst->sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } if (ifp == NULL) goto bad; if (oifp != ifp) { if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); } /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) + if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { error = nd6_output(ifp, ifp, m0, dst, NULL); } else { in6_ifstat_inc(ifp, ifs6_in_toobig); if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); else goto bad; } done: if (r->rt != PF_DUPTO) *m = NULL; return; bad: m_freem(m0); goto done; } #endif /* INET6 */ /* * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag * off is the offset where the protocol header starts * len is the total length of protocol header plus payload * returns 0 when the checksum is valid, otherwise returns 1. */ int pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) { u_int16_t flag_ok, flag_bad; u_int16_t sum; switch (p) { case IPPROTO_TCP: flag_ok = M_TCP_CSUM_IN_OK; flag_bad = M_TCP_CSUM_IN_BAD; break; case IPPROTO_UDP: flag_ok = M_UDP_CSUM_IN_OK; flag_bad = M_UDP_CSUM_IN_BAD; break; case IPPROTO_ICMP: #ifdef INET6 case IPPROTO_ICMPV6: #endif /* INET6 */ flag_ok = flag_bad = 0; break; default: return (1); } - if (m->m_pkthdr.csum & flag_ok) + if (m->m_pkthdr.csum_flags & flag_ok) return (0); - if (m->m_pkthdr.csum & flag_bad) + if (m->m_pkthdr.csum_flags & flag_bad) return (1); if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) return (1); if (m->m_pkthdr.len < off + len) return (1); switch (af) { #ifdef INET case AF_INET: if (p == IPPROTO_ICMP) { if (m->m_len < off) return (1); m->m_data += off; m->m_len -= off; sum = in_cksum(m, len); m->m_data -= off; m->m_len += off; } else { if (m->m_len < sizeof(struct ip)) return (1); sum = in4_cksum(m, p, off, len); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (m->m_len < sizeof(struct ip6_hdr)) return (1); sum = in6_cksum(m, p, off, len); break; #endif /* INET6 */ default: return (1); } if (sum) { - m->m_pkthdr.csum |= flag_bad; + m->m_pkthdr.csum_flags |= flag_bad; switch (p) { case IPPROTO_TCP: tcpstat.tcps_rcvbadsum++; break; case IPPROTO_UDP: udpstat.udps_badsum++; break; case IPPROTO_ICMP: icmpstat.icps_checksum++; break; #ifdef INET6 case IPPROTO_ICMPV6: icmp6stat.icp6s_checksum++; break; #endif /* INET6 */ } return (1); } - m->m_pkthdr.csum |= flag_ok; + m->m_pkthdr.csum_flags |= flag_ok; return (0); } -static int -pf_add_mbuf_tag(struct mbuf *m, u_int tag) -{ - struct m_tag *mtag; - - if (m_tag_find(m, tag, NULL) != NULL) - return (0); - mtag = m_tag_get(tag, 0, M_NOWAIT); - if (mtag == NULL) - return (1); - m_tag_prepend(m, mtag); - return (0); -} - #ifdef INET int pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct ether_header *eh) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0; struct ip *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; - if (!pf_status.running || - (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + if (!pf_status.running) return (PF_PASS); + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + if (ifp->if_type == IFT_CARP && ifp->if_carpdev) ifp = ifp->if_carpdev; - kif = pfi_index2kif[ifp->if_index]; + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("non-M_PKTHDR is passed to pf_test"); #endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } m = *m0; h = mtod(m, struct ip *); off = h->ip_hl << 2; if (off < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); pd.ip_sum = &h->ip_sum; pd.proto = h->ip_p; pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); pd.eh = eh; /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { action = pf_test_fragment(&r, dir, kif, m, h, &pd, &a, &ruleset); goto done; } switch (h->ip_p) { case IPPROTO_TCP: { struct tcphdr th; pd.hdr.tcp = &th; if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET)) { log = action != PF_PASS; goto done; } if (dir == PF_IN && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) { + REASON_SET(&reason, PFRES_PROTCKSUM); action = PF_DROP; goto done; } pd.p_len = pd.tot_len - off - (th.th_off << 2); if ((th.th_flags & TH_ACK) && pd.p_len == 0) pqid = 1; action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_tcp(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); break; } case IPPROTO_UDP: { struct udphdr uh; pd.hdr.udp = &uh; if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET)) { log = action != PF_PASS; goto done; } if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_udp(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); break; } case IPPROTO_ICMP: { struct icmp ih; pd.hdr.icmp = &ih; if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, &action, &reason, AF_INET)) { log = action != PF_PASS; goto done; } if (dir == PF_IN && pf_check_proto_cksum(m, off, ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_icmp(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); break; } default: action = pf_test_state_other(&s, dir, kif, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_other(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ipintrq); break; } done: if (action == PF_PASS && h->ip_hl > 5 && !((s && s->allow_opts) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with ip options\n")); } - if (s && s->tag) - pf_tag_packet(m, pf_get_tag(m), s->tag); + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - if (pqid || pd.tos == IPTOS_LOWDELAY) - atag->qid = r->pqid; - else - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = AF_INET; - atag->hdr = h; - m_tag_prepend(m, mtag); - } + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET; + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ /* * connections redirected to loopback should not match sockets * bound specifically to loopback due to security implications, * see tcp_input() and in_pcblookup_listen(). */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && - pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset); + if (log) { + struct pf_rule *lr; + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, + &pd); + } + kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd.tot_len; if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; } + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; if (nr != NULL) { struct pf_addr *x; /* * XXX: we need to make sure that the addresses * passed to pfr_update_stats() are the same than * the addresses used during matching (pfr_match) */ if (r == &pf_default_rule) { tr = nr; x = (s == NULL || s->direction == dir) ? &pd.baddr : &pd.naddr; } else x = (s == NULL || s->direction == dir) ? &pd.naddr : &pd.baddr; if (x == &pd.baddr || s == NULL) { /* we need to change the address */ if (dir == PF_OUT) pd.src = x; else pd.dst = x; } } if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->dst.neg); } if (action == PF_SYNPROXY_DROP) { m_freem(*m0); *m0 = NULL; action = PF_PASS; } else if (r->rt) /* pf_route can free the mbuf causing *m0 to become NULL */ - pf_route(m0, r, dir, ifp, s); + pf_route(m0, r, dir, ifp, s, &pd); return (action); } #endif /* INET */ #ifdef INET6 int pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct ether_header *eh) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; - struct mbuf *m = *m0; + struct mbuf *m = *m0, *n = NULL; struct ip6_hdr *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx; - if (!pf_status.running || - (m_tag_find(m, PACKET_TAG_PF_GENERATED, NULL) != NULL)) + if (!pf_status.running) return (PF_PASS); + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + if (ifp->if_type == IFT_CARP && ifp->if_carpdev) ifp = ifp->if_carpdev; - kif = pfi_index2kif[ifp->if_index]; + kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("non-M_PKTHDR is passed to pf_test6"); #endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } m = *m0; h = mtod(m, struct ip6_hdr *); +#if 1 + /* + * we do not support jumbogram yet. if we keep going, zero ip6_plen + * will do something bad, so drop the packet for now. + */ + if (htons(h->ip6_plen) == 0) { + action = PF_DROP; + REASON_SET(&reason, PFRES_NORM); /*XXX*/ + goto done; + } +#endif + pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); pd.ip_sum = NULL; pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); pd.eh = eh; off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; do { switch (pd.proto) { case IPPROTO_FRAGMENT: action = pf_test_fragment(&r, dir, kif, m, h, &pd, &a, &ruleset); if (action == PF_DROP) REASON_SET(&reason, PFRES_FRAG); goto done; + case IPPROTO_ROUTING: { + struct ip6_rthdr rthdr; + struct ip6_rthdr0 rthdr0; + struct in6_addr finaldst; + struct ip6_hdr *ip6; + + if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { + if (!pf_pull_hdr(m, off, &rthdr0, + sizeof(rthdr0), NULL, &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr0\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr0.ip6r0_segleft != 0) { + if (!pf_pull_hdr(m, off + + sizeof(rthdr0) + + rthdr0.ip6r0_len * 8 - + sizeof(finaldst), &finaldst, + sizeof(finaldst), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr0\n")); + action = PF_DROP; + log = 1; + goto done; + } + + n = m_copym(m, 0, M_COPYALL, M_DONTWAIT); + if (!n) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: mbuf shortage\n")); + action = PF_DROP; + log = 1; + goto done; + } + n = m_pullup(n, sizeof(struct ip6_hdr)); + if (!n) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: mbuf shortage\n")); + action = PF_DROP; + log = 1; + goto done; + } + ip6 = mtod(n, struct ip6_hdr *); + ip6->ip6_dst = finaldst; + } + } + /* FALLTHROUGH */ + } case IPPROTO_AH: case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short opt\n")); action = PF_DROP; log = 1; goto done; } if (pd.proto == IPPROTO_AH) off += (opt6.ip6e_len + 2) * 4; else off += (opt6.ip6e_len + 1) * 8; pd.proto = opt6.ip6e_nxt; /* goto the next header */ break; } default: terminal++; break; } } while (!terminal); + /* if there's no routing header, use unmodified mbuf for checksumming */ + if (!n) + n = m; + switch (pd.proto) { case IPPROTO_TCP: { struct tcphdr th; pd.hdr.tcp = &th; if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET6)) { log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, + if (dir == PF_IN && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_TCP, AF_INET6)) { action = PF_DROP; REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } pd.p_len = pd.tot_len - off - (th.th_off << 2); action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_tcp(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } case IPPROTO_UDP: { struct udphdr uh; pd.hdr.udp = &uh; if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET6)) { log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_UDP, AF_INET6)) { action = PF_DROP; REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_udp(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } case IPPROTO_ICMPV6: { struct icmp6_hdr ih; pd.hdr.icmp6 = &ih; if (!pf_pull_hdr(m, off, &ih, sizeof(ih), &action, &reason, AF_INET6)) { log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, + if (dir == PF_IN && pf_check_proto_cksum(n, off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), IPPROTO_ICMPV6, AF_INET6)) { action = PF_DROP; REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_icmp(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } default: action = pf_test_state_other(&s, dir, kif, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); #endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_other(&r, &s, dir, kif, m, off, h, &pd, &a, &ruleset, &ip6intrq); break; } done: - /* XXX handle IPv6 options, if not allowed. not implemented. */ + if (n != m) { + m_freem(n); + n = NULL; + } - if (s && s->tag) - pf_tag_packet(m, pf_get_tag(m), s->tag); + /* XXX handle IPv6 options, if not allowed. not implemented. */ + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); + #ifdef ALTQ if (action == PF_PASS && r->qid) { - struct m_tag *mtag; - struct altq_tag *atag; - - mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), M_NOWAIT); - if (mtag != NULL) { - atag = (struct altq_tag *)(mtag + 1); - if (pd.tos == IPTOS_LOWDELAY) - atag->qid = r->pqid; - else - atag->qid = r->qid; - /* add hints for ecn */ - atag->af = AF_INET6; - atag->hdr = h; - m_tag_prepend(m, mtag); - } + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET6; + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - IN6_IS_ADDR_LOOPBACK(&pd.dst->v6) && - pf_add_mbuf_tag(m, PACKET_TAG_PF_TRANSLATE_LOCALHOST)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset); + if (log) { + struct pf_rule *lr; + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd); + } + kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd.tot_len; if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; } + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; if (nr != NULL) { struct pf_addr *x; /* * XXX: we need to make sure that the addresses * passed to pfr_update_stats() are the same than * the addresses used during matching (pfr_match) */ if (r == &pf_default_rule) { tr = nr; x = (s == NULL || s->direction == dir) ? &pd.baddr : &pd.naddr; } else { x = (s == NULL || s->direction == dir) ? &pd.naddr : &pd.baddr; } if (x == &pd.baddr || s == NULL) { if (dir == PF_OUT) pd.src = x; else pd.dst = x; } } if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->dst.neg); } if (action == PF_SYNPROXY_DROP) { m_freem(*m0); *m0 = NULL; action = PF_PASS; } else if (r->rt) /* pf_route6 can free the mbuf causing *m0 to become NULL */ - pf_route6(m0, r, dir, ifp, s); + pf_route6(m0, r, dir, ifp, s, &pd); return (action); } #endif /* INET6 */ int pf_check_congestion(struct ifqueue *ifq) { if (ifq->ifq_congestion) return (1); else return (0); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf_if.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf_if.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf_if.c (revision 171164) @@ -1,847 +1,754 @@ -/* $OpenBSD: pf_if.c,v 1.23 2004/12/22 17:17:55 dhartmei Exp $ */ +/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ /* + * Copyright 2005 Henning Brauer + * Copyright 2005 Ryan McBride * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2003 Cedric Berger * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ -#define ACCEPT_FLAGS(oklist) \ - do { \ - if ((flags & ~(oklist)) & \ - PFI_FLAG_ALLMASK) \ - return (EINVAL); \ - } while (0) - -#define senderr(e) do { rv = (e); goto _bad; } while (0) - -struct pfi_kif **pfi_index2kif; -struct pfi_kif *pfi_self; -int pfi_indexlim; -struct pfi_ifhead pfi_ifs; +struct pfi_kif *pfi_all = NULL; struct pfi_statehead pfi_statehead; -int pfi_ifcnt; struct pool pfi_addr_pl; +struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; -void pfi_dynaddr_update(void *); -void pfi_kifaddr_update(void *); +void pfi_kif_update(struct pfi_kif *); +void pfi_dynaddr_update(struct pfi_dynaddr *dyn); void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, int); +void pfi_kifaddr_update(void *); void pfi_instance_add(struct ifnet *, int, int); void pfi_address_add(struct sockaddr *, int, int); int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); -struct pfi_kif *pfi_if_create(const char *, struct pfi_kif *, int); -void pfi_copy_group(char *, const char *, int); -void pfi_newgroup(const char *, int); -int pfi_skip_if(const char *, struct pfi_kif *, int); +int pfi_skip_if(const char *, struct pfi_kif *); int pfi_unmask(void *); -void pfi_dohooks(struct pfi_kif *); RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); #define PFI_BUFFER_MAX 0x10000 #define PFI_MTYPE M_IFADDR void pfi_initialize(void) { - if (pfi_self != NULL) /* already initialized */ + if (pfi_all != NULL) /* already initialized */ return; TAILQ_INIT(&pfi_statehead); pool_init(&pfi_addr_pl, sizeof(struct pfi_dynaddr), 0, 0, 0, "pfiaddrpl", &pool_allocator_nointr); pfi_buffer_max = 64; pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); - pfi_self = pfi_if_create("self", NULL, PFI_IFLAG_GROUP); + + if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) + panic("pfi_kif_get for pfi_all failed"); } +struct pfi_kif * +pfi_kif_get(const char *kif_name) +{ + struct pfi_kif *kif; + struct pfi_kif_cmp s; + + bzero(&s, sizeof(s)); + strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name)); + if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) + return (kif); + + /* create new one */ + if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_DONTWAIT)) == NULL) + return (NULL); + + bzero(kif, sizeof(*kif)); + strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); + kif->pfik_tzero = time_second; + TAILQ_INIT(&kif->pfik_dynaddrs); + + RB_INSERT(pfi_ifhead, &pfi_ifs, kif); + return (kif); +} + void -pfi_attach_clone(struct if_clone *ifc) +pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) { - pfi_initialize(); - pfi_newgroup(ifc->ifc_name, PFI_IFLAG_CLONABLE); + switch (what) { + case PFI_KIF_REF_RULE: + kif->pfik_rules++; + break; + case PFI_KIF_REF_STATE: + if (!kif->pfik_states++) + TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_ref with unknown type"); + } } void +pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) +{ + if (kif == NULL) + return; + + switch (what) { + case PFI_KIF_REF_NONE: + break; + case PFI_KIF_REF_RULE: + if (kif->pfik_rules <= 0) { + printf("pfi_kif_unref: rules refcount <= 0\n"); + return; + } + kif->pfik_rules--; + break; + case PFI_KIF_REF_STATE: + if (kif->pfik_states <= 0) { + printf("pfi_kif_unref: state refcount <= 0\n"); + return; + } + if (!--kif->pfik_states) + TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_unref with unknown type"); + } + + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) + return; + + if (kif->pfik_rules || kif->pfik_states) + return; + + RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); + free(kif, PFI_MTYPE); +} + +int +pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) +{ + struct ifg_list *p; + + if (rule_kif == NULL || rule_kif == packet_kif) + return (1); + + if (rule_kif->pfik_group != NULL) + TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) + if (p->ifgl_group == rule_kif->pfik_group) + return (1); + + return (0); +} + +void pfi_attach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int s; + struct pfi_kif *kif; + int s; pfi_initialize(); s = splsoftnet(); pfi_update++; - if (ifp->if_index >= pfi_indexlim) { - /* - * grow pfi_index2kif, similar to ifindex2ifnet code in if.c - */ - size_t m, n, oldlim; - struct pfi_kif **mp, **np; + if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) + panic("pfi_kif_get failed"); - oldlim = pfi_indexlim; - if (pfi_indexlim == 0) - pfi_indexlim = 64; - while (ifp->if_index >= pfi_indexlim) - pfi_indexlim <<= 1; + kif->pfik_ifp = ifp; + ifp->if_pf_kif = (caddr_t)kif; - m = oldlim * sizeof(struct pfi_kif *); - mp = pfi_index2kif; - n = pfi_indexlim * sizeof(struct pfi_kif *); - np = malloc(n, PFI_MTYPE, M_DONTWAIT); - if (np == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate translation table"); - bzero(np, n); - if (mp != NULL) - bcopy(mp, np, m); - pfi_index2kif = np; - if (mp != NULL) - free(mp, PFI_MTYPE); - } + if ((kif->pfik_ah_cookie = hook_establish(ifp->if_addrhooks, 1, + pfi_kifaddr_update, kif)) == NULL) + panic("pfi_attach_ifnet: cannot allocate '%s' address hook", + ifp->if_xname); - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - /* add group */ - pfi_copy_group(key.pfik_name, ifp->if_xname, - sizeof(key.pfik_name)); - q = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (q == NULL) - q = pfi_if_create(key.pfik_name, pfi_self, PFI_IFLAG_GROUP); - if (q == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' group", key.pfik_name); + pfi_kif_update(kif); - /* add interface */ - p = pfi_if_create(ifp->if_xname, q, PFI_IFLAG_INSTANCE); - if (p == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' interface", ifp->if_xname); - } else - q = p->pfik_parent; - p->pfik_ifp = ifp; - p->pfik_flags |= PFI_IFLAG_ATTACHED; - p->pfik_ah_cookie = - hook_establish(ifp->if_addrhooks, 1, pfi_kifaddr_update, p); - pfi_index2kif[ifp->if_index] = p; - pfi_dohooks(p); splx(s); } void pfi_detach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int s; + int s; + struct pfi_kif *kif; - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) + return; s = splsoftnet(); pfi_update++; - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - printf("pfi_detach_ifnet: cannot find %s", ifp->if_xname); - splx(s); - return; - } - hook_disestablish(p->pfik_ifp->if_addrhooks, p->pfik_ah_cookie); - q = p->pfik_parent; - p->pfik_ifp = NULL; - p->pfik_flags &= ~PFI_IFLAG_ATTACHED; - pfi_index2kif[ifp->if_index] = NULL; - pfi_dohooks(p); - pfi_maybe_destroy(p); + hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie); + pfi_kif_update(kif); + + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); splx(s); } -struct pfi_kif * -pfi_lookup_create(const char *name) +void +pfi_attach_ifgroup(struct ifg_group *ifg) { - struct pfi_kif *p, *q, key; + struct pfi_kif *kif; int s; + pfi_initialize(); s = splsoftnet(); - p = pfi_lookup_if(name); - if (p == NULL) { - pfi_copy_group(key.pfik_name, name, sizeof(key.pfik_name)); - q = pfi_lookup_if(key.pfik_name); - if (q == NULL) { - pfi_newgroup(key.pfik_name, PFI_IFLAG_DYNAMIC); - q = pfi_lookup_if(key.pfik_name); - } - p = pfi_lookup_if(name); - if (p == NULL && q != NULL) - p = pfi_if_create(name, q, PFI_IFLAG_INSTANCE); - } - splx(s); - return (p); -} + pfi_update++; + if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) + panic("pfi_kif_get failed"); -struct pfi_kif * -pfi_attach_rule(const char *name) -{ - struct pfi_kif *p; + kif->pfik_group = ifg; + ifg->ifg_pf_kif = (caddr_t)kif; - p = pfi_lookup_create(name); - if (p != NULL) - p->pfik_rules++; - return (p); + splx(s); } void -pfi_detach_rule(struct pfi_kif *p) +pfi_detach_ifgroup(struct ifg_group *ifg) { - if (p == NULL) + int s; + struct pfi_kif *kif; + + if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) return; - if (p->pfik_rules > 0) - p->pfik_rules--; - else - printf("pfi_detach_rule: reference count at 0\n"); - pfi_maybe_destroy(p); + + s = splsoftnet(); + pfi_update++; + + kif->pfik_group = NULL; + ifg->ifg_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + splx(s); } void -pfi_attach_state(struct pfi_kif *p) +pfi_group_change(const char *group) { - if (!p->pfik_states++) - TAILQ_INSERT_TAIL(&pfi_statehead, p, pfik_w_states); + struct pfi_kif *kif; + int s; + + s = splsoftnet(); + pfi_update++; + if ((kif = pfi_kif_get(group)) == NULL) + panic("pfi_kif_get failed"); + + pfi_kif_update(kif); + + splx(s); } -void -pfi_detach_state(struct pfi_kif *p) +int +pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { - if (p == NULL) - return; - if (p->pfik_states <= 0) { - printf("pfi_detach_state: reference count <= 0\n"); - return; + switch (af) { +#ifdef INET + case AF_INET: + switch (dyn->pfid_acnt4) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr4, + &dyn->pfid_mask4, a, AF_INET)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + switch (dyn->pfid_acnt6) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr6, + &dyn->pfid_mask6, a, AF_INET6)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); + } + break; +#endif /* INET6 */ + default: + return (0); } - if (!--p->pfik_states) - TAILQ_REMOVE(&pfi_statehead, p, pfik_w_states); - pfi_maybe_destroy(p); } int pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) { struct pfi_dynaddr *dyn; char tblname[PF_TABLE_NAME_SIZE]; struct pf_ruleset *ruleset = NULL; int s, rv = 0; if (aw->type != PF_ADDR_DYNIFTL) return (0); - dyn = pool_get(&pfi_addr_pl, PR_NOWAIT); - if (dyn == NULL) + if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) return (1); bzero(dyn, sizeof(*dyn)); s = splsoftnet(); - dyn->pfid_kif = pfi_attach_rule(aw->v.ifname); - if (dyn->pfid_kif == NULL) - senderr(1); + if (!strcmp(aw->v.ifname, "self")) + dyn->pfid_kif = pfi_kif_get(IFG_ALL); + else + dyn->pfid_kif = pfi_kif_get(aw->v.ifname); + if (dyn->pfid_kif == NULL) { + rv = 1; + goto _bad; + } + pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) dyn->pfid_net = 128; strlcpy(tblname, aw->v.ifname, sizeof(tblname)); if (aw->iflags & PFI_AFLAG_NETWORK) strlcat(tblname, ":network", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_BROADCAST) strlcat(tblname, ":broadcast", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_PEER) strlcat(tblname, ":peer", sizeof(tblname)); if (aw->iflags & PFI_AFLAG_NOALIAS) strlcat(tblname, ":0", sizeof(tblname)); if (dyn->pfid_net != 128) snprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); - ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR); - if (ruleset == NULL) - senderr(1); + if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { + rv = 1; + goto _bad; + } - dyn->pfid_kt = pfr_attach_table(ruleset, tblname); - if (dyn->pfid_kt == NULL) - senderr(1); + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = 1; + goto _bad; + } dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; dyn->pfid_iflags = aw->iflags; dyn->pfid_af = af; - dyn->pfid_hook_cookie = hook_establish(dyn->pfid_kif->pfik_ah_head, 1, - pfi_dynaddr_update, dyn); - if (dyn->pfid_hook_cookie == NULL) - senderr(1); + TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; - pfi_dynaddr_update(aw->p.dyn); + pfi_kif_update(dyn->pfid_kif); splx(s); return (0); _bad: if (dyn->pfid_kt != NULL) pfr_detach_table(dyn->pfid_kt); if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) - pfi_detach_rule(dyn->pfid_kif); + pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); pool_put(&pfi_addr_pl, dyn); splx(s); return (rv); } void -pfi_dynaddr_update(void *p) +pfi_kif_update(struct pfi_kif *kif) { - struct pfi_dynaddr *dyn = (struct pfi_dynaddr *)p; + struct ifg_list *ifgl; + struct pfi_dynaddr *p; + + /* update all dynaddr */ + TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) + pfi_dynaddr_update(p); + + /* again for all groups kif is member of */ + if (kif->pfik_ifp != NULL) + TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) + pfi_kif_update((struct pfi_kif *) + ifgl->ifgl_group->ifg_pf_kif); +} + +void +pfi_dynaddr_update(struct pfi_dynaddr *dyn) +{ struct pfi_kif *kif; struct pfr_ktable *kt; if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) panic("pfi_dynaddr_update"); kif = dyn->pfid_kif; kt = dyn->pfid_kt; + if (kt->pfrkt_larg != pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); kt->pfrkt_larg = pfi_update; } pfr_dynaddr_update(kt, dyn); } void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; - struct pfi_kif *p; - struct pfr_table t; + struct ifg_member *ifgm; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE) && kif->pfik_ifp == NULL) { - pfr_clr_addrs(&kt->pfrkt_t, NULL, 0); - return; - } pfi_buffer_cnt = 0; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE)) + + if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); - else if (strcmp(kif->pfik_name, "self")) { - TAILQ_FOREACH(p, &kif->pfik_grouphead, pfik_instances) - pfi_instance_add(p->pfik_ifp, net, flags); - } else { - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) - if (p->pfik_flags & PFI_IFLAG_INSTANCE) - pfi_instance_add(p->pfik_ifp, net, flags); - } - t = kt->pfrkt_t; - t.pfrt_flags = 0; - if ((e = pfr_set_addrs(&t, pfi_buffer, pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0))) + else if (kif->pfik_group != NULL) + TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) + pfi_instance_add(ifgm->ifgm_ifp, net, flags); + + if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) printf("pfi_table_update: cannot set %d new addresses " "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); } void pfi_instance_add(struct ifnet *ifp, int net, int flags) { struct ifaddr *ia; int got4 = 0, got6 = 0; int net2, af; if (ifp == NULL) return; TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) { if (ia->ifa_addr == NULL) continue; af = ia->ifa_addr->sa_family; if (af != AF_INET && af != AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) continue; if ((flags & PFI_AFLAG_BROADCAST) && !(ifp->if_flags & IFF_BROADCAST)) continue; if ((flags & PFI_AFLAG_PEER) && !(ifp->if_flags & IFF_POINTOPOINT)) continue; if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL( &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) continue; if (flags & PFI_AFLAG_NOALIAS) { if (af == AF_INET && got4) continue; if (af == AF_INET6 && got6) continue; } if (af == AF_INET) got4 = 1; else if (af == AF_INET6) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { - if (af == AF_INET) { + if (af == AF_INET) net2 = pfi_unmask(&((struct sockaddr_in *) ia->ifa_netmask)->sin_addr); - } else if (af == AF_INET6) { + else if (af == AF_INET6) net2 = pfi_unmask(&((struct sockaddr_in6 *) ia->ifa_netmask)->sin6_addr); - } } if (af == AF_INET && net2 > 32) net2 = 32; if (flags & PFI_AFLAG_BROADCAST) pfi_address_add(ia->ifa_broadaddr, af, net2); else if (flags & PFI_AFLAG_PEER) pfi_address_add(ia->ifa_dstaddr, af, net2); else pfi_address_add(ia->ifa_addr, af, net2); } } void pfi_address_add(struct sockaddr *sa, int af, int net) { struct pfr_addr *p; int i; if (pfi_buffer_cnt >= pfi_buffer_max) { int new_max = pfi_buffer_max * 2; if (new_max > PFI_BUFFER_MAX) { printf("pfi_address_add: address buffer full (%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); return; } p = malloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE, M_DONTWAIT); if (p == NULL) { printf("pfi_address_add: no memory to grow buffer " "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); return; } memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); /* no need to zero buffer */ free(pfi_buffer, PFI_MTYPE); pfi_buffer = p; pfi_buffer_max = new_max; } if (af == AF_INET && net > 32) net = 128; p = pfi_buffer + pfi_buffer_cnt++; bzero(p, sizeof(*p)); p->pfra_af = af; p->pfra_net = net; if (af == AF_INET) p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; - if (af == AF_INET6) { + else if (af == AF_INET6) { p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; - if (IN6_IS_ADDR_LINKLOCAL(&p->pfra_ip6addr)) + if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) p->pfra_ip6addr.s6_addr16[1] = 0; } /* mask network address bits */ if (net < 128) ((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8)); for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++) ((caddr_t)p)[i] = 0; } void pfi_dynaddr_remove(struct pf_addr_wrap *aw) { int s; if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) return; s = splsoftnet(); - hook_disestablish(aw->p.dyn->pfid_kif->pfik_ah_head, - aw->p.dyn->pfid_hook_cookie); - pfi_detach_rule(aw->p.dyn->pfid_kif); + TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); + pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; pool_put(&pfi_addr_pl, aw->p.dyn); aw->p.dyn = NULL; splx(s); } void pfi_dynaddr_copyout(struct pf_addr_wrap *aw) { if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL) return; aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; } void pfi_kifaddr_update(void *v) { - int s; + int s; + struct pfi_kif *kif = (struct pfi_kif *)v; s = splsoftnet(); pfi_update++; - pfi_dohooks(v); + pfi_kif_update(kif); splx(s); } int pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) { return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); } -struct pfi_kif * -pfi_if_create(const char *name, struct pfi_kif *q, int flags) -{ - struct pfi_kif *p; - - p = malloc(sizeof(*p), PFI_MTYPE, M_DONTWAIT); - if (p == NULL) - return (NULL); - bzero(p, sizeof(*p)); - p->pfik_ah_head = malloc(sizeof(*p->pfik_ah_head), PFI_MTYPE, - M_DONTWAIT); - if (p->pfik_ah_head == NULL) { - free(p, PFI_MTYPE); - return (NULL); - } - bzero(p->pfik_ah_head, sizeof(*p->pfik_ah_head)); - TAILQ_INIT(p->pfik_ah_head); - TAILQ_INIT(&p->pfik_grouphead); - strlcpy(p->pfik_name, name, sizeof(p->pfik_name)); - RB_INIT(&p->pfik_lan_ext); - RB_INIT(&p->pfik_ext_gwy); - p->pfik_flags = flags; - p->pfik_parent = q; - p->pfik_tzero = time_second; - - RB_INSERT(pfi_ifhead, &pfi_ifs, p); - if (q != NULL) { - q->pfik_addcnt++; - TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt++; - return (p); -} - -int -pfi_maybe_destroy(struct pfi_kif *p) -{ - int i, j, k, s; - struct pfi_kif *q = p->pfik_parent; - - if ((p->pfik_flags & (PFI_IFLAG_ATTACHED | PFI_IFLAG_GROUP)) || - p->pfik_rules > 0 || p->pfik_states > 0) - return (0); - - s = splsoftnet(); - if (q != NULL) { - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) { - q->pfik_bytes[i][j][k] += - p->pfik_bytes[i][j][k]; - q->pfik_packets[i][j][k] += - p->pfik_packets[i][j][k]; - } - q->pfik_delcnt++; - TAILQ_REMOVE(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt--; - RB_REMOVE(pfi_ifhead, &pfi_ifs, p); - splx(s); - - free(p->pfik_ah_head, PFI_MTYPE); - free(p, PFI_MTYPE); - return (1); -} - void -pfi_copy_group(char *p, const char *q, int m) -{ - while (m > 1 && *q && !(*q >= '0' && *q <= '9')) { - *p++ = *q++; - m--; - } - if (m > 0) - *p++ = '\0'; -} - -void -pfi_newgroup(const char *name, int flags) -{ - struct pfi_kif *p; - - p = pfi_lookup_if(name); - if (p == NULL) - p = pfi_if_create(name, pfi_self, PFI_IFLAG_GROUP); - if (p == NULL) { - printf("pfi_newgroup: cannot allocate '%s' group", name); - return; - } - p->pfik_flags |= flags; -} - -void pfi_fill_oldstatus(struct pf_status *pfs) { - struct pfi_kif *p, key; - int i, j, k, s; + struct pfi_kif *p; + struct pfi_kif_cmp key; + int i, j, k, s; strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); s = splsoftnet(); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) { splx(s); return; } bzero(pfs->pcounters, sizeof(pfs->pcounters)); bzero(pfs->bcounters, sizeof(pfs->bcounters)); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) { pfs->pcounters[i][j][k] = p->pfik_packets[i][j][k]; pfs->bcounters[i][j] += p->pfik_bytes[i][j][k]; } splx(s); } int -pfi_clr_istats(const char *name, int *nzero, int flags) +pfi_clr_istats(const char *name) { struct pfi_kif *p; - int n = 0, s; - long tzero = time_second; + int s; - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); s = splsoftnet(); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + if (pfi_skip_if(name, p)) continue; bzero(p->pfik_packets, sizeof(p->pfik_packets)); bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); - p->pfik_tzero = tzero; - n++; + p->pfik_tzero = time_second; } splx(s); - if (nzero != NULL) - *nzero = n; - return (0); -} -int -pfi_set_flags(const char *name, int flags) -{ - struct pfi_kif *p; - int s; - - if (flags & ~PFI_IFLAG_SETABLE_MASK) - return (EINVAL); - - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) - continue; - p->pfik_flags |= flags; - } - splx(s); return (0); } int -pfi_clear_flags(const char *name, int flags) +pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { - struct pfi_kif *p; - int s; - - if (flags & ~PFI_IFLAG_SETABLE_MASK) - return (EINVAL); - - s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, PFI_FLAG_GROUP|PFI_FLAG_INSTANCE)) - continue; - p->pfik_flags &= ~flags; - } - splx(s); - return (0); -} - -int -pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) -{ - struct pfi_kif *p; + struct pfi_kif *p, *nextp; int s, n = 0; - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); s = splsoftnet(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + if (pfi_skip_if(name, p)) continue; if (*size > n++) { if (!p->pfik_tzero) p->pfik_tzero = time_second; + pfi_kif_ref(p, PFI_KIF_REF_RULE); if (copyout(p, buf++, sizeof(*buf))) { + pfi_kif_unref(p, PFI_KIF_REF_RULE); splx(s); return (EFAULT); } + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + pfi_kif_unref(p, PFI_KIF_REF_RULE); } } splx(s); *size = n; return (0); } -struct pfi_kif * -pfi_lookup_if(const char *name) -{ - struct pfi_kif *p, key; - - strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - return (p); -} - int -pfi_skip_if(const char *filter, struct pfi_kif *p, int f) +pfi_skip_if(const char *filter, struct pfi_kif *p) { int n; - if ((p->pfik_flags & PFI_IFLAG_GROUP) && !(f & PFI_FLAG_GROUP)) - return (1); - if ((p->pfik_flags & PFI_IFLAG_INSTANCE) && !(f & PFI_FLAG_INSTANCE)) - return (1); if (filter == NULL || !*filter) return (0); if (!strcmp(p->pfik_name, filter)) return (0); /* exact match */ n = strlen(filter); if (n < 1 || n >= IFNAMSIZ) return (1); /* sanity check */ if (filter[n-1] >= '0' && filter[n-1] <= '9') return (1); /* only do exact match in that case */ if (strncmp(p->pfik_name, filter, n)) return (1); /* prefix doesn't match */ return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); } +int +pfi_set_flags(const char *name, int flags) +{ + struct pfi_kif *p; + int s; + + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags |= flags; + } + splx(s); + return (0); +} + +int +pfi_clear_flags(const char *name, int flags) +{ + struct pfi_kif *p; + int s; + + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags &= ~flags; + } + splx(s); + return (0); +} + /* from pf_print_state.c */ int pfi_unmask(void *addr) { struct pf_addr *m = addr; int i = 31, j = 0, b = 0; u_int32_t tmp; while (j < 4 && m->addr32[j] == 0xffffffff) { b += 32; j++; } if (j < 4) { tmp = ntohl(m->addr32[j]); for (i = 31; tmp & (1 << i); --i) b++; } return (b); } -void -pfi_dohooks(struct pfi_kif *p) -{ - for (; p != NULL; p = p->pfik_parent) - dohooks(p->pfik_ah_head, 0); -} - -int -pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) -{ - switch (af) { -#ifdef INET - case AF_INET: - switch (dyn->pfid_acnt4) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, - &dyn->pfid_mask4, a, AF_INET)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); - } - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - switch (dyn->pfid_acnt6) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, - &dyn->pfid_mask6, a, AF_INET6)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); - } - break; -#endif /* INET6 */ - default: - return (0); - } -} Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf_ioctl.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf_ioctl.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf_ioctl.c (revision 171164) @@ -1,2811 +1,2879 @@ -/* $OpenBSD: pf_ioctl.c,v 1.139 2005/03/03 07:13:39 dhartmei Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * */ #include "pfsync.h" #include #include #include #include #include #include #include #include #include #include #include +#include #include +#include +#include +#include #include #include #include #include #include #include #include #include #include #include +#include #include #if NPFSYNC > 0 #include #endif /* NPFSYNC > 0 */ +#if NPFLOG > 0 +#include +#endif /* NPFLOG > 0 */ + #ifdef INET6 #include #include #endif /* INET6 */ #ifdef ALTQ #include #endif void pfattach(int); +void pf_thread_create(void *); int pfopen(dev_t, int, int, struct proc *); int pfclose(dev_t, int, int, struct proc *); struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); -int pf_get_ruleset_number(u_int8_t); -void pf_init_ruleset(struct pf_ruleset *); -int pf_anchor_setup(struct pf_rule *, - const struct pf_ruleset *, const char *); -int pf_anchor_copyout(const struct pf_ruleset *, - const struct pf_rule *, struct pfioc_rule *); -void pf_anchor_remove(struct pf_rule *); void pf_mv_pool(struct pf_palist *, struct pf_palist *); void pf_empty_pool(struct pf_palist *); int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); #ifdef ALTQ int pf_begin_altq(u_int32_t *); int pf_rollback_altq(u_int32_t); int pf_commit_altq(u_int32_t); int pf_enable_altq(struct pf_altq *); int pf_disable_altq(struct pf_altq *); #endif /* ALTQ */ int pf_begin_rules(u_int32_t *, int, const char *); int pf_rollback_rules(u_int32_t, int, char *); +int pf_setup_pfsync_matching(struct pf_ruleset *); +void pf_hash_rule(MD5_CTX *, struct pf_rule *); +void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); int pf_commit_rules(u_int32_t, int, char *); -extern struct timeout pf_expire_to; - struct pf_rule pf_default_rule; +struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER; #ifdef ALTQ static int pf_altq_running; #endif #define TAGID_MAX 50000 TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -static u_int16_t tagname2tag(struct pf_tags *, char *); -static void tag2tagname(struct pf_tags *, u_int16_t, char *); -static void tag_unref(struct pf_tags *, u_int16_t); +u_int16_t tagname2tag(struct pf_tags *, char *); +void tag2tagname(struct pf_tags *, u_int16_t, char *); +void tag_unref(struct pf_tags *, u_int16_t); int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x void pfattach(int num) { u_int32_t *timeout = pf_default_rule.timeout; pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl", &pool_allocator_nointr); pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, 0, 0, "pfsrctrpl", NULL); pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", NULL); pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", &pool_allocator_nointr); pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, "pfpooladdrpl", &pool_allocator_nointr); pfr_initialize(); pfi_initialize(); pf_osfp_initialize(); pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); + if (ctob(physmem) <= 100*1024*1024) + pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = + PFR_KENTRY_HIWAT_SMALL; + RB_INIT(&tree_src_tracking); RB_INIT(&pf_anchors); pf_init_ruleset(&pf_main_ruleset); TAILQ_INIT(&pf_altqs[0]); TAILQ_INIT(&pf_altqs[1]); TAILQ_INIT(&pf_pabuf); pf_altqs_active = &pf_altqs[0]; pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_updates); + TAILQ_INIT(&state_list); /* default rule should never be garbage collected */ pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; pf_default_rule.action = PF_PASS; pf_default_rule.nr = -1; + pf_default_rule.rtableid = -1; /* initialize default timeouts */ timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; timeout[PFTM_FRAG] = PFTM_FRAG_VAL; timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; + timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; - timeout_set(&pf_expire_to, pf_purge_timeout, &pf_expire_to); - timeout_add(&pf_expire_to, timeout[PFTM_INTERVAL] * hz); - pf_normalize_init(); bzero(&pf_status, sizeof(pf_status)); pf_status.debug = PF_DEBUG_URGENT; /* XXX do our best to avoid a conflict */ pf_status.hostid = arc4random(); + + /* require process context to purge states, so perform in a thread */ + kthread_create_deferred(pf_thread_create, NULL); } +void +pf_thread_create(void *v) +{ + if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) + panic("pfpurge thread"); +} + int pfopen(dev_t dev, int flags, int fmt, struct proc *p) { if (minor(dev) >= 1) return (ENXIO); return (0); } int pfclose(dev_t dev, int flags, int fmt, struct proc *p) { if (minor(dev) >= 1) return (ENXIO); return (0); } struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, u_int8_t check_ticket) { struct pf_ruleset *ruleset; struct pf_rule *rule; int rs_num; ruleset = pf_find_ruleset(anchor); if (ruleset == NULL) return (NULL); rs_num = pf_get_ruleset_number(rule_action); if (rs_num >= PF_RULESET_MAX) return (NULL); if (active) { if (check_ticket && ticket != ruleset->rules[rs_num].active.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_rulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); } else { if (check_ticket && ticket != ruleset->rules[rs_num].inactive.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); } if (!r_last) { while ((rule != NULL) && (rule->nr != rule_number)) rule = TAILQ_NEXT(rule, entries); } if (rule == NULL) return (NULL); return (&rule->rpool); } -int -pf_get_ruleset_number(u_int8_t action) -{ - switch (action) { - case PF_SCRUB: - case PF_NOSCRUB: - return (PF_RULESET_SCRUB); - break; - case PF_PASS: - case PF_DROP: - return (PF_RULESET_FILTER); - break; - case PF_NAT: - case PF_NONAT: - return (PF_RULESET_NAT); - break; - case PF_BINAT: - case PF_NOBINAT: - return (PF_RULESET_BINAT); - break; - case PF_RDR: - case PF_NORDR: - return (PF_RULESET_RDR); - break; - default: - return (PF_RULESET_MAX); - break; - } -} - void -pf_init_ruleset(struct pf_ruleset *ruleset) -{ - int i; - - memset(ruleset, 0, sizeof(struct pf_ruleset)); - for (i = 0; i < PF_RULESET_MAX; i++) { - TAILQ_INIT(&ruleset->rules[i].queues[0]); - TAILQ_INIT(&ruleset->rules[i].queues[1]); - ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; - ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; - } -} - -struct pf_anchor * -pf_find_anchor(const char *path) -{ - static struct pf_anchor key; - - memset(&key, 0, sizeof(key)); - strlcpy(key.path, path, sizeof(key.path)); - return (RB_FIND(pf_anchor_global, &pf_anchors, &key)); -} - -struct pf_ruleset * -pf_find_ruleset(const char *path) -{ - struct pf_anchor *anchor; - - while (*path == '/') - path++; - if (!*path) - return (&pf_main_ruleset); - anchor = pf_find_anchor(path); - if (anchor == NULL) - return (NULL); - else - return (&anchor->ruleset); -} - -struct pf_ruleset * -pf_find_or_create_ruleset(const char *path) -{ - static char p[MAXPATHLEN]; - char *q, *r; - struct pf_ruleset *ruleset; - struct pf_anchor *anchor, *dup, *parent = NULL; - - while (*path == '/') - path++; - ruleset = pf_find_ruleset(path); - if (ruleset != NULL) - return (ruleset); - strlcpy(p, path, sizeof(p)); - while (parent == NULL && (q = strrchr(p, '/')) != NULL) { - *q = 0; - if ((ruleset = pf_find_ruleset(p)) != NULL) { - parent = ruleset->anchor; - break; - } - } - if (q == NULL) - q = p; - else - q++; - strlcpy(p, path, sizeof(p)); - if (!*q) - return (NULL); - while ((r = strchr(q, '/')) != NULL || *q) { - if (r != NULL) - *r = 0; - if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || - (parent != NULL && strlen(parent->path) >= - MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) - return (NULL); - anchor = (struct pf_anchor *)malloc(sizeof(*anchor), M_TEMP, - M_NOWAIT); - if (anchor == NULL) - return (NULL); - memset(anchor, 0, sizeof(*anchor)); - RB_INIT(&anchor->children); - strlcpy(anchor->name, q, sizeof(anchor->name)); - if (parent != NULL) { - strlcpy(anchor->path, parent->path, - sizeof(anchor->path)); - strlcat(anchor->path, "/", sizeof(anchor->path)); - } - strlcat(anchor->path, anchor->name, sizeof(anchor->path)); - if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != - NULL) { - printf("pf_find_or_create_ruleset: RB_INSERT1 " - "'%s' '%s' collides with '%s' '%s'\n", - anchor->path, anchor->name, dup->path, dup->name); - free(anchor, M_TEMP); - return (NULL); - } - if (parent != NULL) { - anchor->parent = parent; - if ((dup = RB_INSERT(pf_anchor_node, &parent->children, - anchor)) != NULL) { - printf("pf_find_or_create_ruleset: " - "RB_INSERT2 '%s' '%s' collides with " - "'%s' '%s'\n", anchor->path, anchor->name, - dup->path, dup->name); - RB_REMOVE(pf_anchor_global, &pf_anchors, - anchor); - free(anchor, M_TEMP); - return (NULL); - } - } - pf_init_ruleset(&anchor->ruleset); - anchor->ruleset.anchor = anchor; - parent = anchor; - if (r != NULL) - q = r + 1; - else - *q = 0; - } - return (&anchor->ruleset); -} - -void -pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) -{ - struct pf_anchor *parent; - int i; - - while (ruleset != NULL) { - if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || - !RB_EMPTY(&ruleset->anchor->children) || - ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || - ruleset->topen) - return; - for (i = 0; i < PF_RULESET_MAX; ++i) - if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || - !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || - ruleset->rules[i].inactive.open) - return; - RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); - if ((parent = ruleset->anchor->parent) != NULL) - RB_REMOVE(pf_anchor_node, &parent->children, - ruleset->anchor); - free(ruleset->anchor, M_TEMP); - if (parent == NULL) - return; - ruleset = &parent->ruleset; - } -} - -int -pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, - const char *name) -{ - static char *p, path[MAXPATHLEN]; - struct pf_ruleset *ruleset; - - r->anchor = NULL; - r->anchor_relative = 0; - r->anchor_wildcard = 0; - if (!name[0]) - return (0); - if (name[0] == '/') - strlcpy(path, name + 1, sizeof(path)); - else { - /* relative path */ - r->anchor_relative = 1; - if (s->anchor == NULL || !s->anchor->path[0]) - path[0] = 0; - else - strlcpy(path, s->anchor->path, sizeof(path)); - while (name[0] == '.' && name[1] == '.' && name[2] == '/') { - if (!path[0]) { - printf("pf_anchor_setup: .. beyond root\n"); - return (1); - } - if ((p = strrchr(path, '/')) != NULL) - *p = 0; - else - path[0] = 0; - r->anchor_relative++; - name += 3; - } - if (path[0]) - strlcat(path, "/", sizeof(path)); - strlcat(path, name, sizeof(path)); - } - if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { - r->anchor_wildcard = 1; - *p = 0; - } - ruleset = pf_find_or_create_ruleset(path); - if (ruleset == NULL || ruleset->anchor == NULL) { - printf("pf_anchor_setup: ruleset\n"); - return (1); - } - r->anchor = ruleset->anchor; - r->anchor->refcnt++; - return (0); -} - -int -pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, - struct pfioc_rule *pr) -{ - pr->anchor_call[0] = 0; - if (r->anchor == NULL) - return (0); - if (!r->anchor_relative) { - strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); - strlcat(pr->anchor_call, r->anchor->path, - sizeof(pr->anchor_call)); - } else { - char a[MAXPATHLEN], b[MAXPATHLEN], *p; - int i; - - if (rs->anchor == NULL) - a[0] = 0; - else - strlcpy(a, rs->anchor->path, sizeof(a)); - strlcpy(b, r->anchor->path, sizeof(b)); - for (i = 1; i < r->anchor_relative; ++i) { - if ((p = strrchr(a, '/')) == NULL) - p = a; - *p = 0; - strlcat(pr->anchor_call, "../", - sizeof(pr->anchor_call)); - } - if (strncmp(a, b, strlen(a))) { - printf("pf_anchor_copyout: '%s' '%s'\n", a, b); - return (1); - } - if (strlen(b) > strlen(a)) - strlcat(pr->anchor_call, b + (a[0] ? strlen(a) + 1 : 0), - sizeof(pr->anchor_call)); - } - if (r->anchor_wildcard) - strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", - sizeof(pr->anchor_call)); - return (0); -} - -void -pf_anchor_remove(struct pf_rule *r) -{ - if (r->anchor == NULL) - return; - if (r->anchor->refcnt <= 0) { - printf("pf_anchor_remove: broken refcount"); - r->anchor = NULL; - return; - } - if (!--r->anchor->refcnt) - pf_remove_if_empty_ruleset(&r->anchor->ruleset); - r->anchor = NULL; -} - -void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { struct pf_pooladdr *mv_pool_pa; while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { TAILQ_REMOVE(poola, mv_pool_pa, entries); TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); } } void pf_empty_pool(struct pf_palist *poola) { struct pf_pooladdr *empty_pool_pa; while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { pfi_dynaddr_remove(&empty_pool_pa->addr); pf_tbladdr_remove(&empty_pool_pa->addr); - pfi_detach_rule(empty_pool_pa->kif); + pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); TAILQ_REMOVE(poola, empty_pool_pa, entries); pool_put(&pf_pooladdr_pl, empty_pool_pa); } } void pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) { if (rulequeue != NULL) { if (rule->states <= 0) { /* * XXX - we need to remove the table *before* detaching * the rule to make sure the table code does not delete * the anchor under our feet. */ pf_tbladdr_remove(&rule->src.addr); pf_tbladdr_remove(&rule->dst.addr); if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); } TAILQ_REMOVE(rulequeue, rule, entries); rule->entries.tqe_prev = NULL; rule->nr = -1; } if (rule->states > 0 || rule->src_nodes > 0 || rule->entries.tqe_prev != NULL) return; pf_tag_unref(rule->tag); pf_tag_unref(rule->match_tag); #ifdef ALTQ if (rule->pqid != rule->qid) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif pf_rtlabel_remove(&rule->src.addr); pf_rtlabel_remove(&rule->dst.addr); pfi_dynaddr_remove(&rule->src.addr); pfi_dynaddr_remove(&rule->dst.addr); if (rulequeue == NULL) { pf_tbladdr_remove(&rule->src.addr); pf_tbladdr_remove(&rule->dst.addr); if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); } - pfi_detach_rule(rule->kif); + pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); pool_put(&pf_rule_pl, rule); } -static u_int16_t +u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; u_int16_t new_tagid = 1; TAILQ_FOREACH(tag, head, entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; return (tag->tag); } /* * to avoid fragmentation, we do a linear search from the beginning * and take the first free slot we find. if there is none or the list * is empty, append a new entry at the end. */ /* new entry */ if (!TAILQ_EMPTY(head)) for (p = TAILQ_FIRST(head); p != NULL && p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) new_tagid = p->tag + 1; if (new_tagid > TAGID_MAX) return (0); /* allocate and fill new struct pf_tagname */ tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname), M_TEMP, M_NOWAIT); if (tag == NULL) return (0); bzero(tag, sizeof(struct pf_tagname)); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref++; if (p != NULL) /* insert new entry before p */ TAILQ_INSERT_BEFORE(p, tag, entries); else /* either list empty or no free slot in between */ TAILQ_INSERT_TAIL(head, tag, entries); return (tag->tag); } -static void +void tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) { struct pf_tagname *tag; TAILQ_FOREACH(tag, head, entries) if (tag->tag == tagid) { strlcpy(p, tag->name, PF_TAG_NAME_SIZE); return; } } -static void +void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; if (tag == 0) return; for (p = TAILQ_FIRST(head); p != NULL; p = next) { next = TAILQ_NEXT(p, entries); if (tag == p->tag) { if (--p->ref == 0) { TAILQ_REMOVE(head, p, entries); free(p, M_TEMP); } break; } } } u_int16_t pf_tagname2tag(char *tagname) { return (tagname2tag(&pf_tags, tagname)); } void pf_tag2tagname(u_int16_t tagid, char *p) { - return (tag2tagname(&pf_tags, tagid, p)); + tag2tagname(&pf_tags, tagid, p); } void pf_tag_ref(u_int16_t tag) { struct pf_tagname *t; TAILQ_FOREACH(t, &pf_tags, entries) if (t->tag == tag) break; if (t != NULL) t->ref++; } void pf_tag_unref(u_int16_t tag) { - return (tag_unref(&pf_tags, tag)); + tag_unref(&pf_tags, tag); } int pf_rtlabel_add(struct pf_addr_wrap *a) { if (a->type == PF_ADDR_RTLABEL && (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0) return (-1); return (0); } void pf_rtlabel_remove(struct pf_addr_wrap *a) { if (a->type == PF_ADDR_RTLABEL) rtlabel_unref(a->v.rtlabel); } void pf_rtlabel_copyout(struct pf_addr_wrap *a) { const char *name; if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) { if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL) strlcpy(a->v.rtlabelname, "?", sizeof(a->v.rtlabelname)); else strlcpy(a->v.rtlabelname, name, sizeof(a->v.rtlabelname)); } } #ifdef ALTQ u_int32_t pf_qname2qid(char *qname) { return ((u_int32_t)tagname2tag(&pf_qids, qname)); } void pf_qid2qname(u_int32_t qid, char *p) { - return (tag2tagname(&pf_qids, (u_int16_t)qid, p)); + tag2tagname(&pf_qids, (u_int16_t)qid, p); } void pf_qid_unref(u_int32_t qid) { - return (tag_unref(&pf_qids, (u_int16_t)qid)); + tag_unref(&pf_qids, (u_int16_t)qid); } int pf_begin_altq(u_int32_t *ticket) { struct pf_altq *altq; int error = 0; /* Purge the old altq list */ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); pool_put(&pf_altq_pl, altq); } if (error) return (error); *ticket = ++ticket_altqs_inactive; altqs_inactive_open = 1; return (0); } int pf_rollback_altq(u_int32_t ticket) { struct pf_altq *altq; int error = 0; if (!altqs_inactive_open || ticket != ticket_altqs_inactive) return (0); /* Purge the old altq list */ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } else pf_qid_unref(altq->qid); pool_put(&pf_altq_pl, altq); } altqs_inactive_open = 0; return (error); } int pf_commit_altq(u_int32_t ticket) { struct pf_altqqueue *old_altqs; struct pf_altq *altq; int s, err, error = 0; if (!altqs_inactive_open || ticket != ticket_altqs_inactive) return (EBUSY); /* swap altqs, keep the old. */ s = splsoftnet(); old_altqs = pf_altqs_active; pf_altqs_active = pf_altqs_inactive; pf_altqs_inactive = old_altqs; ticket_altqs_active = ticket_altqs_inactive; /* Attach new disciplines */ TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { /* attach the discipline */ error = altq_pfattach(altq); if (error == 0 && pf_altq_running) error = pf_enable_altq(altq); if (error != 0) { splx(s); return (error); } } } /* Purge the old altq list */ while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { /* detach and destroy the discipline */ if (pf_altq_running) error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) error = err; err = altq_remove(altq); if (err != 0 && error == 0) error = err; } else pf_qid_unref(altq->qid); pool_put(&pf_altq_pl, altq); } splx(s); altqs_inactive_open = 0; return (error); } int pf_enable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int s, error = 0; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); if (ifp->if_snd.altq_type != ALTQT_NONE) error = altq_enable(&ifp->if_snd); /* set tokenbucket regulator */ if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; - s = splimp(); + s = splnet(); error = tbr_set(&ifp->if_snd, &tb); splx(s); } return (error); } int pf_disable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int s, error; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); /* * when the discipline is no longer referenced, it was overridden * by a new one. if so, just return. */ if (altq->altq_disc != ifp->if_snd.altq_disc) return (0); error = altq_disable(&ifp->if_snd); if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; - s = splimp(); + s = splnet(); error = tbr_set(&ifp->if_snd, &tb); splx(s); } return (error); } #endif /* ALTQ */ int pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); } int pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } rs->rules[rs_num].inactive.open = 0; return (0); } +#define PF_MD5_UPD(st, elm) \ + MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) + +#define PF_MD5_UPD_STR(st, elm) \ + MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) + +#define PF_MD5_UPD_HTONL(st, elm, stor) do { \ + (stor) = htonl((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ +} while (0) + +#define PF_MD5_UPD_HTONS(st, elm, stor) do { \ + (stor) = htons((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ +} while (0) + +void +pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) +{ + PF_MD5_UPD(pfr, addr.type); + switch (pfr->addr.type) { + case PF_ADDR_DYNIFTL: + PF_MD5_UPD(pfr, addr.v.ifname); + PF_MD5_UPD(pfr, addr.iflags); + break; + case PF_ADDR_TABLE: + PF_MD5_UPD(pfr, addr.v.tblname); + break; + case PF_ADDR_ADDRMASK: + /* XXX ignore af? */ + PF_MD5_UPD(pfr, addr.v.a.addr.addr32); + PF_MD5_UPD(pfr, addr.v.a.mask.addr32); + break; + case PF_ADDR_RTLABEL: + PF_MD5_UPD(pfr, addr.v.rtlabelname); + break; + } + + PF_MD5_UPD(pfr, port[0]); + PF_MD5_UPD(pfr, port[1]); + PF_MD5_UPD(pfr, neg); + PF_MD5_UPD(pfr, port_op); +} + +void +pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) +{ + u_int16_t x; + u_int32_t y; + + pf_hash_rule_addr(ctx, &rule->src); + pf_hash_rule_addr(ctx, &rule->dst); + PF_MD5_UPD_STR(rule, label); + PF_MD5_UPD_STR(rule, ifname); + PF_MD5_UPD_STR(rule, match_tagname); + PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ + PF_MD5_UPD_HTONL(rule, os_fingerprint, y); + PF_MD5_UPD_HTONL(rule, prob, y); + PF_MD5_UPD_HTONL(rule, uid.uid[0], y); + PF_MD5_UPD_HTONL(rule, uid.uid[1], y); + PF_MD5_UPD(rule, uid.op); + PF_MD5_UPD_HTONL(rule, gid.gid[0], y); + PF_MD5_UPD_HTONL(rule, gid.gid[1], y); + PF_MD5_UPD(rule, gid.op); + PF_MD5_UPD_HTONL(rule, rule_flag, y); + PF_MD5_UPD(rule, action); + PF_MD5_UPD(rule, direction); + PF_MD5_UPD(rule, af); + PF_MD5_UPD(rule, quick); + PF_MD5_UPD(rule, ifnot); + PF_MD5_UPD(rule, match_tag_not); + PF_MD5_UPD(rule, natpass); + PF_MD5_UPD(rule, keep_state); + PF_MD5_UPD(rule, proto); + PF_MD5_UPD(rule, type); + PF_MD5_UPD(rule, code); + PF_MD5_UPD(rule, flags); + PF_MD5_UPD(rule, flagset); + PF_MD5_UPD(rule, allow_opts); + PF_MD5_UPD(rule, rt); + PF_MD5_UPD(rule, tos); +} + int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; - struct pf_rule *rule; + struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; - int s; + int s, error; + u_int32_t old_rcount; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); + /* Calculate checksum for the main ruleset */ + if (rs == &pf_main_ruleset) { + error = pf_setup_pfsync_matching(rs); + if (error != 0) + return (error); + } + /* Swap rules, keep the old. */ s = splsoftnet(); old_rules = rs->rules[rs_num].active.ptr; + old_rcount = rs->rules[rs_num].active.rcount; + old_array = rs->rules[rs_num].active.ptr_array; + rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; + rs->rules[rs_num].active.ptr_array = + rs->rules[rs_num].inactive.ptr_array; + rs->rules[rs_num].active.rcount = + rs->rules[rs_num].inactive.rcount; rs->rules[rs_num].inactive.ptr = old_rules; + rs->rules[rs_num].inactive.ptr_array = old_array; + rs->rules[rs_num].inactive.rcount = old_rcount; + rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); + /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_rm_rule(old_rules, rule); + if (rs->rules[rs_num].inactive.ptr_array) + free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); + rs->rules[rs_num].inactive.ptr_array = NULL; + rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); splx(s); return (0); } int +pf_setup_pfsync_matching(struct pf_ruleset *rs) +{ + MD5_CTX ctx; + struct pf_rule *rule; + int rs_cnt; + u_int8_t digest[PF_MD5_DIGEST_LENGTH]; + + MD5Init(&ctx); + for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { + /* XXX PF_RULESET_SCRUB as well? */ + if (rs_cnt == PF_RULESET_SCRUB) + continue; + + if (rs->rules[rs_cnt].inactive.ptr_array) + free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); + rs->rules[rs_cnt].inactive.ptr_array = NULL; + + if (rs->rules[rs_cnt].inactive.rcount) { + rs->rules[rs_cnt].inactive.ptr_array = + malloc(sizeof(caddr_t) * + rs->rules[rs_cnt].inactive.rcount, + M_TEMP, M_NOWAIT); + + if (!rs->rules[rs_cnt].inactive.ptr_array) + return (ENOMEM); + } + + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule(&ctx, rule); + (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + } + } + + MD5Final(digest, &ctx); + memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); + return (0); +} + +int pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { struct pf_pooladdr *pa = NULL; struct pf_pool *pool = NULL; int s; int error = 0; /* XXX keep in sync with switch() below */ if (securelevel > 1) switch (cmd) { case DIOCGETRULES: case DIOCGETRULE: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCSETSTATUSIF: case DIOCGETSTATUS: case DIOCCLRSTATUS: case DIOCNATLOOK: case DIOCSETDEBUG: case DIOCGETSTATES: case DIOCGETTIMEOUT: case DIOCCLRRULECTRS: case DIOCGETLIMIT: case DIOCGETALTQS: case DIOCGETALTQ: case DIOCGETQSTATS: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRCLRASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: - case DIOCICLRISTATS: case DIOCSETIFFLAG: case DIOCCLRIFFLAG: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) break; /* dummy operation ok */ return (EPERM); default: return (EPERM); } if (!(flags & FWRITE)) switch (cmd) { case DIOCGETRULES: - case DIOCGETRULE: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCGETSTATUS: case DIOCGETSTATES: case DIOCGETTIMEOUT: case DIOCGETLIMIT: case DIOCGETALTQS: case DIOCGETALTQ: case DIOCGETQSTATS: case DIOCGETRULESETS: case DIOCGETRULESET: + case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCIGETIFACES: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & - PFR_FLAG_DUMMY) + PFR_FLAG_DUMMY) { + flags |= FWRITE; /* need write lock for dummy */ break; /* dummy operation ok */ + } return (EACCES); + case DIOCGETRULE: + if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + return (EACCES); + break; default: return (EACCES); } + if (flags & FWRITE) + rw_enter_write(&pf_consistency_lock); + else + rw_enter_read(&pf_consistency_lock); + s = splsoftnet(); switch (cmd) { case DIOCSTART: if (pf_status.running) error = EEXIST; else { pf_status.running = 1; pf_status.since = time_second; if (pf_status.stateid == 0) { pf_status.stateid = time_second; pf_status.stateid = pf_status.stateid << 32; } DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } break; case DIOCSTOP: if (!pf_status.running) error = ENOENT; else { pf_status.running = 0; pf_status.since = time_second; DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } break; case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *rule, *tail; struct pf_pooladdr *pa; int rs_num; pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { error = EINVAL; break; } if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { error = EBUSY; break; } if (pr->pool_ticket != ticket_pabuf) { error = EBUSY; break; } rule = pool_get(&pf_rule_pl, PR_NOWAIT); if (rule == NULL) { error = ENOMEM; break; } bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + rule->cuid = p->p_cred->p_ruid; + rule->cpid = p->p_pid; rule->anchor = NULL; rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); /* initialize refcounting */ rule->states = 0; rule->src_nodes = 0; rule->entries.tqe_prev = NULL; #ifndef INET if (rule->af == AF_INET) { pool_put(&pf_rule_pl, rule); error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (rule->af == AF_INET6) { pool_put(&pf_rule_pl, rule); error = EAFNOSUPPORT; break; } #endif /* INET6 */ tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) rule->nr = tail->nr + 1; else rule->nr = 0; if (rule->ifname[0]) { - rule->kif = pfi_attach_rule(rule->ifname); + rule->kif = pfi_kif_get(rule->ifname); if (rule->kif == NULL) { pool_put(&pf_rule_pl, rule); error = EINVAL; break; } + pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); } + if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { if ((rule->qid = pf_qname2qid(rule->qname)) == 0) error = EBUSY; else if (rule->pqname[0] != 0) { if ((rule->pqid = pf_qname2qid(rule->pqname)) == 0) error = EBUSY; } else rule->pqid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) error = EBUSY; if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag(rule->match_tagname)) == 0) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; +#if NPFLOG > 0 + if (rule->logif >= PFLOGIFS_MAX) + error = EINVAL; +#endif if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) error = EINVAL; if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) error = EINVAL; if (pf_tbladdr_setup(ruleset, &rule->src.addr)) error = EINVAL; if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) error = EINVAL; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; TAILQ_FOREACH(pa, &pf_pabuf, entries) if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, rule->overload_tblname)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_pool(&pf_pabuf, &rule->rpool.list); if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && (TAILQ_FIRST(&rule->rpool.list) == NULL)) error = EINVAL; if (error) { pf_rm_rule(NULL, rule); break; } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); - rule->evaluations = rule->packets = rule->bytes = 0; + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); + ruleset->rules[rs_num].inactive.rcount++; break; } case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *tail; int rs_num; pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { error = EINVAL; break; } tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_rulequeue); if (tail) pr->nr = tail->nr + 1; else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; break; } case DIOCGETRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *rule; int rs_num, i; pr->anchor[sizeof(pr->anchor) - 1] = 0; ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { error = EINVAL; break; } if (pr->ticket != ruleset->rules[rs_num].active.ticket) { error = EBUSY; break; } rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { error = EBUSY; break; } bcopy(rule, &pr->rule, sizeof(struct pf_rule)); if (pf_anchor_copyout(ruleset, rule, pr)) { error = EBUSY; break; } pfi_dynaddr_copyout(&pr->rule.src.addr); pfi_dynaddr_copyout(&pr->rule.dst.addr); pf_tbladdr_copyout(&pr->rule.src.addr); pf_tbladdr_copyout(&pr->rule.dst.addr); pf_rtlabel_copyout(&pr->rule.src.addr); pf_rtlabel_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = -1; else pr->rule.skip[i].nr = rule->skip[i].ptr->nr; + + if (pr->action == PF_GET_CLR_CNTR) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } case DIOCCHANGERULE: { struct pfioc_rule *pcr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; struct pf_rule *oldrule = NULL, *newrule = NULL; u_int32_t nr = 0; int rs_num; if (!(pcr->action == PF_CHANGE_REMOVE || pcr->action == PF_CHANGE_GET_TICKET) && pcr->pool_ticket != ticket_pabuf) { error = EBUSY; break; } if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; break; } ruleset = pf_find_ruleset(pcr->anchor); if (ruleset == NULL) { error = EINVAL; break; } rs_num = pf_get_ruleset_number(pcr->rule.action); if (rs_num >= PF_RULESET_MAX) { error = EINVAL; break; } if (pcr->action == PF_CHANGE_GET_TICKET) { pcr->ticket = ++ruleset->rules[rs_num].active.ticket; break; } else { if (pcr->ticket != ruleset->rules[rs_num].active.ticket) { error = EINVAL; break; } if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } } if (pcr->action != PF_CHANGE_REMOVE) { newrule = pool_get(&pf_rule_pl, PR_NOWAIT); if (newrule == NULL) { error = ENOMEM; break; } bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + newrule->cuid = p->p_cred->p_ruid; + newrule->cpid = p->p_pid; TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ newrule->states = 0; newrule->entries.tqe_prev = NULL; #ifndef INET if (newrule->af == AF_INET) { pool_put(&pf_rule_pl, newrule); error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (newrule->af == AF_INET6) { pool_put(&pf_rule_pl, newrule); error = EAFNOSUPPORT; break; } #endif /* INET6 */ if (newrule->ifname[0]) { - newrule->kif = pfi_attach_rule(newrule->ifname); + newrule->kif = pfi_kif_get(newrule->ifname); if (newrule->kif == NULL) { pool_put(&pf_rule_pl, newrule); error = EINVAL; break; } + pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); } else newrule->kif = NULL; + if (newrule->rtableid > 0 && + !rtable_exists(newrule->rtableid)) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { if ((newrule->qid = pf_qname2qid(newrule->qname)) == 0) error = EBUSY; else if (newrule->pqname[0] != 0) { if ((newrule->pqid = pf_qname2qid(newrule->pqname)) == 0) error = EBUSY; } else newrule->pqid = newrule->qid; } #endif /* ALTQ */ if (newrule->tagname[0]) if ((newrule->tag = pf_tagname2tag(newrule->tagname)) == 0) error = EBUSY; if (newrule->match_tagname[0]) if ((newrule->match_tag = pf_tagname2tag( newrule->match_tagname)) == 0) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; if (pf_rtlabel_add(&newrule->src.addr) || pf_rtlabel_add(&newrule->dst.addr)) error = EBUSY; if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) error = EINVAL; if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) error = EINVAL; if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) error = EINVAL; if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) error = EINVAL; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; TAILQ_FOREACH(pa, &pf_pabuf, entries) if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( ruleset, newrule->overload_tblname)) == NULL) error = EINVAL; else newrule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_pool(&pf_pabuf, &newrule->rpool.list); if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_FASTROUTE)) && - !pcr->anchor[0])) && + !newrule->anchor)) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; if (error) { pf_rm_rule(NULL, newrule); break; } newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); - newrule->evaluations = newrule->packets = 0; - newrule->bytes = 0; + newrule->evaluations = 0; + newrule->packets[0] = newrule->packets[1] = 0; + newrule->bytes[0] = newrule->bytes[1] = 0; } pf_empty_pool(&pf_pabuf); if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); else if (pcr->action == PF_CHANGE_ADD_TAIL) oldrule = TAILQ_LAST( ruleset->rules[rs_num].active.ptr, pf_rulequeue); else { oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { if (newrule != NULL) pf_rm_rule(NULL, newrule); error = EINVAL; break; } } - if (pcr->action == PF_CHANGE_REMOVE) + if (pcr->action == PF_CHANGE_REMOVE) { pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); - else { + ruleset->rules[rs_num].active.rcount--; + } else { if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, newrule, entries); else if (pcr->action == PF_CHANGE_ADD_HEAD || pcr->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldrule, newrule, entries); else TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); + ruleset->rules[rs_num].active.rcount++; } nr = 0; TAILQ_FOREACH(oldrule, ruleset->rules[rs_num].active.ptr, entries) oldrule->nr = nr++; ruleset->rules[rs_num].active.ticket++; pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_ruleset(ruleset); break; } case DIOCCLRSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name)) { - state->timeout = PFTM_PURGE; #if NPFSYNC /* don't send out individual delete messages */ state->sync_flags = PFSTATE_NOSYNC; #endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); - pf_status.states = 0; psk->psk_af = killed; #if NPFSYNC pfsync_clear_states(pf_status.hostid, psk->psk_ifname); #endif break; } case DIOCKILLSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; + struct pf_state_host *src, *dst; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + + if (state->direction == PF_OUT) { + src = &state->lan; + dst = &state->ext; + } else { + src = &state->ext; + dst = &state->lan; + } if ((!psk->psk_af || state->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == state->proto) && PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &state->lan.addr, state->af) && + &src->addr, state->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &state->ext.addr, state->af) && + &dst->addr, state->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - state->lan.port)) && + src->port)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - state->ext.port)) && + dst->port)) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name))) { - state->timeout = PFTM_PURGE; +#if NPFSYNC > 0 + /* send immediate delete of state */ + pfsync_delete_state(state); + state->sync_flags |= PFSTATE_NOSYNC; +#endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); psk->psk_af = killed; break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *state; struct pfi_kif *kif; if (ps->state.timeout >= PFTM_MAX && ps->state.timeout != PFTM_UNTIL_PACKET) { error = EINVAL; break; } state = pool_get(&pf_state_pl, PR_NOWAIT); if (state == NULL) { error = ENOMEM; break; } - kif = pfi_lookup_create(ps->state.u.ifname); + kif = pfi_kif_get(ps->state.u.ifname); if (kif == NULL) { pool_put(&pf_state_pl, state); error = ENOENT; break; } bcopy(&ps->state, state, sizeof(struct pf_state)); bzero(&state->u, sizeof(state->u)); state->rule.ptr = &pf_default_rule; state->nat_rule.ptr = NULL; state->anchor.ptr = NULL; state->rt_kif = NULL; state->creation = time_second; state->pfsync_time = 0; state->packets[0] = state->packets[1] = 0; state->bytes[0] = state->bytes[1] = 0; if (pf_insert_state(kif, state)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); pool_put(&pf_state_pl, state); error = ENOMEM; } break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *state; u_int32_t nr; + int secs; nr = 0; RB_FOREACH(state, pf_state_tree_id, &tree_id) { if (nr >= ps->nr) break; nr++; } if (state == NULL) { error = EBUSY; break; } - bcopy(state, &ps->state, sizeof(struct pf_state)); + secs = time_second; + bcopy(state, &ps->state, sizeof(ps->state)); + strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name, + sizeof(ps->state.u.ifname)); ps->state.rule.nr = state->rule.ptr->nr; ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? -1 : state->nat_rule.ptr->nr; ps->state.anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; + ps->state.creation = secs - ps->state.creation; ps->state.expire = pf_state_expires(state); - if (ps->state.expire > time_second) - ps->state.expire -= time_second; + if (ps->state.expire > secs) + ps->state.expire -= secs; else ps->state.expire = 0; break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *state; - struct pf_state *p, pstore; - struct pfi_kif *kif; + struct pf_state *p, *pstore; u_int32_t nr = 0; int space = ps->ps_len; if (space == 0) { - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - nr += kif->pfik_states; + nr = pf_status.states; ps->ps_len = sizeof(struct pf_state) * nr; break; } + pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = ps->ps_states; - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - RB_FOREACH(state, pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy) { + + state = TAILQ_FIRST(&state_list); + while (state) { + if (state->timeout != PFTM_UNLINKED) { int secs = time_second; if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - bcopy(state, &pstore, sizeof(pstore)); - strlcpy(pstore.u.ifname, kif->pfik_name, - sizeof(pstore.u.ifname)); - pstore.rule.nr = state->rule.ptr->nr; - pstore.nat_rule.nr = (state->nat_rule.ptr == + bcopy(state, pstore, sizeof(*pstore)); + strlcpy(pstore->u.ifname, + state->u.s.kif->pfik_name, + sizeof(pstore->u.ifname)); + pstore->rule.nr = state->rule.ptr->nr; + pstore->nat_rule.nr = (state->nat_rule.ptr == NULL) ? -1 : state->nat_rule.ptr->nr; - pstore.anchor.nr = (state->anchor.ptr == + pstore->anchor.nr = (state->anchor.ptr == NULL) ? -1 : state->anchor.ptr->nr; - pstore.creation = secs - pstore.creation; - pstore.expire = pf_state_expires(state); - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->creation = secs - pstore->creation; + pstore->expire = pf_state_expires(state); + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; - error = copyout(&pstore, p, sizeof(*p)); - if (error) + pstore->expire = 0; + error = copyout(pstore, p, sizeof(*p)); + if (error) { + free(pstore, M_TEMP); goto fail; + } p++; nr++; } + state = TAILQ_NEXT(state, u.s.entry_list); + } + ps->ps_len = sizeof(struct pf_state) * nr; + + free(pstore, M_TEMP); break; } case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; bcopy(&pf_status, s, sizeof(struct pf_status)); pfi_fill_oldstatus(s); break; } case DIOCSETSTATUSIF: { struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { bzero(pf_status.ifname, IFNAMSIZ); break; } if (ifunit(pi->ifname) == NULL) { error = EINVAL; break; } strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); break; } case DIOCCLRSTATUS: { bzero(pf_status.counters, sizeof(pf_status.counters)); bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); bzero(pf_status.scounters, sizeof(pf_status.scounters)); + pf_status.since = time_second; if (*pf_status.ifname) - pfi_clr_istats(pf_status.ifname, NULL, - PFI_FLAG_INSTANCE); + pfi_clr_istats(pf_status.ifname); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; struct pf_state *state; - struct pf_state key; + struct pf_state_cmp key; int m = 0, direction = pnl->direction; key.af = pnl->af; key.proto = pnl->proto; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || - !pnl->dport || !pnl->sport) + ((pnl->proto == IPPROTO_TCP || + pnl->proto == IPPROTO_UDP) && + (!pnl->dport || !pnl->sport))) error = EINVAL; else { /* * userland gives us source and dest of connection, * reverse the lookup so we ask for what happens with * the return traffic, enabling us to find it in the * state tree. */ if (direction == PF_IN) { PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); key.ext.port = pnl->dport; PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); key.gwy.port = pnl->sport; state = pf_find_state_all(&key, PF_EXT_GWY, &m); } else { PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); key.lan.port = pnl->dport; PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); key.ext.port = pnl->sport; state = pf_find_state_all(&key, PF_LAN_EXT, &m); } if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { if (direction == PF_IN) { PF_ACPY(&pnl->rsaddr, &state->lan.addr, state->af); pnl->rsport = state->lan.port; PF_ACPY(&pnl->rdaddr, &pnl->daddr, pnl->af); pnl->rdport = pnl->dport; } else { PF_ACPY(&pnl->rdaddr, &state->gwy.addr, state->af); pnl->rdport = state->gwy.port; PF_ACPY(&pnl->rsaddr, &pnl->saddr, pnl->af); pnl->rsport = pnl->sport; } } else error = ENOENT; } break; } case DIOCSETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; int old; if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || pt->seconds < 0) { error = EINVAL; goto fail; } old = pf_default_rule.timeout[pt->timeout]; + if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) + pt->seconds = 1; pf_default_rule.timeout[pt->timeout] = pt->seconds; + if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) + wakeup(pf_purge_thread); pt->seconds = old; break; } case DIOCGETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { error = EINVAL; goto fail; } pt->seconds = pf_default_rule.timeout[pt->timeout]; break; } case DIOCGETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { error = EINVAL; goto fail; } pl->limit = pf_pool_limits[pl->index].limit; break; } case DIOCSETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; int old_limit; if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || pf_pool_limits[pl->index].pp == NULL) { error = EINVAL; goto fail; } if (pool_sethardlimit(pf_pool_limits[pl->index].pp, pl->limit, NULL, 0) != 0) { error = EBUSY; goto fail; } old_limit = pf_pool_limits[pl->index].limit; pf_pool_limits[pl->index].limit = pl->limit; pl->limit = old_limit; break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; pf_status.debug = *level; break; } case DIOCCLRRULECTRS: { + /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; TAILQ_FOREACH(rule, - ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) - rule->evaluations = rule->packets = - rule->bytes = 0; + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; /* enable all altq interfaces on active list */ TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) pf_altq_running = 1; DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } case DIOCSTOPALTQ: { struct pf_altq *altq; /* disable all altq interfaces on active list */ TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { error = pf_disable_altq(altq); if (error != 0) break; } } if (error == 0) pf_altq_running = 0; DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } case DIOCADDALTQ: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; if (pa->ticket != ticket_altqs_inactive) { error = EBUSY; break; } altq = pool_get(&pf_altq_pl, PR_NOWAIT); if (altq == NULL) { error = ENOMEM; break; } bcopy(&pa->altq, altq, sizeof(struct pf_altq)); /* * if this is for a queue, find the discipline and * copy the necessary fields */ if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { error = EBUSY; pool_put(&pf_altq_pl, altq); break; } TAILQ_FOREACH(a, pf_altqs_inactive, entries) { if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && a->qname[0] == 0) { altq->altq_disc = a->altq_disc; break; } } } error = altq_add(altq); if (error) { pool_put(&pf_altq_pl, altq); break; } TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); bcopy(altq, &pa->altq, sizeof(struct pf_altq)); break; } case DIOCGETALTQS: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; pa->nr = 0; TAILQ_FOREACH(altq, pf_altqs_active, entries) pa->nr++; pa->ticket = ticket_altqs_active; break; } case DIOCGETALTQ: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; u_int32_t nr; if (pa->ticket != ticket_altqs_active) { error = EBUSY; break; } nr = 0; altq = TAILQ_FIRST(pf_altqs_active); while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { error = EBUSY; break; } bcopy(altq, &pa->altq, sizeof(struct pf_altq)); break; } case DIOCCHANGEALTQ: /* CHANGEALTQ not supported yet! */ error = ENODEV; break; case DIOCGETQSTATS: { struct pfioc_qstats *pq = (struct pfioc_qstats *)addr; struct pf_altq *altq; u_int32_t nr; int nbytes; if (pq->ticket != ticket_altqs_active) { error = EBUSY; break; } nbytes = pq->nbytes; nr = 0; altq = TAILQ_FIRST(pf_altqs_active); while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); nr++; } if (altq == NULL) { error = EBUSY; break; } error = altq_getqstats(altq, pq->buf, &nbytes); if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; } break; } #endif /* ALTQ */ case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; pf_empty_pool(&pf_pabuf); pp->ticket = ++ticket_pabuf; break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; if (pp->ticket != ticket_pabuf) { error = EBUSY; break; } #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pp->af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ if (pp->addr.addr.type != PF_ADDR_ADDRMASK && pp->addr.addr.type != PF_ADDR_DYNIFTL && pp->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; break; } pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); if (pa == NULL) { error = ENOMEM; break; } bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) { - pa->kif = pfi_attach_rule(pa->ifname); + pa->kif = pfi_kif_get(pa->ifname); if (pa->kif == NULL) { pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; } + pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); } if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); - pfi_detach_rule(pa->kif); + pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; } TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); break; } case DIOCGETADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; pp->nr = 0; pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 0); if (pool == NULL) { error = EBUSY; break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; break; } case DIOCGETADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; u_int32_t nr = 0; pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 1); if (pool == NULL) { error = EBUSY; break; } pa = TAILQ_FIRST(&pool->list); while ((pa != NULL) && (nr < pp->nr)) { pa = TAILQ_NEXT(pa, entries); nr++; } if (pa == NULL) { error = EBUSY; break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); pfi_dynaddr_copyout(&pp->addr.addr); pf_tbladdr_copyout(&pp->addr.addr); pf_rtlabel_copyout(&pp->addr.addr); break; } case DIOCCHANGEADDR: { struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; struct pf_pooladdr *oldpa = NULL, *newpa = NULL; struct pf_ruleset *ruleset; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { error = EINVAL; break; } if (pca->addr.addr.type != PF_ADDR_ADDRMASK && pca->addr.addr.type != PF_ADDR_DYNIFTL && pca->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; break; } ruleset = pf_find_ruleset(pca->anchor); if (ruleset == NULL) { error = EBUSY; break; } pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, pca->r_num, pca->r_last, 1, 1); if (pool == NULL) { error = EBUSY; break; } if (pca->action != PF_CHANGE_REMOVE) { newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); if (newpa == NULL) { error = ENOMEM; break; } bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); #ifndef INET if (pca->af == AF_INET) { pool_put(&pf_pooladdr_pl, newpa); error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { pool_put(&pf_pooladdr_pl, newpa); error = EAFNOSUPPORT; break; } #endif /* INET6 */ if (newpa->ifname[0]) { - newpa->kif = pfi_attach_rule(newpa->ifname); + newpa->kif = pfi_kif_get(newpa->ifname); if (newpa->kif == NULL) { pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; } + pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); } else newpa->kif = NULL; if (pfi_dynaddr_setup(&newpa->addr, pca->af) || pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); - pfi_detach_rule(newpa->kif); + pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; } } if (pca->action == PF_CHANGE_ADD_HEAD) oldpa = TAILQ_FIRST(&pool->list); else if (pca->action == PF_CHANGE_ADD_TAIL) oldpa = TAILQ_LAST(&pool->list, pf_palist); else { int i = 0; oldpa = TAILQ_FIRST(&pool->list); while ((oldpa != NULL) && (i < pca->nr)) { oldpa = TAILQ_NEXT(oldpa, entries); i++; } if (oldpa == NULL) { error = EINVAL; break; } } if (pca->action == PF_CHANGE_REMOVE) { TAILQ_REMOVE(&pool->list, oldpa, entries); pfi_dynaddr_remove(&oldpa->addr); pf_tbladdr_remove(&oldpa->addr); - pfi_detach_rule(oldpa->kif); + pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, oldpa); } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); else if (pca->action == PF_CHANGE_ADD_HEAD || pca->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldpa, newpa, entries); else TAILQ_INSERT_AFTER(&pool->list, oldpa, newpa, entries); } pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); break; } case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; struct pf_ruleset *ruleset; struct pf_anchor *anchor; pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { error = EINVAL; break; } pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) if (anchor->parent == NULL) pr->nr++; } else { RB_FOREACH(anchor, pf_anchor_node, &ruleset->anchor->children) pr->nr++; } break; } case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; struct pf_ruleset *ruleset; struct pf_anchor *anchor; u_int32_t nr = 0; pr->path[sizeof(pr->path) - 1] = 0; if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { error = EINVAL; break; } pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } else { RB_FOREACH(anchor, pf_anchor_node, &ruleset->anchor->children) if (nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } if (!pr->name[0]) error = EBUSY; break; } case DIOCRCLRTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; break; } error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRADDTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRDELTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRGETTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRGETTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; break; } error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRCLRTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRSETTFLAGS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRCLRADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; break; } error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRADDADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRDELADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRSETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL); + PFR_FLAG_USERIOCTL, 0); break; } case DIOCRGETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRGETASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; break; } error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRCLRASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRTSTADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); break; } case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; error = pf_osfp_add(io); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; error = pf_osfp_get(io); break; } case DIOCXBEGIN: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_begin_altq(&ioe.ticket))) + if ((error = pf_begin_altq(&ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_begin(&table, - &ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_begin(table, + &ioe->ticket, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; default: - if ((error = pf_begin_rules(&ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_begin_rules(&ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; + } break; } - if (copyout(&ioe, io->array+i, sizeof(io->array[i]))) { + if (copyout(ioe, io->array+i, sizeof(io->array[i]))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCXROLLBACK: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_rollback_altq(ioe.ticket))) + if ((error = pf_rollback_altq(ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_rollback(&table, - ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_rollback(table, + ioe->ticket, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_rollback_rules(ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_rollback_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCXCOMMIT: { - struct pfioc_trans *io = (struct pfioc_trans *) - addr; - static struct pfioc_trans_e ioe; - static struct pfr_table table; - struct pf_ruleset *rs; - int i; + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + struct pf_ruleset *rs; + int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)malloc(sizeof(*table), + M_TEMP, M_WAITOK); /* first makes sure everything will succeed */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0]) { + if (ioe->anchor[0]) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - if (!altqs_inactive_open || ioe.ticket != + if (!altqs_inactive_open || ioe->ticket != ticket_altqs_inactive) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - rs = pf_find_ruleset(ioe.anchor); - if (rs == NULL || !rs->topen || ioe.ticket != + rs = pf_find_ruleset(ioe->anchor); + if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; default: - if (ioe.rs_num < 0 || ioe.rs_num >= + if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EINVAL; goto fail; } - rs = pf_find_ruleset(ioe.anchor); + rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || - !rs->rules[ioe.rs_num].inactive.open || - rs->rules[ioe.rs_num].inactive.ticket != - ioe.ticket) { + !rs->rules[ioe->rs_num].inactive.open || + rs->rules[ioe->rs_num].inactive.ticket != + ioe->ticket) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EBUSY; goto fail; } break; } } /* now do the commit - no errors should happen here */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + free(table, M_TEMP); + free(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if ((error = pf_commit_altq(ioe.ticket))) + if ((error = pf_commit_altq(ioe->ticket))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - if ((error = pfr_ina_commit(&table, ioe.ticket, - NULL, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_commit(table, ioe->ticket, + NULL, NULL, 0))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_commit_rules(ioe.ticket, - ioe.rs_num, ioe.anchor))) + if ((error = pf_commit_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + free(table, M_TEMP); + free(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + free(table, M_TEMP); + free(ioe, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; - struct pf_src_node *n; - struct pf_src_node *p, pstore; + struct pf_src_node *n, *p, *pstore; u_int32_t nr = 0; int space = psn->psn_len; if (space == 0) { RB_FOREACH(n, pf_src_tree, &tree_src_tracking) nr++; psn->psn_len = sizeof(struct pf_src_node) * nr; break; } + pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = psn->psn_src_nodes; RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { int secs = time_second, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; - bcopy(n, &pstore, sizeof(pstore)); + bcopy(n, pstore, sizeof(*pstore)); if (n->rule.ptr != NULL) - pstore.rule.nr = n->rule.ptr->nr; - pstore.creation = secs - pstore.creation; - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->rule.nr = n->rule.ptr->nr; + pstore->creation = secs - pstore->creation; + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; + pstore->expire = 0; /* adjust the connection rate estimate */ diff = secs - n->conn_rate.last; if (diff >= n->conn_rate.seconds) - pstore.conn_rate.count = 0; + pstore->conn_rate.count = 0; else - pstore.conn_rate.count -= + pstore->conn_rate.count -= n->conn_rate.count * diff / n->conn_rate.seconds; - error = copyout(&pstore, p, sizeof(*p)); - if (error) + error = copyout(pstore, p, sizeof(*p)); + if (error) { + free(pstore, M_TEMP); goto fail; + } p++; nr++; } psn->psn_len = sizeof(struct pf_src_node) * nr; + + free(pstore, M_TEMP); break; } case DIOCCLRSRCNODES: { struct pf_src_node *n; struct pf_state *state; RB_FOREACH(state, pf_state_tree_id, &tree_id) { state->src_node = NULL; state->nat_src_node = NULL; } RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { n->expire = 1; n->states = 0; } - pf_purge_expired_src_nodes(); + pf_purge_expired_src_nodes(1); pf_status.src_nodes = 0; break; } + case DIOCKILLSRCNODES: { + struct pf_src_node *sn; + struct pf_state *s; + struct pfioc_src_node_kill *psnk = \ + (struct pfioc_src_node_kill *) addr; + int killed = 0; + + RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { + if (PF_MATCHA(psnk->psnk_src.neg, \ + &psnk->psnk_src.addr.v.a.addr, \ + &psnk->psnk_src.addr.v.a.mask, \ + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, \ + &psnk->psnk_dst.addr.v.a.addr, \ + &psnk->psnk_dst.addr.v.a.mask, \ + &sn->raddr, sn->af)) { + /* Handle state to src_node linkage */ + if (sn->states != 0) { + RB_FOREACH(s, pf_state_tree_id, + &tree_id) { + if (s->src_node == sn) + s->src_node = NULL; + if (s->nat_src_node == sn) + s->nat_src_node = NULL; + } + sn->states = 0; + } + sn->expire = 1; + killed++; + } + } + + if (killed > 0) + pf_purge_expired_src_nodes(1); + + psnk->psnk_af = killed; + break; + } + case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; if (*hostid == 0) pf_status.hostid = arc4random(); else pf_status.hostid = *hostid; break; } case DIOCOSFPFLUSH: pf_osfp_flush(); break; case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; - if (io->pfiio_esize != sizeof(struct pfi_if)) { + if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, - &io->pfiio_size, io->pfiio_flags); + &io->pfiio_size); break; } - case DIOCICLRISTATS: { - struct pfioc_iface *io = (struct pfioc_iface *)addr; - - error = pfi_clr_istats(io->pfiio_name, &io->pfiio_nzero, - io->pfiio_flags); - break; - } - case DIOCSETIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); break; } case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); break; } default: error = ENODEV; break; } fail: splx(s); + if (flags & FWRITE) + rw_exit_write(&pf_consistency_lock); + else + rw_exit_read(&pf_consistency_lock); return (error); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf_norm.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf_norm.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf_norm.c (revision 171164) @@ -1,1844 +1,1887 @@ -/* $OpenBSD: pf_norm.c,v 1.97 2004/09/21 16:59:12 aaron Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ /* * Copyright 2001 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pflog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include struct pf_frent { LIST_ENTRY(pf_frent) fr_next; struct ip *fr_ip; struct mbuf *fr_m; }; struct pf_frcache { LIST_ENTRY(pf_frcache) fr_next; uint16_t fr_off; uint16_t fr_end; }; #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ #define PFFRAG_DROP 0x0004 /* Drop all fragments */ #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) struct pf_fragment { RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; struct in_addr fr_src; struct in_addr fr_dst; u_int8_t fr_p; /* protocol of this fragment */ u_int8_t fr_flags; /* status flags */ u_int16_t fr_id; /* fragment id for reassemble */ u_int16_t fr_max; /* fragment data max */ u_int32_t fr_timeout; #define fr_queue fr_u.fru_queue #define fr_cache fr_u.fru_cache union { LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ } fr_u; }; TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; static __inline int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); /* Private prototypes */ void pf_ip2key(struct pf_fragment *, struct ip *); void pf_remove_fragment(struct pf_fragment *); void pf_flush_fragments(void); void pf_free_fragment(struct pf_fragment *); struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, struct pf_frent *, int); struct mbuf *pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, int, int, int *); int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int); #define DPFPRINTF(x) do { \ if (pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ printf x ; \ } \ } while(0) /* Globals */ struct pool pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; struct pool pf_state_scrub_pl; int pf_nfrents, pf_ncache; void pf_normalize_init(void) { pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", NULL); pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", NULL); pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrcache", NULL); pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent", NULL); pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, "pfstscr", NULL); pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); TAILQ_INIT(&pf_fragqueue); TAILQ_INIT(&pf_cachequeue); } static __inline int pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { int diff; if ((diff = a->fr_id - b->fr_id)) return (diff); else if ((diff = a->fr_p - b->fr_p)) return (diff); else if (a->fr_src.s_addr < b->fr_src.s_addr) return (-1); else if (a->fr_src.s_addr > b->fr_src.s_addr) return (1); else if (a->fr_dst.s_addr < b->fr_dst.s_addr) return (-1); else if (a->fr_dst.s_addr > b->fr_dst.s_addr) return (1); return (0); } void pf_purge_expired_fragments(void) { struct pf_fragment *frag; u_int32_t expire = time_second - pf_default_rule.timeout[PFTM_FRAG]; while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { KASSERT(BUFFER_FRAGMENTS(frag)); if (frag->fr_timeout > expire) break; DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); } while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { KASSERT(!BUFFER_FRAGMENTS(frag)); if (frag->fr_timeout > expire) break; DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); KASSERT(TAILQ_EMPTY(&pf_cachequeue) || TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); } } /* * Try to flush old fragments to make space for new ones */ void pf_flush_fragments(void) { struct pf_fragment *frag; int goal; goal = pf_nfrents * 9 / 10; DPFPRINTF(("trying to free > %d frents\n", pf_nfrents - goal)); while (goal < pf_nfrents) { frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); if (frag == NULL) break; pf_free_fragment(frag); } goal = pf_ncache * 9 / 10; DPFPRINTF(("trying to free > %d cache entries\n", pf_ncache - goal)); while (goal < pf_ncache) { frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); if (frag == NULL) break; pf_free_fragment(frag); } } /* Frees the fragments and all associated entries */ void pf_free_fragment(struct pf_fragment *frag) { struct pf_frent *frent; struct pf_frcache *frcache; /* Free all fragments */ if (BUFFER_FRAGMENTS(frag)) { for (frent = LIST_FIRST(&frag->fr_queue); frent; frent = LIST_FIRST(&frag->fr_queue)) { LIST_REMOVE(frent, fr_next); m_freem(frent->fr_m); pool_put(&pf_frent_pl, frent); pf_nfrents--; } } else { for (frcache = LIST_FIRST(&frag->fr_cache); frcache; frcache = LIST_FIRST(&frag->fr_cache)) { LIST_REMOVE(frcache, fr_next); KASSERT(LIST_EMPTY(&frag->fr_cache) || LIST_FIRST(&frag->fr_cache)->fr_off > frcache->fr_end); pool_put(&pf_cent_pl, frcache); pf_ncache--; } } pf_remove_fragment(frag); } void pf_ip2key(struct pf_fragment *key, struct ip *ip) { key->fr_p = ip->ip_p; key->fr_id = ip->ip_id; key->fr_src.s_addr = ip->ip_src.s_addr; key->fr_dst.s_addr = ip->ip_dst.s_addr; } struct pf_fragment * pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) { struct pf_fragment key; struct pf_fragment *frag; pf_ip2key(&key, ip); frag = RB_FIND(pf_frag_tree, tree, &key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = time_second; if (BUFFER_FRAGMENTS(frag)) { TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); } else { TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); } } return (frag); } /* Removes a fragment from the fragment queue and frees the fragment */ void pf_remove_fragment(struct pf_fragment *frag) { if (BUFFER_FRAGMENTS(frag)) { RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); pool_put(&pf_frag_pl, frag); } else { RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); pool_put(&pf_cache_pl, frag); } } #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) struct mbuf * pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, struct pf_frent *frent, int mff) { struct mbuf *m = *m0, *m2; struct pf_frent *frea, *next; struct pf_frent *frep = NULL; struct ip *ip = frent->fr_ip; int hlen = ip->ip_hl << 2; u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; u_int16_t max = ip_len + off; KASSERT(*frag == NULL || BUFFER_FRAGMENTS(*frag)); /* Strip off ip header */ m->m_data += hlen; m->m_len -= hlen; /* Create a new reassembly queue for this packet */ if (*frag == NULL) { *frag = pool_get(&pf_frag_pl, PR_NOWAIT); if (*frag == NULL) { pf_flush_fragments(); *frag = pool_get(&pf_frag_pl, PR_NOWAIT); if (*frag == NULL) goto drop_fragment; } (*frag)->fr_flags = 0; (*frag)->fr_max = 0; (*frag)->fr_src = frent->fr_ip->ip_src; (*frag)->fr_dst = frent->fr_ip->ip_dst; (*frag)->fr_p = frent->fr_ip->ip_p; (*frag)->fr_id = frent->fr_ip->ip_id; (*frag)->fr_timeout = time_second; LIST_INIT(&(*frag)->fr_queue); RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); /* We do not have a previous fragment */ frep = NULL; goto insert; } /* * Find a fragment after the current one: * - off contains the real shifted offset. */ LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { if (FR_IP_OFF(frea) > off) break; frep = frea; } KASSERT(frep != NULL || frea != NULL); if (frep != NULL && FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4 > off) { u_int16_t precut; precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4 - off; if (precut >= ip_len) goto drop_fragment; m_adj(frent->fr_m, precut); DPFPRINTF(("overlap -%d\n", precut)); /* Enforce 8 byte boundaries */ ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; ip_len -= precut; ip->ip_len = htons(ip_len); } for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); frea = next) { u_int16_t aftercut; aftercut = ip_len + off - FR_IP_OFF(frea); DPFPRINTF(("adjust overlap %d\n", aftercut)); if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl * 4) { frea->fr_ip->ip_len = htons(ntohs(frea->fr_ip->ip_len) - aftercut); frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + (aftercut >> 3)); m_adj(frea->fr_m, aftercut); break; } - /* This fragment is completely overlapped, loose it */ + /* This fragment is completely overlapped, lose it */ next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); pool_put(&pf_frent_pl, frea); pf_nfrents--; } insert: /* Update maximum data size */ if ((*frag)->fr_max < max) (*frag)->fr_max = max; /* This is the last segment */ if (!mff) (*frag)->fr_flags |= PFFRAG_SEENLAST; if (frep == NULL) LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); else LIST_INSERT_AFTER(frep, frent, fr_next); /* Check if we are completely reassembled */ if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) return (NULL); /* Check if we have all the data */ off = 0; for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { next = LIST_NEXT(frep, fr_next); off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; if (off < (*frag)->fr_max && (next == NULL || FR_IP_OFF(next) != off)) { DPFPRINTF(("missing fragment at %d, next %d, max %d\n", off, next == NULL ? -1 : FR_IP_OFF(next), (*frag)->fr_max)); return (NULL); } } DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); if (off < (*frag)->fr_max) return (NULL); /* We have all the data */ frent = LIST_FIRST(&(*frag)->fr_queue); KASSERT(frent != NULL); if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { DPFPRINTF(("drop: too big: %d\n", off)); pf_free_fragment(*frag); *frag = NULL; return (NULL); } next = LIST_NEXT(frent, fr_next); /* Magic from ip_input */ ip = frent->fr_ip; m = frent->fr_m; m2 = m->m_next; m->m_next = NULL; m_cat(m, m2); pool_put(&pf_frent_pl, frent); pf_nfrents--; for (frent = next; frent != NULL; frent = next) { next = LIST_NEXT(frent, fr_next); m2 = frent->fr_m; pool_put(&pf_frent_pl, frent); pf_nfrents--; m_cat(m, m2); } ip->ip_src = (*frag)->fr_src; ip->ip_dst = (*frag)->fr_dst; /* Remove from fragment queue */ pf_remove_fragment(*frag); *frag = NULL; hlen = ip->ip_hl << 2; ip->ip_len = htons(off + hlen); m->m_len += hlen; m->m_data -= hlen; /* some debugging cruft by sklower, below, will go away soon */ /* XXX this should be done elsewhere */ if (m->m_flags & M_PKTHDR) { int plen = 0; for (m2 = m; m2; m2 = m2->m_next) plen += m2->m_len; m->m_pkthdr.len = plen; } DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); return (m); drop_fragment: /* Oops - fail safe - drop packet */ pool_put(&pf_frent_pl, frent); pf_nfrents--; m_freem(m); return (NULL); } struct mbuf * pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, int drop, int *nomem) { struct mbuf *m = *m0; struct pf_frcache *frp, *fra, *cur = NULL; int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); u_int16_t off = ntohs(h->ip_off) << 3; u_int16_t max = ip_len + off; int hosed = 0; KASSERT(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); /* Create a new range queue for this packet */ if (*frag == NULL) { *frag = pool_get(&pf_cache_pl, PR_NOWAIT); if (*frag == NULL) { pf_flush_fragments(); *frag = pool_get(&pf_cache_pl, PR_NOWAIT); if (*frag == NULL) goto no_mem; } /* Get an entry for the queue */ cur = pool_get(&pf_cent_pl, PR_NOWAIT); if (cur == NULL) { pool_put(&pf_cache_pl, *frag); *frag = NULL; goto no_mem; } pf_ncache++; (*frag)->fr_flags = PFFRAG_NOBUFFER; (*frag)->fr_max = 0; (*frag)->fr_src = h->ip_src; (*frag)->fr_dst = h->ip_dst; (*frag)->fr_p = h->ip_p; (*frag)->fr_id = h->ip_id; (*frag)->fr_timeout = time_second; cur->fr_off = off; cur->fr_end = max; LIST_INIT(&(*frag)->fr_cache); LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); goto pass; } /* * Find a fragment after the current one: * - off contains the real shifted offset. */ frp = NULL; LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { if (fra->fr_off > off) break; frp = fra; } KASSERT(frp != NULL || fra != NULL); if (frp != NULL) { int precut; precut = frp->fr_end - off; if (precut >= ip_len) { /* Fragment is entirely a duplicate */ DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", h->ip_id, frp->fr_off, frp->fr_end, off, max)); goto drop_fragment; } if (precut == 0) { /* They are adjacent. Fixup cache entry */ DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", h->ip_id, frp->fr_off, frp->fr_end, off, max)); frp->fr_end = max; } else if (precut > 0) { /* The first part of this payload overlaps with a * fragment that has already been passed. * Need to trim off the first part of the payload. * But to do so easily, we need to create another * mbuf to throw the original header into. */ DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", h->ip_id, precut, frp->fr_off, frp->fr_end, off, max)); off += precut; max -= precut; /* Update the previous frag to encompass this one */ frp->fr_end = max; if (!drop) { /* XXX Optimization opportunity * This is a very heavy way to trim the payload. * we could do it much faster by diddling mbuf * internals but that would be even less legible * than this mbuf magic. For my next trick, * I'll pull a rabbit out of my laptop. */ *m0 = m_copym2(m, 0, h->ip_hl << 2, M_NOWAIT); if (*m0 == NULL) goto no_mem; KASSERT((*m0)->m_next == NULL); m_adj(m, precut + (h->ip_hl << 2)); m_cat(*m0, m); m = *m0; if (m->m_flags & M_PKTHDR) { int plen = 0; struct mbuf *t; for (t = m; t; t = t->m_next) plen += t->m_len; m->m_pkthdr.len = plen; } h = mtod(m, struct ip *); KASSERT((int)m->m_len == ntohs(h->ip_len) - precut); h->ip_off = htons(ntohs(h->ip_off) + (precut >> 3)); h->ip_len = htons(ntohs(h->ip_len) - precut); } else { hosed++; } } else { /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", h->ip_id, -precut, frp->fr_off, frp->fr_end, off, max)); cur = pool_get(&pf_cent_pl, PR_NOWAIT); if (cur == NULL) goto no_mem; pf_ncache++; cur->fr_off = off; cur->fr_end = max; LIST_INSERT_AFTER(frp, cur, fr_next); } } if (fra != NULL) { int aftercut; int merge = 0; aftercut = max - fra->fr_off; if (aftercut == 0) { /* Adjacent fragments */ DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", h->ip_id, off, max, fra->fr_off, fra->fr_end)); fra->fr_off = off; merge = 1; } else if (aftercut > 0) { /* Need to chop off the tail of this fragment */ DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", h->ip_id, aftercut, off, max, fra->fr_off, fra->fr_end)); fra->fr_off = off; max -= aftercut; merge = 1; if (!drop) { m_adj(m, -aftercut); if (m->m_flags & M_PKTHDR) { int plen = 0; struct mbuf *t; for (t = m; t; t = t->m_next) plen += t->m_len; m->m_pkthdr.len = plen; } h = mtod(m, struct ip *); KASSERT((int)m->m_len == ntohs(h->ip_len) - aftercut); h->ip_len = htons(ntohs(h->ip_len) - aftercut); } else { hosed++; } - } else { + } else if (frp == NULL) { /* There is a gap between fragments */ DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", h->ip_id, -aftercut, off, max, fra->fr_off, fra->fr_end)); cur = pool_get(&pf_cent_pl, PR_NOWAIT); if (cur == NULL) goto no_mem; pf_ncache++; cur->fr_off = off; cur->fr_end = max; LIST_INSERT_BEFORE(fra, cur, fr_next); } /* Need to glue together two separate fragment descriptors */ if (merge) { if (cur && fra->fr_off <= cur->fr_end) { /* Need to merge in a previous 'cur' */ DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", h->ip_id, cur->fr_off, cur->fr_end, off, max, fra->fr_off, fra->fr_end)); fra->fr_off = cur->fr_off; LIST_REMOVE(cur, fr_next); pool_put(&pf_cent_pl, cur); pf_ncache--; cur = NULL; } else if (frp && fra->fr_off <= frp->fr_end) { /* Need to merge in a modified 'frp' */ KASSERT(cur == NULL); DPFPRINTF(("fragcache[%d]: adjacent(merge " "%d-%d) %d-%d (%d-%d)\n", h->ip_id, frp->fr_off, frp->fr_end, off, max, fra->fr_off, fra->fr_end)); fra->fr_off = frp->fr_off; LIST_REMOVE(frp, fr_next); pool_put(&pf_cent_pl, frp); pf_ncache--; frp = NULL; } } } if (hosed) { /* * We must keep tracking the overall fragment even when * we're going to drop it anyway so that we know when to * free the overall descriptor. Thus we drop the frag late. */ goto drop_fragment; } pass: /* Update maximum data size */ if ((*frag)->fr_max < max) (*frag)->fr_max = max; /* This is the last segment */ if (!mff) (*frag)->fr_flags |= PFFRAG_SEENLAST; /* Check if we are completely reassembled */ if (((*frag)->fr_flags & PFFRAG_SEENLAST) && LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { /* Remove from fragment queue */ DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, (*frag)->fr_max)); pf_free_fragment(*frag); *frag = NULL; } return (m); no_mem: *nomem = 1; /* Still need to pay attention to !IP_MF */ if (!mff && *frag != NULL) (*frag)->fr_flags |= PFFRAG_SEENLAST; m_freem(m); return (NULL); drop_fragment: /* Still need to pay attention to !IP_MF */ if (!mff && *frag != NULL) (*frag)->fr_flags |= PFFRAG_SEENLAST; if (drop) { /* This fragment has been deemed bad. Don't reass */ if (((*frag)->fr_flags & PFFRAG_DROP) == 0) DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", h->ip_id)); (*frag)->fr_flags |= PFFRAG_DROP; } m_freem(m); return (NULL); } int pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; struct pf_frent *frent; struct pf_fragment *frag = NULL; struct ip *h = mtod(m, struct ip *); int mff = (ntohs(h->ip_off) & IP_MF); int hlen = h->ip_hl << 2; u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; u_int16_t max; int ip_len; int ip_off; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != AF_INET) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != h->ip_p) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg)) + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg)) + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (hlen < (int)sizeof(struct ip)) goto drop; if (hlen > ntohs(h->ip_len)) goto drop; /* Clear IP_DF if the rule uses the no-df option */ - if (r->rule_flag & PFRULE_NODF) + if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + h->ip_off &= htons(~IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* We will need other tests here */ if (!fragoff && !mff) goto no_fragment; /* We're dealing with a fragment now. Don't allow fragments * with IP_DF to enter the cache. If the flag was cleared by * no-df above, fine. Otherwise drop it. */ if (h->ip_off & htons(IP_DF)) { DPFPRINTF(("IP_DF\n")); goto bad; } ip_len = ntohs(h->ip_len) - hlen; ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; /* All fragments are 8 byte aligned */ if (mff && (ip_len & 0x7)) { DPFPRINTF(("mff and %d\n", ip_len)); goto bad; } /* Respect maximum length */ if (fragoff + ip_len > IP_MAXPACKET) { DPFPRINTF(("max packet %d\n", fragoff + ip_len)); goto bad; } max = fragoff + ip_len; if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { /* Fully buffer all of the fragments */ frag = pf_find_fragment(h, &pf_frag_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && max > frag->fr_max) goto bad; /* Get an entry for the fragment queue */ frent = pool_get(&pf_frent_pl, PR_NOWAIT); if (frent == NULL) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } pf_nfrents++; frent->fr_ip = h; frent->fr_m = m; /* Might return a completely reassembled mbuf, or NULL */ DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); *m0 = m = pf_reassemble(m0, &frag, frent, mff); if (m == NULL) return (PF_DROP); + /* use mtag from concatenated mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(1)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; h = mtod(m, struct ip *); } else { /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; - if (dir == PF_OUT) { - if (m_tag_find(m, PACKET_TAG_PF_FRAGCACHE, NULL) != - NULL) { - /* Already passed the fragment cache in the - * input direction. If we continued, it would - * appear to be a dup and would be dropped. - */ - goto fragment_pass; - } + if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { + /* + * Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; } frag = pf_find_fragment(h, &pf_cache_tree); /* Check if we saw the last fragment already */ if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && max > frag->fr_max) { if (r->rule_flag & PFRULE_FRAGDROP) frag->fr_flags |= PFFRAG_DROP; goto bad; } *m0 = m = pf_fragcache(m0, h, &frag, mff, (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); if (m == NULL) { if (nomem) goto no_mem; goto drop; } - if (dir == PF_IN) { - struct m_tag *mtag; - - mtag = m_tag_get(PACKET_TAG_PF_FRAGCACHE, 0, M_NOWAIT); - if (mtag == NULL) + /* use mtag from copied and trimmed mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(2)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; goto no_mem; - m_tag_prepend(m, mtag); + } } +#endif + if (dir == PF_IN) + pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; goto fragment_pass; } no_fragment: /* At this point, only IP_DF is allowed in ip_off */ - h->ip_off &= htons(IP_DF); + if (h->ip_off & ~htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + h->ip_off &= htons(IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } + /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if (r->rule_flag & PFRULE_RANDOMID) { u_int16_t ip_id = h->ip_id; h->ip_id = ip_randomid(); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); fragment_pass: /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); no_mem: REASON_SET(reason, PFRES_MEMORY); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); bad: DPFPRINTF(("dropping bad fragment\n")); /* Free associated fragments */ if (frag != NULL) pf_free_fragment(frag); REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } #ifdef INET6 int pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; struct ip6_hdr *h = mtod(m, struct ip6_hdr *); int off; struct ip6_ext ext; struct ip6_opt opt; struct ip6_opt_jumbo jumbo; struct ip6_frag frag; u_int32_t jumbolen = 0, plen; u_int16_t fragoff = 0; int optend; int ooff; u_int8_t proto; int terminal; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != AF_INET6) r = r->skip[PF_SKIP_AF].ptr; #if 0 /* header chain! */ else if (r->proto && r->proto != h->ip6_nxt) r = r->skip[PF_SKIP_PROTO].ptr; #endif else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg)) + (struct pf_addr *)&h->ip6_src, AF_INET6, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg)) + (struct pf_addr *)&h->ip6_dst, AF_INET6, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) goto drop; off = sizeof(struct ip6_hdr); proto = h->ip6_nxt; terminal = 0; do { switch (proto) { case IPPROTO_FRAGMENT: goto fragment; break; case IPPROTO_AH: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; if (proto == IPPROTO_AH) off += (ext.ip6e_len + 2) * 4; else off += (ext.ip6e_len + 1) * 8; proto = ext.ip6e_nxt; break; case IPPROTO_HOPOPTS: if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; optend = off + (ext.ip6e_len + 1) * 8; ooff = off + sizeof(ext); do { if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, sizeof(opt.ip6o_type), NULL, NULL, AF_INET6)) goto shortpkt; if (opt.ip6o_type == IP6OPT_PAD1) { ooff++; continue; } if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), NULL, NULL, AF_INET6)) goto shortpkt; if (ooff + sizeof(opt) + opt.ip6o_len > optend) goto drop; switch (opt.ip6o_type) { case IP6OPT_JUMBO: if (h->ip6_plen != 0) goto drop; if (!pf_pull_hdr(m, ooff, &jumbo, sizeof(jumbo), NULL, NULL, AF_INET6)) goto shortpkt; memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, sizeof(jumbolen)); jumbolen = ntohl(jumbolen); if (jumbolen <= IPV6_MAXPACKET) goto drop; if (sizeof(struct ip6_hdr) + jumbolen != m->m_pkthdr.len) goto drop; break; default: break; } ooff += sizeof(opt) + opt.ip6o_len; } while (ooff < optend); off = optend; proto = ext.ip6e_nxt; break; default: terminal = 1; break; } } while (!terminal); /* jumbo payload option must be present, or plen > 0 */ if (ntohs(h->ip6_plen) == 0) plen = jumbolen; else plen = ntohs(h->ip6_plen); if (plen == 0) goto drop; if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; /* Enforce a minimum ttl, may cause endless packet loops */ if (r->min_ttl && h->ip6_hlim < r->min_ttl) h->ip6_hlim = r->min_ttl; return (PF_PASS); fragment: if (ntohs(h->ip6_plen) == 0 || jumbolen) goto drop; plen = ntohs(h->ip6_plen); if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) goto shortpkt; fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) goto badfrag; /* do something about it */ /* remember to set pd->flags |= PFDESC_IP_REAS */ return (PF_PASS); shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); badfrag: REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } #endif /* INET6 */ int pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd) { struct pf_rule *r, *rm = NULL; struct tcphdr *th = pd->hdr.tcp; int rewrite = 0; u_short reason; u_int8_t flags; sa_family_t af = pd->af; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) r = TAILQ_NEXT(r, entries); else { rm = r; break; } } if (rm == NULL || rm->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) pd->flags |= PFDESC_TCP_NORM; flags = th->th_flags; if (flags & TH_SYN) { /* Illegal packet */ if (flags & TH_RST) goto tcp_drop; if (flags & TH_FIN) flags &= ~TH_FIN; } else { /* Illegal packet */ if (!(flags & (TH_ACK|TH_RST))) goto tcp_drop; } if (!(flags & TH_ACK)) { /* These flags are only valid if ACK is set */ if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) goto tcp_drop; } /* Check for illegal header length */ if (th->th_off < (sizeof(struct tcphdr) >> 2)) goto tcp_drop; /* If flags changed, or reserved data set, then adjust */ if (flags != th->th_flags || th->th_x2 != 0) { u_int16_t ov, nv; ov = *(u_int16_t *)(&th->th_ack + 1); th->th_flags = flags; th->th_x2 = 0; nv = *(u_int16_t *)(&th->th_ack + 1); th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); rewrite = 1; } /* Remove urgent pointer, if TH_URG is not set */ if (!(flags & TH_URG) && th->th_urp) { th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); th->th_urp = 0; rewrite = 1; } /* Process options */ if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) rewrite = 1; /* copy back packet headers if we sanitized */ if (rewrite) m_copyback(m, off, sizeof(*th), th); return (PF_PASS); tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); return (PF_DROP); } int pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) { u_int32_t tsval, tsecr; u_int8_t hdr[60]; u_int8_t *opt; KASSERT(src->scrub == NULL); src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); if (src->scrub == NULL) return (1); bzero(src->scrub, sizeof(*src->scrub)); switch (pd->af) { #ifdef INET case AF_INET: { struct ip *h = mtod(m, struct ip *); src->scrub->pfss_ttl = h->ip_ttl; break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); src->scrub->pfss_ttl = h->ip6_hlim; break; } #endif /* INET6 */ } /* * All normalizations below are only begun if we see the start of * the connections. They must all set an enabled bit in pfss_flags */ if ((th->th_flags & TH_SYN) == 0) return (0); if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { /* Diddle with TCP options */ int hlen; opt = hdr + sizeof(struct tcphdr); hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; break; case TCPOPT_TIMESTAMP: if (opt[1] >= TCPOLEN_TIMESTAMP) { src->scrub->pfss_flags |= PFSS_TIMESTAMP; src->scrub->pfss_ts_mod = htonl(arc4random()); /* note PFSS_PAWS not set yet */ memcpy(&tsval, &opt[2], sizeof(u_int32_t)); memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); src->scrub->pfss_tsval0 = ntohl(tsval); src->scrub->pfss_tsval = ntohl(tsval); src->scrub->pfss_tsecr = ntohl(tsecr); getmicrouptime(&src->scrub->pfss_last); } /* FALLTHROUGH */ default: hlen -= MAX(opt[1], 2); opt += MAX(opt[1], 2); break; } } } return (0); } void pf_normalize_tcp_cleanup(struct pf_state *state) { if (state->src.scrub) pool_put(&pf_state_scrub_pl, state->src.scrub); if (state->dst.scrub) pool_put(&pf_state_scrub_pl, state->dst.scrub); /* Someday... flush the TCP segment reassembly descriptors. */ } int pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, struct tcphdr *th, struct pf_state *state, struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) { struct timeval uptime; u_int32_t tsval, tsecr; u_int tsval_from_last; u_int8_t hdr[60]; u_int8_t *opt; int copyback = 0; int got_ts = 0; KASSERT(src->scrub || dst->scrub); /* * Enforce the minimum TTL seen for this connection. Negate a common * technique to evade an intrusion detection system and confuse * firewall state code. */ switch (pd->af) { #ifdef INET case AF_INET: { if (src->scrub) { struct ip *h = mtod(m, struct ip *); if (h->ip_ttl > src->scrub->pfss_ttl) src->scrub->pfss_ttl = h->ip_ttl; h->ip_ttl = src->scrub->pfss_ttl; } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { if (src->scrub) { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); if (h->ip6_hlim > src->scrub->pfss_ttl) src->scrub->pfss_ttl = h->ip6_hlim; h->ip6_hlim = src->scrub->pfss_ttl; } break; } #endif /* INET6 */ } if (th->th_off > (sizeof(struct tcphdr) >> 2) && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { /* Diddle with TCP options */ int hlen; opt = hdr + sizeof(struct tcphdr); hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; break; case TCPOPT_TIMESTAMP: /* Modulate the timestamps. Can be used for * NAT detection, OS uptime determination or * reboot detection. */ if (got_ts) { /* Huh? Multiple timestamps!? */ if (pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("multiple TS??")); pf_print_state(state); printf("\n"); } REASON_SET(reason, PFRES_TS); return (PF_DROP); } if (opt[1] >= TCPOLEN_TIMESTAMP) { memcpy(&tsval, &opt[2], sizeof(u_int32_t)); if (tsval && src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { tsval = ntohl(tsval); pf_change_a(&opt[2], &th->th_sum, htonl(tsval + src->scrub->pfss_ts_mod), 0); copyback = 1; } /* Modulate TS reply iff valid (!0) */ memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); if (tsecr && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod; pf_change_a(&opt[6], &th->th_sum, htonl(tsecr), 0); copyback = 1; } got_ts = 1; } /* FALLTHROUGH */ default: hlen -= MAX(opt[1], 2); opt += MAX(opt[1], 2); break; } } if (copyback) { /* Copyback the options, caller copys back header */ *writeback = 1; m_copyback(m, off + sizeof(struct tcphdr), (th->th_off << 2) - sizeof(struct tcphdr), hdr + sizeof(struct tcphdr)); } } /* * Must invalidate PAWS checks on connections idle for too long. * The fastest allowed timestamp clock is 1ms. That turns out to * be about 24 days before it wraps. XXX Right now our lowerbound * TS echo check only works for the first 12 days of a connection * when the TS has exhausted half its 32bit space */ #define TS_MAX_IDLE (24*24*60*60) #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ getmicrouptime(&uptime); if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || time_second - state->creation > TS_MAX_CONN)) { if (pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); printf("\n"); } src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; } if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { if (pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("dst idled out of PAWS\n")); pf_print_state(state); printf("\n"); } dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; } if (got_ts && src->scrub && dst->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (dst->scrub->pfss_flags & PFSS_PAWS)) { /* Validate that the timestamps are "in-window". * RFC1323 describes TCP Timestamp options that allow * measurement of RTT (round trip time) and PAWS * (protection against wrapped sequence numbers). PAWS * gives us a set of rules for rejecting packets on * long fat pipes (packets that were somehow delayed * in transit longer than the time it took to send the * full TCP sequence space of 4Gb). We can use these * rules and infer a few others that will let us treat * the 32bit timestamp and the 32bit echoed timestamp * as sequence numbers to prevent a blind attacker from * inserting packets into a connection. * * RFC1323 tells us: * - The timestamp on this packet must be greater than * or equal to the last value echoed by the other * endpoint. The RFC says those will be discarded * since it is a dup that has already been acked. * This gives us a lowerbound on the timestamp. * timestamp >= other last echoed timestamp * - The timestamp will be less than or equal to * the last timestamp plus the time between the * last packet and now. The RFC defines the max * clock rate as 1ms. We will allow clocks to be * up to 10% fast and will allow a total difference * or 30 seconds due to a route change. And this * gives us an upperbound on the timestamp. * timestamp <= last timestamp + max ticks * We have to be careful here. Windows will send an * initial timestamp of zero and then initialize it * to a random value after the 3whs; presumably to * avoid a DoS by having to call an expensive RNG * during a SYN flood. Proof MS has at least one * good security geek. * * - The TCP timestamp option must also echo the other * endpoints timestamp. The timestamp echoed is the * one carried on the earliest unacknowledged segment * on the left edge of the sequence window. The RFC * states that the host will reject any echoed * timestamps that were larger than any ever sent. * This gives us an upperbound on the TS echo. * tescr <= largest_tsval * - The lowerbound on the TS echo is a little more * tricky to determine. The other endpoint's echoed * values will not decrease. But there may be * network conditions that re-order packets and * cause our view of them to decrease. For now the * only lowerbound we can safely determine is that * the TS echo will never be less than the orginal * TS. XXX There is probably a better lowerbound. * Remove TS_MAX_CONN with better lowerbound check. * tescr >= other original TS * * It is also important to note that the fastest * timestamp clock of 1ms will wrap its 32bit space in * 24 days. So we just disable TS checking after 24 * days of idle time. We actually must use a 12d * connection limit until we can come up with a better * lowerbound to the TS echo check. */ struct timeval delta_ts; int ts_fudge; /* * PFTM_TS_DIFF is how many seconds of leeway to allow * a host's timestamp. This can happen if the previous * packet got delayed in transit for much longer than * this packet. */ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; /* Calculate max ticks since the last timestamp */ #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ #define TS_MICROSECS 1000000 /* microseconds per second */ timersub(&uptime, &src->scrub->pfss_last, &delta_ts); tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); if ((src->state >= TCPS_ESTABLISHED && dst->state >= TCPS_ESTABLISHED) && (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { /* Bad RFC1323 implementation or an insertion attack. * * - Solaris 2.6 and 2.7 are known to send another ACK * after the FIN,FIN|ACK,ACK closing that carries * an old timestamp. */ DPFPRINTF(("Timestamp failed %c%c%c%c\n", SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ? '1' : ' ', SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); DPFPRINTF((" tsval: %lu tsecr: %lu +ticks: %lu " "idle: %lus %lums\n", tsval, tsecr, tsval_from_last, delta_ts.tv_sec, delta_ts.tv_usec / 1000)); DPFPRINTF((" src->tsval: %lu tsecr: %lu\n", src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); DPFPRINTF((" dst->tsval: %lu tsecr: %lu tsval0: %lu" "\n", dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); if (pf_status.debug >= PF_DEBUG_MISC) { pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); } REASON_SET(reason, PFRES_TS); return (PF_DROP); } /* XXX I'd really like to require tsecr but it's optional */ } else if (!got_ts && (th->th_flags & TH_RST) == 0 && ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) || pd->p_len > 0 || (th->th_flags & TH_SYN)) && src->scrub && dst->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (dst->scrub->pfss_flags & PFSS_PAWS)) { /* Didn't send a timestamp. Timestamps aren't really useful * when: * - connection opening or closing (often not even sent). * but we must not let an attacker to put a FIN on a * data packet to sneak it through our ESTABLISHED check. * - on a TCP reset. RFC suggests not even looking at TS. * - on an empty ACK. The TS will not be echoed so it will * probably not help keep the RTT calculation in sync and * there isn't as much danger when the sequence numbers * got wrapped. So some stacks don't include TS on empty * ACKs :-( * * To minimize the disruption to mostly RFC1323 conformant * stacks, we will only require timestamps on data packets. * * And what do ya know, we cannot require timestamps on data * packets. There appear to be devices that do legitimate * TCP connection hijacking. There are HTTP devices that allow * a 3whs (with timestamps) and then buffer the HTTP request. * If the intermediate device has the HTTP response cache, it * will spoof the response but not bother timestamping its * packets. So we can look for the presence of a timestamp in * the first data packet and if there, require it in all future * packets. */ if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { /* * Hey! Someone tried to sneak a packet in. Or the * stack changed its RFC1323 behavior?!?! */ if (pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("Did not receive expected RFC1323 " "timestamp\n")); pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); } REASON_SET(reason, PFRES_TS); return (PF_DROP); } } /* * We will note if a host sends his data packets with or without * timestamps. And require all data packets to contain a timestamp * if the first does. PAWS implicitly requires that all data packets be * timestamped. But I think there are middle-man devices that hijack - * TCP streams immedietly after the 3whs and don't timestamp their + * TCP streams immediately after the 3whs and don't timestamp their * packets (seen in a WWW accelerator or cache). */ if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { if (got_ts) src->scrub->pfss_flags |= PFSS_DATA_TS; else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ DPFPRINTF(("Broken RFC1323 stack did not " "timestamp data packet. Disabled PAWS " "security.\n")); pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); } } } /* * Update PAWS values */ if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { getmicrouptime(&src->scrub->pfss_last); if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || (src->scrub->pfss_flags & PFSS_PAWS) == 0) src->scrub->pfss_tsval = tsval; if (tsecr) { if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || (src->scrub->pfss_flags & PFSS_PAWS) == 0) src->scrub->pfss_tsecr = tsecr; if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && (SEQ_LT(tsval, src->scrub->pfss_tsval0) || src->scrub->pfss_tsval0 == 0)) { /* tsval0 MUST be the lowest timestamp */ src->scrub->pfss_tsval0 = tsval; } /* Only fully initialized after a TS gets echoed */ if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) src->scrub->pfss_flags |= PFSS_PAWS; } } /* I have a dream.... TCP segment reassembly.... */ return (0); } int pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, int off) { u_int16_t *mss; int thoff; int opt, cnt, optlen = 0; int rewrite = 0; u_char *optp; thoff = th->th_off << 2; cnt = thoff - sizeof(struct tcphdr); optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr); for (; cnt > 0; cnt -= optlen, optp += optlen) { opt = optp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { if (cnt < 2) break; optlen = optp[1]; if (optlen < 2 || optlen > cnt) break; } switch (opt) { case TCPOPT_MAXSEG: mss = (u_int16_t *)(optp + 2); if ((ntohs(*mss)) > r->max_mss) { th->th_sum = pf_cksum_fixup(th->th_sum, *mss, htons(r->max_mss), 0); *mss = htons(r->max_mss); rewrite = 1; } break; default: break; } } return (rewrite); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf_osfp.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf_osfp.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf_osfp.c (revision 171164) @@ -1,525 +1,585 @@ -/* $OpenBSD: pf_osfp.c,v 1.10 2004/04/09 19:30:41 frantzen Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ /* * Copyright (c) 2003 Mike Frantzen * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ #include #include #ifdef _KERNEL # include #endif /* _KERNEL */ #include #include #include #include #include #include #include -#ifdef INET6 #include -#endif /* INET6 */ +#ifdef _KERNEL +#include +#endif #ifdef _KERNEL # define DPFPRINTF(format, x...) \ if (pf_status.debug >= PF_DEBUG_NOISY) \ printf(format , ##x) typedef struct pool pool_t; #else /* Userland equivalents so we can lend code to tcpdump et al. */ # include # include # include # include # include +# include # define pool_t int # define pool_get(pool, flags) malloc(*(pool)) # define pool_put(pool, item) free(item) # define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) # ifdef PFDEBUG # include # define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) # else # define DPFPRINTF(format, x...) ((void)0) # endif /* PFDEBUG */ #endif /* _KERNEL */ SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; pool_t pf_osfp_entry_pl; pool_t pf_osfp_pl; struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, struct pf_os_fingerprint *, u_int8_t); struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, struct pf_os_fingerprint *); void pf_osfp_insert(struct pf_osfp_list *, struct pf_os_fingerprint *); #ifdef _KERNEL /* * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only) * Returns the list of possible OSes. */ struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, const struct tcphdr *tcp) { struct ip *ip; + struct ip6_hdr *ip6; char hdr[60]; - /* XXX don't have a fingerprint database for IPv6 :-( */ - if (pd->af != PF_INET || pd->proto != IPPROTO_TCP || (tcp->th_off << 2) - < sizeof(*tcp)) + if ((pd->af != PF_INET && pd->af != PF_INET6) || + pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp)) return (NULL); - ip = mtod(m, struct ip *); - if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, pd->af)) - return (NULL); + if (pd->af == PF_INET) { + ip = mtod(m, struct ip *); + ip6 = (struct ip6_hdr *)NULL; + } else { + ip = (struct ip *)NULL; + ip6 = mtod(m, struct ip6_hdr *); + } + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, + pd->af)) return (NULL); - return (pf_osfp_fingerprint_hdr(ip, (struct tcphdr *)hdr)); + return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); } #endif /* _KERNEL */ struct pf_osfp_enlist * -pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp) { struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; +#ifdef _KERNEL + char srcname[128]; +#else + char srcname[NI_MAXHOST]; +#endif - if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN || (ip->ip_off & - htons(IP_OFFMASK))) + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); + if (ip) { + if ((ip->ip_off & htons(IP_OFFMASK)) != 0) + return (NULL); + } memset(&fp, 0, sizeof(fp)); - fp.fp_psize = ntohs(ip->ip_len); - fp.fp_ttl = ip->ip_ttl; - if (ip->ip_off & htons(IP_DF)) + if (ip) { +#ifndef _KERNEL + struct sockaddr_in sin; +#endif + + fp.fp_psize = ntohs(ip->ip_len); + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & htons(IP_DF)) + fp.fp_flags |= PF_OSFP_DF; +#ifdef _KERNEL + strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); +#else + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr = ip->ip_src; + (void)getnameinfo((struct sockaddr *)&sin, + sizeof(struct sockaddr_in), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#ifdef INET6 + else if (ip6) { +#ifndef _KERNEL + struct sockaddr_in6 sin6; +#endif + + /* jumbo payload? */ + fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); + fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; + fp.fp_flags |= PF_OSFP_INET6; +#ifdef _KERNEL + strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), + sizeof(srcname)); +#else + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = ip6->ip6_src; + (void)getnameinfo((struct sockaddr *)&sin6, + sizeof(struct sockaddr_in6), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#endif + else + return (NULL); fp.fp_wsize = ntohs(tcp->th_win); cnt = (tcp->th_off << 2) - sizeof(*tcp); optp = (const u_int8_t *)((const char *)tcp + sizeof(*tcp)); for (; cnt > 0; cnt -= optlen, optp += optlen) { if (*optp == TCPOPT_EOL) break; fp.fp_optcnt++; if (*optp == TCPOPT_NOP) { fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_NOP; optlen = 1; } else { if (cnt < 2) return (NULL); optlen = optp[1]; if (optlen > cnt || optlen < 2) return (NULL); switch (*optp) { case TCPOPT_MAXSEG: if (optlen >= TCPOLEN_MAXSEG) memcpy(&fp.fp_mss, &optp[2], sizeof(fp.fp_mss)); fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS; NTOHS(fp.fp_mss); break; case TCPOPT_WINDOW: if (optlen >= TCPOLEN_WINDOW) memcpy(&fp.fp_wscale, &optp[2], sizeof(fp.fp_wscale)); NTOHS(fp.fp_wscale); fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_WSCALE; break; case TCPOPT_SACK_PERMITTED: fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK; break; case TCPOPT_TIMESTAMP: if (optlen >= TCPOLEN_TIMESTAMP) { u_int32_t ts; memcpy(&ts, &optp[2], sizeof(ts)); if (ts == 0) fp.fp_flags |= PF_OSFP_TS0; } fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS; break; default: return (NULL); } } optlen = MAX(optlen, 1); /* paranoia */ } DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " "(TS=%s,M=%s%d,W=%s%d)\n", - inet_ntoa(ip->ip_src), ntohs(tcp->th_sport), + srcname, ntohs(tcp->th_sport), fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" : (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", fp.fp_mss, (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fp.fp_wscale); if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, PF_OSFP_MAXTTL_OFFSET))) return (&fpresult->fp_oses); return (NULL); } /* Match a fingerprint ID against a list of OSes */ int pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) { struct pf_osfp_entry *entry; int os_class, os_version, os_subtype; int en_class, en_version, en_subtype; if (os == PF_OSFP_ANY) return (1); if (list == NULL) { DPFPRINTF("osfp no match against %x\n", os); return (os == PF_OSFP_UNKNOWN); } PF_OSFP_UNPACK(os, os_class, os_version, os_subtype); SLIST_FOREACH(entry, list, fp_entry) { PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype); if ((os_class == PF_OSFP_ANY || en_class == os_class) && (os_version == PF_OSFP_ANY || en_version == os_version) && (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) { DPFPRINTF("osfp matched %s %s %s %x==%x\n", entry->fp_class_nm, entry->fp_version_nm, entry->fp_subtype_nm, os, entry->fp_os); return (1); } } DPFPRINTF("fingerprint 0x%x didn't match\n", os); return (0); } /* Initialize the OS fingerprint system */ void pf_osfp_initialize(void) { pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, "pfosfpen", &pool_allocator_nointr); pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, "pfosfp", &pool_allocator_nointr); SLIST_INIT(&pf_osfp_list); } /* Flush the fingerprint list */ void pf_osfp_flush(void) { struct pf_os_fingerprint *fp; struct pf_osfp_entry *entry; while ((fp = SLIST_FIRST(&pf_osfp_list))) { SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); while ((entry = SLIST_FIRST(&fp->fp_oses))) { SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); pool_put(&pf_osfp_entry_pl, entry); } pool_put(&pf_osfp_pl, fp); } } /* Add a fingerprint */ int pf_osfp_add(struct pf_osfp_ioctl *fpioc) { struct pf_os_fingerprint *fp, fpadd; struct pf_osfp_entry *entry; memset(&fpadd, 0, sizeof(fpadd)); fpadd.fp_tcpopts = fpioc->fp_tcpopts; fpadd.fp_wsize = fpioc->fp_wsize; fpadd.fp_psize = fpioc->fp_psize; fpadd.fp_mss = fpioc->fp_mss; fpadd.fp_flags = fpioc->fp_flags; fpadd.fp_optcnt = fpioc->fp_optcnt; fpadd.fp_wscale = fpioc->fp_wscale; fpadd.fp_ttl = fpioc->fp_ttl; DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " "(TS=%s,M=%s%d,W=%s%d) %x\n", fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, fpioc->fp_os.fp_subtype_nm, (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" : (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" : (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" : (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "", fpadd.fp_wsize, fpadd.fp_ttl, (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0, (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" : (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "", fpadd.fp_psize, (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt, (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "", (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" : (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", fpadd.fp_mss, (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fpadd.fp_wscale, fpioc->fp_os.fp_os); if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) return (EEXIST); } if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) return (ENOMEM); } else { if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL) return (ENOMEM); memset(fp, 0, sizeof(*fp)); fp->fp_tcpopts = fpioc->fp_tcpopts; fp->fp_wsize = fpioc->fp_wsize; fp->fp_psize = fpioc->fp_psize; fp->fp_mss = fpioc->fp_mss; fp->fp_flags = fpioc->fp_flags; fp->fp_optcnt = fpioc->fp_optcnt; fp->fp_wscale = fpioc->fp_wscale; fp->fp_ttl = fpioc->fp_ttl; SLIST_INIT(&fp->fp_oses); if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) { pool_put(&pf_osfp_pl, fp); return (ENOMEM); } pf_osfp_insert(&pf_osfp_list, fp); } memcpy(entry, &fpioc->fp_os, sizeof(*entry)); /* Make sure the strings are NUL terminated */ entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0'; entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0'; entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0'; SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry); #ifdef PFDEBUG if ((fp = pf_osfp_validate())) printf("Invalid fingerprint list\n"); #endif /* PFDEBUG */ return (0); } /* Find a fingerprint in the list */ struct pf_os_fingerprint * pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, u_int8_t ttldiff) { struct pf_os_fingerprint *f; #define MATCH_INT(_MOD, _DC, _field) \ if ((f->fp_flags & _DC) == 0) { \ if ((f->fp_flags & _MOD) == 0) { \ if (f->_field != find->_field) \ continue; \ } else { \ if (f->_field == 0 || find->_field % f->_field) \ continue; \ } \ } SLIST_FOREACH(f, list, fp_next) { if (f->fp_tcpopts != find->fp_tcpopts || f->fp_optcnt != find->fp_optcnt || f->fp_ttl < find->fp_ttl || f->fp_ttl - find->fp_ttl > ttldiff || (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) != (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0))) continue; MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize) MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss) MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale) if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) { if (f->fp_flags & PF_OSFP_WSIZE_MSS) { if (find->fp_mss == 0) continue; /* Some "smart" NAT devices and DSL routers will tweak the MSS size and * will set it to whatever is suitable for the link type. */ #define SMART_MSS 1460 if ((find->fp_wsize % find->fp_mss || find->fp_wsize / find->fp_mss != f->fp_wsize) && (find->fp_wsize % SMART_MSS || find->fp_wsize / SMART_MSS != f->fp_wsize)) continue; } else if (f->fp_flags & PF_OSFP_WSIZE_MTU) { if (find->fp_mss == 0) continue; #define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) #define SMART_MTU (SMART_MSS + MTUOFF) if ((find->fp_wsize % (find->fp_mss + MTUOFF) || find->fp_wsize / (find->fp_mss + MTUOFF) != f->fp_wsize) && (find->fp_wsize % SMART_MTU || find->fp_wsize / SMART_MTU != f->fp_wsize)) continue; } else if (f->fp_flags & PF_OSFP_WSIZE_MOD) { if (f->fp_wsize == 0 || find->fp_wsize % f->fp_wsize) continue; } else { if (f->fp_wsize != find->fp_wsize) continue; } } return (f); } return (NULL); } /* Find an exact fingerprint in the list */ struct pf_os_fingerprint * pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) { struct pf_os_fingerprint *f; SLIST_FOREACH(f, list, fp_next) { if (f->fp_tcpopts == find->fp_tcpopts && f->fp_wsize == find->fp_wsize && f->fp_psize == find->fp_psize && f->fp_mss == find->fp_mss && f->fp_flags == find->fp_flags && f->fp_optcnt == find->fp_optcnt && f->fp_wscale == find->fp_wscale && f->fp_ttl == find->fp_ttl) return (f); } return (NULL); } /* Insert a fingerprint into the list */ void pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins) { struct pf_os_fingerprint *f, *prev = NULL; /* XXX need to go semi tree based. can key on tcp options */ SLIST_FOREACH(f, list, fp_next) prev = f; if (prev) SLIST_INSERT_AFTER(prev, ins, fp_next); else SLIST_INSERT_HEAD(list, ins, fp_next); } /* Fill a fingerprint by its number (from an ioctl) */ int pf_osfp_get(struct pf_osfp_ioctl *fpioc) { struct pf_os_fingerprint *fp; struct pf_osfp_entry *entry; int num = fpioc->fp_getnum; int i = 0; memset(fpioc, 0, sizeof(*fpioc)); SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (i++ == num) { fpioc->fp_mss = fp->fp_mss; fpioc->fp_wsize = fp->fp_wsize; fpioc->fp_flags = fp->fp_flags; fpioc->fp_psize = fp->fp_psize; fpioc->fp_ttl = fp->fp_ttl; fpioc->fp_wscale = fp->fp_wscale; fpioc->fp_getnum = num; memcpy(&fpioc->fp_os, entry, sizeof(fpioc->fp_os)); return (0); } } } return (EBUSY); } /* Validate that each signature is reachable */ struct pf_os_fingerprint * pf_osfp_validate(void) { struct pf_os_fingerprint *f, *f2, find; SLIST_FOREACH(f, &pf_osfp_list, fp_next) { memcpy(&find, f, sizeof(find)); /* We do a few MSS/th_win percolations to make things unique */ if (find.fp_mss == 0) find.fp_mss = 128; if (f->fp_flags & PF_OSFP_WSIZE_MSS) find.fp_wsize *= find.fp_mss, 1; else if (f->fp_flags & PF_OSFP_WSIZE_MTU) find.fp_wsize *= (find.fp_mss + 40); else if (f->fp_flags & PF_OSFP_WSIZE_MOD) find.fp_wsize *= 2; if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { if (f2) printf("Found \"%s %s %s\" instead of " "\"%s %s %s\"\n", SLIST_FIRST(&f2->fp_oses)->fp_class_nm, SLIST_FIRST(&f2->fp_oses)->fp_version_nm, SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm, SLIST_FIRST(&f->fp_oses)->fp_class_nm, SLIST_FIRST(&f->fp_oses)->fp_version_nm, SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); else printf("Couldn't find \"%s %s %s\"\n", SLIST_FIRST(&f->fp_oses)->fp_class_nm, SLIST_FIRST(&f->fp_oses)->fp_version_nm, SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); return (f); } } return (NULL); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf_ruleset.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf_ruleset.c (nonexistent) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf_ruleset.c (revision 171164) @@ -0,0 +1,415 @@ +/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#ifdef _KERNEL +# include +#endif /* _KERNEL */ +#include + +#include +#include +#include +#include + +#include +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + + +#ifdef _KERNEL +# define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +#define rs_malloc(x) malloc(x, M_TEMP, M_WAITOK) +#define rs_free(x) free(x, M_TEMP) + +#else +/* Userland equivalents so we can lend code to pfctl et al. */ + +# include +# include +# include +# include +# include +# define rs_malloc(x) malloc(x) +# define rs_free(x) free(x) + +# ifdef PFDEBUG +# include +# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +# else +# define DPFPRINTF(format, x...) ((void)0) +# endif /* PFDEBUG */ +#endif /* _KERNEL */ + + +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + +static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); + +RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); + +static __inline int +pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) +{ + int c = strcmp(a->path, b->path); + + return (c ? (c < 0 ? -1 : 1) : 0); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + case PF_NOSCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof(struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *path) +{ + struct pf_anchor *key, *found; + + key = (struct pf_anchor *)rs_malloc(sizeof(*key)); + memset(key, 0, sizeof(*key)); + strlcpy(key->path, path, sizeof(key->path)); + found = RB_FIND(pf_anchor_global, &pf_anchors, key); + rs_free(key); + return (found); +} + +struct pf_ruleset * +pf_find_ruleset(const char *path) +{ + struct pf_anchor *anchor; + + while (*path == '/') + path++; + if (!*path) + return (&pf_main_ruleset); + anchor = pf_find_anchor(path); + if (anchor == NULL) + return (NULL); + else + return (&anchor->ruleset); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(const char *path) +{ + char *p, *q, *r; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor, *dup, *parent = NULL; + + if (path[0] == 0) + return (&pf_main_ruleset); + while (*path == '/') + path++; + ruleset = pf_find_ruleset(path); + if (ruleset != NULL) + return (ruleset); + p = (char *)rs_malloc(MAXPATHLEN); + bzero(p, MAXPATHLEN); + strlcpy(p, path, MAXPATHLEN); + while (parent == NULL && (q = strrchr(p, '/')) != NULL) { + *q = 0; + if ((ruleset = pf_find_ruleset(p)) != NULL) { + parent = ruleset->anchor; + break; + } + } + if (q == NULL) + q = p; + else + q++; + strlcpy(p, path, MAXPATHLEN); + if (!*q) { + rs_free(p); + return (NULL); + } + while ((r = strchr(q, '/')) != NULL || *q) { + if (r != NULL) + *r = 0; + if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || + (parent != NULL && strlen(parent->path) >= + MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) { + rs_free(p); + return (NULL); + } + anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor)); + if (anchor == NULL) { + rs_free(p); + return (NULL); + } + memset(anchor, 0, sizeof(*anchor)); + RB_INIT(&anchor->children); + strlcpy(anchor->name, q, sizeof(anchor->name)); + if (parent != NULL) { + strlcpy(anchor->path, parent->path, + sizeof(anchor->path)); + strlcat(anchor->path, "/", sizeof(anchor->path)); + } + strlcat(anchor->path, anchor->name, sizeof(anchor->path)); + if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != + NULL) { + printf("pf_find_or_create_ruleset: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", + anchor->path, anchor->name, dup->path, dup->name); + rs_free(anchor); + rs_free(p); + return (NULL); + } + if (parent != NULL) { + anchor->parent = parent; + if ((dup = RB_INSERT(pf_anchor_node, &parent->children, + anchor)) != NULL) { + printf("pf_find_or_create_ruleset: " + "RB_INSERT2 '%s' '%s' collides with " + "'%s' '%s'\n", anchor->path, anchor->name, + dup->path, dup->name); + RB_REMOVE(pf_anchor_global, &pf_anchors, + anchor); + rs_free(anchor); + rs_free(p); + return (NULL); + } + } + pf_init_ruleset(&anchor->ruleset); + anchor->ruleset.anchor = anchor; + parent = anchor; + if (r != NULL) + q = r + 1; + else + *q = 0; + } + rs_free(p); + return (&anchor->ruleset); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *parent; + int i; + + while (ruleset != NULL) { + if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + !RB_EMPTY(&ruleset->anchor->children) || + ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || + ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); + if ((parent = ruleset->anchor->parent) != NULL) + RB_REMOVE(pf_anchor_node, &parent->children, + ruleset->anchor); + rs_free(ruleset->anchor); + if (parent == NULL) + return; + ruleset = &parent->ruleset; + } +} + +int +pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, + const char *name) +{ + char *p, *path; + struct pf_ruleset *ruleset; + + r->anchor = NULL; + r->anchor_relative = 0; + r->anchor_wildcard = 0; + if (!name[0]) + return (0); + path = (char *)rs_malloc(MAXPATHLEN); + bzero(path, MAXPATHLEN); + if (name[0] == '/') + strlcpy(path, name + 1, MAXPATHLEN); + else { + /* relative path */ + r->anchor_relative = 1; + if (s->anchor == NULL || !s->anchor->path[0]) + path[0] = 0; + else + strlcpy(path, s->anchor->path, MAXPATHLEN); + while (name[0] == '.' && name[1] == '.' && name[2] == '/') { + if (!path[0]) { + printf("pf_anchor_setup: .. beyond root\n"); + rs_free(path); + return (1); + } + if ((p = strrchr(path, '/')) != NULL) + *p = 0; + else + path[0] = 0; + r->anchor_relative++; + name += 3; + } + if (path[0]) + strlcat(path, "/", MAXPATHLEN); + strlcat(path, name, MAXPATHLEN); + } + if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { + r->anchor_wildcard = 1; + *p = 0; + } + ruleset = pf_find_or_create_ruleset(path); + rs_free(path); + if (ruleset == NULL || ruleset->anchor == NULL) { + printf("pf_anchor_setup: ruleset\n"); + return (1); + } + r->anchor = ruleset->anchor; + r->anchor->refcnt++; + return (0); +} + +int +pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, + struct pfioc_rule *pr) +{ + pr->anchor_call[0] = 0; + if (r->anchor == NULL) + return (0); + if (!r->anchor_relative) { + strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); + strlcat(pr->anchor_call, r->anchor->path, + sizeof(pr->anchor_call)); + } else { + char *a, *p; + int i; + + a = (char *)rs_malloc(MAXPATHLEN); + bzero(a, MAXPATHLEN); + if (rs->anchor == NULL) + a[0] = 0; + else + strlcpy(a, rs->anchor->path, MAXPATHLEN); + for (i = 1; i < r->anchor_relative; ++i) { + if ((p = strrchr(a, '/')) == NULL) + p = a; + *p = 0; + strlcat(pr->anchor_call, "../", + sizeof(pr->anchor_call)); + } + if (strncmp(a, r->anchor->path, strlen(a))) { + printf("pf_anchor_copyout: '%s' '%s'\n", a, + r->anchor->path); + rs_free(a); + return (1); + } + if (strlen(r->anchor->path) > strlen(a)) + strlcat(pr->anchor_call, r->anchor->path + (a[0] ? + strlen(a) + 1 : 0), sizeof(pr->anchor_call)); + rs_free(a); + } + if (r->anchor_wildcard) + strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", + sizeof(pr->anchor_call)); + return (0); +} + +void +pf_anchor_remove(struct pf_rule *r) +{ + if (r->anchor == NULL) + return; + if (r->anchor->refcnt <= 0) { + printf("pf_anchor_remove: broken refcount\n"); + r->anchor = NULL; + return; + } + if (!--r->anchor->refcnt) + pf_remove_if_empty_ruleset(&r->anchor->ruleset); + r->anchor = NULL; +} Property changes on: vendor-sys/pf/dist/sys/contrib/pf/net/pf_ruleset.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: vendor-sys/pf/dist/sys/contrib/pf/net/pf_table.c =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pf_table.c (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pf_table.c (revision 171164) @@ -1,2212 +1,2214 @@ -/* $OpenBSD: pf_table.c,v 1.62 2004/12/07 18:02:04 mcbride Exp $ */ +/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ /* * Copyright (c) 2002 Cedric Berger * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include #include #include #define ACCEPT_FLAGS(oklist) \ do { \ if ((flags & ~(oklist)) & \ PFR_FLAG_ALLMASK) \ return (EINVAL); \ } while (0) #define COPYIN(from, to, size) \ ((flags & PFR_FLAG_USERIOCTL) ? \ copyin((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) #define COPYOUT(from, to, size) \ ((flags & PFR_FLAG_USERIOCTL) ? \ copyout((from), (to), (size)) : \ (bcopy((from), (to), (size)), 0)) #define FILLIN_SIN(sin, addr) \ do { \ (sin).sin_len = sizeof(sin); \ (sin).sin_family = AF_INET; \ (sin).sin_addr = (addr); \ } while (0) #define FILLIN_SIN6(sin6, addr) \ do { \ (sin6).sin6_len = sizeof(sin6); \ (sin6).sin6_family = AF_INET6; \ (sin6).sin6_addr = (addr); \ } while (0) #define SWAP(type, a1, a2) \ do { \ type tmp = a1; \ a1 = a2; \ a2 = tmp; \ } while (0) #define SUNION2PF(su, af) (((af)==AF_INET) ? \ (struct pf_addr *)&(su)->sin.sin_addr : \ (struct pf_addr *)&(su)->sin6.sin6_addr) #define AF_BITS(af) (((af)==AF_INET)?32:128) #define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af)) #define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af)) #define KENTRY_RNF_ROOT(ke) \ ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0) #define NO_ADDRESSES (-1) #define ENQUEUE_UNMARKED_ONLY (1) #define INVERT_NEG_FLAG (1) struct pfr_walktree { enum pfrw_op { PFRW_MARK, PFRW_SWEEP, PFRW_ENQUEUE, PFRW_GET_ADDRS, PFRW_GET_ASTATS, PFRW_POOL_GET, PFRW_DYNADDR_UPDATE } pfrw_op; union { struct pfr_addr *pfrw1_addr; struct pfr_astats *pfrw1_astats; struct pfr_kentryworkq *pfrw1_workq; struct pfr_kentry *pfrw1_kentry; struct pfi_dynaddr *pfrw1_dyn; } pfrw_1; int pfrw_free; int pfrw_flags; }; #define pfrw_addr pfrw_1.pfrw1_addr #define pfrw_astats pfrw_1.pfrw1_astats #define pfrw_workq pfrw_1.pfrw1_workq #define pfrw_kentry pfrw_1.pfrw1_kentry #define pfrw_dyn pfrw_1.pfrw1_dyn #define pfrw_cnt pfrw_free #define senderr(e) do { rv = (e); goto _bad; } while (0) struct pool pfr_ktable_pl; struct pool pfr_kentry_pl; struct pool pfr_kentry_pl2; struct sockaddr_in pfr_sin; struct sockaddr_in6 pfr_sin6; union sockaddr_union pfr_mask; struct pf_addr pfr_ffaddr; void pfr_copyout_addr(struct pfr_addr *, struct pfr_kentry *ke); int pfr_validate_addr(struct pfr_addr *); void pfr_enqueue_addrs(struct pfr_ktable *, struct pfr_kentryworkq *, int *, int); void pfr_mark_addrs(struct pfr_ktable *); struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, struct pfr_addr *, int); struct pfr_kentry *pfr_create_kentry(struct pfr_addr *, int); void pfr_destroy_kentries(struct pfr_kentryworkq *); void pfr_destroy_kentry(struct pfr_kentry *); void pfr_insert_kentries(struct pfr_ktable *, struct pfr_kentryworkq *, long); void pfr_remove_kentries(struct pfr_ktable *, struct pfr_kentryworkq *); void pfr_clstats_kentries(struct pfr_kentryworkq *, long, int); void pfr_reset_feedback(struct pfr_addr *, int, int); void pfr_prepare_network(union sockaddr_union *, int, int); int pfr_route_kentry(struct pfr_ktable *, struct pfr_kentry *); int pfr_unroute_kentry(struct pfr_ktable *, struct pfr_kentry *); int pfr_walktree(struct radix_node *, void *); int pfr_validate_table(struct pfr_table *, int, int); int pfr_fix_anchor(char *); void pfr_commit_ktable(struct pfr_ktable *, long); void pfr_insert_ktables(struct pfr_ktableworkq *); void pfr_insert_ktable(struct pfr_ktable *); void pfr_setflags_ktables(struct pfr_ktableworkq *); void pfr_setflags_ktable(struct pfr_ktable *, int); void pfr_clstats_ktables(struct pfr_ktableworkq *, long, int); void pfr_clstats_ktable(struct pfr_ktable *, long, int); struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int); void pfr_destroy_ktables(struct pfr_ktableworkq *, int); void pfr_destroy_ktable(struct pfr_ktable *, int); int pfr_ktable_compare(struct pfr_ktable *, struct pfr_ktable *); struct pfr_ktable *pfr_lookup_table(struct pfr_table *); void pfr_clean_node_mask(struct pfr_ktable *, struct pfr_kentryworkq *); int pfr_table_count(struct pfr_table *, int); int pfr_skip_table(struct pfr_table *, struct pfr_ktable *, int); struct pfr_kentry *pfr_kentry_byidx(struct pfr_ktable *, int, int); RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); struct pfr_ktablehead pfr_ktables; struct pfr_table pfr_nulltable; int pfr_ktable_cnt; void pfr_initialize(void) { pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, "pfrktable", &pool_allocator_oldnointr); pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, "pfrkentry", &pool_allocator_oldnointr); pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0, "pfrkentry2", NULL); pfr_sin.sin_len = sizeof(pfr_sin); pfr_sin.sin_family = AF_INET; pfr_sin6.sin6_len = sizeof(pfr_sin6); pfr_sin6.sin6_family = AF_INET6; memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); } int pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) { struct pfr_ktable *kt; struct pfr_kentryworkq workq; int s; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); pfr_enqueue_addrs(kt, &workq, ndel, 0); if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_remove_kentries(kt, &workq); if (flags & PFR_FLAG_ATOMIC) splx(s); if (kt->pfrkt_cnt) { printf("pfr_clr_addrs: corruption detected (%d).\n", kt->pfrkt_cnt); kt->pfrkt_cnt = 0; } } return (0); } int pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, int *nadd, int flags) { struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq workq; struct pfr_kentry *p, *q; struct pfr_addr ad; int i, rv, s, xadd = 0; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); p = pfr_lookup_addr(kt, &ad, 1); q = pfr_lookup_addr(tmpkt, &ad, 1); if (flags & PFR_FLAG_FEEDBACK) { if (q != NULL) ad.pfra_fback = PFR_FB_DUPLICATE; else if (p == NULL) ad.pfra_fback = PFR_FB_ADDED; else if (p->pfrke_not != ad.pfra_not) ad.pfra_fback = PFR_FB_CONFLICT; else ad.pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { pfr_destroy_kentry(p); ad.pfra_fback = PFR_FB_NONE; } else { SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xadd++; } } if (flags & PFR_FLAG_FEEDBACK) if (COPYOUT(&ad, addr+i, sizeof(ad))) senderr(EFAULT); } pfr_clean_node_mask(tmpkt, &workq); if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_insert_kentries(kt, &workq, tzero); if (flags & PFR_FLAG_ATOMIC) splx(s); } else pfr_destroy_kentries(&workq); if (nadd != NULL) *nadd = xadd; pfr_destroy_ktable(tmpkt, 0); return (0); _bad: pfr_clean_node_mask(tmpkt, &workq); pfr_destroy_kentries(&workq); if (flags & PFR_FLAG_FEEDBACK) pfr_reset_feedback(addr, size, flags); pfr_destroy_ktable(tmpkt, 0); return (rv); } int pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, int *ndel, int flags) { struct pfr_ktable *kt; struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; int i, rv, s, xdel = 0, log = 1; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); /* * there are two algorithms to choose from here. * with: * n: number of addresses to delete * N: number of addresses in the table * * one is O(N) and is better for large 'n' * one is O(n*LOG(N)) and is better for small 'n' * * following code try to decide which one is best. */ for (i = kt->pfrkt_cnt; i > 0; i >>= 1) log++; if (size > kt->pfrkt_cnt/log) { /* full table scan */ pfr_mark_addrs(kt); } else { /* iterate over addresses to delete */ for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) return (EFAULT); if (pfr_validate_addr(&ad)) return (EINVAL); p = pfr_lookup_addr(kt, &ad, 1); if (p != NULL) p->pfrke_mark = 0; } } SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); p = pfr_lookup_addr(kt, &ad, 1); if (flags & PFR_FLAG_FEEDBACK) { if (p == NULL) ad.pfra_fback = PFR_FB_NONE; else if (p->pfrke_not != ad.pfra_not) ad.pfra_fback = PFR_FB_CONFLICT; else if (p->pfrke_mark) ad.pfra_fback = PFR_FB_DUPLICATE; else ad.pfra_fback = PFR_FB_DELETED; } if (p != NULL && p->pfrke_not == ad.pfra_not && !p->pfrke_mark) { p->pfrke_mark = 1; SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xdel++; } if (flags & PFR_FLAG_FEEDBACK) if (COPYOUT(&ad, addr+i, sizeof(ad))) senderr(EFAULT); } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_remove_kentries(kt, &workq); if (flags & PFR_FLAG_ATOMIC) splx(s); } if (ndel != NULL) *ndel = xdel; return (0); _bad: if (flags & PFR_FLAG_FEEDBACK) pfr_reset_feedback(addr, size, flags); return (rv); } int pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, - int *size2, int *nadd, int *ndel, int *nchange, int flags) + int *size2, int *nadd, int *ndel, int *nchange, int flags, + u_int32_t ignore_pfrt_flags) { struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq addq, delq, changeq; struct pfr_kentry *p, *q; struct pfr_addr ad; int i, rv, s, xadd = 0, xdel = 0, xchange = 0; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); - if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & + PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); if (tmpkt == NULL) return (ENOMEM); pfr_mark_addrs(kt); SLIST_INIT(&addq); SLIST_INIT(&delq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); ad.pfra_fback = PFR_FB_NONE; p = pfr_lookup_addr(kt, &ad, 1); if (p != NULL) { if (p->pfrke_mark) { ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } p->pfrke_mark = 1; if (p->pfrke_not != ad.pfra_not) { SLIST_INSERT_HEAD(&changeq, p, pfrke_workq); ad.pfra_fback = PFR_FB_CHANGED; xchange++; } } else { q = pfr_lookup_addr(tmpkt, &ad, 1); if (q != NULL) { ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { pfr_destroy_kentry(p); ad.pfra_fback = PFR_FB_NONE; } else { SLIST_INSERT_HEAD(&addq, p, pfrke_workq); ad.pfra_fback = PFR_FB_ADDED; xadd++; } } _skip: if (flags & PFR_FLAG_FEEDBACK) if (COPYOUT(&ad, addr+i, sizeof(ad))) senderr(EFAULT); } pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); if ((flags & PFR_FLAG_FEEDBACK) && *size2) { if (*size2 < size+xdel) { *size2 = size+xdel; senderr(0); } i = 0; SLIST_FOREACH(p, &delq, pfrke_workq) { pfr_copyout_addr(&ad, p); ad.pfra_fback = PFR_FB_DELETED; if (COPYOUT(&ad, addr+size+i, sizeof(ad))) senderr(EFAULT); i++; } } pfr_clean_node_mask(tmpkt, &addq); if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_insert_kentries(kt, &addq, tzero); pfr_remove_kentries(kt, &delq); pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); if (flags & PFR_FLAG_ATOMIC) splx(s); } else pfr_destroy_kentries(&addq); if (nadd != NULL) *nadd = xadd; if (ndel != NULL) *ndel = xdel; if (nchange != NULL) *nchange = xchange; if ((flags & PFR_FLAG_FEEDBACK) && size2) *size2 = size+xdel; pfr_destroy_ktable(tmpkt, 0); return (0); _bad: pfr_clean_node_mask(tmpkt, &addq); pfr_destroy_kentries(&addq); if (flags & PFR_FLAG_FEEDBACK) pfr_reset_feedback(addr, size, flags); pfr_destroy_ktable(tmpkt, 0); return (rv); } int pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, int *nmatch, int flags) { struct pfr_ktable *kt; struct pfr_kentry *p; struct pfr_addr ad; int i, xmatch = 0; ACCEPT_FLAGS(PFR_FLAG_REPLACE); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) return (EFAULT); if (pfr_validate_addr(&ad)) return (EINVAL); if (ADDR_NETWORK(&ad)) return (EINVAL); p = pfr_lookup_addr(kt, &ad, 0); if (flags & PFR_FLAG_REPLACE) pfr_copyout_addr(&ad, p); ad.pfra_fback = (p == NULL) ? PFR_FB_NONE : (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); if (p != NULL && !p->pfrke_not) xmatch++; if (COPYOUT(&ad, addr+i, sizeof(ad))) return (EFAULT); } if (nmatch != NULL) *nmatch = xmatch; return (0); } int pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, int flags) { struct pfr_ktable *kt; struct pfr_walktree w; int rv; ACCEPT_FLAGS(0); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); if (kt->pfrkt_cnt > *size) { *size = kt->pfrkt_cnt; return (0); } bzero(&w, sizeof(w)); w.pfrw_op = PFRW_GET_ADDRS; w.pfrw_addr = addr; w.pfrw_free = kt->pfrkt_cnt; w.pfrw_flags = flags; rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); if (!rv) rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); if (rv) return (rv); if (w.pfrw_free) { printf("pfr_get_addrs: corruption detected (%d).\n", w.pfrw_free); return (ENOTTY); } *size = kt->pfrkt_cnt; return (0); } int pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, int flags) { struct pfr_ktable *kt; struct pfr_walktree w; struct pfr_kentryworkq workq; int rv, s; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */ if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); if (kt->pfrkt_cnt > *size) { *size = kt->pfrkt_cnt; return (0); } bzero(&w, sizeof(w)); w.pfrw_op = PFRW_GET_ASTATS; w.pfrw_astats = addr; w.pfrw_free = kt->pfrkt_cnt; w.pfrw_flags = flags; if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); if (!rv) rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); if (!rv && (flags & PFR_FLAG_CLSTATS)) { pfr_enqueue_addrs(kt, &workq, NULL, 0); pfr_clstats_kentries(&workq, tzero, 0); } if (flags & PFR_FLAG_ATOMIC) splx(s); if (rv) return (rv); if (w.pfrw_free) { printf("pfr_get_astats: corruption detected (%d).\n", w.pfrw_free); return (ENOTTY); } *size = kt->pfrkt_cnt; return (0); } int pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, int *nzero, int flags) { struct pfr_ktable *kt; struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; int i, rv, s, xzero = 0; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, 0)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (ESRCH); SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); p = pfr_lookup_addr(kt, &ad, 1); if (flags & PFR_FLAG_FEEDBACK) { ad.pfra_fback = (p != NULL) ? PFR_FB_CLEARED : PFR_FB_NONE; if (COPYOUT(&ad, addr+i, sizeof(ad))) senderr(EFAULT); } if (p != NULL) { SLIST_INSERT_HEAD(&workq, p, pfrke_workq); xzero++; } } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_clstats_kentries(&workq, 0, 0); if (flags & PFR_FLAG_ATOMIC) splx(s); } if (nzero != NULL) *nzero = xzero; return (0); _bad: if (flags & PFR_FLAG_FEEDBACK) pfr_reset_feedback(addr, size, flags); return (rv); } int pfr_validate_addr(struct pfr_addr *ad) { int i; switch (ad->pfra_af) { #ifdef INET case AF_INET: if (ad->pfra_net > 32) return (-1); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (ad->pfra_net > 128) return (-1); break; #endif /* INET6 */ default: return (-1); } if (ad->pfra_net < 128 && (((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8)))) return (-1); for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++) if (((caddr_t)ad)[i]) return (-1); if (ad->pfra_not && ad->pfra_not != 1) return (-1); if (ad->pfra_fback) return (-1); return (0); } void pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, int *naddr, int sweep) { struct pfr_walktree w; SLIST_INIT(workq); bzero(&w, sizeof(w)); w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE; w.pfrw_workq = workq; if (kt->pfrkt_ip4 != NULL) if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) printf("pfr_enqueue_addrs: IPv4 walktree failed.\n"); if (kt->pfrkt_ip6 != NULL) if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) printf("pfr_enqueue_addrs: IPv6 walktree failed.\n"); if (naddr != NULL) *naddr = w.pfrw_cnt; } void pfr_mark_addrs(struct pfr_ktable *kt) { struct pfr_walktree w; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_MARK; if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) printf("pfr_mark_addrs: IPv4 walktree failed.\n"); if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) printf("pfr_mark_addrs: IPv6 walktree failed.\n"); } struct pfr_kentry * pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) { union sockaddr_union sa, mask; struct radix_node_head *head; struct pfr_kentry *ke; int s; bzero(&sa, sizeof(sa)); if (ad->pfra_af == AF_INET) { FILLIN_SIN(sa.sin, ad->pfra_ip4addr); head = kt->pfrkt_ip4; } else if ( ad->pfra_af == AF_INET6 ) { FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr); head = kt->pfrkt_ip6; } if (ADDR_NETWORK(ad)) { pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); s = splsoftnet(); /* rn_lookup makes use of globals */ ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); splx(s); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; } else { ke = (struct pfr_kentry *)rn_match(&sa, head); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; if (exact && ke && KENTRY_NETWORK(ke)) ke = NULL; } return (ke); } struct pfr_kentry * pfr_create_kentry(struct pfr_addr *ad, int intr) { struct pfr_kentry *ke; if (intr) ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT); else ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); if (ke == NULL) return (NULL); bzero(ke, sizeof(*ke)); if (ad->pfra_af == AF_INET) FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); else if (ad->pfra_af == AF_INET6) FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr); ke->pfrke_af = ad->pfra_af; ke->pfrke_net = ad->pfra_net; ke->pfrke_not = ad->pfra_not; ke->pfrke_intrpool = intr; return (ke); } void pfr_destroy_kentries(struct pfr_kentryworkq *workq) { struct pfr_kentry *p, *q; for (p = SLIST_FIRST(workq); p != NULL; p = q) { q = SLIST_NEXT(p, pfrke_workq); pfr_destroy_kentry(p); } } void pfr_destroy_kentry(struct pfr_kentry *ke) { if (ke->pfrke_intrpool) pool_put(&pfr_kentry_pl2, ke); else pool_put(&pfr_kentry_pl, ke); } void pfr_insert_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, long tzero) { struct pfr_kentry *p; int rv, n = 0; SLIST_FOREACH(p, workq, pfrke_workq) { rv = pfr_route_kentry(kt, p); if (rv) { printf("pfr_insert_kentries: cannot route entry " "(code=%d).\n", rv); break; } p->pfrke_tzero = tzero; n++; } kt->pfrkt_cnt += n; } int pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) { struct pfr_kentry *p; int rv; p = pfr_lookup_addr(kt, ad, 1); if (p != NULL) return (0); p = pfr_create_kentry(ad, 1); if (p == NULL) return (EINVAL); rv = pfr_route_kentry(kt, p); if (rv) return (rv); p->pfrke_tzero = tzero; kt->pfrkt_cnt++; return (0); } void pfr_remove_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) { struct pfr_kentry *p; int n = 0; SLIST_FOREACH(p, workq, pfrke_workq) { pfr_unroute_kentry(kt, p); n++; } kt->pfrkt_cnt -= n; pfr_destroy_kentries(workq); } void pfr_clean_node_mask(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) { struct pfr_kentry *p; SLIST_FOREACH(p, workq, pfrke_workq) pfr_unroute_kentry(kt, p); } void pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) { struct pfr_kentry *p; int s; SLIST_FOREACH(p, workq, pfrke_workq) { s = splsoftnet(); if (negchange) p->pfrke_not = !p->pfrke_not; bzero(p->pfrke_packets, sizeof(p->pfrke_packets)); bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes)); splx(s); p->pfrke_tzero = tzero; } } void pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) { struct pfr_addr ad; int i; for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) break; ad.pfra_fback = PFR_FB_NONE; if (COPYOUT(&ad, addr+i, sizeof(ad))) break; } } void pfr_prepare_network(union sockaddr_union *sa, int af, int net) { int i; bzero(sa, sizeof(*sa)); if (af == AF_INET) { sa->sin.sin_len = sizeof(sa->sin); sa->sin.sin_family = AF_INET; sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0; } else if (af == AF_INET6) { sa->sin6.sin6_len = sizeof(sa->sin6); sa->sin6.sin6_family = AF_INET6; for (i = 0; i < 4; i++) { if (net <= 32) { sa->sin6.sin6_addr.s6_addr32[i] = net ? htonl(-1 << (32-net)) : 0; break; } sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF; net -= 32; } } } int pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; struct radix_node_head *head; int s; bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; s = splsoftnet(); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); } else rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); splx(s); return (rn == NULL ? -1 : 0); } int pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; struct radix_node_head *head; int s; if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; s = splsoftnet(); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL); } else rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL); splx(s); if (rn == NULL) { printf("pfr_unroute_kentry: delete failed.\n"); return (-1); } return (0); } void pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) { bzero(ad, sizeof(*ad)); if (ke == NULL) return; ad->pfra_af = ke->pfrke_af; ad->pfra_net = ke->pfrke_net; ad->pfra_not = ke->pfrke_not; if (ad->pfra_af == AF_INET) ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr; else if (ad->pfra_af == AF_INET6) ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; } int pfr_walktree(struct radix_node *rn, void *arg) { struct pfr_kentry *ke = (struct pfr_kentry *)rn; struct pfr_walktree *w = arg; int s, flags = w->pfrw_flags; switch (w->pfrw_op) { case PFRW_MARK: ke->pfrke_mark = 0; break; case PFRW_SWEEP: if (ke->pfrke_mark) break; /* FALLTHROUGH */ case PFRW_ENQUEUE: SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq); w->pfrw_cnt++; break; case PFRW_GET_ADDRS: if (w->pfrw_free-- > 0) { struct pfr_addr ad; pfr_copyout_addr(&ad, ke); if (copyout(&ad, w->pfrw_addr, sizeof(ad))) return (EFAULT); w->pfrw_addr++; } break; case PFRW_GET_ASTATS: if (w->pfrw_free-- > 0) { struct pfr_astats as; pfr_copyout_addr(&as.pfras_a, ke); s = splsoftnet(); bcopy(ke->pfrke_packets, as.pfras_packets, sizeof(as.pfras_packets)); bcopy(ke->pfrke_bytes, as.pfras_bytes, sizeof(as.pfras_bytes)); splx(s); as.pfras_tzero = ke->pfrke_tzero; if (COPYOUT(&as, w->pfrw_astats, sizeof(as))) return (EFAULT); w->pfrw_astats++; } break; case PFRW_POOL_GET: if (ke->pfrke_not) break; /* negative entries are ignored */ if (!w->pfrw_cnt--) { w->pfrw_kentry = ke; return (1); /* finish search */ } break; case PFRW_DYNADDR_UPDATE: if (ke->pfrke_af == AF_INET) { if (w->pfrw_dyn->pfid_acnt4++ > 0) break; pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); w->pfrw_dyn->pfid_addr4 = *SUNION2PF( &ke->pfrke_sa, AF_INET); w->pfrw_dyn->pfid_mask4 = *SUNION2PF( &pfr_mask, AF_INET); } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); w->pfrw_dyn->pfid_addr6 = *SUNION2PF( &ke->pfrke_sa, AF_INET6); w->pfrw_dyn->pfid_mask6 = *SUNION2PF( &pfr_mask, AF_INET6); } break; } return (0); } int pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; int s, xdel = 0; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); if (pfr_table_count(filter, flags) < 0) return (ENOENT); SLIST_INIT(&workq); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (pfr_skip_table(filter, p, flags)) continue; if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR)) continue; if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) continue; p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xdel++; } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_setflags_ktables(&workq); if (flags & PFR_FLAG_ATOMIC) splx(s); } if (ndel != NULL) *ndel = xdel; return (0); } int pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) { struct pfr_ktableworkq addq, changeq; struct pfr_ktable *p, *q, *r, key; int i, rv, s, xadd = 0; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); SLIST_INIT(&addq); SLIST_INIT(&changeq); for (i = 0; i < size; i++) { if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) senderr(EFAULT); if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) senderr(EINVAL); key.pfrkt_flags |= PFR_TFLAG_ACTIVE; p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p == NULL) { p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); if (p == NULL) senderr(ENOMEM); SLIST_FOREACH(q, &addq, pfrkt_workq) { if (!pfr_ktable_compare(p, q)) goto _skip; } SLIST_INSERT_HEAD(&addq, p, pfrkt_workq); xadd++; if (!key.pfrkt_anchor[0]) goto _skip; /* find or create root table */ bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (r != NULL) { p->pfrkt_root = r; goto _skip; } SLIST_FOREACH(q, &addq, pfrkt_workq) { if (!pfr_ktable_compare(&key, q)) { p->pfrkt_root = q; goto _skip; } } key.pfrkt_flags = 0; r = pfr_create_ktable(&key.pfrkt_t, 0, 1); if (r == NULL) senderr(ENOMEM); SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); p->pfrkt_root = r; } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { SLIST_FOREACH(q, &changeq, pfrkt_workq) if (!pfr_ktable_compare(&key, q)) goto _skip; p->pfrkt_nflags = (p->pfrkt_flags & ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); xadd++; } _skip: ; } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_insert_ktables(&addq); pfr_setflags_ktables(&changeq); if (flags & PFR_FLAG_ATOMIC) splx(s); } else pfr_destroy_ktables(&addq, 0); if (nadd != NULL) *nadd = xadd; return (0); _bad: pfr_destroy_ktables(&addq, 0); return (rv); } int pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; int i, s, xdel = 0; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { SLIST_FOREACH(q, &workq, pfrkt_workq) if (!pfr_ktable_compare(p, q)) goto _skip; p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xdel++; } _skip: ; } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_setflags_ktables(&workq); if (flags & PFR_FLAG_ATOMIC) splx(s); } if (ndel != NULL) *ndel = xdel; return (0); } int pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, int flags) { struct pfr_ktable *p; int n, nn; ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); if (n < 0) return (ENOENT); if (n > *size) { *size = n; return (0); } RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (pfr_skip_table(filter, p, flags)) continue; if (n-- <= 0) continue; if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl))) return (EFAULT); } if (n) { printf("pfr_get_tables: corruption detected (%d).\n", n); return (ENOTTY); } *size = nn; return (0); } int pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, int flags) { struct pfr_ktable *p; struct pfr_ktableworkq workq; int s, n, nn; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); /* XXX PFR_FLAG_CLSTATS disabled */ if (pfr_fix_anchor(filter->pfrt_anchor)) return (EINVAL); n = nn = pfr_table_count(filter, flags); if (n < 0) return (ENOENT); if (n > *size) { *size = n; return (0); } SLIST_INIT(&workq); if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (pfr_skip_table(filter, p, flags)) continue; if (n-- <= 0) continue; if (!(flags & PFR_FLAG_ATOMIC)) s = splsoftnet(); if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl))) { splx(s); return (EFAULT); } if (!(flags & PFR_FLAG_ATOMIC)) splx(s); SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); } if (flags & PFR_FLAG_CLSTATS) pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); if (flags & PFR_FLAG_ATOMIC) splx(s); if (n) { printf("pfr_get_tstats: corruption detected (%d).\n", n); return (ENOTTY); } *size = nn; return (0); } int pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, key; int i, s, xzero = 0; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO); SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, 0)) return (EINVAL); p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p != NULL) { SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xzero++; } } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); if (flags & PFR_FLAG_ATOMIC) splx(s); } if (nzero != NULL) *nzero = xzero; return (0); } int pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, int *nchange, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p, *q, key; int i, s, xchange = 0, xdel = 0; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); if ((setflag & ~PFR_TFLAG_USRMASK) || (clrflag & ~PFR_TFLAG_USRMASK) || (setflag & clrflag)) return (EINVAL); SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) return (EFAULT); if (pfr_validate_table(&key.pfrkt_t, 0, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { p->pfrkt_nflags = (p->pfrkt_flags | setflag) & ~clrflag; if (p->pfrkt_nflags == p->pfrkt_flags) goto _skip; SLIST_FOREACH(q, &workq, pfrkt_workq) if (!pfr_ktable_compare(p, q)) goto _skip; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) && (clrflag & PFR_TFLAG_PERSIST) && !(p->pfrkt_flags & PFR_TFLAG_REFERENCED)) xdel++; else xchange++; } _skip: ; } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); pfr_setflags_ktables(&workq); if (flags & PFR_FLAG_ATOMIC) splx(s); } if (nchange != NULL) *nchange = xchange; if (ndel != NULL) *ndel = xdel; return (0); } int pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; struct pf_ruleset *rs; int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY); rs = pf_find_or_create_ruleset(trs->pfrt_anchor); if (rs == NULL) return (ENOMEM); SLIST_INIT(&workq); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || pfr_skip_table(trs, p, 0)) continue; p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xdel++; } if (!(flags & PFR_FLAG_DUMMY)) { pfr_setflags_ktables(&workq); if (ticket != NULL) *ticket = ++rs->tticket; rs->topen = 1; } else pf_remove_if_empty_ruleset(rs); if (ndel != NULL) *ndel = xdel; return (0); } int pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, int *nadd, int *naddr, u_int32_t ticket, int flags) { struct pfr_ktableworkq tableq; struct pfr_kentryworkq addrq; struct pfr_ktable *kt, *rt, *shadow, key; struct pfr_kentry *p; struct pfr_addr ad; struct pf_ruleset *rs; int i, rv, xadd = 0, xaddr = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO); if (size && !(flags & PFR_FLAG_ADDRSTOO)) return (EINVAL); if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); rs = pf_find_ruleset(tbl->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); tbl->pfrt_flags |= PFR_TFLAG_INACTIVE; SLIST_INIT(&tableq); kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); if (kt == NULL) { kt = pfr_create_ktable(tbl, 0, 1); if (kt == NULL) return (ENOMEM); SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); xadd++; if (!tbl->pfrt_anchor[0]) goto _skip; /* find or create root table */ bzero(&key, sizeof(key)); strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name)); rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (rt != NULL) { kt->pfrkt_root = rt; goto _skip; } rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); if (rt == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); } SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq); kt->pfrkt_root = rt; } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) xadd++; _skip: shadow = pfr_create_ktable(tbl, 0, 0); if (shadow == NULL) { pfr_destroy_ktables(&tableq, 0); return (ENOMEM); } SLIST_INIT(&addrq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) senderr(EFAULT); if (pfr_validate_addr(&ad)) senderr(EINVAL); if (pfr_lookup_addr(shadow, &ad, 1) != NULL) continue; p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(shadow, p)) { pfr_destroy_kentry(p); continue; } SLIST_INSERT_HEAD(&addrq, p, pfrke_workq); xaddr++; } if (!(flags & PFR_FLAG_DUMMY)) { if (kt->pfrkt_shadow != NULL) pfr_destroy_ktable(kt->pfrkt_shadow, 1); kt->pfrkt_flags |= PFR_TFLAG_INACTIVE; pfr_insert_ktables(&tableq); shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ? xaddr : NO_ADDRESSES; kt->pfrkt_shadow = shadow; } else { pfr_clean_node_mask(shadow, &addrq); pfr_destroy_ktable(shadow, 0); pfr_destroy_ktables(&tableq, 0); pfr_destroy_kentries(&addrq); } if (nadd != NULL) *nadd = xadd; if (naddr != NULL) *naddr = xaddr; return (0); _bad: pfr_destroy_ktable(shadow, 0); pfr_destroy_ktables(&tableq, 0); pfr_destroy_kentries(&addrq); return (rv); } int pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) { struct pfr_ktableworkq workq; struct pfr_ktable *p; struct pf_ruleset *rs; int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (0); SLIST_INIT(&workq); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || pfr_skip_table(trs, p, 0)) continue; p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); xdel++; } if (!(flags & PFR_FLAG_DUMMY)) { pfr_setflags_ktables(&workq); rs->topen = 0; pf_remove_if_empty_ruleset(rs); } if (ndel != NULL) *ndel = xdel; return (0); } int pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, int *nchange, int flags) { struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; int s, xadd = 0, xchange = 0; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); SLIST_INIT(&workq); RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || pfr_skip_table(trs, p, 0)) continue; SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); if (p->pfrkt_flags & PFR_TFLAG_ACTIVE) xchange++; else xadd++; } if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) s = splsoftnet(); for (p = SLIST_FIRST(&workq); p != NULL; p = q) { q = SLIST_NEXT(p, pfrkt_workq); pfr_commit_ktable(p, tzero); } if (flags & PFR_FLAG_ATOMIC) splx(s); rs->topen = 0; pf_remove_if_empty_ruleset(rs); } if (nadd != NULL) *nadd = xadd; if (nchange != NULL) *nchange = xchange; return (0); } void pfr_commit_ktable(struct pfr_ktable *kt, long tzero) { struct pfr_ktable *shadow = kt->pfrkt_shadow; int nflags; if (shadow->pfrkt_cnt == NO_ADDRESSES) { if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) pfr_clstats_ktable(kt, tzero, 1); } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) { /* kt might contain addresses */ struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq; struct pfr_kentry *p, *q, *next; struct pfr_addr ad; pfr_enqueue_addrs(shadow, &addrq, NULL, 0); pfr_mark_addrs(kt); SLIST_INIT(&addq); SLIST_INIT(&changeq); SLIST_INIT(&delq); SLIST_INIT(&garbageq); pfr_clean_node_mask(shadow, &addrq); for (p = SLIST_FIRST(&addrq); p != NULL; p = next) { next = SLIST_NEXT(p, pfrke_workq); /* XXX */ pfr_copyout_addr(&ad, p); q = pfr_lookup_addr(kt, &ad, 1); if (q != NULL) { if (q->pfrke_not != p->pfrke_not) SLIST_INSERT_HEAD(&changeq, q, pfrke_workq); q->pfrke_mark = 1; SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq); } else { p->pfrke_tzero = tzero; SLIST_INSERT_HEAD(&addq, p, pfrke_workq); } } pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY); pfr_insert_kentries(kt, &addq, tzero); pfr_remove_kentries(kt, &delq); pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); pfr_destroy_kentries(&garbageq); } else { /* kt cannot contain addresses */ SWAP(struct radix_node_head *, kt->pfrkt_ip4, shadow->pfrkt_ip4); SWAP(struct radix_node_head *, kt->pfrkt_ip6, shadow->pfrkt_ip6); SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt); pfr_clstats_ktable(kt, tzero, 1); } nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) | (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE) & ~PFR_TFLAG_INACTIVE; pfr_destroy_ktable(shadow, 0); kt->pfrkt_shadow = NULL; pfr_setflags_ktable(kt, nflags); } int pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) { int i; if (!tbl->pfrt_name[0]) return (-1); if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR)) return (-1); if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1]) return (-1); for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++) if (tbl->pfrt_name[i]) return (-1); if (pfr_fix_anchor(tbl->pfrt_anchor)) return (-1); if (tbl->pfrt_flags & ~allowedflags) return (-1); return (0); } /* * Rewrite anchors referenced by tables to remove slashes * and check for validity. */ int pfr_fix_anchor(char *anchor) { size_t siz = MAXPATHLEN; int i; if (anchor[0] == '/') { char *path; int off; path = anchor; off = 1; while (*++path == '/') off++; bcopy(path, anchor, siz - off); memset(anchor + siz - off, 0, off); } if (anchor[siz - 1]) return (-1); for (i = strlen(anchor); i < siz; i++) if (anchor[i]) return (-1); return (0); } int pfr_table_count(struct pfr_table *filter, int flags) { struct pf_ruleset *rs; if (flags & PFR_FLAG_ALLRSETS) return (pfr_ktable_cnt); if (filter->pfrt_anchor[0]) { rs = pf_find_ruleset(filter->pfrt_anchor); return ((rs != NULL) ? rs->tables : -1); } return (pf_main_ruleset.tables); } int pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) { if (flags & PFR_FLAG_ALLRSETS) return (0); if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor)) return (1); return (0); } void pfr_insert_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p; SLIST_FOREACH(p, workq, pfrkt_workq) pfr_insert_ktable(p); } void pfr_insert_ktable(struct pfr_ktable *kt) { RB_INSERT(pfr_ktablehead, &pfr_ktables, kt); pfr_ktable_cnt++; if (kt->pfrkt_root != NULL) if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++) pfr_setflags_ktable(kt->pfrkt_root, kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR); } void pfr_setflags_ktables(struct pfr_ktableworkq *workq) { struct pfr_ktable *p, *q; for (p = SLIST_FIRST(workq); p; p = q) { q = SLIST_NEXT(p, pfrkt_workq); pfr_setflags_ktable(p, p->pfrkt_nflags); } } void pfr_setflags_ktable(struct pfr_ktable *kt, int newf) { struct pfr_kentryworkq addrq; if (!(newf & PFR_TFLAG_REFERENCED) && !(newf & PFR_TFLAG_PERSIST)) newf &= ~PFR_TFLAG_ACTIVE; if (!(newf & PFR_TFLAG_ACTIVE)) newf &= ~PFR_TFLAG_USRMASK; if (!(newf & PFR_TFLAG_SETMASK)) { RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt); if (kt->pfrkt_root != NULL) if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]) pfr_setflags_ktable(kt->pfrkt_root, kt->pfrkt_root->pfrkt_flags & ~PFR_TFLAG_REFDANCHOR); pfr_destroy_ktable(kt, 1); pfr_ktable_cnt--; return; } if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_remove_kentries(kt, &addrq); } if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) { pfr_destroy_ktable(kt->pfrkt_shadow, 1); kt->pfrkt_shadow = NULL; } kt->pfrkt_flags = newf; } void pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) { struct pfr_ktable *p; SLIST_FOREACH(p, workq, pfrkt_workq) pfr_clstats_ktable(p, tzero, recurse); } void pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) { struct pfr_kentryworkq addrq; int s; if (recurse) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_clstats_kentries(&addrq, tzero, 0); } s = splsoftnet(); bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); kt->pfrkt_match = kt->pfrkt_nomatch = 0; splx(s); kt->pfrkt_tzero = tzero; } struct pfr_ktable * pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) { struct pfr_ktable *kt; struct pf_ruleset *rs; kt = pool_get(&pfr_ktable_pl, PR_NOWAIT); if (kt == NULL) return (NULL); bzero(kt, sizeof(*kt)); kt->pfrkt_t = *tbl; if (attachruleset) { rs = pf_find_or_create_ruleset(tbl->pfrt_anchor); if (!rs) { pfr_destroy_ktable(kt, 0); return (NULL); } kt->pfrkt_rs = rs; rs->tables++; } if (!rn_inithead((void **)&kt->pfrkt_ip4, offsetof(struct sockaddr_in, sin_addr) * 8) || !rn_inithead((void **)&kt->pfrkt_ip6, offsetof(struct sockaddr_in6, sin6_addr) * 8)) { pfr_destroy_ktable(kt, 0); return (NULL); } kt->pfrkt_tzero = tzero; return (kt); } void pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) { struct pfr_ktable *p, *q; for (p = SLIST_FIRST(workq); p; p = q) { q = SLIST_NEXT(p, pfrkt_workq); pfr_destroy_ktable(p, flushaddr); } } void pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) { struct pfr_kentryworkq addrq; if (flushaddr) { pfr_enqueue_addrs(kt, &addrq, NULL, 0); pfr_clean_node_mask(kt, &addrq); pfr_destroy_kentries(&addrq); } if (kt->pfrkt_ip4 != NULL) free((caddr_t)kt->pfrkt_ip4, M_RTABLE); if (kt->pfrkt_ip6 != NULL) free((caddr_t)kt->pfrkt_ip6, M_RTABLE); if (kt->pfrkt_shadow != NULL) pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); if (kt->pfrkt_rs != NULL) { kt->pfrkt_rs->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } pool_put(&pfr_ktable_pl, kt); } int pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) { int d; if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE))) return (d); return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor)); } struct pfr_ktable * pfr_lookup_table(struct pfr_table *tbl) { /* struct pfr_ktable start like a struct pfr_table */ return (RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl)); } int pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) { struct pfr_kentry *ke = NULL; int match; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (0); switch (af) { #ifdef INET case AF_INET: pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET */ #ifdef INET6 case AF_INET6: bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET6 */ } match = (ke && !ke->pfrke_not); if (match) kt->pfrkt_match++; else kt->pfrkt_nomatch++; return (match); } void pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, u_int64_t len, int dir_out, int op_pass, int notrule) { struct pfr_kentry *ke = NULL; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return; switch (af) { #ifdef INET case AF_INET: pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET */ #ifdef INET6 case AF_INET6: bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; #endif /* INET6 */ default: ; } if ((ke == NULL || ke->pfrke_not) != notrule) { if (op_pass != PFR_OP_PASS) printf("pfr_update_stats: assertion failed.\n"); op_pass = PFR_OP_XPASS; } kt->pfrkt_packets[dir_out][op_pass]++; kt->pfrkt_bytes[dir_out][op_pass] += len; if (ke != NULL && op_pass != PFR_OP_XPASS) { ke->pfrke_packets[dir_out][op_pass]++; ke->pfrke_bytes[dir_out][op_pass] += len; } } struct pfr_ktable * pfr_attach_table(struct pf_ruleset *rs, char *name) { struct pfr_ktable *kt, *rt; struct pfr_table tbl; struct pf_anchor *ac = rs->anchor; bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); if (ac != NULL) - strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); + strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { kt = pfr_create_ktable(&tbl, time_second, 1); if (kt == NULL) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { rt = pfr_create_ktable(&tbl, 0, 1); if (rt == NULL) { pfr_destroy_ktable(kt, 0); return (NULL); } pfr_insert_ktable(rt); } kt->pfrkt_root = rt; } pfr_insert_ktable(kt); } if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++) pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED); return (kt); } void pfr_detach_table(struct pfr_ktable *kt) { if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0) printf("pfr_detach_table: refcount = %d.\n", kt->pfrkt_refcnt[PFR_REFCNT_RULE]); else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); } int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) { struct pfr_kentry *ke, *ke2; struct pf_addr *addr; union sockaddr_union mask; int idx = -1, use_counter = 0; if (af == AF_INET) addr = (struct pf_addr *)&pfr_sin.sin_addr; else if (af == AF_INET6) addr = (struct pf_addr *)&pfr_sin6.sin6_addr; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) return (-1); if (pidx != NULL) idx = *pidx; if (counter != NULL && idx >= 0) use_counter = 1; if (idx < 0) idx = 0; _next_block: ke = pfr_kentry_byidx(kt, idx, af); if (ke == NULL) return (1); pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); *raddr = SUNION2PF(&ke->pfrke_sa, af); *rmask = SUNION2PF(&pfr_mask, af); if (use_counter) { /* is supplied address within block? */ if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) { /* no, go to next block in table */ idx++; use_counter = 0; goto _next_block; } PF_ACPY(addr, counter, af); } else { /* use first address of block */ PF_ACPY(addr, *raddr, af); } if (!KENTRY_NETWORK(ke)) { /* this is a single IP address - no possible nested block */ PF_ACPY(counter, addr, af); *pidx = idx; return (0); } for (;;) { /* we don't want to use a nested block */ if (af == AF_INET) ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); else if (af == AF_INET6) ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ PF_ACPY(counter, addr, af); *pidx = idx; return (0); } /* we need to increase the counter past the nested block */ pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); PF_AINC(addr, af); if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { /* ok, we reached the end of our main block */ /* go to next block in table */ idx++; use_counter = 0; goto _next_block; } } } struct pfr_kentry * pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) { struct pfr_walktree w; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_POOL_GET; w.pfrw_cnt = idx; switch (af) { #ifdef INET case AF_INET: rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); return (w.pfrw_kentry); #endif /* INET */ #ifdef INET6 case AF_INET6: rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); return (w.pfrw_kentry); #endif /* INET6 */ default: return (NULL); } } void pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) { struct pfr_walktree w; int s; bzero(&w, sizeof(w)); w.pfrw_op = PFRW_DYNADDR_UPDATE; w.pfrw_dyn = dyn; s = splsoftnet(); dyn->pfid_acnt4 = 0; dyn->pfid_acnt6 = 0; if (!dyn->pfid_af || dyn->pfid_af == AF_INET) rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w); if (!dyn->pfid_af || dyn->pfid_af == AF_INET6) rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w); splx(s); } Index: vendor-sys/pf/dist/sys/contrib/pf/net/pfvar.h =================================================================== --- vendor-sys/pf/dist/sys/contrib/pf/net/pfvar.h (revision 171163) +++ vendor-sys/pf/dist/sys/contrib/pf/net/pfvar.h (revision 171164) @@ -1,1577 +1,1660 @@ -/* $OpenBSD: pfvar.h,v 1.213 2005/03/03 07:13:39 dhartmei Exp $ */ +/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #ifndef _NET_PFVAR_H_ #define _NET_PFVAR_H_ #include #include #include #include +#include #include #include #include #include struct ip; +struct ip6_hdr; #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) +#define PF_MD5_DIGEST_LENGTH 16 +#ifdef MD5_DIGEST_LENGTH +#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH +#error +#endif +#endif + enum { PF_INOUT, PF_IN, PF_OUT }; enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG }; enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +enum { PF_GET_NONE, PF_GET_CLR_CNTR }; + /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before * PFTM_MAX, special cases afterwards. See pf_state_expires(). */ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED, PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE, PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, - PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED, + PFTM_UNTIL_PACKET }; /* PFTM default values */ #define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ #define PFTM_TCP_OPENING_VAL 30 /* No response yet */ #define PFTM_TCP_ESTABLISHED_VAL 24*60*60/* Established */ #define PFTM_TCP_CLOSING_VAL 15 * 60 /* Half closed */ #define PFTM_TCP_FIN_WAIT_VAL 45 /* Got both FINs */ #define PFTM_TCP_CLOSED_VAL 90 /* Got a RST */ #define PFTM_UDP_FIRST_PACKET_VAL 60 /* First UDP packet */ #define PFTM_UDP_SINGLE_VAL 30 /* Unidirectional */ #define PFTM_UDP_MULTIPLE_VAL 60 /* Bidirectional */ #define PFTM_ICMP_FIRST_PACKET_VAL 20 /* First ICMP packet */ #define PFTM_ICMP_ERROR_REPLY_VAL 10 /* Got error response */ #define PFTM_OTHER_FIRST_PACKET_VAL 60 /* First packet */ #define PFTM_OTHER_SINGLE_VAL 30 /* Unidirectional */ #define PFTM_OTHER_MULTIPLE_VAL 60 /* Bidirectional */ #define PFTM_FRAG_VAL 30 /* Fragment expire */ #define PFTM_INTERVAL_VAL 10 /* Expire interval */ #define PFTM_SRC_NODE_VAL 0 /* Source tracking */ #define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */ enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; -enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE, PF_ADDR_RTLABEL }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f +#define PF_LOG 0x01 +#define PF_LOG_ALL 0x02 +#define PF_LOG_SOCKET_LOOKUP 0x04 + struct pf_addr { union { struct in_addr v4; struct in6_addr v6; u_int8_t addr8[16]; u_int16_t addr16[8]; u_int32_t addr32[4]; } pfa; /* 128-bit address */ #define v4 pfa.v4 #define v6 pfa.v6 #define addr8 pfa.addr8 #define addr16 pfa.addr16 #define addr32 pfa.addr32 }; #define PF_TABLE_NAME_SIZE 32 #define PFI_AFLAG_NETWORK 0x01 #define PFI_AFLAG_BROADCAST 0x02 #define PFI_AFLAG_PEER 0x04 #define PFI_AFLAG_MODEMASK 0x07 #define PFI_AFLAG_NOALIAS 0x08 struct pf_addr_wrap { union { struct { struct pf_addr addr; struct pf_addr mask; } a; char ifname[IFNAMSIZ]; char tblname[PF_TABLE_NAME_SIZE]; char rtlabelname[RTLABEL_LEN]; u_int32_t rtlabel; } v; union { struct pfi_dynaddr *dyn; struct pfr_ktable *tbl; int dyncnt; int tblcnt; } p; u_int8_t type; /* PF_ADDR_* */ u_int8_t iflags; /* PFI_AFLAG_* */ }; #ifdef _KERNEL struct pfi_dynaddr { - struct pf_addr pfid_addr4; - struct pf_addr pfid_mask4; - struct pf_addr pfid_addr6; - struct pf_addr pfid_mask6; - struct pfr_ktable *pfid_kt; - struct pfi_kif *pfid_kif; - void *pfid_hook_cookie; - int pfid_net; /* optional mask, or 128 */ - int pfid_acnt4; /* address count, IPv4 */ - int pfid_acnt6; /* address count, IPv6 */ - sa_family_t pfid_af; /* rule address family */ - u_int8_t pfid_iflags; /* PFI_AFLAG_* */ + TAILQ_ENTRY(pfi_dynaddr) entry; + struct pf_addr pfid_addr4; + struct pf_addr pfid_mask4; + struct pf_addr pfid_addr6; + struct pf_addr pfid_mask6; + struct pfr_ktable *pfid_kt; + struct pfi_kif *pfid_kif; + void *pfid_hook_cookie; + int pfid_net; /* mask or 128 */ + int pfid_acnt4; /* address count IPv4 */ + int pfid_acnt6; /* address count IPv6 */ + sa_family_t pfid_af; /* rule af */ + u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; /* * Address manipulation macros */ #ifdef INET #ifndef INET6 #define PF_INET_ONLY #endif /* ! INET6 */ #endif /* INET */ #ifdef INET6 #ifndef INET #define PF_INET6_ONLY #endif /* ! INET */ #endif /* INET6 */ #ifdef INET #ifdef INET6 #define PF_INET_INET6 #endif /* INET6 */ #endif /* INET */ #else #define PF_INET_INET6 #endif /* _KERNEL */ /* Both IPv4 and IPv6 */ #ifdef PF_INET_INET6 #define PF_AEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ ((a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0])) \ #define PF_ANEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ ((a)->addr32[3] != (b)->addr32[3] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[0] != (b)->addr32[0])) \ #define PF_AZERO(a, c) \ ((c == AF_INET && !(a)->addr32[0]) || \ (!(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ pf_addrcpy(a, b, f) #define PF_AINC(a, f) \ pf_addr_inc(a, f) #define PF_POOLMASK(a, b, c, d, f) \ pf_poolmask(a, b, c, d, f) #else /* Just IPv6 */ #ifdef PF_INET6_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0]) \ #define PF_ANEQ(a, b, c) \ ((a)->addr32[3] != (b)->addr32[3] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[0] != (b)->addr32[0]) \ #define PF_AZERO(a, c) \ (!(a)->addr32[0] && \ !(a)->addr32[1] && \ !(a)->addr32[2] && \ !(a)->addr32[3] ) \ #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ pf_addrcpy(a, b, f) #define PF_AINC(a, f) \ pf_addr_inc(a, f) #define PF_POOLMASK(a, b, c, d, f) \ pf_poolmask(a, b, c, d, f) #else /* Just IPv4 */ #ifdef PF_INET_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[0] == (b)->addr32[0]) #define PF_ANEQ(a, b, c) \ ((a)->addr32[0] != (b)->addr32[0]) #define PF_AZERO(a, c) \ (!(a)->addr32[0]) #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ (a)->v4.s_addr = (b)->v4.s_addr #define PF_AINC(a, f) \ do { \ (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ } while (0) #define PF_POOLMASK(a, b, c, d, f) \ do { \ (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ } while (0) #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, neg) \ - ( \ - (((aw)->type == PF_ADDR_NOROUTE && \ - pf_routable((x), (af))) || \ - ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw))) || \ - ((aw)->type == PF_ADDR_TABLE && \ - !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ - ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ - ((aw)->type == PF_ADDR_ADDRMASK && \ - !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ - &(aw)->v.a.mask, (x), (af)))) != \ - (neg) \ +#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af), NULL)) || \ + (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ + pf_routable((x), (af), (ifp))) || \ + ((aw)->type == PF_ADDR_RTLABEL && \ + !pf_rtlabel_match((x), (af), (aw))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))))) != \ + (neg) \ ) + struct pf_rule_uid { uid_t uid[2]; u_int8_t op; }; struct pf_rule_gid { uid_t gid[2]; u_int8_t op; }; struct pf_rule_addr { struct pf_addr_wrap addr; u_int16_t port[2]; u_int8_t neg; u_int8_t port_op; }; struct pf_pooladdr { struct pf_addr_wrap addr; TAILQ_ENTRY(pf_pooladdr) entries; char ifname[IFNAMSIZ]; struct pfi_kif *kif; }; TAILQ_HEAD(pf_palist, pf_pooladdr); struct pf_poolhashkey { union { u_int8_t key8[16]; u_int16_t key16[8]; u_int32_t key32[4]; } pfk; /* 128-bit hash key */ #define key8 pfk.key8 #define key16 pfk.key16 #define key32 pfk.key32 }; struct pf_pool { struct pf_palist list; struct pf_pooladdr *cur; struct pf_poolhashkey key; struct pf_addr counter; int tblidx; u_int16_t proxy_port[2]; u_int8_t port_op; u_int8_t opts; }; /* A packed Operating System description for fingerprinting */ typedef u_int32_t pf_osfp_t; #define PF_OSFP_ANY ((pf_osfp_t)0) #define PF_OSFP_UNKNOWN ((pf_osfp_t)-1) #define PF_OSFP_NOMATCH ((pf_osfp_t)-2) struct pf_osfp_entry { SLIST_ENTRY(pf_osfp_entry) fp_entry; pf_osfp_t fp_os; int fp_enflags; #define PF_OSFP_EXPANDED 0x001 /* expanded entry */ #define PF_OSFP_GENERIC 0x002 /* generic signature */ #define PF_OSFP_NODETAIL 0x004 /* no p0f details */ #define PF_OSFP_LEN 32 char fp_class_nm[PF_OSFP_LEN]; char fp_version_nm[PF_OSFP_LEN]; char fp_subtype_nm[PF_OSFP_LEN]; }; #define PF_OSFP_ENTRY_EQ(a, b) \ ((a)->fp_os == (b)->fp_os && \ memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \ memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \ memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0) /* handle pf_osfp_t packing */ #define _FP_RESERVED_BIT 1 /* For the special negative #defines */ #define _FP_UNUSED_BITS 1 #define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */ #define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */ #define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */ #define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \ (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \ ((1 << _FP_CLASS_BITS) - 1); \ (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \ ((1 << _FP_VERSION_BITS) - 1);\ (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \ } while(0) #define PF_OSFP_PACK(osfp, class, version, subtype) do { \ (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \ + _FP_SUBTYPE_BITS); \ (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \ _FP_SUBTYPE_BITS; \ (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \ } while(0) /* the fingerprint of an OSes TCP SYN packet */ typedef u_int64_t pf_tcpopts_t; struct pf_os_fingerprint { SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */ pf_tcpopts_t fp_tcpopts; /* packed TCP options */ u_int16_t fp_wsize; /* TCP window size */ u_int16_t fp_psize; /* ip->ip_len */ u_int16_t fp_mss; /* TCP MSS */ u_int16_t fp_flags; #define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */ #define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */ #define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */ #define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */ #define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */ #define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */ #define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */ #define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */ #define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */ #define PF_OSFP_MSS 0x0200 /* TCP MSS */ #define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */ #define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ #define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ #define PF_OSFP_TS0 0x2000 /* Zero timestamp */ +#define PF_OSFP_INET6 0x4000 /* IPv6 */ u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ #define PF_OSFP_MAXTTL_OFFSET 40 /* TCP options packing */ #define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */ #define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */ #define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */ #define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */ #define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */ #define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */ #define PF_OSFP_MAX_OPTS \ (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \ / PF_OSFP_TCPOPT_BITS SLIST_ENTRY(pf_os_fingerprint) fp_next; }; struct pf_osfp_ioctl { struct pf_osfp_entry fp_os; pf_tcpopts_t fp_tcpopts; /* packed TCP options */ u_int16_t fp_wsize; /* TCP window size */ u_int16_t fp_psize; /* ip->ip_len */ u_int16_t fp_mss; /* TCP MSS */ u_int16_t fp_flags; u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ int fp_getnum; /* DIOCOSFPGET number */ }; union pf_rule_ptr { struct pf_rule *ptr; u_int32_t nr; }; #define PF_ANCHOR_NAME_SIZE 64 struct pf_rule { struct pf_rule_addr src; struct pf_rule_addr dst; #define PF_SKIP_IFP 0 #define PF_SKIP_DIR 1 #define PF_SKIP_AF 2 #define PF_SKIP_PROTO 3 #define PF_SKIP_SRC_ADDR 4 #define PF_SKIP_SRC_PORT 5 #define PF_SKIP_DST_ADDR 6 #define PF_SKIP_DST_PORT 7 #define PF_SKIP_COUNT 8 union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; -#define PF_QNAME_SIZE 16 +#define PF_QNAME_SIZE 64 char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; -#define PF_TAG_NAME_SIZE 16 +#define PF_TAG_NAME_SIZE 64 char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; char overload_tblname[PF_TABLE_NAME_SIZE]; TAILQ_ENTRY(pf_rule) entries; struct pf_pool rpool; u_int64_t evaluations; - u_int64_t packets; - u_int64_t bytes; + u_int64_t packets[2]; + u_int64_t bytes[2]; struct pfi_kif *kif; struct pf_anchor *anchor; struct pfr_ktable *overload_tbl; pf_osfp_t os_fingerprint; + int rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t states; u_int32_t max_states; u_int32_t src_nodes; u_int32_t max_src_nodes; u_int32_t max_src_states; u_int32_t max_src_conn; struct { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; u_int32_t qid; u_int32_t pqid; u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; + uid_t cuid; + pid_t cpid; u_int16_t return_icmp; u_int16_t return_icmp6; u_int16_t max_mss; u_int16_t tag; u_int16_t match_tag; struct pf_rule_uid uid; struct pf_rule_gid gid; u_int32_t rule_flag; u_int8_t action; u_int8_t direction; u_int8_t log; + u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; u_int8_t natpass; #define PF_STATE_NORMAL 0x1 #define PF_STATE_MODULATE 0x2 #define PF_STATE_SYNPROXY 0x3 u_int8_t keep_state; sa_family_t af; u_int8_t proto; u_int8_t type; u_int8_t code; u_int8_t flags; u_int8_t flagset; u_int8_t min_ttl; u_int8_t allow_opts; u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; #define PF_FLUSH 0x01 #define PF_FLUSH_GLOBAL 0x02 u_int8_t flush; }; /* rule flags */ #define PFRULE_DROP 0x0000 #define PFRULE_RETURNRST 0x0001 #define PFRULE_FRAGMENT 0x0002 #define PFRULE_RETURNICMP 0x0004 #define PFRULE_RETURN 0x0008 #define PFRULE_NOSYNC 0x0010 #define PFRULE_SRCTRACK 0x0020 /* track source states */ #define PFRULE_RULESRCTRACK 0x0040 /* per rule */ /* scrub flags */ #define PFRULE_NODF 0x0100 #define PFRULE_FRAGCROP 0x0200 /* non-buffering frag cache */ #define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ #define PFRULE_RANDOMID 0x0800 #define PFRULE_REASSEMBLE_TCP 0x1000 /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ -#define PFRULE_GRBOUND 0x00020000 /* group-bound */ #define PFSTATE_HIWAT 10000 /* default state table size */ +#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ +#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */ struct pf_threshold { u_int32_t limit; #define PF_THRESHOLD_MULT 1000 #define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT u_int32_t seconds; u_int32_t count; u_int32_t last; }; struct pf_src_node { RB_ENTRY(pf_src_node) entry; struct pf_addr addr; struct pf_addr raddr; union pf_rule_ptr rule; struct pfi_kif *kif; - u_int32_t bytes; - u_int32_t packets; + u_int64_t bytes[2]; + u_int64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_threshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; u_int8_t ruletype; }; #define PFSNODE_HIWAT 10000 /* default source node table size */ struct pf_state_scrub { struct timeval pfss_last; /* time received last packet */ u_int32_t pfss_tsecr; /* last echoed timestamp */ u_int32_t pfss_tsval; /* largest timestamp */ u_int32_t pfss_tsval0; /* original timestamp */ u_int16_t pfss_flags; #define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ #define PFSS_PAWS 0x0010 /* stricter PAWS checks */ #define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ #define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ #define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ u_int8_t pfss_ttl; /* stashed TTL */ u_int8_t pad; u_int32_t pfss_ts_mod; /* timestamp modulation */ }; struct pf_state_host { struct pf_addr addr; u_int16_t port; u_int16_t pad; }; struct pf_state_peer { u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int16_t mss; /* Maximum segment size option */ + u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ struct pf_state_scrub *scrub; /* state is scrubbed */ + u_int8_t pad[3]; }; TAILQ_HEAD(pf_state_queue, pf_state); +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; +}; + struct pf_state { u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; +#define PFSTATE_NOSYNC 0x01 +#define PFSTATE_FROMSYNC 0x02 +#define PFSTATE_STALE 0x04 union { struct { RB_ENTRY(pf_state) entry_lan_ext; RB_ENTRY(pf_state) entry_ext_gwy; RB_ENTRY(pf_state) entry_id; - TAILQ_ENTRY(pf_state) entry_updates; + TAILQ_ENTRY(pf_state) entry_list; struct pfi_kif *kif; } s; char ifname[IFNAMSIZ]; } u; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; union pf_rule_ptr anchor; union pf_rule_ptr nat_rule; struct pf_addr rt_addr; struct pfi_kif *rt_kif; struct pf_src_node *src_node; struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; - u_int32_t packets[2]; - u_int32_t bytes[2]; - u_int32_t creatorid; u_int16_t tag; - sa_family_t af; - u_int8_t proto; - u_int8_t direction; - u_int8_t log; - u_int8_t allow_opts; - u_int8_t timeout; - u_int8_t sync_flags; -#define PFSTATE_NOSYNC 0x01 -#define PFSTATE_FROMSYNC 0x02 -#define PFSTATE_STALE 0x04 - u_int8_t pad; }; TAILQ_HEAD(pf_rulequeue, pf_rule); struct pf_anchor; struct pf_ruleset { struct { struct pf_rulequeue queues[2]; struct { struct pf_rulequeue *ptr; + struct pf_rule **ptr_array; + u_int32_t rcount; u_int32_t ticket; int open; } active, inactive; } rules[PF_RULESET_MAX]; struct pf_anchor *anchor; u_int32_t tticket; int tables; int topen; }; RB_HEAD(pf_anchor_global, pf_anchor); RB_HEAD(pf_anchor_node, pf_anchor); struct pf_anchor { RB_ENTRY(pf_anchor) entry_global; RB_ENTRY(pf_anchor) entry_node; struct pf_anchor *parent; struct pf_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; char path[MAXPATHLEN]; struct pf_ruleset ruleset; int refcnt; /* anchor rules */ + int match; }; RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PF_RESERVED_ANCHOR "_pf" #define PFR_TFLAG_PERSIST 0x00000001 #define PFR_TFLAG_CONST 0x00000002 #define PFR_TFLAG_ACTIVE 0x00000004 #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 #define PFR_TFLAG_USRMASK 0x00000003 #define PFR_TFLAG_SETMASK 0x0000003C #define PFR_TFLAG_ALLMASK 0x0000003F struct pfr_table { char pfrt_anchor[MAXPATHLEN]; char pfrt_name[PF_TABLE_NAME_SIZE]; u_int32_t pfrt_flags; u_int8_t pfrt_fback; }; enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; struct pfr_addr { union { struct in_addr _pfra_ip4addr; struct in6_addr _pfra_ip6addr; } pfra_u; u_int8_t pfra_af; u_int8_t pfra_net; u_int8_t pfra_not; u_int8_t pfra_fback; }; #define pfra_ip4addr pfra_u._pfra_ip4addr #define pfra_ip6addr pfra_u._pfra_ip6addr enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; #define PFR_OP_XPASS PFR_OP_ADDR_MAX struct pfr_astats { struct pfr_addr pfras_a; u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; long pfras_tzero; }; enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; struct pfr_tstats { struct pfr_table pfrts_t; u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_match; u_int64_t pfrts_nomatch; long pfrts_tzero; int pfrts_cnt; int pfrts_refcnt[PFR_REFCNT_MAX]; }; #define pfrts_name pfrts_t.pfrt_name #define pfrts_flags pfrts_t.pfrt_flags SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; SLIST_ENTRY(pfr_kentry) pfrke_workq; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; u_int8_t pfrke_intrpool; }; SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); struct pfr_ktable { struct pfr_tstats pfrkt_ts; RB_ENTRY(pfr_ktable) pfrkt_tree; SLIST_ENTRY(pfr_ktable) pfrkt_workq; struct radix_node_head *pfrkt_ip4; struct radix_node_head *pfrkt_ip6; struct pfr_ktable *pfrkt_shadow; struct pfr_ktable *pfrkt_root; struct pf_ruleset *pfrkt_rs; long pfrkt_larg; int pfrkt_nflags; }; #define pfrkt_t pfrkt_ts.pfrts_t #define pfrkt_name pfrkt_t.pfrt_name #define pfrkt_anchor pfrkt_t.pfrt_anchor #define pfrkt_ruleset pfrkt_t.pfrt_ruleset #define pfrkt_flags pfrkt_t.pfrt_flags #define pfrkt_cnt pfrkt_ts.pfrts_cnt #define pfrkt_refcnt pfrkt_ts.pfrts_refcnt #define pfrkt_packets pfrkt_ts.pfrts_packets #define pfrkt_bytes pfrkt_ts.pfrts_bytes #define pfrkt_match pfrkt_ts.pfrts_match #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero RB_HEAD(pf_state_tree_lan_ext, pf_state); RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state, u.s.entry_lan_ext, pf_state_compare_lan_ext); RB_HEAD(pf_state_tree_ext_gwy, pf_state); RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state, u.s.entry_ext_gwy, pf_state_compare_ext_gwy); -struct pfi_if { - char pfif_name[IFNAMSIZ]; - u_int64_t pfif_packets[2][2][2]; - u_int64_t pfif_bytes[2][2][2]; - u_int64_t pfif_addcnt; - u_int64_t pfif_delcnt; - long pfif_tzero; - int pfif_states; - int pfif_rules; - int pfif_flags; -}; - -TAILQ_HEAD(pfi_grouphead, pfi_kif); TAILQ_HEAD(pfi_statehead, pfi_kif); RB_HEAD(pfi_ifhead, pfi_kif); + +/* keep synced with pfi_kif, used in RB_FIND */ +struct pfi_kif_cmp { + char pfik_name[IFNAMSIZ]; +}; + struct pfi_kif { - struct pfi_if pfik_if; + char pfik_name[IFNAMSIZ]; RB_ENTRY(pfi_kif) pfik_tree; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; struct pf_state_tree_lan_ext pfik_lan_ext; struct pf_state_tree_ext_gwy pfik_ext_gwy; - struct pfi_grouphead pfik_grouphead; - TAILQ_ENTRY(pfi_kif) pfik_instances; TAILQ_ENTRY(pfi_kif) pfik_w_states; - struct hook_desc_head *pfik_ah_head; void *pfik_ah_cookie; - struct pfi_kif *pfik_parent; struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; int pfik_states; int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; -#define pfik_name pfik_if.pfif_name -#define pfik_packets pfik_if.pfif_packets -#define pfik_bytes pfik_if.pfif_bytes -#define pfik_tzero pfik_if.pfif_tzero -#define pfik_flags pfik_if.pfif_flags -#define pfik_addcnt pfik_if.pfif_addcnt -#define pfik_delcnt pfik_if.pfif_delcnt -#define pfik_states pfik_if.pfif_states -#define pfik_rules pfik_if.pfif_rules -#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ -#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ -#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ -#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ +enum pfi_kif_refs { + PFI_KIF_REF_NONE, + PFI_KIF_REF_STATE, + PFI_KIF_REF_RULE +}; + #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* setable via DIOC{SET,CLR}IFFLAG */ struct pf_pdesc { + struct { + int done; + uid_t uid; + gid_t gid; + pid_t pid; + } lookup; u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; struct udphdr *udp; struct icmp *icmp; #ifdef INET6 struct icmp6_hdr *icmp6; #endif /* INET6 */ void *any; } hdr; struct pf_addr baddr; /* address before translation */ struct pf_addr naddr; /* address after translation */ struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ struct pf_addr *src; struct pf_addr *dst; struct ether_header *eh; + struct pf_mtag *pf_mtag; u_int16_t *ip_sum; u_int32_t p_len; /* total length of payload */ u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ #define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ sa_family_t af; u_int8_t proto; u_int8_t tos; }; /* flags for RDR options */ #define PF_DPORT_RANGE 0x01 /* Dest port uses range */ #define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ /* Reasons code for passing/dropping a packet */ #define PFRES_MATCH 0 /* Explicit match of a rule */ #define PFRES_BADOFF 1 /* Bad offset for pull_hdr */ #define PFRES_FRAG 2 /* Dropping following fragment */ #define PFRES_SHORT 3 /* Dropping short packet */ #define PFRES_NORM 4 /* Dropping by normalizer */ #define PFRES_MEMORY 5 /* Dropped due to lacking mem */ #define PFRES_TS 6 /* Bad TCP Timestamp (RFC1323) */ #define PFRES_CONGEST 7 /* Congestion (of ipintrq) */ #define PFRES_IPOPTIONS 8 /* IP option */ #define PFRES_PROTCKSUM 9 /* Protocol checksum invalid */ #define PFRES_BADSTATE 10 /* State mismatch */ #define PFRES_STATEINS 11 /* State insertion failure */ #define PFRES_MAXSTATES 12 /* State limit */ #define PFRES_SRCLIMIT 13 /* Source node/conn limit */ #define PFRES_SYNPROXY 14 /* SYN proxy */ #define PFRES_MAX 15 /* total+1 */ #define PFRES_NAMES { \ "match", \ "bad-offset", \ "fragment", \ "short", \ "normalize", \ "memory", \ "bad-timestamp", \ "congestion", \ "ip-option", \ "proto-cksum", \ "state-mismatch", \ "state-insert", \ "state-limit", \ "src-limit", \ "synproxy", \ NULL \ } /* Counters for other things we want to keep track of */ #define LCNT_STATES 0 /* states */ #define LCNT_SRCSTATES 1 /* max-src-states */ #define LCNT_SRCNODES 2 /* max-src-nodes */ #define LCNT_SRCCONN 3 /* max-src-conn */ #define LCNT_SRCCONNRATE 4 /* max-src-conn-rate */ #define LCNT_OVERLOAD_TABLE 5 /* entry added to overload table */ #define LCNT_OVERLOAD_FLUSH 6 /* state entries flushed */ #define LCNT_MAX 7 /* total+1 */ #define LCNT_NAMES { \ "max states per rule", \ "max-src-states", \ "max-src-nodes", \ "max-src-conn", \ "max-src-conn-rate", \ "overload table insertion", \ "overload flush states", \ NULL \ } /* UDP state enumeration */ #define PFUDPS_NO_TRAFFIC 0 #define PFUDPS_SINGLE 1 #define PFUDPS_MULTIPLE 2 #define PFUDPS_NSTATES 3 /* number of state levels */ #define PFUDPS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } /* Other protocol state enumeration */ #define PFOTHERS_NO_TRAFFIC 0 #define PFOTHERS_SINGLE 1 #define PFOTHERS_MULTIPLE 2 #define PFOTHERS_NSTATES 3 /* number of state levels */ #define PFOTHERS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } #define FCNT_STATE_SEARCH 0 #define FCNT_STATE_INSERT 1 #define FCNT_STATE_REMOVALS 2 #define FCNT_MAX 3 #define SCNT_SRC_NODE_SEARCH 0 #define SCNT_SRC_NODE_INSERT 1 #define SCNT_SRC_NODE_REMOVALS 2 #define SCNT_MAX 3 #define ACTION_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ } while (0) #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ if (x < PFRES_MAX) \ pf_status.counters[x]++; \ } while (0) struct pf_status { u_int64_t counters[PFRES_MAX]; u_int64_t lcounters[LCNT_MAX]; /* limit counters */ u_int64_t fcounters[FCNT_MAX]; u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; u_int64_t bcounters[2][2]; u_int64_t stateid; u_int32_t running; u_int32_t states; u_int32_t src_nodes; u_int32_t since; u_int32_t debug; u_int32_t hostid; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; }; struct cbq_opts { u_int minburst; u_int maxburst; u_int pktsize; u_int maxpktsize; u_int ns_per_byte; u_int maxidle; int minidle; u_int offtime; int flags; }; struct priq_opts { int flags; }; struct hfsc_opts { /* real-time service curve */ u_int rtsc_m1; /* slope of the 1st segment in bps */ u_int rtsc_d; /* the x-projection of m1 in msec */ u_int rtsc_m2; /* slope of the 2nd segment in bps */ /* link-sharing service curve */ u_int lssc_m1; u_int lssc_d; u_int lssc_m2; /* upper-limit service curve */ u_int ulsc_m1; u_int ulsc_d; u_int ulsc_m2; int flags; }; struct pf_altq { char ifname[IFNAMSIZ]; void *altq_disc; /* discipline-specific state */ TAILQ_ENTRY(pf_altq) entries; /* scheduler spec */ u_int8_t scheduler; /* scheduler type */ u_int16_t tbrsize; /* tokenbucket regulator size */ u_int32_t ifbandwidth; /* interface bandwidth */ /* queue spec */ char qname[PF_QNAME_SIZE]; /* queue name */ char parent[PF_QNAME_SIZE]; /* parent name */ u_int32_t parent_qid; /* parent queue id */ u_int32_t bandwidth; /* queue bandwidth */ u_int8_t priority; /* priority */ u_int16_t qlimit; /* queue size limit */ u_int16_t flags; /* misc flags */ union { struct cbq_opts cbq_opts; struct priq_opts priq_opts; struct hfsc_opts hfsc_opts; } pq_u; u_int32_t qid; /* return value */ }; +#define PF_TAG_GENERATED 0x01 +#define PF_TAG_FRAGCACHE 0x02 +#define PF_TAG_TRANSLATE_LOCALHOST 0x04 + +struct pf_mtag { + void *hdr; /* saved hdr pos in mbuf, for ECN */ + u_int rtableid; /* alternate routing table id */ + u_int32_t qid; /* queue id */ + u_int16_t tag; /* tag id */ + u_int8_t flags; + u_int8_t routed; + sa_family_t af; /* for ECN */ +}; + struct pf_tag { u_int16_t tag; /* tag id */ }; struct pf_tagname { TAILQ_ENTRY(pf_tagname) entries; char name[PF_TAG_NAME_SIZE]; u_int16_t tag; int ref; }; #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ #define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ +#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ +#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ +#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ + /* * ioctl parameter structures */ struct pfioc_pooladdr { u_int32_t action; u_int32_t ticket; u_int32_t nr; u_int32_t r_num; u_int8_t r_action; u_int8_t r_last; u_int8_t af; char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; struct pfioc_rule { u_int32_t action; u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; char anchor[MAXPATHLEN]; char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; struct pfioc_natlook { struct pf_addr saddr; struct pf_addr daddr; struct pf_addr rsaddr; struct pf_addr rdaddr; u_int16_t sport; u_int16_t dport; u_int16_t rsport; u_int16_t rdport; sa_family_t af; u_int8_t proto; u_int8_t direction; }; struct pfioc_state { u_int32_t nr; struct pf_state state; }; +struct pfioc_src_node_kill { + /* XXX returns the number of src nodes killed in psnk_af */ + sa_family_t psnk_af; + struct pf_rule_addr psnk_src; + struct pf_rule_addr psnk_dst; +}; + struct pfioc_state_kill { /* XXX returns the number of states killed in psk_af */ sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; }; struct pfioc_states { int ps_len; union { caddr_t psu_buf; struct pf_state *psu_states; } ps_u; #define ps_buf ps_u.psu_buf #define ps_states ps_u.psu_states }; struct pfioc_src_nodes { int psn_len; union { caddr_t psu_buf; struct pf_src_node *psu_src_nodes; } psn_u; #define psn_buf psn_u.psu_buf #define psn_src_nodes psn_u.psu_src_nodes }; struct pfioc_if { char ifname[IFNAMSIZ]; }; struct pfioc_tm { int timeout; int seconds; }; struct pfioc_limit { int index; unsigned limit; }; struct pfioc_altq { u_int32_t action; u_int32_t ticket; u_int32_t nr; struct pf_altq altq; }; struct pfioc_qstats { u_int32_t ticket; u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; }; struct pfioc_ruleset { u_int32_t nr; char path[MAXPATHLEN]; char name[PF_ANCHOR_NAME_SIZE]; }; #define PF_RULESET_ALTQ (PF_RULESET_MAX) #define PF_RULESET_TABLE (PF_RULESET_MAX+1) struct pfioc_trans { int size; /* number of elements */ int esize; /* size of each element in bytes */ struct pfioc_trans_e { int rs_num; char anchor[MAXPATHLEN]; u_int32_t ticket; } *array; }; #define PFR_FLAG_ATOMIC 0x00000001 #define PFR_FLAG_DUMMY 0x00000002 #define PFR_FLAG_FEEDBACK 0x00000004 #define PFR_FLAG_CLSTATS 0x00000008 #define PFR_FLAG_ADDRSTOO 0x00000010 #define PFR_FLAG_REPLACE 0x00000020 #define PFR_FLAG_ALLRSETS 0x00000040 #define PFR_FLAG_ALLMASK 0x0000007F #ifdef _KERNEL #define PFR_FLAG_USERIOCTL 0x10000000 #endif struct pfioc_table { struct pfr_table pfrio_table; void *pfrio_buffer; int pfrio_esize; int pfrio_size; int pfrio_size2; int pfrio_nadd; int pfrio_ndel; int pfrio_nchange; int pfrio_flags; u_int32_t pfrio_ticket; }; #define pfrio_exists pfrio_nadd #define pfrio_nzero pfrio_nadd #define pfrio_nmatch pfrio_nadd #define pfrio_naddr pfrio_size2 #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 - struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; int pfiio_esize; int pfiio_size; int pfiio_nzero; int pfiio_flags; }; /* * ioctl operations */ #define DIOCSTART _IO ('D', 1) #define DIOCSTOP _IO ('D', 2) #define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) #define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) #define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) /* XXX cut 8 - 17 */ #define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill) #define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) #define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) #define DIOCGETSTATUS _IOWR('D', 21, struct pf_status) #define DIOCCLRSTATUS _IO ('D', 22) #define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) #define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) #define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) #define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) /* XXX cut 26 - 28 */ #define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) #define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) #define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) #define DIOCCLRRULECTRS _IO ('D', 38) #define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) #define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) #define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) #define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) #define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) #define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) #define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) #define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats) #define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) #define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) #define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) #define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) /* XXX cut 55 - 57 */ #define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) #define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) #define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) #define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) #define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) #define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) #define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) #define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) #define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) #define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) #define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) #define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) #define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) #define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) #define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) #define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) #define DIOCOSFPFLUSH _IO('D', 78) #define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) #define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) #define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) #define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) #define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) #define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) #define DIOCCLRSRCNODES _IO('D', 85) #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) -#define DIOCICLRISTATS _IOWR('D', 88, struct pfioc_iface) #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #ifdef _KERNEL RB_HEAD(pf_src_tree, pf_src_node); RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); extern struct pf_src_tree tree_src_tracking; RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_updates; +extern struct pf_state_queue state_list; -extern struct pf_anchor_global pf_anchors; -extern struct pf_ruleset pf_main_ruleset; TAILQ_HEAD(pf_poolqueue, pf_pool); extern struct pf_poolqueue pf_pools[2]; TAILQ_HEAD(pf_altqqueue, pf_altq); extern struct pf_altqqueue pf_altqs[2]; extern struct pf_palist pf_pabuf; -extern struct pfi_kif **pfi_index2kif; extern u_int32_t ticket_altqs_active; extern u_int32_t ticket_altqs_inactive; extern int altqs_inactive_open; extern u_int32_t ticket_pabuf; extern struct pf_altqqueue *pf_altqs_active; extern struct pf_altqqueue *pf_altqs_inactive; extern struct pf_poolqueue *pf_pools_active; extern struct pf_poolqueue *pf_pools_inactive; extern int pf_tbladdr_setup(struct pf_ruleset *, struct pf_addr_wrap *); extern void pf_tbladdr_remove(struct pf_addr_wrap *); extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); extern struct pool pf_src_tree_pl, pf_rule_pl; extern struct pool pf_state_pl, pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; -extern void pf_purge_timeout(void *); -extern void pf_purge_expired_src_nodes(void); -extern void pf_purge_expired_states(void); -extern void pf_purge_expired_state(struct pf_state *); +extern void pf_purge_thread(void *); +extern void pf_purge_expired_src_nodes(int); +extern void pf_purge_expired_states(u_int32_t); +extern void pf_unlink_state(struct pf_state *); +extern void pf_free_state(struct pf_state *); extern int pf_insert_state(struct pfi_kif *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, sa_family_t); void pf_src_tree_remove_state(struct pf_state *); -extern struct pf_state *pf_find_state_byid(struct pf_state *); -extern struct pf_state *pf_find_state_all(struct pf_state *key, +extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); -extern struct pf_anchor *pf_find_anchor(const char *); -extern struct pf_ruleset *pf_find_ruleset(const char *); -extern struct pf_ruleset *pf_find_or_create_ruleset(const char *); -extern void pf_remove_if_empty_ruleset( - struct pf_ruleset *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); extern struct ifnet *sync_ifp; extern struct pf_rule pf_default_rule; extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); #ifdef INET int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *); #endif /* INET */ #ifdef INET6 int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); #endif /* INET6 */ void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, - u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *); + u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, + struct pf_pdesc *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); void pf_normalize_init(void); int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, struct pf_pdesc *); int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, struct pf_pdesc *); int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *, struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_state *); int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_short *, struct tcphdr *, struct pf_state *, struct pf_state_peer *, struct pf_state_peer *, int *); u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); -int pf_routable(struct pf_addr *addr, sa_family_t af); +int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); +int pf_socket_lookup(int, struct pf_pdesc *); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, struct pf_addr **, struct pf_addr **, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * pfr_attach_table(struct pf_ruleset *, char *); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); int pfr_del_tables(struct pfr_table *, int, int *, int); int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int); int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); int pfr_clr_tstats(struct pfr_table *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long); int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, - int *, int *, int *, int); + int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int); int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int); int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); +extern struct pfi_statehead pfi_statehead; +extern struct pfi_kif *pfi_all; + void pfi_initialize(void); -void pfi_attach_clone(struct if_clone *); +struct pfi_kif *pfi_kif_get(const char *); +void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); +void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); void pfi_attach_ifnet(struct ifnet *); void pfi_detach_ifnet(struct ifnet *); -struct pfi_kif *pfi_lookup_create(const char *); -struct pfi_kif *pfi_lookup_if(const char *); -int pfi_maybe_destroy(struct pfi_kif *); -struct pfi_kif *pfi_attach_rule(const char *); -void pfi_detach_rule(struct pfi_kif *); -void pfi_attach_state(struct pfi_kif *); -void pfi_detach_state(struct pfi_kif *); +void pfi_attach_ifgroup(struct ifg_group *); +void pfi_detach_ifgroup(struct ifg_group *); +void pfi_group_change(const char *); +int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, + sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); -void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_fill_oldstatus(struct pf_status *); -int pfi_clr_istats(const char *, int *, int); -int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); +int pfi_clr_istats(const char *); +int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); -int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, - sa_family_t); -extern struct pfi_statehead pfi_statehead; +u_int16_t pf_tagname2tag(char *); +void pf_tag2tagname(u_int16_t, char *); +void pf_tag_ref(u_int16_t); +void pf_tag_unref(u_int16_t); +int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int); +u_int32_t pf_qname2qid(char *); +void pf_qid2qname(u_int32_t, char *); +void pf_qid_unref(u_int32_t); +struct pf_mtag *pf_find_mtag(struct mbuf *); +struct pf_mtag *pf_get_mtag(struct mbuf *); -u_int16_t pf_tagname2tag(char *); -void pf_tag2tagname(u_int16_t, char *); -void pf_tag_ref(u_int16_t); -void pf_tag_unref(u_int16_t); -int pf_tag_packet(struct mbuf *, struct pf_tag *, int); -u_int32_t pf_qname2qid(char *); -void pf_qid2qname(u_int32_t, char *); -void pf_qid_unref(u_int32_t); - extern struct pf_status pf_status; extern struct pool pf_frent_pl, pf_frag_pl; +extern struct rwlock pf_consistency_lock; struct pf_pool_limit { void *pp; unsigned limit; }; extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; #endif /* _KERNEL */ +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; +#define pf_main_ruleset pf_main_anchor.ruleset + +/* these ruleset functions can be linked into userland programs (pfctl) */ +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); +void pf_remove_if_empty_ruleset(struct pf_ruleset *); +struct pf_anchor *pf_find_anchor(const char *); +struct pf_ruleset *pf_find_ruleset(const char *); +struct pf_ruleset *pf_find_or_create_ruleset(const char *); +void pf_rs_initialize(void); + +#ifdef _KERNEL +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); + +#endif /* _KERNEL */ + /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, const struct tcphdr *); #endif /* _KERNEL */ struct pf_osfp_enlist * - pf_osfp_fingerprint_hdr(const struct ip *, const struct tcphdr *); + pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *, + const struct tcphdr *); void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); void pf_osfp_initialize(void); int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); struct pf_os_fingerprint * pf_osfp_validate(void); #endif /* _NET_PFVAR_H_ */