diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index ed371f61a999..e0c5fb5214a8 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,2499 +1,2490 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ * $FreeBSD$ */ #ifndef _NET_PFVAR_H_ #define _NET_PFVAR_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #include #include #include #endif #include #include #include #ifdef _KERNEL #if defined(__arm__) #define PF_WANT_32_TO_64_COUNTER #endif /* * A hybrid of 32-bit and 64-bit counters which can be used on platforms where * counter(9) is very expensive. * * As 32-bit counters are expected to overflow, a periodic job sums them up to * a saved 64-bit state. Fetching the value still walks all CPUs to get the most * current snapshot. */ #ifdef PF_WANT_32_TO_64_COUNTER struct pf_counter_u64_pcpu { u_int32_t current; u_int32_t snapshot; }; struct pf_counter_u64 { struct pf_counter_u64_pcpu *pfcu64_pcpu; u_int64_t pfcu64_value; seqc_t pfcu64_seqc; }; static inline int pf_counter_u64_init(struct pf_counter_u64 *pfcu64, int flags) { pfcu64->pfcu64_value = 0; pfcu64->pfcu64_seqc = 0; pfcu64->pfcu64_pcpu = uma_zalloc_pcpu(pcpu_zone_8, flags | M_ZERO); if (__predict_false(pfcu64->pfcu64_pcpu == NULL)) return (ENOMEM); return (0); } static inline void pf_counter_u64_deinit(struct pf_counter_u64 *pfcu64) { uma_zfree_pcpu(pcpu_zone_8, pfcu64->pfcu64_pcpu); } static inline void pf_counter_u64_critical_enter(void) { critical_enter(); } static inline void pf_counter_u64_critical_exit(void) { critical_exit(); } static inline void pf_counter_u64_add_protected(struct pf_counter_u64 *pfcu64, uint32_t n) { struct pf_counter_u64_pcpu *pcpu; u_int32_t val; MPASS(curthread->td_critnest > 0); pcpu = zpcpu_get(pfcu64->pfcu64_pcpu); val = atomic_load_int(&pcpu->current); atomic_store_int(&pcpu->current, val + n); } static inline void pf_counter_u64_add(struct pf_counter_u64 *pfcu64, uint32_t n) { critical_enter(); pf_counter_u64_add_protected(pfcu64, n); critical_exit(); } static inline u_int64_t pf_counter_u64_periodic(struct pf_counter_u64 *pfcu64) { struct pf_counter_u64_pcpu *pcpu; u_int64_t sum; u_int32_t val; int cpu; MPASS(curthread->td_critnest > 0); seqc_write_begin(&pfcu64->pfcu64_seqc); sum = pfcu64->pfcu64_value; CPU_FOREACH(cpu) { pcpu = zpcpu_get_cpu(pfcu64->pfcu64_pcpu, cpu); val = atomic_load_int(&pcpu->current); sum += (uint32_t)(val - pcpu->snapshot); pcpu->snapshot = val; } pfcu64->pfcu64_value = sum; seqc_write_end(&pfcu64->pfcu64_seqc); return (sum); } static inline u_int64_t pf_counter_u64_fetch(const struct pf_counter_u64 *pfcu64) { struct pf_counter_u64_pcpu *pcpu; u_int64_t sum; seqc_t seqc; int cpu; for (;;) { seqc = seqc_read(&pfcu64->pfcu64_seqc); sum = 0; CPU_FOREACH(cpu) { pcpu = zpcpu_get_cpu(pfcu64->pfcu64_pcpu, cpu); sum += (uint32_t)(atomic_load_int(&pcpu->current) -pcpu->snapshot); } sum += pfcu64->pfcu64_value; if (seqc_consistent(&pfcu64->pfcu64_seqc, seqc)) break; } return (sum); } static inline void pf_counter_u64_zero_protected(struct pf_counter_u64 *pfcu64) { struct pf_counter_u64_pcpu *pcpu; int cpu; MPASS(curthread->td_critnest > 0); seqc_write_begin(&pfcu64->pfcu64_seqc); CPU_FOREACH(cpu) { pcpu = zpcpu_get_cpu(pfcu64->pfcu64_pcpu, cpu); pcpu->snapshot = atomic_load_int(&pcpu->current); } pfcu64->pfcu64_value = 0; seqc_write_end(&pfcu64->pfcu64_seqc); } static inline void pf_counter_u64_zero(struct pf_counter_u64 *pfcu64) { critical_enter(); pf_counter_u64_zero_protected(pfcu64); critical_exit(); } #else struct pf_counter_u64 { counter_u64_t counter; }; static inline int pf_counter_u64_init(struct pf_counter_u64 *pfcu64, int flags) { pfcu64->counter = counter_u64_alloc(flags); if (__predict_false(pfcu64->counter == NULL)) return (ENOMEM); return (0); } static inline void pf_counter_u64_deinit(struct pf_counter_u64 *pfcu64) { counter_u64_free(pfcu64->counter); } static inline void pf_counter_u64_critical_enter(void) { } static inline void pf_counter_u64_critical_exit(void) { } static inline void pf_counter_u64_add_protected(struct pf_counter_u64 *pfcu64, uint32_t n) { counter_u64_add(pfcu64->counter, n); } static inline void pf_counter_u64_add(struct pf_counter_u64 *pfcu64, uint32_t n) { pf_counter_u64_add_protected(pfcu64, n); } static inline u_int64_t pf_counter_u64_fetch(const struct pf_counter_u64 *pfcu64) { return (counter_u64_fetch(pfcu64->counter)); } static inline void pf_counter_u64_zero_protected(struct pf_counter_u64 *pfcu64) { counter_u64_zero(pfcu64->counter); } static inline void pf_counter_u64_zero(struct pf_counter_u64 *pfcu64) { pf_counter_u64_zero_protected(pfcu64); } #endif #define pf_get_timestamp(prule)({ \ uint32_t _ts = 0; \ uint32_t __ts; \ int cpu; \ CPU_FOREACH(cpu) { \ __ts = *zpcpu_get_cpu(prule->timestamp, cpu); \ if (__ts > _ts) \ _ts = __ts; \ } \ _ts; \ }) #define pf_update_timestamp(prule) \ do { \ critical_enter(); \ *zpcpu_get((prule)->timestamp) = time_second; \ critical_exit(); \ } while (0) #define pf_timestamp_pcpu_zone (sizeof(time_t) == 4 ? pcpu_zone_4 : pcpu_zone_8) _Static_assert(sizeof(time_t) == 4 || sizeof(time_t) == 8, "unexpected time_t size"); SYSCTL_DECL(_net_pf); MALLOC_DECLARE(M_PFHASH); MALLOC_DECLARE(M_PF_RULE_ITEM); SDT_PROVIDER_DECLARE(pf); struct pfi_dynaddr { TAILQ_ENTRY(pfi_dynaddr) entry; struct pf_addr pfid_addr4; struct pf_addr pfid_mask4; struct pf_addr pfid_addr6; struct pf_addr pfid_mask6; struct pfr_ktable *pfid_kt; struct pfi_kkif *pfid_kif; int pfid_net; /* mask or 128 */ int pfid_acnt4; /* address count IPv4 */ int pfid_acnt6; /* address count IPv6 */ sa_family_t pfid_af; /* rule af */ u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; /* * Address manipulation macros */ #define HTONL(x) (x) = htonl((__uint32_t)(x)) #define HTONS(x) (x) = htons((__uint16_t)(x)) #define NTOHL(x) (x) = ntohl((__uint32_t)(x)) #define NTOHS(x) (x) = ntohs((__uint16_t)(x)) #define PF_NAME "pf" #define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED) #define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock) #define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock) #ifdef INVARIANTS #define PF_STATE_LOCK(s) \ do { \ struct pf_kstate *_s = (s); \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(_s)]; \ MPASS(_s->lock == &_ih->lock); \ mtx_lock(_s->lock); \ } while (0) #define PF_STATE_UNLOCK(s) \ do { \ struct pf_kstate *_s = (s); \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(_s)]; \ MPASS(_s->lock == &_ih->lock); \ mtx_unlock(_s->lock); \ } while (0) #else #define PF_STATE_LOCK(s) mtx_lock(s->lock) #define PF_STATE_UNLOCK(s) mtx_unlock(s->lock) #endif #ifdef INVARIANTS #define PF_STATE_LOCK_ASSERT(s) \ do { \ struct pf_kstate *_s = (s); \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(_s)]; \ MPASS(_s->lock == &_ih->lock); \ PF_HASHROW_ASSERT(_ih); \ } while (0) #else /* !INVARIANTS */ #define PF_STATE_LOCK_ASSERT(s) do {} while (0) #endif /* INVARIANTS */ #ifdef INVARIANTS #define PF_SRC_NODE_LOCK(sn) \ do { \ struct pf_ksrc_node *_sn = (sn); \ struct pf_srchash *_sh = &V_pf_srchash[ \ pf_hashsrc(&_sn->addr, _sn->af)]; \ MPASS(_sn->lock == &_sh->lock); \ mtx_lock(_sn->lock); \ } while (0) #define PF_SRC_NODE_UNLOCK(sn) \ do { \ struct pf_ksrc_node *_sn = (sn); \ struct pf_srchash *_sh = &V_pf_srchash[ \ pf_hashsrc(&_sn->addr, _sn->af)]; \ MPASS(_sn->lock == &_sh->lock); \ mtx_unlock(_sn->lock); \ } while (0) #else #define PF_SRC_NODE_LOCK(sn) mtx_lock((sn)->lock) #define PF_SRC_NODE_UNLOCK(sn) mtx_unlock((sn)->lock) #endif #ifdef INVARIANTS #define PF_SRC_NODE_LOCK_ASSERT(sn) \ do { \ struct pf_ksrc_node *_sn = (sn); \ struct pf_srchash *_sh = &V_pf_srchash[ \ pf_hashsrc(&_sn->addr, _sn->af)]; \ MPASS(_sn->lock == &_sh->lock); \ PF_HASHROW_ASSERT(_sh); \ } while (0) #else /* !INVARIANTS */ #define PF_SRC_NODE_LOCK_ASSERT(sn) do {} while (0) #endif /* INVARIANTS */ extern struct mtx_padalign pf_unlnkdrules_mtx; #define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx) #define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx) #define PF_UNLNKDRULES_ASSERT() mtx_assert(&pf_unlnkdrules_mtx, MA_OWNED) extern struct sx pf_config_lock; #define PF_CONFIG_LOCK() sx_xlock(&pf_config_lock) #define PF_CONFIG_UNLOCK() sx_xunlock(&pf_config_lock) #define PF_CONFIG_ASSERT() sx_assert(&pf_config_lock, SA_XLOCKED) VNET_DECLARE(struct rmlock, pf_rules_lock); #define V_pf_rules_lock VNET(pf_rules_lock) #define PF_RULES_RLOCK_TRACKER struct rm_priotracker _pf_rules_tracker #define PF_RULES_RLOCK() rm_rlock(&V_pf_rules_lock, &_pf_rules_tracker) #define PF_RULES_RUNLOCK() rm_runlock(&V_pf_rules_lock, &_pf_rules_tracker) #define PF_RULES_WLOCK() rm_wlock(&V_pf_rules_lock) #define PF_RULES_WUNLOCK() rm_wunlock(&V_pf_rules_lock) #define PF_RULES_WOWNED() rm_wowned(&V_pf_rules_lock) #define PF_RULES_ASSERT() rm_assert(&V_pf_rules_lock, RA_LOCKED) #define PF_RULES_RASSERT() rm_assert(&V_pf_rules_lock, RA_RLOCKED) #define PF_RULES_WASSERT() rm_assert(&V_pf_rules_lock, RA_WLOCKED) extern struct mtx_padalign pf_table_stats_lock; #define PF_TABLE_STATS_LOCK() mtx_lock(&pf_table_stats_lock) #define PF_TABLE_STATS_UNLOCK() mtx_unlock(&pf_table_stats_lock) #define PF_TABLE_STATS_OWNED() mtx_owned(&pf_table_stats_lock) #define PF_TABLE_STATS_ASSERT() mtx_assert(&pf_table_stats_lock, MA_OWNED) extern struct sx pf_end_lock; #define PF_MODVER 1 #define PFLOG_MODVER 1 #define PFSYNC_MODVER 1 #define PFLOG_MINVER 1 #define PFLOG_PREFVER PFLOG_MODVER #define PFLOG_MAXVER 1 #define PFSYNC_MINVER 1 #define PFSYNC_PREFVER PFSYNC_MODVER #define PFSYNC_MAXVER 1 #ifdef INET #ifndef INET6 #define PF_INET_ONLY #endif /* ! INET6 */ #endif /* INET */ #ifdef INET6 #ifndef INET #define PF_INET6_ONLY #endif /* ! INET */ #endif /* INET6 */ #ifdef INET #ifdef INET6 #define PF_INET_INET6 #endif /* INET6 */ #endif /* INET */ #else #define PF_INET_INET6 #endif /* _KERNEL */ /* Both IPv4 and IPv6 */ #ifdef PF_INET_INET6 #define PF_AEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ (c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0])) \ #define PF_ANEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[3] != (b)->addr32[3]))) \ #define PF_AZERO(a, c) \ ((c == AF_INET && !(a)->addr32[0]) || \ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ pf_addrcpy(a, b, f) #define PF_AINC(a, f) \ pf_addr_inc(a, f) #define PF_POOLMASK(a, b, c, d, f) \ pf_poolmask(a, b, c, d, f) #else /* Just IPv6 */ #ifdef PF_INET6_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0]) \ #define PF_ANEQ(a, b, c) \ ((a)->addr32[3] != (b)->addr32[3] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[0] != (b)->addr32[0]) \ #define PF_AZERO(a, c) \ (!(a)->addr32[0] && \ !(a)->addr32[1] && \ !(a)->addr32[2] && \ !(a)->addr32[3] ) \ #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ pf_addrcpy(a, b, f) #define PF_AINC(a, f) \ pf_addr_inc(a, f) #define PF_POOLMASK(a, b, c, d, f) \ pf_poolmask(a, b, c, d, f) #else /* Just IPv4 */ #ifdef PF_INET_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[0] == (b)->addr32[0]) #define PF_ANEQ(a, b, c) \ ((a)->addr32[0] != (b)->addr32[0]) #define PF_AZERO(a, c) \ (!(a)->addr32[0]) #define PF_MATCHA(n, a, m, b, f) \ pf_match_addr(n, a, m, b, f) #define PF_ACPY(a, b, f) \ (a)->v4.s_addr = (b)->v4.s_addr #define PF_AINC(a, f) \ do { \ (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ } while (0) #define PF_POOLMASK(a, b, c, d, f) \ do { \ (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ } while (0) #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ /* * XXX callers not FIB-aware in our version of pf yet. * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio. */ #define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \ ( \ (((aw)->type == PF_ADDR_NOROUTE && \ pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ pf_routable((x), (af), (ifp), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ ((aw)->type == PF_ADDR_RANGE && \ !pf_match_addr_range(&(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !PF_MATCHA(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))))) != \ (neg) \ ) #define PF_ALGNMNT(off) (((off) % 2) == 0) #ifdef _KERNEL struct pf_kpooladdr { struct pf_addr_wrap addr; TAILQ_ENTRY(pf_kpooladdr) entries; char ifname[IFNAMSIZ]; struct pfi_kkif *kif; }; TAILQ_HEAD(pf_kpalist, pf_kpooladdr); struct pf_kpool { struct mtx mtx; struct pf_kpalist list; struct pf_kpooladdr *cur; struct pf_poolhashkey key; struct pf_addr counter; struct pf_mape_portset mape; int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; }; struct pf_rule_actions { int32_t rtableid; uint16_t qid; uint16_t pqid; uint16_t max_mss; uint8_t log; uint8_t set_tos; uint8_t min_ttl; uint16_t dnpipe; uint16_t dnrpipe; /* Reverse direction pipe */ uint32_t flags; uint8_t set_prio[2]; }; union pf_keth_rule_ptr { struct pf_keth_rule *ptr; uint32_t nr; }; struct pf_keth_rule_addr { uint8_t addr[ETHER_ADDR_LEN]; uint8_t mask[ETHER_ADDR_LEN]; bool neg; uint8_t isset; }; struct pf_keth_anchor; TAILQ_HEAD(pf_keth_ruleq, pf_keth_rule); struct pf_keth_ruleset { struct pf_keth_ruleq rules[2]; struct pf_keth_rules { struct pf_keth_ruleq *rules; int open; uint32_t ticket; } active, inactive; struct epoch_context epoch_ctx; struct vnet *vnet; struct pf_keth_anchor *anchor; }; RB_HEAD(pf_keth_anchor_global, pf_keth_anchor); RB_HEAD(pf_keth_anchor_node, pf_keth_anchor); struct pf_keth_anchor { RB_ENTRY(pf_keth_anchor) entry_node; RB_ENTRY(pf_keth_anchor) entry_global; struct pf_keth_anchor *parent; struct pf_keth_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; char path[MAXPATHLEN]; struct pf_keth_ruleset ruleset; int refcnt; /* anchor rules */ uint8_t anchor_relative; uint8_t anchor_wildcard; }; RB_PROTOTYPE(pf_keth_anchor_node, pf_keth_anchor, entry_node, pf_keth_anchor_compare); RB_PROTOTYPE(pf_keth_anchor_global, pf_keth_anchor, entry_global, pf_keth_anchor_compare); struct pf_keth_rule { #define PFE_SKIP_IFP 0 #define PFE_SKIP_DIR 1 #define PFE_SKIP_PROTO 2 #define PFE_SKIP_SRC_ADDR 3 #define PFE_SKIP_DST_ADDR 4 #define PFE_SKIP_SRC_IP_ADDR 5 #define PFE_SKIP_DST_IP_ADDR 6 #define PFE_SKIP_COUNT 7 union pf_keth_rule_ptr skip[PFE_SKIP_COUNT]; TAILQ_ENTRY(pf_keth_rule) entries; struct pf_keth_anchor *anchor; u_int8_t anchor_relative; u_int8_t anchor_wildcard; uint32_t nr; bool quick; /* Filter */ char ifname[IFNAMSIZ]; struct pfi_kkif *kif; bool ifnot; uint8_t direction; uint16_t proto; struct pf_keth_rule_addr src, dst; struct pf_rule_addr ipsrc, ipdst; char match_tagname[PF_TAG_NAME_SIZE]; uint16_t match_tag; bool match_tag_not; /* Stats */ counter_u64_t evaluations; counter_u64_t packets[2]; counter_u64_t bytes[2]; time_t *timestamp; /* Action */ char qname[PF_QNAME_SIZE]; int qid; char tagname[PF_TAG_NAME_SIZE]; uint16_t tag; char bridge_to_name[IFNAMSIZ]; struct pfi_kkif *bridge_to; uint8_t action; uint16_t dnpipe; uint32_t dnflags; char label[PF_RULE_MAX_LABEL_COUNT][PF_RULE_LABEL_SIZE]; uint32_t ridentifier; }; union pf_krule_ptr { struct pf_krule *ptr; u_int32_t nr; }; RB_HEAD(pf_krule_global, pf_krule); RB_PROTOTYPE(pf_krule_global, pf_krule, entry_global, pf_krule_compare); struct pf_krule { struct pf_rule_addr src; struct pf_rule_addr dst; union pf_krule_ptr skip[PF_SKIP_COUNT]; char label[PF_RULE_MAX_LABEL_COUNT][PF_RULE_LABEL_SIZE]; uint32_t ridentifier; char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; char overload_tblname[PF_TABLE_NAME_SIZE]; TAILQ_ENTRY(pf_krule) entries; struct pf_kpool rpool; struct pf_counter_u64 evaluations; struct pf_counter_u64 packets[2]; struct pf_counter_u64 bytes[2]; time_t *timestamp; struct pfi_kkif *kif; struct pf_kanchor *anchor; struct pfr_ktable *overload_tbl; pf_osfp_t os_fingerprint; int32_t rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t max_states; u_int32_t max_src_nodes; u_int32_t max_src_states; u_int32_t max_src_conn; struct { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; u_int16_t qid; u_int16_t pqid; u_int16_t dnpipe; u_int16_t dnrpipe; u_int32_t free_flags; u_int32_t nr; u_int32_t prob; uid_t cuid; pid_t cpid; counter_u64_t states_cur; counter_u64_t states_tot; counter_u64_t src_nodes; u_int16_t return_icmp; u_int16_t return_icmp6; u_int16_t max_mss; u_int16_t tag; u_int16_t match_tag; u_int16_t scrub_flags; struct pf_rule_uid uid; struct pf_rule_gid gid; u_int32_t rule_flag; uint32_t rule_ref; u_int8_t action; u_int8_t direction; u_int8_t log; u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; u_int8_t natpass; u_int8_t keep_state; sa_family_t af; u_int8_t proto; u_int8_t type; u_int8_t code; u_int8_t flags; u_int8_t flagset; u_int8_t min_ttl; u_int8_t allow_opts; u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; u_int8_t set_tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; u_int8_t flush; u_int8_t prio; u_int8_t set_prio[2]; struct { struct pf_addr addr; u_int16_t port; } divert; u_int8_t md5sum[PF_MD5_DIGEST_LENGTH]; RB_ENTRY(pf_krule) entry_global; #ifdef PF_WANT_32_TO_64_COUNTER LIST_ENTRY(pf_krule) allrulelist; bool allrulelinked; #endif }; struct pf_krule_item { SLIST_ENTRY(pf_krule_item) entry; struct pf_krule *r; }; SLIST_HEAD(pf_krule_slist, pf_krule_item); struct pf_ksrc_node { LIST_ENTRY(pf_ksrc_node) entry; struct pf_addr addr; struct pf_addr raddr; struct pf_krule_slist match_rules; union pf_krule_ptr rule; struct pfi_kkif *kif; counter_u64_t bytes[2]; counter_u64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_threshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; u_int8_t ruletype; struct mtx *lock; }; #endif struct pf_state_scrub { struct timeval pfss_last; /* time received last packet */ u_int32_t pfss_tsecr; /* last echoed timestamp */ u_int32_t pfss_tsval; /* largest timestamp */ u_int32_t pfss_tsval0; /* original timestamp */ u_int16_t pfss_flags; #define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ #define PFSS_PAWS 0x0010 /* stricter PAWS checks */ #define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ #define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ #define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ u_int8_t pfss_ttl; /* stashed TTL */ u_int8_t pad; u_int32_t pfss_ts_mod; /* timestamp modulation */ }; struct pf_state_host { struct pf_addr addr; u_int16_t port; u_int16_t pad; }; struct pf_state_peer { struct pf_state_scrub *scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ u_int8_t pad[1]; }; /* Keep synced with struct pf_state_key. */ struct pf_state_key_cmp { struct pf_addr addr[2]; u_int16_t port[2]; sa_family_t af; u_int8_t proto; u_int8_t pad[2]; }; struct pf_state_key { struct pf_addr addr[2]; u_int16_t port[2]; sa_family_t af; u_int8_t proto; u_int8_t pad[2]; LIST_ENTRY(pf_state_key) entry; TAILQ_HEAD(, pf_kstate) states[2]; }; /* Keep synced with struct pf_kstate. */ struct pf_state_cmp { u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; }; struct pf_state_scrub_export { uint16_t pfss_flags; uint8_t pfss_ttl; /* stashed TTL */ #define PF_SCRUB_FLAG_VALID 0x01 uint8_t scrub_flag; uint32_t pfss_ts_mod; /* timestamp modulation */ }; struct pf_state_key_export { struct pf_addr addr[2]; uint16_t port[2]; }; struct pf_state_peer_export { struct pf_state_scrub_export scrub; /* state is scrubbed */ uint32_t seqlo; /* Max sequence number sent */ uint32_t seqhi; /* Max the other end ACKd + win */ uint32_t seqdiff; /* Sequence number modulator */ uint16_t max_win; /* largest window (pre scaling) */ uint16_t mss; /* Maximum segment size option */ uint8_t state; /* active state level */ uint8_t wscale; /* window scaling factor */ uint8_t dummy[6]; }; _Static_assert(sizeof(struct pf_state_peer_export) == 32, "size incorrect"); struct pf_state_export { uint64_t version; #define PF_STATE_VERSION 20230404 uint64_t id; char ifname[IFNAMSIZ]; char orig_ifname[IFNAMSIZ]; struct pf_state_key_export key[2]; struct pf_state_peer_export src; struct pf_state_peer_export dst; struct pf_addr rt_addr; uint32_t rule; uint32_t anchor; uint32_t nat_rule; uint32_t creation; uint32_t expire; uint32_t spare0; uint64_t packets[2]; uint64_t bytes[2]; uint32_t creatorid; uint32_t spare1; sa_family_t af; uint8_t proto; uint8_t direction; uint8_t log; uint8_t state_flags_compat; uint8_t timeout; uint8_t sync_flags; uint8_t updates; uint16_t state_flags; uint16_t qid; uint16_t pqid; uint16_t dnpipe; uint16_t dnrpipe; int32_t rtableid; uint8_t min_ttl; uint8_t set_tos; uint16_t max_mss; uint8_t set_prio[2]; uint8_t rt; char rt_ifname[IFNAMSIZ]; uint8_t spare[72]; }; _Static_assert(sizeof(struct pf_state_export) == 384, "size incorrect"); #ifdef _KERNEL struct pf_kstate { /* * Area shared with pf_state_cmp */ u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; /* * end of the area */ u_int16_t state_flags; u_int8_t timeout; u_int8_t sync_state; /* PFSYNC_S_x */ u_int8_t sync_updates; /* XXX */ u_int refs; struct mtx *lock; TAILQ_ENTRY(pf_kstate) sync_list; TAILQ_ENTRY(pf_kstate) key_list[2]; LIST_ENTRY(pf_kstate) entry; struct pf_state_peer src; struct pf_state_peer dst; struct pf_krule_slist match_rules; union pf_krule_ptr rule; union pf_krule_ptr anchor; union pf_krule_ptr nat_rule; struct pf_addr rt_addr; struct pf_state_key *key[2]; /* addresses stack and wire */ struct pfi_kkif *kif; struct pfi_kkif *orig_kif; /* The real kif, even if we're a floating state (i.e. if == V_pfi_all). */ struct pfi_kkif *rt_kif; struct pf_ksrc_node *src_node; struct pf_ksrc_node *nat_src_node; u_int64_t packets[2]; u_int64_t bytes[2]; u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; - u_int16_t qid; - u_int16_t pqid; - u_int16_t dnpipe; - u_int16_t dnrpipe; + struct pf_rule_actions act; u_int16_t tag; - u_int8_t log; - int32_t rtableid; - u_int8_t min_ttl; - u_int8_t set_tos; - u_int16_t max_mss; u_int8_t rt; - u_int8_t set_prio[2]; }; /* * Size <= fits 11 objects per page on LP64. Try to not grow the struct beyond that. */ _Static_assert(sizeof(struct pf_kstate) <= 368, "pf_kstate size crosses 368 bytes"); #endif /* * Unified state structures for pulling states out of the kernel * used by pfsync(4) and the pf(4) ioctl. */ struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ #define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; struct pfsync_state_peer { struct pfsync_state_scrub scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int8_t pad[6]; } __packed; struct pfsync_state_key { struct pf_addr addr[2]; u_int16_t port[2]; }; struct pfsync_state_1301 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pfsync_state_peer src; struct pfsync_state_peer dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; u_int32_t packets[2][2]; u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; u_int8_t direction; u_int8_t __spare[2]; u_int8_t log; u_int8_t state_flags; u_int8_t timeout; u_int8_t sync_flags; u_int8_t updates; } __packed; struct pfsync_state_1400 { /* The beginning of the struct is compatible with previous versions */ u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pfsync_state_peer src; struct pfsync_state_peer dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; u_int32_t packets[2][2]; u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; u_int8_t direction; u_int16_t state_flags; u_int8_t log; u_int8_t __spare; u_int8_t timeout; u_int8_t sync_flags; u_int8_t updates; /* The rest is not */ u_int16_t qid; u_int16_t pqid; u_int16_t dnpipe; u_int16_t dnrpipe; int32_t rtableid; u_int8_t min_ttl; u_int8_t set_tos; u_int16_t max_mss; u_int8_t set_prio[2]; u_int8_t rt; char rt_ifname[IFNAMSIZ]; } __packed; union pfsync_state_union { struct pfsync_state_1301 pfs_1301; struct pfsync_state_1400 pfs_1400; } __packed; #ifdef _KERNEL /* pfsync */ typedef int pfsync_state_import_t(union pfsync_state_union *, int, int); typedef void pfsync_insert_state_t(struct pf_kstate *); typedef void pfsync_update_state_t(struct pf_kstate *); typedef void pfsync_delete_state_t(struct pf_kstate *); typedef void pfsync_clear_states_t(u_int32_t, const char *); typedef int pfsync_defer_t(struct pf_kstate *, struct mbuf *); typedef void pfsync_detach_ifnet_t(struct ifnet *); VNET_DECLARE(pfsync_state_import_t *, pfsync_state_import_ptr); #define V_pfsync_state_import_ptr VNET(pfsync_state_import_ptr) VNET_DECLARE(pfsync_insert_state_t *, pfsync_insert_state_ptr); #define V_pfsync_insert_state_ptr VNET(pfsync_insert_state_ptr) VNET_DECLARE(pfsync_update_state_t *, pfsync_update_state_ptr); #define V_pfsync_update_state_ptr VNET(pfsync_update_state_ptr) VNET_DECLARE(pfsync_delete_state_t *, pfsync_delete_state_ptr); #define V_pfsync_delete_state_ptr VNET(pfsync_delete_state_ptr) VNET_DECLARE(pfsync_clear_states_t *, pfsync_clear_states_ptr); #define V_pfsync_clear_states_ptr VNET(pfsync_clear_states_ptr) VNET_DECLARE(pfsync_defer_t *, pfsync_defer_ptr); #define V_pfsync_defer_ptr VNET(pfsync_defer_ptr) extern pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr; void pfsync_state_export(union pfsync_state_union *, struct pf_kstate *, int); void pf_state_export(struct pf_state_export *, struct pf_kstate *); /* pflog */ struct pf_kruleset; struct pf_pdesc; typedef int pflog_packet_t(struct pfi_kkif *, struct mbuf *, sa_family_t, u_int8_t, struct pf_krule *, struct pf_krule *, struct pf_kruleset *, struct pf_pdesc *, int); extern pflog_packet_t *pflog_packet_ptr; #endif /* _KERNEL */ #define PFSYNC_FLAG_SRCNODE 0x04 #define PFSYNC_FLAG_NATSRCNODE 0x08 /* for copies to/from network byte order */ /* ioctl interface also uses network byte order */ #define pf_state_peer_hton(s,d) do { \ (d)->seqlo = htonl((s)->seqlo); \ (d)->seqhi = htonl((s)->seqhi); \ (d)->seqdiff = htonl((s)->seqdiff); \ (d)->max_win = htons((s)->max_win); \ (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ if ((s)->scrub) { \ (d)->scrub.pfss_flags = \ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ } \ } while (0) #define pf_state_peer_ntoh(s,d) do { \ (d)->seqlo = ntohl((s)->seqlo); \ (d)->seqhi = ntohl((s)->seqhi); \ (d)->seqdiff = ntohl((s)->seqdiff); \ (d)->max_win = ntohs((s)->max_win); \ (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ (d)->scrub != NULL) { \ (d)->scrub->pfss_flags = \ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ } \ } while (0) #define pf_state_counter_hton(s,d) do { \ d[0] = htonl((s>>32)&0xffffffff); \ d[1] = htonl(s&0xffffffff); \ } while (0) #define pf_state_counter_from_pfsync(s) \ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) #define pf_state_counter_ntoh(s,d) do { \ d = ntohl(s[0]); \ d = d<<32; \ d += ntohl(s[1]); \ } while (0) TAILQ_HEAD(pf_krulequeue, pf_krule); struct pf_kanchor; struct pf_kruleset { struct { struct pf_krulequeue queues[2]; struct { struct pf_krulequeue *ptr; struct pf_krule **ptr_array; u_int32_t rcount; u_int32_t ticket; int open; struct pf_krule_global *tree; } active, inactive; } rules[PF_RULESET_MAX]; struct pf_kanchor *anchor; u_int32_t tticket; int tables; int topen; }; RB_HEAD(pf_kanchor_global, pf_kanchor); RB_HEAD(pf_kanchor_node, pf_kanchor); struct pf_kanchor { RB_ENTRY(pf_kanchor) entry_global; RB_ENTRY(pf_kanchor) entry_node; struct pf_kanchor *parent; struct pf_kanchor_node children; char name[PF_ANCHOR_NAME_SIZE]; char path[MAXPATHLEN]; struct pf_kruleset ruleset; int refcnt; /* anchor rules */ }; RB_PROTOTYPE(pf_kanchor_global, pf_kanchor, entry_global, pf_anchor_compare); RB_PROTOTYPE(pf_kanchor_node, pf_kanchor, entry_node, pf_kanchor_compare); #define PF_RESERVED_ANCHOR "_pf" #define PFR_TFLAG_PERSIST 0x00000001 #define PFR_TFLAG_CONST 0x00000002 #define PFR_TFLAG_ACTIVE 0x00000004 #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 #define PFR_TFLAG_COUNTERS 0x00000040 /* Adjust masks below when adding flags. */ #define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \ PFR_TFLAG_CONST | \ PFR_TFLAG_COUNTERS) #define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \ PFR_TFLAG_INACTIVE | \ PFR_TFLAG_REFERENCED | \ PFR_TFLAG_REFDANCHOR) #define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \ PFR_TFLAG_CONST | \ PFR_TFLAG_ACTIVE | \ PFR_TFLAG_INACTIVE | \ PFR_TFLAG_REFERENCED | \ PFR_TFLAG_REFDANCHOR | \ PFR_TFLAG_COUNTERS) struct pf_kanchor_stackframe; struct pf_keth_anchor_stackframe; struct pfr_table { char pfrt_anchor[MAXPATHLEN]; char pfrt_name[PF_TABLE_NAME_SIZE]; u_int32_t pfrt_flags; u_int8_t pfrt_fback; }; enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX }; struct pfr_addr { union { struct in_addr _pfra_ip4addr; struct in6_addr _pfra_ip6addr; } pfra_u; u_int8_t pfra_af; u_int8_t pfra_net; u_int8_t pfra_not; u_int8_t pfra_fback; }; #define pfra_ip4addr pfra_u._pfra_ip4addr #define pfra_ip6addr pfra_u._pfra_ip6addr enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; enum { PFR_TYPE_PACKETS, PFR_TYPE_BYTES, PFR_TYPE_MAX }; #define PFR_NUM_COUNTERS (PFR_DIR_MAX * PFR_OP_ADDR_MAX * PFR_TYPE_MAX) #define PFR_OP_XPASS PFR_OP_ADDR_MAX struct pfr_astats { struct pfr_addr pfras_a; u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; long pfras_tzero; }; enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; struct pfr_tstats { struct pfr_table pfrts_t; u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_match; u_int64_t pfrts_nomatch; long pfrts_tzero; int pfrts_cnt; int pfrts_refcnt[PFR_REFCNT_MAX]; }; #ifdef _KERNEL struct pfr_kstate_counter { counter_u64_t pkc_pcpu; u_int64_t pkc_zero; }; static inline int pfr_kstate_counter_init(struct pfr_kstate_counter *pfrc, int flags) { pfrc->pkc_zero = 0; pfrc->pkc_pcpu = counter_u64_alloc(flags); if (pfrc->pkc_pcpu == NULL) return (ENOMEM); return (0); } static inline void pfr_kstate_counter_deinit(struct pfr_kstate_counter *pfrc) { counter_u64_free(pfrc->pkc_pcpu); } static inline u_int64_t pfr_kstate_counter_fetch(struct pfr_kstate_counter *pfrc) { u_int64_t c; c = counter_u64_fetch(pfrc->pkc_pcpu); c -= pfrc->pkc_zero; return (c); } static inline void pfr_kstate_counter_zero(struct pfr_kstate_counter *pfrc) { u_int64_t c; c = counter_u64_fetch(pfrc->pkc_pcpu); pfrc->pkc_zero = c; } static inline void pfr_kstate_counter_add(struct pfr_kstate_counter *pfrc, int64_t n) { counter_u64_add(pfrc->pkc_pcpu, n); } struct pfr_ktstats { struct pfr_table pfrts_t; struct pfr_kstate_counter pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; struct pfr_kstate_counter pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; struct pfr_kstate_counter pfrkts_match; struct pfr_kstate_counter pfrkts_nomatch; long pfrkts_tzero; int pfrkts_cnt; int pfrkts_refcnt[PFR_REFCNT_MAX]; }; #endif /* _KERNEL */ #define pfrts_name pfrts_t.pfrt_name #define pfrts_flags pfrts_t.pfrt_flags #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ struct pfr_kcounters { counter_u64_t pfrkc_counters; long pfrkc_tzero; }; #define pfr_kentry_counter(kc, dir, op, t) \ ((kc)->pfrkc_counters + \ (dir) * PFR_OP_ADDR_MAX * PFR_TYPE_MAX + (op) * PFR_TYPE_MAX + (t)) #ifdef _KERNEL SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; SLIST_ENTRY(pfr_kentry) pfrke_workq; struct pfr_kcounters pfrke_counters; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; }; SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); struct pfr_ktable { struct pfr_ktstats pfrkt_kts; RB_ENTRY(pfr_ktable) pfrkt_tree; SLIST_ENTRY(pfr_ktable) pfrkt_workq; struct radix_node_head *pfrkt_ip4; struct radix_node_head *pfrkt_ip6; struct pfr_ktable *pfrkt_shadow; struct pfr_ktable *pfrkt_root; struct pf_kruleset *pfrkt_rs; long pfrkt_larg; int pfrkt_nflags; }; #define pfrkt_t pfrkt_kts.pfrts_t #define pfrkt_name pfrkt_t.pfrt_name #define pfrkt_anchor pfrkt_t.pfrt_anchor #define pfrkt_ruleset pfrkt_t.pfrt_ruleset #define pfrkt_flags pfrkt_t.pfrt_flags #define pfrkt_cnt pfrkt_kts.pfrkts_cnt #define pfrkt_refcnt pfrkt_kts.pfrkts_refcnt #define pfrkt_packets pfrkt_kts.pfrkts_packets #define pfrkt_bytes pfrkt_kts.pfrkts_bytes #define pfrkt_match pfrkt_kts.pfrkts_match #define pfrkt_nomatch pfrkt_kts.pfrkts_nomatch #define pfrkt_tzero pfrkt_kts.pfrkts_tzero #endif #ifdef _KERNEL struct pfi_kkif { char pfik_name[IFNAMSIZ]; union { RB_ENTRY(pfi_kkif) _pfik_tree; LIST_ENTRY(pfi_kkif) _pfik_list; } _pfik_glue; #define pfik_tree _pfik_glue._pfik_tree #define pfik_list _pfik_glue._pfik_list struct pf_counter_u64 pfik_packets[2][2][2]; struct pf_counter_u64 pfik_bytes[2][2][2]; u_int32_t pfik_tzero; u_int pfik_flags; struct ifnet *pfik_ifp; struct ifg_group *pfik_group; u_int pfik_rulerefs; TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; #ifdef PF_WANT_32_TO_64_COUNTER LIST_ENTRY(pfi_kkif) pfik_allkiflist; #endif }; #endif #define PFI_IFLAG_REFS 0x0001 /* has state references */ #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ #ifdef _KERNEL struct pf_pdesc { struct { int done; uid_t uid; gid_t gid; } lookup; u_int64_t tot_len; /* Make Mickey money */ union pf_headers { struct tcphdr tcp; struct udphdr udp; struct icmp icmp; #ifdef INET6 struct icmp6_hdr icmp6; #endif /* INET6 */ char any[0]; } hdr; struct pf_krule *nat_rule; /* nat/rdr rule applied to packet */ struct pf_addr *src; /* src address */ struct pf_addr *dst; /* dst address */ u_int16_t *sport; u_int16_t *dport; struct pf_mtag *pf_mtag; struct pf_rule_actions act; u_int32_t p_len; /* total length of payload */ u_int16_t *ip_sum; u_int16_t *proto_sum; u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ #define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ sa_family_t af; u_int8_t proto; u_int8_t tos; u_int8_t dir; /* direction */ u_int8_t sidx; /* key index for source */ u_int8_t didx; /* key index for destination */ }; #endif /* flags for RDR options */ #define PF_DPORT_RANGE 0x01 /* Dest port uses range */ #define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ /* UDP state enumeration */ #define PFUDPS_NO_TRAFFIC 0 #define PFUDPS_SINGLE 1 #define PFUDPS_MULTIPLE 2 #define PFUDPS_NSTATES 3 /* number of state levels */ #define PFUDPS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } /* Other protocol state enumeration */ #define PFOTHERS_NO_TRAFFIC 0 #define PFOTHERS_SINGLE 1 #define PFOTHERS_MULTIPLE 2 #define PFOTHERS_NSTATES 3 /* number of state levels */ #define PFOTHERS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } #define ACTION_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ } while (0) #define REASON_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ if (x < PFRES_MAX) \ counter_u64_add(V_pf_status.counters[x], 1); \ } while (0) enum pf_syncookies_mode { PF_SYNCOOKIES_NEVER = 0, PF_SYNCOOKIES_ALWAYS = 1, PF_SYNCOOKIES_ADAPTIVE = 2, PF_SYNCOOKIES_MODE_MAX = PF_SYNCOOKIES_ADAPTIVE }; #define PF_SYNCOOKIES_HIWATPCT 25 #define PF_SYNCOOKIES_LOWATPCT (PF_SYNCOOKIES_HIWATPCT / 2) #ifdef _KERNEL struct pf_kstatus { counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */ counter_u64_t lcounters[KLCNT_MAX]; /* limit counters */ struct pf_counter_u64 fcounters[FCNT_MAX]; /* state operation counters */ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */ uint32_t states; uint32_t src_nodes; uint32_t running; uint32_t since; uint32_t debug; uint32_t hostid; char ifname[IFNAMSIZ]; uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; bool keep_counters; enum pf_syncookies_mode syncookies_mode; bool syncookies_active; uint64_t syncookies_inflight[2]; uint32_t states_halfopen; uint32_t reass; }; #endif struct pf_divert { union { struct in_addr ipv4; struct in6_addr ipv6; } addr; u_int16_t port; }; #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ /* * Limit the length of the fragment queue traversal. Remember * search entry points based on the fragment offset. */ #define PF_FRAG_ENTRY_POINTS 16 /* * The number of entries in the fragment queue must be limited * to avoid DoS by linear searching. Instead of a global limit, * use a limit per entry point. For large packets these sum up. */ #define PF_FRAG_ENTRY_LIMIT 64 /* * ioctl parameter structures */ struct pfioc_pooladdr { u_int32_t action; u_int32_t ticket; u_int32_t nr; u_int32_t r_num; u_int8_t r_action; u_int8_t r_last; u_int8_t af; char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; struct pfioc_rule { u_int32_t action; u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; char anchor[MAXPATHLEN]; char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; struct pfioc_natlook { struct pf_addr saddr; struct pf_addr daddr; struct pf_addr rsaddr; struct pf_addr rdaddr; u_int16_t sport; u_int16_t dport; u_int16_t rsport; u_int16_t rdport; sa_family_t af; u_int8_t proto; u_int8_t direction; }; struct pfioc_state { struct pfsync_state_1301 state; }; struct pfioc_src_node_kill { sa_family_t psnk_af; struct pf_rule_addr psnk_src; struct pf_rule_addr psnk_dst; u_int psnk_killed; }; #ifdef _KERNEL struct pf_kstate_kill { struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; struct pf_rule_addr psk_rt_addr; char psk_ifname[IFNAMSIZ]; char psk_label[PF_RULE_LABEL_SIZE]; u_int psk_killed; bool psk_kill_match; }; #endif struct pfioc_state_kill { struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; char psk_label[PF_RULE_LABEL_SIZE]; u_int psk_killed; }; struct pfioc_states { int ps_len; union { void *ps_buf; struct pfsync_state_1301 *ps_states; }; }; struct pfioc_states_v2 { int ps_len; uint64_t ps_req_version; union { void *ps_buf; struct pf_state_export *ps_states; }; }; struct pfioc_src_nodes { int psn_len; union { void *psn_buf; struct pf_src_node *psn_src_nodes; }; }; struct pfioc_if { char ifname[IFNAMSIZ]; }; struct pfioc_tm { int timeout; int seconds; }; struct pfioc_limit { int index; unsigned limit; }; struct pfioc_altq_v0 { u_int32_t action; u_int32_t ticket; u_int32_t nr; struct pf_altq_v0 altq; }; struct pfioc_altq_v1 { u_int32_t action; u_int32_t ticket; u_int32_t nr; /* * Placed here so code that only uses the above parameters can be * written entirely in terms of the v0 or v1 type. */ u_int32_t version; struct pf_altq_v1 altq; }; /* * Latest version of struct pfioc_altq_vX. This must move in lock-step with * the latest version of struct pf_altq_vX as it has that struct as a * member. */ #define PFIOC_ALTQ_VERSION PF_ALTQ_VERSION struct pfioc_qstats_v0 { u_int32_t ticket; u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; }; struct pfioc_qstats_v1 { u_int32_t ticket; u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; /* * Placed here so code that only uses the above parameters can be * written entirely in terms of the v0 or v1 type. */ u_int32_t version; /* Requested version of stats struct */ }; /* Latest version of struct pfioc_qstats_vX */ #define PFIOC_QSTATS_VERSION 1 struct pfioc_ruleset { u_int32_t nr; char path[MAXPATHLEN]; char name[PF_ANCHOR_NAME_SIZE]; }; #define PF_RULESET_ALTQ (PF_RULESET_MAX) #define PF_RULESET_TABLE (PF_RULESET_MAX+1) #define PF_RULESET_ETH (PF_RULESET_MAX+2) struct pfioc_trans { int size; /* number of elements */ int esize; /* size of each element in bytes */ struct pfioc_trans_e { int rs_num; char anchor[MAXPATHLEN]; u_int32_t ticket; } *array; }; #define PFR_FLAG_ATOMIC 0x00000001 /* unused */ #define PFR_FLAG_DUMMY 0x00000002 #define PFR_FLAG_FEEDBACK 0x00000004 #define PFR_FLAG_CLSTATS 0x00000008 #define PFR_FLAG_ADDRSTOO 0x00000010 #define PFR_FLAG_REPLACE 0x00000020 #define PFR_FLAG_ALLRSETS 0x00000040 #define PFR_FLAG_ALLMASK 0x0000007F #ifdef _KERNEL #define PFR_FLAG_USERIOCTL 0x10000000 #endif struct pfioc_table { struct pfr_table pfrio_table; void *pfrio_buffer; int pfrio_esize; int pfrio_size; int pfrio_size2; int pfrio_nadd; int pfrio_ndel; int pfrio_nchange; int pfrio_flags; u_int32_t pfrio_ticket; }; #define pfrio_exists pfrio_nadd #define pfrio_nzero pfrio_nadd #define pfrio_nmatch pfrio_nadd #define pfrio_naddr pfrio_size2 #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; int pfiio_esize; int pfiio_size; int pfiio_nzero; int pfiio_flags; }; /* * ioctl operations */ #define DIOCSTART _IO ('D', 1) #define DIOCSTOP _IO ('D', 2) #define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) #define DIOCADDRULENV _IOWR('D', 4, struct pfioc_nv) #define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) #define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) #define DIOCGETRULENV _IOWR('D', 7, struct pfioc_nv) /* XXX cut 8 - 17 */ #define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill) #define DIOCCLRSTATESNV _IOWR('D', 18, struct pfioc_nv) #define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) #define DIOCGETSTATENV _IOWR('D', 19, struct pfioc_nv) #define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) #define DIOCGETSTATUS _IOWR('D', 21, struct pf_status) #define DIOCGETSTATUSNV _IOWR('D', 21, struct pfioc_nv) #define DIOCCLRSTATUS _IO ('D', 22) #define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) #define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) #define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) #define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) /* XXX cut 26 - 28 */ #define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) #define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) #define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) #define DIOCCLRRULECTRS _IO ('D', 38) #define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) #define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) #define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) #define DIOCKILLSTATESNV _IOWR('D', 41, struct pfioc_nv) #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) #define DIOCADDALTQV0 _IOWR('D', 45, struct pfioc_altq_v0) #define DIOCADDALTQV1 _IOWR('D', 45, struct pfioc_altq_v1) #define DIOCGETALTQSV0 _IOWR('D', 47, struct pfioc_altq_v0) #define DIOCGETALTQSV1 _IOWR('D', 47, struct pfioc_altq_v1) #define DIOCGETALTQV0 _IOWR('D', 48, struct pfioc_altq_v0) #define DIOCGETALTQV1 _IOWR('D', 48, struct pfioc_altq_v1) #define DIOCCHANGEALTQV0 _IOWR('D', 49, struct pfioc_altq_v0) #define DIOCCHANGEALTQV1 _IOWR('D', 49, struct pfioc_altq_v1) #define DIOCGETQSTATSV0 _IOWR('D', 50, struct pfioc_qstats_v0) #define DIOCGETQSTATSV1 _IOWR('D', 50, struct pfioc_qstats_v1) #define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) #define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) #define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) #define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) /* XXX cut 55 - 57 */ #define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) #define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) #define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) #define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) #define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) #define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) #define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) #define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) #define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) #define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) #define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) #define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) #define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) #define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) #define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) #define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) #define DIOCOSFPFLUSH _IO('D', 78) #define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) #define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) #define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) #define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) #define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) #define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) #define DIOCCLRSRCNODES _IO('D', 85) #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #define DIOCGIFSPEEDV0 _IOWR('D', 92, struct pf_ifspeed_v0) #define DIOCGIFSPEEDV1 _IOWR('D', 92, struct pf_ifspeed_v1) #define DIOCGETSTATESV2 _IOWR('D', 93, struct pfioc_states_v2) #define DIOCGETSYNCOOKIES _IOWR('D', 94, struct pfioc_nv) #define DIOCSETSYNCOOKIES _IOWR('D', 95, struct pfioc_nv) #define DIOCKEEPCOUNTERS _IOWR('D', 96, struct pfioc_nv) #define DIOCKEEPCOUNTERS_FREEBSD13 _IOWR('D', 92, struct pfioc_nv) #define DIOCADDETHRULE _IOWR('D', 97, struct pfioc_nv) #define DIOCGETETHRULE _IOWR('D', 98, struct pfioc_nv) #define DIOCGETETHRULES _IOWR('D', 99, struct pfioc_nv) #define DIOCGETETHRULESETS _IOWR('D', 100, struct pfioc_nv) #define DIOCGETETHRULESET _IOWR('D', 101, struct pfioc_nv) #define DIOCSETREASS _IOWR('D', 102, u_int32_t) struct pf_ifspeed_v0 { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; struct pf_ifspeed_v1 { char ifname[IFNAMSIZ]; u_int32_t baudrate32; /* layout identical to struct pf_ifspeed_v0 up to this point */ u_int64_t baudrate; }; /* Latest version of struct pf_ifspeed_vX */ #define PF_IFSPEED_VERSION 1 /* * Compatibility and convenience macros */ #ifndef _KERNEL #ifdef PFIOC_USE_LATEST /* * Maintaining in-tree consumers of the ioctl interface is easier when that * code can be written in terms old names that refer to the latest interface * version as that reduces the required changes in the consumers to those * that are functionally necessary to accommodate a new interface version. */ #define pfioc_altq __CONCAT(pfioc_altq_v, PFIOC_ALTQ_VERSION) #define pfioc_qstats __CONCAT(pfioc_qstats_v, PFIOC_QSTATS_VERSION) #define pf_ifspeed __CONCAT(pf_ifspeed_v, PF_IFSPEED_VERSION) #define DIOCADDALTQ __CONCAT(DIOCADDALTQV, PFIOC_ALTQ_VERSION) #define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, PFIOC_ALTQ_VERSION) #define DIOCGETALTQ __CONCAT(DIOCGETALTQV, PFIOC_ALTQ_VERSION) #define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, PFIOC_ALTQ_VERSION) #define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, PFIOC_QSTATS_VERSION) #define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, PF_IFSPEED_VERSION) #else /* * When building out-of-tree code that is written for the old interface, * such as may exist in ports for example, resolve the old struct tags and * ioctl command names to the v0 versions. */ #define pfioc_altq __CONCAT(pfioc_altq_v, 0) #define pfioc_qstats __CONCAT(pfioc_qstats_v, 0) #define pf_ifspeed __CONCAT(pf_ifspeed_v, 0) #define DIOCADDALTQ __CONCAT(DIOCADDALTQV, 0) #define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, 0) #define DIOCGETALTQ __CONCAT(DIOCGETALTQV, 0) #define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, 0) #define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, 0) #define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, 0) #endif /* PFIOC_USE_LATEST */ #endif /* _KERNEL */ #ifdef _KERNEL LIST_HEAD(pf_ksrc_node_list, pf_ksrc_node); struct pf_srchash { struct pf_ksrc_node_list nodes; struct mtx lock; }; struct pf_keyhash { LIST_HEAD(, pf_state_key) keys; struct mtx lock; }; struct pf_idhash { LIST_HEAD(, pf_kstate) states; struct mtx lock; }; extern u_long pf_ioctl_maxcount; extern u_long pf_hashmask; extern u_long pf_srchashmask; #define PF_HASHSIZ (131072) #define PF_SRCHASHSIZ (PF_HASHSIZ/4) VNET_DECLARE(struct pf_keyhash *, pf_keyhash); VNET_DECLARE(struct pf_idhash *, pf_idhash); #define V_pf_keyhash VNET(pf_keyhash) #define V_pf_idhash VNET(pf_idhash) VNET_DECLARE(struct pf_srchash *, pf_srchash); #define V_pf_srchash VNET(pf_srchash) #define PF_IDHASH(s) (be64toh((s)->id) % (pf_hashmask + 1)) VNET_DECLARE(void *, pf_swi_cookie); #define V_pf_swi_cookie VNET(pf_swi_cookie) VNET_DECLARE(struct intr_event *, pf_swi_ie); #define V_pf_swi_ie VNET(pf_swi_ie) VNET_DECLARE(struct unrhdr64, pf_stateid); #define V_pf_stateid VNET(pf_stateid) TAILQ_HEAD(pf_altqqueue, pf_altq); VNET_DECLARE(struct pf_altqqueue, pf_altqs[4]); #define V_pf_altqs VNET(pf_altqs) VNET_DECLARE(struct pf_kpalist, pf_pabuf); #define V_pf_pabuf VNET(pf_pabuf) VNET_DECLARE(u_int32_t, ticket_altqs_active); #define V_ticket_altqs_active VNET(ticket_altqs_active) VNET_DECLARE(u_int32_t, ticket_altqs_inactive); #define V_ticket_altqs_inactive VNET(ticket_altqs_inactive) VNET_DECLARE(int, altqs_inactive_open); #define V_altqs_inactive_open VNET(altqs_inactive_open) VNET_DECLARE(u_int32_t, ticket_pabuf); #define V_ticket_pabuf VNET(ticket_pabuf) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); #define V_pf_altqs_active VNET(pf_altqs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_active); #define V_pf_altq_ifs_active VNET(pf_altq_ifs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); #define V_pf_altqs_inactive VNET(pf_altqs_inactive) VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_inactive); #define V_pf_altq_ifs_inactive VNET(pf_altq_ifs_inactive) VNET_DECLARE(struct pf_krulequeue, pf_unlinked_rules); #define V_pf_unlinked_rules VNET(pf_unlinked_rules) #ifdef PF_WANT_32_TO_64_COUNTER LIST_HEAD(allkiflist_head, pfi_kkif); VNET_DECLARE(struct allkiflist_head, pf_allkiflist); #define V_pf_allkiflist VNET(pf_allkiflist) VNET_DECLARE(size_t, pf_allkifcount); #define V_pf_allkifcount VNET(pf_allkifcount) VNET_DECLARE(struct pfi_kkif *, pf_kifmarker); #define V_pf_kifmarker VNET(pf_kifmarker) LIST_HEAD(allrulelist_head, pf_krule); VNET_DECLARE(struct allrulelist_head, pf_allrulelist); #define V_pf_allrulelist VNET(pf_allrulelist) VNET_DECLARE(size_t, pf_allrulecount); #define V_pf_allrulecount VNET(pf_allrulecount) VNET_DECLARE(struct pf_krule *, pf_rulemarker); #define V_pf_rulemarker VNET(pf_rulemarker) #endif void pf_initialize(void); void pf_mtag_initialize(void); void pf_mtag_cleanup(void); void pf_cleanup(void); struct pf_mtag *pf_get_mtag(struct mbuf *); extern void pf_calc_skip_steps(struct pf_krulequeue *); #ifdef ALTQ extern void pf_altq_ifnet_event(struct ifnet *, int); #endif VNET_DECLARE(uma_zone_t, pf_state_z); #define V_pf_state_z VNET(pf_state_z) VNET_DECLARE(uma_zone_t, pf_state_key_z); #define V_pf_state_key_z VNET(pf_state_key_z) VNET_DECLARE(uma_zone_t, pf_state_scrub_z); #define V_pf_state_scrub_z VNET(pf_state_scrub_z) extern void pf_purge_thread(void *); extern void pf_unload_vnet_purge(void); extern void pf_intr(void *); extern void pf_purge_expired_src_nodes(void); extern int pf_unlink_state(struct pf_kstate *); extern int pf_state_insert(struct pfi_kkif *, struct pfi_kkif *, struct pf_state_key *, struct pf_state_key *, struct pf_kstate *); extern struct pf_kstate *pf_alloc_state(int); extern void pf_free_state(struct pf_kstate *); static __inline void pf_ref_state(struct pf_kstate *s) { refcount_acquire(&s->refs); } static __inline int pf_release_state(struct pf_kstate *s) { if (refcount_release(&s->refs)) { pf_free_state(s); return (1); } else return (0); } static __inline int pf_release_staten(struct pf_kstate *s, u_int n) { if (refcount_releasen(&s->refs, n)) { pf_free_state(s); return (1); } else return (0); } extern struct pf_kstate *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_kstate *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); extern bool pf_find_state_all_exists(struct pf_state_key_cmp *, u_int); extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *, struct pf_krule *, sa_family_t, struct pf_srchash **, bool); extern void pf_unlink_src_node(struct pf_ksrc_node *); extern u_int pf_free_src_nodes(struct pf_ksrc_node_list *); extern void pf_print_state(struct pf_kstate *); extern void pf_print_flags(u_int8_t); extern int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t, u_int16_t, u_int16_t, u_int8_t); VNET_DECLARE(struct ifnet *, sync_ifp); #define V_sync_ifp VNET(sync_ifp); VNET_DECLARE(struct pf_krule, pf_default_rule); #define V_pf_default_rule VNET(pf_default_rule) extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, sa_family_t); void pf_free_rule(struct pf_krule *); int pf_test_eth(int, int, struct ifnet *, struct mbuf **, struct inpcb *); #ifdef INET int pf_test(int, int, struct ifnet *, struct mbuf **, struct inpcb *, struct pf_rule_actions *); int pf_normalize_ip(struct mbuf **, struct pfi_kkif *, u_short *, struct pf_pdesc *); #endif /* INET */ #ifdef INET6 int pf_test6(int, int, struct ifnet *, struct mbuf **, struct inpcb *, struct pf_rule_actions *); int pf_normalize_ip6(struct mbuf **, struct pfi_kkif *, u_short *, struct pf_pdesc *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, sa_family_t); void pf_addr_inc(struct pf_addr *, sa_family_t); int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, bool); #endif /* INET6 */ u_int32_t pf_new_isn(struct pf_kstate *); void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t); void pf_patch_16_unaligned(struct mbuf *, u_int16_t *, void *, u_int16_t, bool, u_int8_t); void pf_patch_32_unaligned(struct mbuf *, u_int16_t *, void *, u_int32_t, bool, u_int8_t); void pf_send_deferred_syn(struct pf_kstate *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_addr_range(struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); void pf_normalize_init(void); void pf_normalize_cleanup(void); int pf_normalize_tcp(struct pfi_kkif *, struct mbuf *, int, int, void *, struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_kstate *); int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_short *, struct tcphdr *, struct pf_kstate *, struct pf_state_peer *, struct pf_state_peer *, int *); u_int32_t pf_state_expires(const struct pf_kstate *); void pf_purge_expired_fragments(void); void pf_purge_fragments(uint32_t); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *, int); int pf_socket_lookup(struct pf_pdesc *, struct mbuf *); struct pf_state_key *pf_alloc_state_key(int); void pfr_initialize(void); void pfr_cleanup(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * pfr_attach_table(struct pf_kruleset *, char *); struct pfr_ktable * pfr_eth_attach_table(struct pf_keth_ruleset *, char *); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); int pfr_del_tables(struct pfr_table *, int, int *, int); int pfr_table_count(struct pfr_table *, int); int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int); int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); int pfr_clr_tstats(struct pfr_table *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long); int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int); int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int); int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); MALLOC_DECLARE(PFI_MTYPE); VNET_DECLARE(struct pfi_kkif *, pfi_all); #define V_pfi_all VNET(pfi_all) void pfi_initialize(void); void pfi_initialize_vnet(void); void pfi_cleanup(void); void pfi_cleanup_vnet(void); void pfi_kkif_ref(struct pfi_kkif *); void pfi_kkif_unref(struct pfi_kkif *); struct pfi_kkif *pfi_kkif_find(const char *); struct pfi_kkif *pfi_kkif_attach(struct pfi_kkif *, const char *); int pfi_kkif_match(struct pfi_kkif *, struct pfi_kkif *); void pfi_kkif_purge(void); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); void pfi_dynaddr_remove(struct pfi_dynaddr *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_update_status(const char *, struct pf_status *); void pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); int pf_match_tag(struct mbuf *, struct pf_krule *, int *, int); int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int); int pf_addr_cmp(struct pf_addr *, struct pf_addr *, sa_family_t); u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); struct mbuf *pf_build_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, bool, u_int16_t, u_int16_t, int); void pf_send_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, bool, u_int16_t, u_int16_t, int); void pf_syncookies_init(void); void pf_syncookies_cleanup(void); int pf_get_syncookies(struct pfioc_nv *); int pf_set_syncookies(struct pfioc_nv *); int pf_synflood_check(struct pf_pdesc *); void pf_syncookie_send(struct mbuf *m, int off, struct pf_pdesc *); bool pf_syncookie_check(struct pf_pdesc *); u_int8_t pf_syncookie_validate(struct pf_pdesc *); struct mbuf * pf_syncookie_recreate_syn(uint8_t, int, struct pf_pdesc *); VNET_DECLARE(struct pf_kstatus, pf_status); #define V_pf_status VNET(pf_status) struct pf_limit { uma_zone_t zone; u_int limit; }; VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); #define V_pf_limits VNET(pf_limits) #endif /* _KERNEL */ #ifdef _KERNEL VNET_DECLARE(struct pf_kanchor_global, pf_anchors); #define V_pf_anchors VNET(pf_anchors) VNET_DECLARE(struct pf_kanchor, pf_main_anchor); #define V_pf_main_anchor VNET(pf_main_anchor) VNET_DECLARE(struct pf_keth_anchor_global, pf_keth_anchors); #define V_pf_keth_anchors VNET(pf_keth_anchors) #define pf_main_ruleset V_pf_main_anchor.ruleset VNET_DECLARE(struct pf_keth_anchor, pf_main_keth_anchor); #define V_pf_main_keth_anchor VNET(pf_main_keth_anchor) VNET_DECLARE(struct pf_keth_ruleset*, pf_keth); #define V_pf_keth VNET(pf_keth) void pf_init_kruleset(struct pf_kruleset *); void pf_init_keth(struct pf_keth_ruleset *); int pf_kanchor_setup(struct pf_krule *, const struct pf_kruleset *, const char *); int pf_kanchor_nvcopyout(const struct pf_kruleset *, const struct pf_krule *, nvlist_t *); int pf_kanchor_copyout(const struct pf_kruleset *, const struct pf_krule *, struct pfioc_rule *); void pf_kanchor_remove(struct pf_krule *); void pf_remove_if_empty_kruleset(struct pf_kruleset *); struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); struct pf_krule *pf_krule_alloc(void); void pf_remove_if_empty_keth_ruleset( struct pf_keth_ruleset *); struct pf_keth_ruleset *pf_find_keth_ruleset(const char *); struct pf_keth_anchor *pf_find_keth_anchor(const char *); int pf_keth_anchor_setup(struct pf_keth_rule *, const struct pf_keth_ruleset *, const char *); int pf_keth_anchor_nvcopyout( const struct pf_keth_ruleset *, const struct pf_keth_rule *, nvlist_t *); struct pf_keth_ruleset *pf_find_or_create_keth_ruleset(const char *); void pf_keth_anchor_remove(struct pf_keth_rule *); void pf_krule_free(struct pf_krule *); #endif /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, const struct tcphdr *); #endif /* _KERNEL */ void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); #ifdef _KERNEL void pf_print_host(struct pf_addr *, u_int16_t, sa_family_t); void pf_step_into_anchor(struct pf_kanchor_stackframe *, int *, struct pf_kruleset **, int, struct pf_krule **, struct pf_krule **, int *); int pf_step_out_of_anchor(struct pf_kanchor_stackframe *, int *, struct pf_kruleset **, int, struct pf_krule **, struct pf_krule **, int *); void pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *, int *, struct pf_keth_ruleset **, struct pf_keth_rule **, struct pf_keth_rule **, int *); int pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *, int *, struct pf_keth_ruleset **, struct pf_keth_rule **, struct pf_keth_rule **, int *); u_short pf_map_addr(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, struct pf_addr *, struct pf_ksrc_node **); struct pf_krule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, struct pfi_kkif *, struct pf_ksrc_node **, struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, uint16_t, uint16_t, struct pf_kanchor_stackframe *); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); struct pf_state_key *pf_state_key_clone(struct pf_state_key *); - +void pf_rule_to_actions(struct pf_krule *, + struct pf_rule_actions *); int pf_normalize_mss(struct mbuf *m, int off, - struct pf_pdesc *pd, u_int16_t maxmss); -u_int16_t pf_rule_to_scrub_flags(u_int32_t); + struct pf_pdesc *pd); #ifdef INET -void pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t); +void pf_scrub_ip(struct mbuf **, struct pf_pdesc *); #endif /* INET */ #ifdef INET6 -void pf_scrub_ip6(struct mbuf **, uint32_t, uint8_t, uint8_t); +void pf_scrub_ip6(struct mbuf **, struct pf_pdesc *); #endif /* INET6 */ struct pfi_kkif *pf_kkif_create(int); void pf_kkif_free(struct pfi_kkif *); void pf_kkif_zero(struct pfi_kkif *); #endif /* _KERNEL */ #endif /* _NET_PFVAR_H_ */ diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 2af0b57eba45..4fe916c77ab8 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -1,2898 +1,2898 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause AND ISC) * * Copyright (c) 2002 Michael Shalayeff * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2009 David Gwynne * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ * * Revisions picked from OpenBSD after revision 1.110 import: * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates * 1.120, 1.175 - use monotonic time_uptime * 1.122 - reduce number of updates for non-TCP sessions * 1.125, 1.127 - rewrite merge or stale processing * 1.128 - cleanups * 1.146 - bzero() mbuf before sparsely filling it with data * 1.170 - SIOCSIFMTU checks * 1.126, 1.142 - deferred packets processing * 1.173 - correct expire time processing */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct pfsync_bucket; struct pfsync_softc; union inet_template { struct ip ipv4; }; #define PFSYNC_MINPKT ( \ sizeof(union inet_template) + \ sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) static int pfsync_upd_tcp(struct pf_kstate *, struct pfsync_state_peer *, struct pfsync_state_peer *); static int pfsync_in_clr(struct mbuf *, int, int, int, int); static int pfsync_in_ins(struct mbuf *, int, int, int, int); static int pfsync_in_iack(struct mbuf *, int, int, int, int); static int pfsync_in_upd(struct mbuf *, int, int, int, int); static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); static int pfsync_in_ureq(struct mbuf *, int, int, int, int); static int pfsync_in_del_c(struct mbuf *, int, int, int, int); static int pfsync_in_bus(struct mbuf *, int, int, int, int); static int pfsync_in_tdb(struct mbuf *, int, int, int, int); static int pfsync_in_eof(struct mbuf *, int, int, int, int); static int pfsync_in_error(struct mbuf *, int, int, int, int); static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { pfsync_in_clr, /* PFSYNC_ACT_CLR */ pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ pfsync_in_error, /* PFSYNC_ACT_DEL */ pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ pfsync_in_error, /* PFSYNC_ACT_INS_F */ pfsync_in_error, /* PFSYNC_ACT_DEL_F */ pfsync_in_bus, /* PFSYNC_ACT_BUS */ pfsync_in_tdb, /* PFSYNC_ACT_TDB */ pfsync_in_eof, /* PFSYNC_ACT_EOF */ pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ }; struct pfsync_q { void (*write)(struct pf_kstate *, void *); size_t len; u_int8_t action; }; /* We have the following sync queues */ enum pfsync_q_id { PFSYNC_Q_INS_1301, PFSYNC_Q_INS_1400, PFSYNC_Q_IACK, PFSYNC_Q_UPD_1301, PFSYNC_Q_UPD_1400, PFSYNC_Q_UPD_C, PFSYNC_Q_DEL_C, PFSYNC_Q_COUNT, }; /* Functions for building messages for given queue */ static void pfsync_out_state_1301(struct pf_kstate *, void *); static void pfsync_out_state_1400(struct pf_kstate *, void *); static void pfsync_out_iack(struct pf_kstate *, void *); static void pfsync_out_upd_c(struct pf_kstate *, void *); static void pfsync_out_del_c(struct pf_kstate *, void *); /* Attach those functions to queue */ static struct pfsync_q pfsync_qs[] = { { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; /* Map queue to pf_kstate->sync_state */ static u_int8_t pfsync_qid_sstate[] = { PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ }; /* Map pf_kstate->sync_state to queue */ static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); static void pfsync_update_state(struct pf_kstate *); static void pfsync_tx(struct pfsync_softc *, struct mbuf *); struct pfsync_upd_req_item { TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; struct pfsync_upd_req ur_msg; }; struct pfsync_deferral { struct pfsync_softc *pd_sc; TAILQ_ENTRY(pfsync_deferral) pd_entry; struct callout pd_tmo; struct pf_kstate *pd_st; struct mbuf *pd_m; }; struct pfsync_bucket { int b_id; struct pfsync_softc *b_sc; struct mtx b_mtx; struct callout b_tmo; int b_flags; #define PFSYNCF_BUCKET_PUSH 0x00000001 size_t b_len; TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; TAILQ_HEAD(, pfsync_deferral) b_deferrals; u_int b_deferred; void *b_plus; size_t b_pluslen; struct ifaltq b_snd; }; struct pfsync_softc { /* Configuration */ struct ifnet *sc_ifp; struct ifnet *sc_sync_if; struct ip_moptions sc_imo; struct sockaddr_storage sc_sync_peer; uint32_t sc_flags; uint8_t sc_maxupdates; union inet_template sc_template; struct mtx sc_mtx; uint32_t sc_version; /* Queued data */ struct pfsync_bucket *sc_buckets; /* Bulk update info */ struct mtx sc_bulk_mtx; uint32_t sc_ureq_sent; int sc_bulk_tries; uint32_t sc_ureq_received; int sc_bulk_hashid; uint64_t sc_bulk_stateid; uint32_t sc_bulk_creatorid; struct callout sc_bulk_tmo; struct callout sc_bulkfail_tmo; }; #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) #define PFSYNC_DEFER_TIMEOUT 20 static const char pfsyncname[] = "pfsync"; static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); #define V_pfsync_swi_ie VNET(pfsync_swi_ie) VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); #define V_pfsyncstats VNET(pfsyncstats) VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) static void pfsync_timeout(void *); static void pfsync_push(struct pfsync_bucket *); static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, struct in_mfilter *imf); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); static int pfsync_init(void); static void pfsync_uninit(void); static unsigned long pfsync_buckets; SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, &pfsync_buckets, 0, "Number of pfsync hash buckets"); SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int pfsyncioctl(struct ifnet *, u_long, caddr_t); static int pfsync_defer(struct pf_kstate *, struct mbuf *); static void pfsync_undefer(struct pfsync_deferral *, int); static void pfsync_undefer_state_locked(struct pf_kstate *, int); static void pfsync_undefer_state(struct pf_kstate *, int); static void pfsync_defer_tmo(void *); static void pfsync_request_update(u_int32_t, u_int64_t); static bool pfsync_update_state_req(struct pf_kstate *); static void pfsync_drop(struct pfsync_softc *); static void pfsync_sendout(int, int); static void pfsync_send_plus(void *, size_t); static void pfsync_bulk_start(void); static void pfsync_bulk_status(u_int8_t); static void pfsync_bulk_update(void *); static void pfsync_bulk_fail(void *); static void pfsync_detach_ifnet(struct ifnet *); static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, struct pfsync_kstatus *); static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, struct pfsync_softc *); #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, struct pf_kstate *); #define PFSYNC_MAX_BULKTRIES 12 VNET_DEFINE(struct if_clone *, pfsync_cloner); #define V_pfsync_cloner VNET(pfsync_cloner) static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) { struct pfsync_softc *sc; struct ifnet *ifp; struct pfsync_bucket *b; int c; enum pfsync_q_id q; if (unit != 0) return (EINVAL); if (! pfsync_buckets) pfsync_buckets = mp_ncpus * 2; sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); sc->sc_flags |= PFSYNCF_OK; sc->sc_maxupdates = 128; sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if (ifp == NULL) { free(sc, M_PFSYNC); return (ENOSPC); } if_initname(ifp, pfsyncname, unit); ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_type = IFT_PFSYNC; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); if_attach(ifp); bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), M_PFSYNC, M_ZERO | M_WAITOK); for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); b->b_id = c; b->b_sc = sc; b->b_len = PFSYNC_MINPKT; for (q = 0; q < PFSYNC_Q_COUNT; q++) TAILQ_INIT(&b->b_qs[q]); TAILQ_INIT(&b->b_upd_req_list); TAILQ_INIT(&b->b_deferrals); callout_init(&b->b_tmo, 1); b->b_snd.ifq_maxlen = ifqmaxlen; } V_pfsyncif = sc; return (0); } static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; struct pfsync_bucket *b; int c, ret; for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; /* * At this stage, everything should have already been * cleared by pfsync_uninit(), and we have only to * drain callouts. */ PFSYNC_BUCKET_LOCK(b); while (b->b_deferred > 0) { struct pfsync_deferral *pd = TAILQ_FIRST(&b->b_deferrals); ret = callout_stop(&pd->pd_tmo); PFSYNC_BUCKET_UNLOCK(b); if (ret > 0) { pfsync_undefer(pd, 1); } else { callout_drain(&pd->pd_tmo); } PFSYNC_BUCKET_LOCK(b); } MPASS(b->b_deferred == 0); MPASS(TAILQ_EMPTY(&b->b_deferrals)); PFSYNC_BUCKET_UNLOCK(b); callout_drain(&b->b_tmo); } callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); pfsync_drop(sc); if_free(ifp); pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); free(sc->sc_buckets, M_PFSYNC); free(sc, M_PFSYNC); V_pfsyncif = NULL; } static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); if (d->scrub == NULL) return (ENOMEM); } return (0); } static int pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) { struct pfsync_softc *sc = V_pfsyncif; #ifndef __NO_STRICT_ALIGNMENT struct pfsync_state_key key[2]; #endif struct pfsync_state_key *kw, *ks; struct pf_kstate *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_krule *r = NULL; struct pfi_kkif *kif; int error; PF_RULES_RASSERT(); if (sp->pfs_1301.creatorid == 0) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid creator id: %08x\n", __func__, ntohl(sp->pfs_1301.creatorid)); return (EINVAL); } if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown interface: %s\n", __func__, sp->pfs_1301.ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ } /* * If the ruleset checksums match or the state is coming from the ioctl, * it's safe to associate the state with the rule of that number. */ if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)]; else r = &V_pf_default_rule; if ((r->max_states && counter_u64_fetch(r->states_cur) >= r->max_states)) goto cleanup; /* * XXXGL: consider M_WAITOK in ioctl path after. */ st = pf_alloc_state(M_NOWAIT); if (__predict_false(st == NULL)) goto cleanup; if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) goto cleanup; #ifndef __NO_STRICT_ALIGNMENT bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); kw = &key[PF_SK_WIRE]; ks = &key[PF_SK_STACK]; #else kw = &sp->pfs_1301.key[PF_SK_WIRE]; ks = &sp->pfs_1301.key[PF_SK_STACK]; #endif if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || kw->port[0] != ks->port[0] || kw->port[1] != ks->port[1]) { sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sks == NULL) goto cleanup; } else sks = skw; /* allocate memory for scrub info */ if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) goto cleanup; /* Copy to state key(s). */ skw->addr[0] = kw->addr[0]; skw->addr[1] = kw->addr[1]; skw->port[0] = kw->port[0]; skw->port[1] = kw->port[1]; skw->proto = sp->pfs_1301.proto; skw->af = sp->pfs_1301.af; if (sks != skw) { sks->addr[0] = ks->addr[0]; sks->addr[1] = ks->addr[1]; sks->port[0] = ks->port[0]; sks->port[1] = ks->port[1]; sks->proto = sp->pfs_1301.proto; sks->af = sp->pfs_1301.af; } /* copy to state */ bcopy(&sp->pfs_1301.rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_uptime - ntohl(sp->pfs_1301.creation); st->expire = time_uptime; if (sp->pfs_1301.expire) { uint32_t timeout; timeout = r->timeout[sp->pfs_1301.timeout]; if (!timeout) timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; /* sp->expire may have been adaptively scaled by export. */ st->expire -= timeout - ntohl(sp->pfs_1301.expire); } st->direction = sp->pfs_1301.direction; - st->log = sp->pfs_1301.log; + st->act.log = sp->pfs_1301.log; st->timeout = sp->pfs_1301.timeout; switch (msg_version) { case PFSYNC_MSG_VERSION_1301: st->state_flags = sp->pfs_1301.state_flags; /* * In FreeBSD 13 pfsync lacks many attributes. Copy them * from the rule if possible. If rule can't be matched * clear any set options as we can't recover their * parameters. */ if (r == &V_pf_default_rule) { st->state_flags &= ~PFSTATE_SETMASK; } else { /* * Similar to pf_rule_to_actions(). This code * won't set the actions properly if they come * from multiple "match" rules as only rule * creating the state is send over pfsync. */ - st->qid = r->qid; - st->pqid = r->pqid; - st->rtableid = r->rtableid; + st->act.qid = r->qid; + st->act.pqid = r->pqid; + st->act.rtableid = r->rtableid; if (r->scrub_flags & PFSTATE_SETTOS) - st->set_tos = r->set_tos; - st->min_ttl = r->min_ttl; - st->max_mss = r->max_mss; + st->act.set_tos = r->set_tos; + st->act.min_ttl = r->min_ttl; + st->act.max_mss = r->max_mss; st->state_flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| PFSTATE_SETPRIO)); if (r->dnpipe || r->dnrpipe) { if (r->free_flags & PFRULE_DN_IS_PIPE) st->state_flags |= PFSTATE_DN_IS_PIPE; else st->state_flags &= ~PFSTATE_DN_IS_PIPE; } - st->dnpipe = r->dnpipe; - st->dnrpipe = r->dnrpipe; + st->act.dnpipe = r->dnpipe; + st->act.dnrpipe = r->dnrpipe; } break; case PFSYNC_MSG_VERSION_1400: st->state_flags = ntohs(sp->pfs_1400.state_flags); - st->qid = ntohs(sp->pfs_1400.qid); - st->pqid = ntohs(sp->pfs_1400.pqid); - st->dnpipe = ntohs(sp->pfs_1400.dnpipe); - st->dnrpipe = ntohs(sp->pfs_1400.dnrpipe); - st->rtableid = ntohl(sp->pfs_1400.rtableid); - st->min_ttl = sp->pfs_1400.min_ttl; - st->set_tos = sp->pfs_1400.set_tos; - st->max_mss = ntohs(sp->pfs_1400.max_mss); - st->set_prio[0] = sp->pfs_1400.set_prio[0]; - st->set_prio[1] = sp->pfs_1400.set_prio[1]; + st->act.qid = ntohs(sp->pfs_1400.qid); + st->act.pqid = ntohs(sp->pfs_1400.pqid); + st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); + st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); + st->act.rtableid = ntohl(sp->pfs_1400.rtableid); + st->act.min_ttl = sp->pfs_1400.min_ttl; + st->act.set_tos = sp->pfs_1400.set_tos; + st->act.max_mss = ntohs(sp->pfs_1400.max_mss); + st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; + st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; st->rt = sp->pfs_1400.rt; if (st->rt && (st->rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown route interface: %s\n", __func__, sp->pfs_1400.rt_ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ } break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } st->id = sp->pfs_1301.id; st->creatorid = sp->pfs_1301.creatorid; pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); st->rule.ptr = r; st->nat_rule.ptr = NULL; st->anchor.ptr = NULL; st->pfsync_time = time_uptime; st->sync_state = PFSYNC_S_NONE; if (!(flags & PFSYNC_SI_IOCTL)) st->state_flags |= PFSTATE_NOSYNC; if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) goto cleanup_state; /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ counter_u64_add(r->states_cur, 1); counter_u64_add(r->states_tot, 1); if (!(flags & PFSYNC_SI_IOCTL)) { st->state_flags &= ~PFSTATE_NOSYNC; if (st->state_flags & PFSTATE_ACK) { struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK(b); pfsync_q_ins(st, PFSYNC_S_IACK, true); PFSYNC_BUCKET_UNLOCK(b); pfsync_push_all(sc); } } st->state_flags &= ~PFSTATE_ACK; PF_STATE_UNLOCK(st); return (0); cleanup: error = ENOMEM; if (skw == sks) sks = NULL; uma_zfree(V_pf_state_key_z, skw); uma_zfree(V_pf_state_key_z, sks); cleanup_state: /* pf_state_insert() frees the state keys. */ if (st) { st->timeout = PFTM_UNLINKED; /* appease an assert */ pf_free_state(st); } return (error); } #ifdef INET static int pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) { struct pfsync_softc *sc = V_pfsyncif; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; struct pfsync_subheader subh; int offset, len, flags = 0; int rv; uint16_t count; PF_RULES_RLOCK_TRACKER; *mp = NULL; V_pfsyncstats.pfsyncs_ipackets++; /* Verify that we have a sync interface configured. */ if (!sc || !sc->sc_sync_if || !V_pf_status.running || (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ if (sc->sc_sync_if != m->m_pkthdr.rcvif) { V_pfsyncstats.pfsyncs_badif++; goto done; } if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; goto done; } offset = ip->ip_hl << 2; if (m->m_pkthdr.len < offset + sizeof(*ph)) { V_pfsyncstats.pfsyncs_hdrops++; goto done; } if (offset + sizeof(*ph) > m->m_len) { if (m_pullup(m, offset + sizeof(*ph)) == NULL) { V_pfsyncstats.pfsyncs_hdrops++; return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } ph = (struct pfsync_header *)((char *)ip + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { V_pfsyncstats.pfsyncs_badver++; goto done; } len = ntohs(ph->len) + offset; if (m->m_pkthdr.len < len) { V_pfsyncstats.pfsyncs_badlen++; goto done; } /* * Trusting pf_chksum during packet processing, as well as seeking * in interface name tree, require holding PF_RULES_RLOCK(). */ PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) flags = PFSYNC_SI_CKSUM; offset += sizeof(*ph); while (offset <= len - sizeof(subh)) { m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); offset += sizeof(subh); if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; PF_RULES_RUNLOCK(); goto done; } count = ntohs(subh.count); V_pfsyncstats.pfsyncs_iacts[subh.action] += count; rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); if (rv == -1) { PF_RULES_RUNLOCK(); return (IPPROTO_DONE); } offset += rv; } PF_RULES_RUNLOCK(); done: m_freem(m); return (IPPROTO_DONE); } #endif static int pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_clr *clr; struct mbuf *mp; int len = sizeof(*clr) * count; int i, offp; u_int32_t creatorid; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } clr = (struct pfsync_clr *)(mp->m_data + offp); for (i = 0; i < count; i++) { creatorid = clr[i].creatorid; if (clr[i].ifname[0] != '\0' && pfi_kkif_find(clr[i].ifname) == NULL) continue; for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_kstate *s; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->creatorid == creatorid) { s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s); goto relock; } } PF_HASHROW_UNLOCK(ih); } } return (len); } static int pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) { struct mbuf *mp; union pfsync_state_union *sa, *sp; int i, offp, len, msg_version; switch (action) { case PFSYNC_ACT_INS_1301: len = sizeof(struct pfsync_state_1301) * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_INS_1400: len = sizeof(struct pfsync_state_1400) * count; msg_version = PFSYNC_MSG_VERSION_1400; break; default: V_pfsyncstats.pfsyncs_badact++; return (-1); } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (union pfsync_state_union *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; /* Check for invalid values. */ if (sp->pfs_1301.timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || sp->pfs_1301.direction > PF_OUT || (sp->pfs_1301.af != AF_INET && sp->pfs_1301.af != AF_INET6)) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; continue; } if (pfsync_state_import(sp, flags, msg_version) == ENOMEM) /* Drop out, but process the rest of the actions. */ break; } return (len); } static int pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_ins_ack *ia, *iaa; struct pf_kstate *st; struct mbuf *mp; int len = count * sizeof(*ia); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); for (i = 0; i < count; i++) { ia = &iaa[i]; st = pf_find_state_byid(ia->id, ia->creatorid); if (st == NULL) continue; if (st->state_flags & PFSTATE_ACK) { pfsync_undefer_state(st, 0); } PF_STATE_UNLOCK(st); } /* * XXX this is not yet implemented, but we know the size of the * message so we can skip it. */ return (count * sizeof(struct pfsync_ins_ack)); } static int pfsync_upd_tcp(struct pf_kstate *st, struct pfsync_state_peer *src, struct pfsync_state_peer *dst) { int sync = 0; PF_STATE_LOCK_ASSERT(st); /* * The state should never go backwards except * for syn-proxy states. Neither should the * sequence window slide backwards. */ if ((st->src.state > src->state && (st->src.state < PF_TCPS_PROXY_SRC || src->state >= PF_TCPS_PROXY_SRC)) || (st->src.state == src->state && SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) sync++; else pf_state_peer_ntoh(src, &st->src); if ((st->dst.state > dst->state) || (st->dst.state >= TCPS_SYN_SENT && SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) sync++; else pf_state_peer_ntoh(dst, &st->dst); return (sync); } static int pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_softc *sc = V_pfsyncif; union pfsync_state_union *sa, *sp; struct pf_kstate *st; struct mbuf *mp; int sync, offp, i, len, msg_version; switch (action) { case PFSYNC_ACT_UPD_1301: len = sizeof(struct pfsync_state_1301) * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_UPD_1400: len = sizeof(struct pfsync_state_1400) * count; msg_version = PFSYNC_MSG_VERSION_1400; break; default: V_pfsyncstats.pfsyncs_badact++; return (-1); } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (union pfsync_state_union *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; /* check for invalid values */ if (sp->pfs_1301.timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: PFSYNC_ACT_UPD: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); if (st == NULL) { /* insert the update */ if (pfsync_state_import(sp, flags, msg_version)) V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_ACK) { pfsync_undefer_state(st, 1); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > sp->pfs_1301.src.state) sync++; else pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); if (st->dst.state > sp->pfs_1301.dst.state) sync++; else pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); st->expire = time_uptime; st->timeout = sp->pfs_1301.timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_c *ua, *up; struct pf_kstate *st; int len = count * sizeof(*up); int sync; struct mbuf *mp; int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ua = (struct pfsync_upd_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { up = &ua[i]; /* check for invalid values */ if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(up->creatorid, up->id); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); continue; } if (st->state_flags & PFSTATE_ACK) { pfsync_undefer_state(st, 1); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &up->src, &up->dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > up->src.state) sync++; else pf_state_peer_ntoh(&up->src, &st->src); if (st->dst.state > up->dst.state) sync++; else pf_state_peer_ntoh(&up->dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = time_uptime; st->timeout = up->timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_upd_req *ur, *ura; struct mbuf *mp; int len = count * sizeof(*ur); int i, offp; struct pf_kstate *st; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ura = (struct pfsync_upd_req *)(mp->m_data + offp); for (i = 0; i < count; i++) { ur = &ura[i]; if (ur->id == 0 && ur->creatorid == 0) pfsync_bulk_start(); else { st = pf_find_state_byid(ur->id, ur->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_NOSYNC) { PF_STATE_UNLOCK(st); continue; } pfsync_update_state_req(st); PF_STATE_UNLOCK(st); } } return (len); } static int pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) { struct mbuf *mp; struct pfsync_del_c *sa, *sp; struct pf_kstate *st; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_del_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } st->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(st); } return (len); } static int pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bus *bus; struct mbuf *mp; int len = count * sizeof(*bus); int offp; PFSYNC_BLOCK(sc); /* If we're not waiting for a bulk update, who cares. */ if (sc->sc_ureq_sent == 0) { PFSYNC_BUNLOCK(sc); return (len); } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { PFSYNC_BUNLOCK(sc); V_pfsyncstats.pfsyncs_badlen++; return (-1); } bus = (struct pfsync_bus *)(mp->m_data + offp); switch (bus->status) { case PFSYNC_BUS_START: callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + V_pf_limits[PF_LIMIT_STATES].limit / ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / sizeof(union pfsync_state_union)), pfsync_bulk_fail, sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update start\n"); break; case PFSYNC_BUS_END: if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received valid " "bulk update end\n"); } else { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } PFSYNC_BUNLOCK(sc); return (len); } static int pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) { int len = count * sizeof(struct pfsync_tdb); #if defined(IPSEC) struct pfsync_tdb *tp; struct mbuf *mp; int offp; int i; int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } tp = (struct pfsync_tdb *)(mp->m_data + offp); for (i = 0; i < count; i++) pfsync_update_net_tdb(&tp[i]); #endif return (len); } #if defined(IPSEC) /* Update an in-kernel tdb. Silently fail if no tdb is found. */ static void pfsync_update_net_tdb(struct pfsync_tdb *pt) { struct tdb *tdb; int s; /* check for invalid values */ if (ntohl(pt->spi) <= SPI_RESERVED_MAX || (pt->dst.sa.sa_family != AF_INET && pt->dst.sa.sa_family != AF_INET6)) goto bad; tdb = gettdb(pt->spi, &pt->dst, pt->sproto); if (tdb) { pt->rpl = ntohl(pt->rpl); pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); /* Neither replay nor byte counter should ever decrease. */ if (pt->rpl < tdb->tdb_rpl || pt->cur_bytes < tdb->tdb_cur_bytes) { goto bad; } tdb->tdb_rpl = pt->rpl; tdb->tdb_cur_bytes = pt->cur_bytes; } return; bad: if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " "invalid value\n"); V_pfsyncstats.pfsyncs_badstate++; return; } #endif static int pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) { /* check if we are at the right place in the packet */ if (offset != m->m_pkthdr.len) V_pfsyncstats.pfsyncs_badlen++; /* we're done. free and let the caller return */ m_freem(m); return (-1); } static int pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) { V_pfsyncstats.pfsyncs_badact++; m_freem(m); return (-1); } static int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *rt) { m_freem(m); return (0); } /* ARGSUSED */ static int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct pfsyncreq pfsyncr; size_t nvbuflen; int error; int c; switch (cmd) { case SIOCSIFFLAGS: PFSYNC_LOCK(sc); if (ifp->if_flags & IFF_UP) { ifp->if_drv_flags |= IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_init(); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_uninit(); } break; case SIOCSIFMTU: if (!sc->sc_sync_if || ifr->ifr_mtu <= PFSYNC_MINPKT || ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { for (c = 0; c < pfsync_buckets; c++) { PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) pfsync_sendout(1, c); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); } } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); PFSYNC_LOCK(sc); if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_if->if_xname, IFNAMSIZ); } pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; pfsyncr.pfsyncr_defer = sc->sc_flags; PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), sizeof(pfsyncr))); case SIOCGETPFSYNCNV: { nvlist_t *nvl_syncpeer; nvlist_t *nvl = nvlist_create(0); if (nvl == NULL) return (ENOMEM); if (sc->sc_sync_if) nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); nvlist_add_number(nvl, "flags", sc->sc_flags); nvlist_add_number(nvl, "version", sc->sc_version); if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); void *packed = NULL; packed = nvlist_pack(nvl, &nvbuflen); if (packed == NULL) { free(packed, M_NVLIST); nvlist_destroy(nvl); return (ENOMEM); } if (nvbuflen > ifr->ifr_cap_nv.buf_length) { ifr->ifr_cap_nv.length = nvbuflen; ifr->ifr_cap_nv.buffer = NULL; free(packed, M_NVLIST); nvlist_destroy(nvl); return (EFBIG); } ifr->ifr_cap_nv.length = nvbuflen; error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); nvlist_destroy(nvl); nvlist_destroy(nvl_syncpeer); free(packed, M_NVLIST); break; } case SIOCSETPFSYNC: { struct pfsync_kstatus status; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, sizeof(pfsyncr)))) return (error); memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); error = pfsync_kstatus_to_softc(&status, sc); return (error); } case SIOCSETPFSYNCNV: { struct pfsync_kstatus status; void *data; nvlist_t *nvl; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); data = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); if ((error = copyin(ifr->ifr_cap_nv.buffer, data, ifr->ifr_cap_nv.length)) != 0) { free(data, M_TEMP); return (error); } if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { free(data, M_TEMP); return (EINVAL); } memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); pfsync_nvstatus_to_kstatus(nvl, &status); nvlist_destroy(nvl); free(data, M_TEMP); error = pfsync_kstatus_to_softc(&status, sc); return (error); } default: return (ENOTTY); } return (0); } static void pfsync_out_state_1301(struct pf_kstate *st, void *buf) { union pfsync_state_union *sp = buf; pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); } static void pfsync_out_state_1400(struct pf_kstate *st, void *buf) { union pfsync_state_union *sp = buf; pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); } static void pfsync_out_iack(struct pf_kstate *st, void *buf) { struct pfsync_ins_ack *iack = buf; iack->id = st->id; iack->creatorid = st->creatorid; } static void pfsync_out_upd_c(struct pf_kstate *st, void *buf) { struct pfsync_upd_c *up = buf; bzero(up, sizeof(*up)); up->id = st->id; pf_state_peer_hton(&st->src, &up->src); pf_state_peer_hton(&st->dst, &up->dst); up->creatorid = st->creatorid; up->timeout = st->timeout; } static void pfsync_out_del_c(struct pf_kstate *st, void *buf) { struct pfsync_del_c *dp = buf; dp->id = st->id; dp->creatorid = st->creatorid; st->state_flags |= PFSTATE_NOSYNC; } static void pfsync_drop(struct pfsync_softc *sc) { struct pf_kstate *st, *next; struct pfsync_upd_req_item *ur; struct pfsync_bucket *b; int c; enum pfsync_q_id q; for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; for (q = 0; q < PFSYNC_Q_COUNT; q++) { if (TAILQ_EMPTY(&b->b_qs[q])) continue; TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { KASSERT(st->sync_state == pfsync_qid_sstate[q], ("%s: st->sync_state == q", __func__)); st->sync_state = PFSYNC_S_NONE; pf_release_state(st); } TAILQ_INIT(&b->b_qs[q]); } while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); free(ur, M_PFSYNC); } b->b_len = PFSYNC_MINPKT; b->b_plus = NULL; } } static void pfsync_sendout(int schedswi, int c) { struct pfsync_softc *sc = V_pfsyncif; struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct pfsync_header *ph; struct pfsync_subheader *subh; struct pf_kstate *st, *st_next; struct pfsync_upd_req_item *ur; struct pfsync_bucket *b = &sc->sc_buckets[c]; int aflen, offset, count = 0; enum pfsync_q_id q; KASSERT(sc != NULL, ("%s: null sc", __func__)); KASSERT(b->b_len > PFSYNC_MINPKT, ("%s: sc_len %zu", __func__, b->b_len)); PFSYNC_BUCKET_LOCK_ASSERT(b); if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { pfsync_drop(sc); return; } m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); V_pfsyncstats.pfsyncs_onomem++; return; } m->m_data += max_linkhdr; m->m_len = m->m_pkthdr.len = b->b_len; /* build the ip header */ switch (sc->sc_sync_peer.ss_family) { #ifdef INET case AF_INET: { struct ip *ip; ip = mtod(m, struct ip *); bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); aflen = offset = sizeof(*ip); ip->ip_len = htons(m->m_pkthdr.len); ip_fillid(ip); break; } #endif default: m_freem(m); return; } /* build the pfsync header */ ph = (struct pfsync_header *)(m->m_data + offset); bzero(ph, sizeof(*ph)); offset += sizeof(*ph); ph->version = PFSYNC_VERSION; ph->len = htons(b->b_len - aflen); bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); /* walk the queues */ for (q = 0; q < PFSYNC_Q_COUNT; q++) { if (TAILQ_EMPTY(&b->b_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { KASSERT(st->sync_state == pfsync_qid_sstate[q], ("%s: st->sync_state == q", __func__)); /* * XXXGL: some of write methods do unlocked reads * of state data :( */ pfsync_qs[q].write(st, m->m_data + offset); offset += pfsync_qs[q].len; st->sync_state = PFSYNC_S_NONE; pf_release_state(st); count++; } TAILQ_INIT(&b->b_qs[q]); bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } if (!TAILQ_EMPTY(&b->b_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); offset += sizeof(ur->ur_msg); free(ur, M_PFSYNC); count++; } bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_UPD_REQ; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; } /* has someone built a custom region for us to add? */ if (b->b_plus != NULL) { bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); offset += b->b_pluslen; b->b_plus = NULL; } subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_EOF; subh->count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; /* we're done, let's put it on the wire */ if (ifp->if_bpf) { m->m_data += aflen; m->m_len = m->m_pkthdr.len = b->b_len - aflen; BPF_MTAP(ifp, m); m->m_data -= aflen; m->m_len = m->m_pkthdr.len = b->b_len; } if (sc->sc_sync_if == NULL) { b->b_len = PFSYNC_MINPKT; m_freem(m); return; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); b->b_len = PFSYNC_MINPKT; if (!_IF_QFULL(&b->b_snd)) _IF_ENQUEUE(&b->b_snd, m); else { m_freem(m); if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); } if (schedswi) swi_sched(V_pfsync_swi_cookie, 0); } static void pfsync_insert_state(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); if (st->state_flags & PFSTATE_NOSYNC) return; if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { st->state_flags |= PFSTATE_NOSYNC; return; } KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); PFSYNC_BUCKET_LOCK(b); if (b->b_len == PFSYNC_MINPKT) callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); pfsync_q_ins(st, PFSYNC_S_INS, true); PFSYNC_BUCKET_UNLOCK(b); st->sync_updates = 0; } static int pfsync_defer(struct pf_kstate *st, struct mbuf *m) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; struct pfsync_bucket *b; if (m->m_flags & (M_BCAST|M_MCAST)) return (0); if (sc == NULL) return (0); b = pfsync_get_bucket(sc, st); PFSYNC_LOCK(sc); if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || !(sc->sc_flags & PFSYNCF_DEFER)) { PFSYNC_UNLOCK(sc); return (0); } PFSYNC_BUCKET_LOCK(b); PFSYNC_UNLOCK(sc); if (b->b_deferred >= 128) pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) { PFSYNC_BUCKET_UNLOCK(b); return (0); } b->b_deferred++; m->m_flags |= M_SKIP_FIREWALL; st->state_flags |= PFSTATE_ACK; pd->pd_sc = sc; pd->pd_st = st; pf_ref_state(st); pd->pd_m = m; TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, pfsync_defer_tmo, pd); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); return (1); } static void pfsync_undefer(struct pfsync_deferral *pd, int drop) { struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_kstate *st = pd->pd_st; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ free(pd, M_PFSYNC); pf_release_state(st); if (drop) m_freem(m); else { _IF_ENQUEUE(&b->b_snd, m); pfsync_push(b); } } static void pfsync_defer_tmo(void *arg) { struct epoch_tracker et; struct pfsync_deferral *pd = arg; struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_kstate *st = pd->pd_st; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ PFSYNC_BUCKET_UNLOCK(b); free(pd, M_PFSYNC); if (sc->sc_sync_if == NULL) { pf_release_state(st); m_freem(m); return; } NET_EPOCH_ENTER(et); CURVNET_SET(sc->sc_sync_if->if_vnet); pfsync_tx(sc, m); pf_release_state(st); CURVNET_RESTORE(); NET_EPOCH_EXIT(et); } static void pfsync_undefer_state_locked(struct pf_kstate *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { if (pd->pd_st == st) { if (callout_stop(&pd->pd_tmo) > 0) pfsync_undefer(pd, drop); return; } } panic("%s: unable to find deferred state", __func__); } static void pfsync_undefer_state(struct pf_kstate *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK(b); pfsync_undefer_state_locked(st, drop); PFSYNC_BUCKET_UNLOCK(b); } static struct pfsync_bucket* pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) { int c = PF_IDHASH(st) % pfsync_buckets; return &sc->sc_buckets[c]; } static void pfsync_update_state(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; bool sync = false, ref = true; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state_locked(st, 0); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true, b); PFSYNC_BUCKET_UNLOCK(b); return; } if (b->b_len == PFSYNC_MINPKT) callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_INS: /* we're already handling it */ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { st->sync_updates++; if (st->sync_updates >= sc->sc_maxupdates) sync = true; } break; case PFSYNC_S_IACK: pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); st->sync_updates = 0; break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } if (sync || (time_uptime - st->pfsync_time) < 2) pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = &sc->sc_buckets[0]; struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); PFSYNC_BUCKET_LOCK_ASSERT(b); /* * This code does a bit to prevent multiple update requests for the * same state being generated. It searches current subheader queue, * but it doesn't lookup into queue of already packed datagrams. */ TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) if (item->ur_msg.id == id && item->ur_msg.creatorid == creatorid) return; item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); if (item == NULL) return; /* XXX stats */ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; if (TAILQ_EMPTY(&b->b_upd_req_list)) nlen += sizeof(struct pfsync_subheader); if (b->b_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(0, 0); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); } TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); b->b_len += nlen; pfsync_push(b); } static bool pfsync_update_state_req(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; bool ref = true, full = false; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true, b); PFSYNC_BUCKET_UNLOCK(b); return (full); } switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_IACK: pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD, ref); pfsync_push(b); break; case PFSYNC_S_INS: case PFSYNC_S_UPD: case PFSYNC_S_DEL_C: /* we're already handling it */ break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) full = true; PFSYNC_BUCKET_UNLOCK(b); return (full); } static void pfsync_delete_state(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); bool ref = true; PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state_locked(st, 1); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true, b); PFSYNC_BUCKET_UNLOCK(b); return; } if (b->b_len == PFSYNC_MINPKT) callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_INS: /* We never got to tell the world so just forget about it. */ pfsync_q_del(st, true, b); break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_IACK: pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { struct { struct pfsync_subheader subh; struct pfsync_clr clr; } __packed r; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_CLR; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; pfsync_send_plus(&r, sizeof(r)); } static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t sync_state) { struct pfsync_softc *sc = V_pfsyncif; switch (sync_state) { case PFSYNC_S_INS: switch (sc->sc_version) { case PFSYNC_MSG_VERSION_1301: return PFSYNC_Q_INS_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_INS_1400; } break; case PFSYNC_S_IACK: return PFSYNC_Q_IACK; case PFSYNC_S_UPD: switch (sc->sc_version) { case PFSYNC_MSG_VERSION_1301: return PFSYNC_Q_UPD_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_UPD_1400; } break; case PFSYNC_S_UPD_C: return PFSYNC_Q_UPD_C; case PFSYNC_S_DEL_C: return PFSYNC_Q_DEL_C; default: panic("%s: Unsupported st->sync_state 0x%02x", __func__, sync_state); } panic("%s: Unsupported pfsync_msg_version %d", __func__, sc->sc_version); } static void pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) { enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); struct pfsync_softc *sc = V_pfsyncif; size_t nlen = pfsync_qs[q].len; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", b->b_len)); if (TAILQ_EMPTY(&b->b_qs[q])) nlen += sizeof(struct pfsync_subheader); if (b->b_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(1, b->b_id); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } b->b_len += nlen; TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); st->sync_state = pfsync_qid_sstate[q]; if (ref) pf_ref_state(st); } static void pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) { enum pfsync_q_id q; PFSYNC_BUCKET_LOCK_ASSERT(b); KASSERT(st->sync_state != PFSYNC_S_NONE, ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); q = pfsync_sstate_to_qid(st->sync_state); b->b_len -= pfsync_qs[q].len; TAILQ_REMOVE(&b->b_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; if (unref) pf_release_state(st); if (TAILQ_EMPTY(&b->b_qs[q])) b->b_len -= sizeof(struct pfsync_subheader); } static void pfsync_bulk_start(void) { struct pfsync_softc *sc = V_pfsyncif; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update request\n"); PFSYNC_BLOCK(sc); sc->sc_ureq_received = time_uptime; sc->sc_bulk_hashid = 0; sc->sc_bulk_stateid = 0; pfsync_bulk_status(PFSYNC_BUS_START); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); PFSYNC_BUNLOCK(sc); } static void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc = arg; struct pf_kstate *s; int i; PFSYNC_BLOCK_ASSERT(sc); CURVNET_SET(sc->sc_ifp->if_vnet); /* * Start with last state from previous invocation. * It may had gone, in this case start from the * hash slot. */ s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); if (s != NULL) i = PF_IDHASH(s); else i = sc->sc_bulk_hashid; for (; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; if (s != NULL) PF_HASHROW_ASSERT(ih); else { PF_HASHROW_LOCK(ih); s = LIST_FIRST(&ih->states); } for (; s; s = LIST_NEXT(s, entry)) { if (s->sync_state == PFSYNC_S_NONE && s->timeout < PFTM_MAX && s->pfsync_time <= sc->sc_ureq_received) { if (pfsync_update_state_req(s)) { /* We've filled a packet. */ sc->sc_bulk_hashid = i; sc->sc_bulk_stateid = s->id; sc->sc_bulk_creatorid = s->creatorid; PF_HASHROW_UNLOCK(ih); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); goto full; } } } PF_HASHROW_UNLOCK(ih); } /* We're done. */ pfsync_bulk_status(PFSYNC_BUS_END); full: CURVNET_RESTORE(); } static void pfsync_bulk_status(u_int8_t status) { struct { struct pfsync_subheader subh; struct pfsync_bus bus; } __packed r; struct pfsync_softc *sc = V_pfsyncif; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_BUS; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; r.bus.creatorid = V_pf_status.hostid; r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; pfsync_send_plus(&r, sizeof(r)); } static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; struct pfsync_bucket *b = &sc->sc_buckets[0]; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_BLOCK_ASSERT(sc); if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again */ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); PFSYNC_BUCKET_LOCK(b); pfsync_request_update(0, 0); PFSYNC_BUCKET_UNLOCK(b); } else { /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; PFSYNC_UNLOCK(sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: failed to receive bulk update\n"); } CURVNET_RESTORE(); } static void pfsync_send_plus(void *plus, size_t pluslen) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = &sc->sc_buckets[0]; PFSYNC_BUCKET_LOCK(b); if (b->b_len + pluslen > sc->sc_ifp->if_mtu) pfsync_sendout(1, b->b_id); b->b_plus = plus; b->b_len += (b->b_pluslen = pluslen); pfsync_sendout(1, b->b_id); PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_timeout(void *arg) { struct pfsync_bucket *b = arg; CURVNET_SET(b->b_sc->sc_ifp->if_vnet); PFSYNC_BUCKET_LOCK(b); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); CURVNET_RESTORE(); } static void pfsync_push(struct pfsync_bucket *b) { PFSYNC_BUCKET_LOCK_ASSERT(b); b->b_flags |= PFSYNCF_BUCKET_PUSH; swi_sched(V_pfsync_swi_cookie, 0); } static void pfsync_push_all(struct pfsync_softc *sc) { int c; struct pfsync_bucket *b; for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; PFSYNC_BUCKET_LOCK(b); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); } } static void pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) { struct ip *ip; int af, error = 0; ip = mtod(m, struct ip *); MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; /* * We distinguish between a deferral packet and our * own pfsync packet based on M_SKIP_FIREWALL * flag. This is XXX. */ switch (af) { #ifdef INET case AF_INET: if (m->m_flags & M_SKIP_FIREWALL) { error = ip_output(m, NULL, NULL, 0, NULL, NULL); } else { error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL); } break; #endif #ifdef INET6 case AF_INET6: if (m->m_flags & M_SKIP_FIREWALL) { error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } else { MPASS(false); /* We don't support pfsync over IPv6. */ /*error = ip6_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo6, NULL);*/ } break; #endif } if (error == 0) V_pfsyncstats.pfsyncs_opackets++; else V_pfsyncstats.pfsyncs_oerrors++; } static void pfsyncintr(void *arg) { struct epoch_tracker et; struct pfsync_softc *sc = arg; struct pfsync_bucket *b; struct mbuf *m, *n; int c; NET_EPOCH_ENTER(et); CURVNET_SET(sc->sc_ifp->if_vnet); for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; PFSYNC_BUCKET_LOCK(b); if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { pfsync_sendout(0, b->b_id); b->b_flags &= ~PFSYNCF_BUCKET_PUSH; } _IF_DEQUEUE_ALL(&b->b_snd, m); PFSYNC_BUCKET_UNLOCK(b); for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; pfsync_tx(sc, m); } } CURVNET_RESTORE(); NET_EPOCH_EXIT(et); } static int pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, struct in_mfilter *imf) { struct ip_moptions *imo = &sc->sc_imo; int error; if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); switch (sc->sc_sync_peer.ss_family) { #ifdef INET case AF_INET: { ip_mfilter_init(&imo->imo_head); imo->imo_multicast_vif = -1; if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, &imf->imf_inm)) != 0) return (error); ip_mfilter_insert(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; break; } #endif } return (0); } static void pfsync_multicast_cleanup(struct pfsync_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; struct in_mfilter *imf; while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { ip_mfilter_remove(&imo->imo_head, imf); in_leavegroup(imf->imf_inm, NULL); ip_mfilter_free(imf); } imo->imo_multicast_ifp = NULL; } void pfsync_detach_ifnet(struct ifnet *ifp) { struct pfsync_softc *sc = V_pfsyncif; if (sc == NULL) return; PFSYNC_LOCK(sc); if (sc->sc_sync_if == ifp) { /* We don't need mutlicast cleanup here, because the interface * is going away. We do need to ensure we don't try to do * cleanup later. */ ip_mfilter_init(&sc->sc_imo.imo_head); sc->sc_imo.imo_multicast_ifp = NULL; sc->sc_sync_if = NULL; } PFSYNC_UNLOCK(sc); } static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) { struct sockaddr_storage sa; status->maxupdates = pfsyncr->pfsyncr_maxupdates; status->flags = pfsyncr->pfsyncr_defer; strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); memset(&sa, 0, sizeof(sa)); if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { struct sockaddr_in *in = (struct sockaddr_in *)&sa; in->sin_family = AF_INET; in->sin_len = sizeof(*in); in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; } status->syncpeer = sa; return 0; } static int pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) { struct in_mfilter *imf = NULL; struct ifnet *sifp; struct ip *ip; int error; int c; if ((status->maxupdates < 0) || (status->maxupdates > 255)) return (EINVAL); if (status->syncdev[0] == '\0') sifp = NULL; else if ((sifp = ifunit_ref(status->syncdev)) == NULL) return (EINVAL); struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); if (sifp != NULL && (status_sin->sin_addr.s_addr == 0 || status_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP))) imf = ip_mfilter_alloc(M_WAITOK, 0, 0); PFSYNC_LOCK(sc); switch (status->version) { case PFSYNC_MSG_VERSION_UNSPECIFIED: sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; break; case PFSYNC_MSG_VERSION_1301: case PFSYNC_MSG_VERSION_1400: sc->sc_version = status->version; break; default: PFSYNC_UNLOCK(sc); return (EINVAL); } struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; sc_sin->sin_family = AF_INET; sc_sin->sin_len = sizeof(*sc_sin); if (status_sin->sin_addr.s_addr == 0) { sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); } else { sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; } sc->sc_maxupdates = status->maxupdates; if (status->flags & PFSYNCF_DEFER) { sc->sc_flags |= PFSYNCF_DEFER; V_pfsync_defer_ptr = pfsync_defer; } else { sc->sc_flags &= ~PFSYNCF_DEFER; V_pfsync_defer_ptr = NULL; } if (sifp == NULL) { if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; pfsync_multicast_cleanup(sc); PFSYNC_UNLOCK(sc); return (0); } for (c = 0; c < pfsync_buckets; c++) { PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && (sifp->if_mtu < sc->sc_ifp->if_mtu || (sc->sc_sync_if != NULL && sifp->if_mtu < sc->sc_sync_if->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip))) pfsync_sendout(1, c); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); } pfsync_multicast_cleanup(sc); if (sc_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { error = pfsync_multicast_setup(sc, sifp, imf); if (error) { if_rele(sifp); ip_mfilter_free(imf); PFSYNC_UNLOCK(sc); return (error); } } if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = sifp; ip = &sc->sc_template.ipv4; bzero(ip, sizeof(*ip)); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; ip->ip_tos = IPTOS_LOWDELAY; /* len and id are set later. */ ip->ip_off = htons(IP_DF); ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst.s_addr = sc_sin->sin_addr.s_addr; /* Request a full state table update. */ if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); PFSYNC_UNLOCK(sc); PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(0, 0); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); PFSYNC_BLOCK(sc); sc->sc_ureq_sent = time_uptime; callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); PFSYNC_BUNLOCK(sc); return (0); } static void pfsync_pointers_init(void) { PF_RULES_WLOCK(); V_pfsync_state_import_ptr = pfsync_state_import; V_pfsync_insert_state_ptr = pfsync_insert_state; V_pfsync_update_state_ptr = pfsync_update_state; V_pfsync_delete_state_ptr = pfsync_delete_state; V_pfsync_clear_states_ptr = pfsync_clear_states; V_pfsync_defer_ptr = pfsync_defer; PF_RULES_WUNLOCK(); } static void pfsync_pointers_uninit(void) { PF_RULES_WLOCK(); V_pfsync_state_import_ptr = NULL; V_pfsync_insert_state_ptr = NULL; V_pfsync_update_state_ptr = NULL; V_pfsync_delete_state_ptr = NULL; V_pfsync_clear_states_ptr = NULL; V_pfsync_defer_ptr = NULL; PF_RULES_WUNLOCK(); } static void vnet_pfsync_init(const void *unused __unused) { int error; V_pfsync_cloner = if_clone_simple(pfsyncname, pfsync_clone_create, pfsync_clone_destroy, 1); error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); if (error) { if_clone_detach(V_pfsync_cloner); log(LOG_INFO, "swi_add() failed in %s\n", __func__); } pfsync_pointers_init(); } VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, vnet_pfsync_init, NULL); static void vnet_pfsync_uninit(const void *unused __unused) { int ret __diagused; pfsync_pointers_uninit(); if_clone_detach(V_pfsync_cloner); ret = swi_remove(V_pfsync_swi_cookie); MPASS(ret == 0); ret = intr_event_destroy(V_pfsync_swi_ie); MPASS(ret == 0); } VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, vnet_pfsync_uninit, NULL); static int pfsync_init(void) { #ifdef INET int error; pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); if (error) return (error); #endif return (0); } static void pfsync_uninit(void) { pfsync_detach_ifnet_ptr = NULL; #ifdef INET ipproto_unregister(IPPROTO_PFSYNC); #endif } static int pfsync_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: error = pfsync_init(); break; case MOD_UNLOAD: pfsync_uninit(); break; default: error = EINVAL; break; } return (error); } static moduledata_t pfsync_mod = { pfsyncname, pfsync_modevent, 0 }; #define PFSYNC_MODVER 1 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 7c41be4b25fe..2a1813d6aadd 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -1,8192 +1,8105 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002 - 2008 Henning Brauer * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * * $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pf.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* dummynet */ #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #include #include #endif /* INET6 */ #if defined(SCTP) || defined(SCTP_SUPPORT) #include #endif #include #include #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x SDT_PROVIDER_DEFINE(pf); SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *", "struct pf_kstate *"); SDT_PROBE_DEFINE4(pf, ip, test6, done, "int", "int", "struct pf_krule *", "struct pf_kstate *"); SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *", "struct pf_state_key_cmp *", "int", "struct pf_pdesc *", "struct pf_kstate *"); SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *", "struct mbuf *"); SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *"); SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch, "int", "struct pf_keth_rule *", "char *"); SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *"); SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match, "int", "struct pf_keth_rule *"); /* * Global variables */ /* state tables */ VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]); VNET_DEFINE(struct pf_kpalist, pf_pabuf); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive); VNET_DEFINE(struct pf_kstatus, pf_status); VNET_DEFINE(u_int32_t, ticket_altqs_active); VNET_DEFINE(u_int32_t, ticket_altqs_inactive); VNET_DEFINE(int, altqs_inactive_open); VNET_DEFINE(u_int32_t, ticket_pabuf); VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx); #define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx) VNET_DEFINE(u_char, pf_tcp_secret[16]); #define V_pf_tcp_secret VNET(pf_tcp_secret) VNET_DEFINE(int, pf_tcp_secret_init); #define V_pf_tcp_secret_init VNET(pf_tcp_secret_init) VNET_DEFINE(int, pf_tcp_iss_off); #define V_pf_tcp_iss_off VNET(pf_tcp_iss_off) VNET_DECLARE(int, pf_vnet_active); #define V_pf_vnet_active VNET(pf_vnet_active) VNET_DEFINE_STATIC(uint32_t, pf_purge_idx); #define V_pf_purge_idx VNET(pf_purge_idx) #ifdef PF_WANT_32_TO_64_COUNTER VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter); #define V_pf_counter_periodic_iter VNET(pf_counter_periodic_iter) VNET_DEFINE(struct allrulelist_head, pf_allrulelist); VNET_DEFINE(size_t, pf_allrulecount); VNET_DEFINE(struct pf_krule *, pf_rulemarker); #endif /* * Queue for pf_intr() sends. */ static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations"); struct pf_send_entry { STAILQ_ENTRY(pf_send_entry) pfse_next; struct mbuf *pfse_m; enum { PFSE_IP, PFSE_IP6, PFSE_ICMP, PFSE_ICMP6, } pfse_type; struct { int type; int code; int mtu; } icmpopts; }; STAILQ_HEAD(pf_send_head, pf_send_entry); VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue); #define V_pf_sendqueue VNET(pf_sendqueue) static struct mtx_padalign pf_sendqueue_mtx; MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF); #define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx) #define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx) /* * Queue for pf_overload_task() tasks. */ struct pf_overload_entry { SLIST_ENTRY(pf_overload_entry) next; struct pf_addr addr; sa_family_t af; uint8_t dir; struct pf_krule *rule; }; SLIST_HEAD(pf_overload_head, pf_overload_entry); VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue); #define V_pf_overloadqueue VNET(pf_overloadqueue) VNET_DEFINE_STATIC(struct task, pf_overloadtask); #define V_pf_overloadtask VNET(pf_overloadtask) static struct mtx_padalign pf_overloadqueue_mtx; MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx, "pf overload/flush queue", MTX_DEF); #define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx) #define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx) VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules); struct mtx_padalign pf_unlnkdrules_mtx; MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules", MTX_DEF); struct sx pf_config_lock; SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config"); struct mtx_padalign pf_table_stats_lock; MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats", MTX_DEF); VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z); #define V_pf_sources_z VNET(pf_sources_z) uma_zone_t pf_mtag_z; VNET_DEFINE(uma_zone_t, pf_state_z); VNET_DEFINE(uma_zone_t, pf_state_key_z); VNET_DEFINE(struct unrhdr64, pf_stateid); static void pf_src_tree_remove_state(struct pf_kstate *); static void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); static void pf_add_threshold(struct pf_threshold *); static int pf_check_threshold(struct pf_threshold *); static void pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *); static void pf_change_icmp(struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t *, u_int16_t *, u_int16_t *, u_int16_t *, u_int8_t, sa_family_t); static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_krule *, int); static void pf_detach_state(struct pf_kstate *); static int pf_state_key_attach(struct pf_state_key *, struct pf_state_key *, struct pf_kstate *); static void pf_state_key_detach(struct pf_kstate *, int); static int pf_state_key_ctor(void *, int, void *, int); static u_int32_t pf_tcp_iss(struct pf_pdesc *); -void pf_rule_to_actions(struct pf_krule *, - struct pf_rule_actions *); static int pf_dummynet(struct pf_pdesc *, struct pf_kstate *, struct pf_krule *, struct mbuf **); static int pf_dummynet_route(struct pf_pdesc *, struct pf_kstate *, struct pf_krule *, struct ifnet *, struct sockaddr *, struct mbuf **); static int pf_test_eth_rule(int, struct pfi_kkif *, struct mbuf **); static int pf_test_rule(struct pf_krule **, struct pf_kstate **, struct pfi_kkif *, struct mbuf *, int, struct pf_pdesc *, struct pf_krule **, struct pf_kruleset **, struct inpcb *); static int pf_create_state(struct pf_krule *, struct pf_krule *, struct pf_krule *, struct pf_pdesc *, struct pf_ksrc_node *, struct pf_state_key *, struct pf_state_key *, struct mbuf *, int, u_int16_t, u_int16_t, int *, struct pfi_kkif *, struct pf_kstate **, int, u_int16_t, u_int16_t, int, struct pf_krule_slist *); static int pf_test_fragment(struct pf_krule **, struct pfi_kkif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_krule **, struct pf_kruleset **); static int pf_tcp_track_full(struct pf_kstate **, struct pfi_kkif *, struct mbuf *, int, struct pf_pdesc *, u_short *, int *); static int pf_tcp_track_sloppy(struct pf_kstate **, struct pf_pdesc *, u_short *); static int pf_test_state_tcp(struct pf_kstate **, struct pfi_kkif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_udp(struct pf_kstate **, struct pfi_kkif *, struct mbuf *, int, void *, struct pf_pdesc *); static int pf_test_state_icmp(struct pf_kstate **, struct pfi_kkif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_other(struct pf_kstate **, struct pfi_kkif *, struct mbuf *, struct pf_pdesc *); static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, u_int16_t); static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); static void pf_print_state_parts(struct pf_kstate *, struct pf_state_key *, struct pf_state_key *); static void pf_patch_8(struct mbuf *, u_int16_t *, u_int8_t *, u_int8_t, bool, u_int8_t); static struct pf_kstate *pf_find_state(struct pfi_kkif *, struct pf_state_key_cmp *, u_int); static int pf_src_connlimit(struct pf_kstate **); static void pf_overload_task(void *v, int pending); static u_short pf_insert_src_node(struct pf_ksrc_node **, struct pf_krule *, struct pf_addr *, sa_family_t); static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); static void pf_packet_rework_nat(struct mbuf *, struct pf_pdesc *, int, struct pf_state_key *); #ifdef INET static void pf_route(struct mbuf **, struct pf_krule *, struct ifnet *, struct pf_kstate *, struct pf_pdesc *, struct inpcb *); #endif /* INET */ #ifdef INET6 static void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); static void pf_route6(struct mbuf **, struct pf_krule *, struct ifnet *, struct pf_kstate *, struct pf_pdesc *, struct inpcb *); #endif /* INET6 */ static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t); int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); extern int pf_end_threads; extern struct proc *pf_purge_proc; VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); #define PACKET_UNDO_NAT(_m, _pd, _off, _s) \ do { \ struct pf_state_key *nk; \ if ((pd->dir) == PF_OUT) \ nk = (_s)->key[PF_SK_STACK]; \ else \ nk = (_s)->key[PF_SK_WIRE]; \ pf_packet_rework_nat(_m, _pd, _off, nk); \ } while (0) #define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ (pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED) #define STATE_LOOKUP(i, k, s, pd) \ do { \ (s) = pf_find_state((i), (k), (pd->dir)); \ SDT_PROBE5(pf, ip, state, lookup, i, k, (pd->dir), pd, (s)); \ if ((s) == NULL) \ return (PF_DROP); \ if (PACKET_LOOPED(pd)) \ return (PF_PASS); \ } while (0) #define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all #define STATE_INC_COUNTERS(s) \ do { \ struct pf_krule_item *mrm; \ counter_u64_add(s->rule.ptr->states_cur, 1); \ counter_u64_add(s->rule.ptr->states_tot, 1); \ if (s->anchor.ptr != NULL) { \ counter_u64_add(s->anchor.ptr->states_cur, 1); \ counter_u64_add(s->anchor.ptr->states_tot, 1); \ } \ if (s->nat_rule.ptr != NULL) { \ counter_u64_add(s->nat_rule.ptr->states_cur, 1);\ counter_u64_add(s->nat_rule.ptr->states_tot, 1);\ } \ SLIST_FOREACH(mrm, &s->match_rules, entry) { \ counter_u64_add(mrm->r->states_cur, 1); \ counter_u64_add(mrm->r->states_tot, 1); \ } \ } while (0) #define STATE_DEC_COUNTERS(s) \ do { \ struct pf_krule_item *mrm; \ if (s->nat_rule.ptr != NULL) \ counter_u64_add(s->nat_rule.ptr->states_cur, -1);\ if (s->anchor.ptr != NULL) \ counter_u64_add(s->anchor.ptr->states_cur, -1); \ counter_u64_add(s->rule.ptr->states_cur, -1); \ SLIST_FOREACH(mrm, &s->match_rules, entry) \ counter_u64_add(mrm->r->states_cur, -1); \ } while (0) MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items"); VNET_DEFINE(struct pf_keyhash *, pf_keyhash); VNET_DEFINE(struct pf_idhash *, pf_idhash); VNET_DEFINE(struct pf_srchash *, pf_srchash); SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "pf(4)"); u_long pf_hashmask; u_long pf_srchashmask; static u_long pf_hashsize; static u_long pf_srchashsize; u_long pf_ioctl_maxcount = 65535; SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN, &pf_hashsize, 0, "Size of pf(4) states hashtable"); SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable"); SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN, &pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call"); VNET_DEFINE(void *, pf_swi_cookie); VNET_DEFINE(struct intr_event *, pf_swi_ie); VNET_DEFINE(uint32_t, pf_hashseed); #define V_pf_hashseed VNET(pf_hashseed) int pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: if (a->addr32[0] > b->addr32[0]) return (1); if (a->addr32[0] < b->addr32[0]) return (-1); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (a->addr32[3] > b->addr32[3]) return (1); if (a->addr32[3] < b->addr32[3]) return (-1); if (a->addr32[2] > b->addr32[2]) return (1); if (a->addr32[2] < b->addr32[2]) return (-1); if (a->addr32[1] > b->addr32[1]) return (1); if (a->addr32[1] < b->addr32[1]) return (-1); if (a->addr32[0] > b->addr32[0]) return (1); if (a->addr32[0] < b->addr32[0]) return (-1); break; #endif /* INET6 */ default: panic("%s: unknown address family %u", __func__, af); } return (0); } static void pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off, struct pf_state_key *nk) { switch (pd->proto) { case IPPROTO_TCP: { struct tcphdr *th = &pd->hdr.tcp; if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 0, pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, &th->th_sum, &nk->addr[pd->didx], nk->port[pd->didx], 0, pd->af); m_copyback(m, off, sizeof(*th), (caddr_t)th); break; } case IPPROTO_UDP: { struct udphdr *uh = &pd->hdr.udp; if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 1, pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, pd->af); m_copyback(m, off, sizeof(*uh), (caddr_t)uh); break; } case IPPROTO_ICMP: { struct icmp *ih = &pd->hdr.icmp; if (nk->port[pd->sidx] != ih->icmp_id) { pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( ih->icmp_cksum, ih->icmp_id, nk->port[pd->sidx], 0); ih->icmp_id = nk->port[pd->sidx]; pd->sport = &ih->icmp_id; m_copyback(m, off, ICMP_MINLEN, (caddr_t)ih); } /* FALLTHROUGH */ } default: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) { switch (pd->af) { case AF_INET: pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); break; case AF_INET6: PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); break; } } if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) { switch (pd->af) { case AF_INET: pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); break; case AF_INET6: PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); break; } } break; } } static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { uint32_t h; h = murmur3_32_hash32((uint32_t *)sk, sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), V_pf_hashseed); return (h & pf_hashmask); } static __inline uint32_t pf_hashsrc(struct pf_addr *addr, sa_family_t af) { uint32_t h; switch (af) { case AF_INET: h = murmur3_32_hash32((uint32_t *)&addr->v4, sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed); break; case AF_INET6: h = murmur3_32_hash32((uint32_t *)&addr->v6, sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed); break; default: panic("%s: unknown address family %u", __func__, af); } return (h & pf_srchashmask); } #ifdef ALTQ static int pf_state_hash(struct pf_kstate *s) { u_int32_t hv = (intptr_t)s / sizeof(*s); hv ^= crc32(&s->src, sizeof(s->src)); hv ^= crc32(&s->dst, sizeof(s->dst)); if (hv == 0) hv = 1; return (hv); } #endif static __inline void pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate) { if (which == PF_PEER_DST || which == PF_PEER_BOTH) s->dst.state = newstate; if (which == PF_PEER_DST) return; if (s->src.state == newstate) return; if (s->creatorid == V_pf_status.hostid && s->key[PF_SK_STACK] != NULL && s->key[PF_SK_STACK]->proto == IPPROTO_TCP && !(TCPS_HAVEESTABLISHED(s->src.state) || s->src.state == TCPS_CLOSED) && (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) atomic_add_32(&V_pf_status.states_halfopen, -1); s->src.state = newstate; } #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: dst->addr32[0] = src->addr32[0]; break; #endif /* INET */ case AF_INET6: dst->addr32[0] = src->addr32[0]; dst->addr32[1] = src->addr32[1]; dst->addr32[2] = src->addr32[2]; dst->addr32[3] = src->addr32[3]; break; } } #endif /* INET6 */ static void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) { threshold->limit = limit * PF_THRESHOLD_MULT; threshold->seconds = seconds; threshold->count = 0; threshold->last = time_uptime; } static void pf_add_threshold(struct pf_threshold *threshold) { u_int32_t t = time_uptime, diff = t - threshold->last; if (diff >= threshold->seconds) threshold->count = 0; else threshold->count -= threshold->count * diff / threshold->seconds; threshold->count += PF_THRESHOLD_MULT; threshold->last = t; } static int pf_check_threshold(struct pf_threshold *threshold) { return (threshold->count > threshold->limit); } static int pf_src_connlimit(struct pf_kstate **state) { struct pf_overload_entry *pfoe; int bad = 0; PF_STATE_LOCK_ASSERT(*state); /* * XXXKS: The src node is accessed unlocked! * PF_SRC_NODE_LOCK_ASSERT((*state)->src_node); */ (*state)->src_node->conn++; (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1); bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1); bad++; } if (!bad) return (0); /* Kill this state. */ (*state)->timeout = PFTM_PURGE; pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); if ((*state)->rule.ptr->overload_tbl == NULL) return (1); /* Schedule overloading and flushing task. */ pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT); if (pfoe == NULL) return (1); /* too bad :( */ bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr)); pfoe->af = (*state)->key[PF_SK_WIRE]->af; pfoe->rule = (*state)->rule.ptr; pfoe->dir = (*state)->direction; PF_OVERLOADQ_LOCK(); SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next); PF_OVERLOADQ_UNLOCK(); taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask); return (1); } static void pf_overload_task(void *v, int pending) { struct pf_overload_head queue; struct pfr_addr p; struct pf_overload_entry *pfoe, *pfoe1; uint32_t killed = 0; CURVNET_SET((struct vnet *)v); PF_OVERLOADQ_LOCK(); queue = V_pf_overloadqueue; SLIST_INIT(&V_pf_overloadqueue); PF_OVERLOADQ_UNLOCK(); bzero(&p, sizeof(p)); SLIST_FOREACH(pfoe, &queue, next) { counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("%s: blocking address ", __func__); pf_print_host(&pfoe->addr, 0, pfoe->af); printf("\n"); } p.pfra_af = pfoe->af; switch (pfoe->af) { #ifdef INET case AF_INET: p.pfra_net = 32; p.pfra_ip4addr = pfoe->addr.v4; break; #endif #ifdef INET6 case AF_INET6: p.pfra_net = 128; p.pfra_ip6addr = pfoe->addr.v6; break; #endif } PF_RULES_WLOCK(); pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second); PF_RULES_WUNLOCK(); } /* * Remove those entries, that don't need flushing. */ SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) if (pfoe->rule->flush == 0) { SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next); free(pfoe, M_PFTEMP); } else counter_u64_add( V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1); /* If nothing to flush, return. */ if (SLIST_EMPTY(&queue)) { CURVNET_RESTORE(); return; } for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_state_key *sk; struct pf_kstate *s; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { sk = s->key[PF_SK_WIRE]; SLIST_FOREACH(pfoe, &queue, next) if (sk->af == pfoe->af && ((pfoe->rule->flush & PF_FLUSH_GLOBAL) || pfoe->rule == s->rule.ptr) && ((pfoe->dir == PF_OUT && PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) || (pfoe->dir == PF_IN && PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) { s->timeout = PFTM_PURGE; pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED); killed++; } } PF_HASHROW_UNLOCK(ih); } SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) free(pfoe, M_PFTEMP); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: %u states killed", __func__, killed); CURVNET_RESTORE(); } /* * Can return locked on failure, so that we can consistently * allocate and insert a new one. */ struct pf_ksrc_node * pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af, struct pf_srchash **sh, bool returnlocked) { struct pf_ksrc_node *n; counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); *sh = &V_pf_srchash[pf_hashsrc(src, af)]; PF_HASHROW_LOCK(*sh); LIST_FOREACH(n, &(*sh)->nodes, entry) if (n->rule.ptr == rule && n->af == af && ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) break; if (n != NULL) { n->states++; PF_HASHROW_UNLOCK(*sh); } else if (returnlocked == false) PF_HASHROW_UNLOCK(*sh); return (n); } static void pf_free_src_node(struct pf_ksrc_node *sn) { for (int i = 0; i < 2; i++) { counter_u64_free(sn->bytes[i]); counter_u64_free(sn->packets[i]); } uma_zfree(V_pf_sources_z, sn); } static u_short pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_krule *rule, struct pf_addr *src, sa_family_t af) { u_short reason = 0; struct pf_srchash *sh = NULL; KASSERT((rule->rule_flag & PFRULE_SRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR), ("%s for non-tracking rule %p", __func__, rule)); if (*sn == NULL) *sn = pf_find_src_node(src, rule, af, &sh, true); if (*sn == NULL) { PF_HASHROW_ASSERT(sh); if (rule->max_src_nodes && counter_u64_fetch(rule->src_nodes) >= rule->max_src_nodes) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1); PF_HASHROW_UNLOCK(sh); reason = PFRES_SRCLIMIT; goto done; } (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); if ((*sn) == NULL) { PF_HASHROW_UNLOCK(sh); reason = PFRES_MEMORY; goto done; } for (int i = 0; i < 2; i++) { (*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT); (*sn)->packets[i] = counter_u64_alloc(M_NOWAIT); if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) { pf_free_src_node(*sn); PF_HASHROW_UNLOCK(sh); reason = PFRES_MEMORY; goto done; } } pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, rule->max_src_conn_rate.seconds); MPASS((*sn)->lock == NULL); (*sn)->lock = &sh->lock; (*sn)->af = af; (*sn)->rule.ptr = rule; PF_ACPY(&(*sn)->addr, src, af); LIST_INSERT_HEAD(&sh->nodes, *sn, entry); (*sn)->creation = time_uptime; (*sn)->ruletype = rule->action; (*sn)->states = 1; if ((*sn)->rule.ptr != NULL) counter_u64_add((*sn)->rule.ptr->src_nodes, 1); PF_HASHROW_UNLOCK(sh); counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], 1); reason = PFRES_SRCLIMIT; goto done; } } done: return (reason); } void pf_unlink_src_node(struct pf_ksrc_node *src) { PF_SRC_NODE_LOCK_ASSERT(src); LIST_REMOVE(src, entry); if (src->rule.ptr) counter_u64_add(src->rule.ptr->src_nodes, -1); } u_int pf_free_src_nodes(struct pf_ksrc_node_list *head) { struct pf_ksrc_node *sn, *tmp; u_int count = 0; LIST_FOREACH_SAFE(sn, head, entry, tmp) { pf_free_src_node(sn); count++; } counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count); return (count); } void pf_mtag_initialize(void) { pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) + sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL, UMA_ALIGN_PTR, 0); } /* Per-vnet data storage structures initialization. */ void pf_initialize(void) { struct pf_keyhash *kh; struct pf_idhash *ih; struct pf_srchash *sh; u_int i; if (pf_hashsize == 0 || !powerof2(pf_hashsize)) pf_hashsize = PF_HASHSIZ; if (pf_srchashsize == 0 || !powerof2(pf_srchashsize)) pf_srchashsize = PF_SRCHASHSIZ; V_pf_hashseed = arc4random(); /* States and state keys storage. */ V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z; uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT); uma_zone_set_warning(V_pf_state_z, "PF states limit reached"); V_pf_state_key_z = uma_zcreate("pf state keys", sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_keyhash = mallocarray(pf_hashsize, sizeof(struct pf_keyhash), M_PFHASH, M_NOWAIT | M_ZERO); V_pf_idhash = mallocarray(pf_hashsize, sizeof(struct pf_idhash), M_PFHASH, M_NOWAIT | M_ZERO); if (V_pf_keyhash == NULL || V_pf_idhash == NULL) { printf("pf: Unable to allocate memory for " "state_hashsize %lu.\n", pf_hashsize); free(V_pf_keyhash, M_PFHASH); free(V_pf_idhash, M_PFHASH); pf_hashsize = PF_HASHSIZ; V_pf_keyhash = mallocarray(pf_hashsize, sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO); V_pf_idhash = mallocarray(pf_hashsize, sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO); } pf_hashmask = pf_hashsize - 1; for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask; i++, kh++, ih++) { mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); } /* Source nodes. */ V_pf_sources_z = uma_zcreate("pf source nodes", sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z; uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT); uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached"); V_pf_srchash = mallocarray(pf_srchashsize, sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO); if (V_pf_srchash == NULL) { printf("pf: Unable to allocate memory for " "source_hashsize %lu.\n", pf_srchashsize); pf_srchashsize = PF_SRCHASHSIZ; V_pf_srchash = mallocarray(pf_srchashsize, sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO); } pf_srchashmask = pf_srchashsize - 1; for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF); /* ALTQ */ TAILQ_INIT(&V_pf_altqs[0]); TAILQ_INIT(&V_pf_altqs[1]); TAILQ_INIT(&V_pf_altqs[2]); TAILQ_INIT(&V_pf_altqs[3]); TAILQ_INIT(&V_pf_pabuf); V_pf_altqs_active = &V_pf_altqs[0]; V_pf_altq_ifs_active = &V_pf_altqs[1]; V_pf_altqs_inactive = &V_pf_altqs[2]; V_pf_altq_ifs_inactive = &V_pf_altqs[3]; /* Send & overload+flush queues. */ STAILQ_INIT(&V_pf_sendqueue); SLIST_INIT(&V_pf_overloadqueue); TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet); /* Unlinked, but may be referenced rules. */ TAILQ_INIT(&V_pf_unlinked_rules); } void pf_mtag_cleanup(void) { uma_zdestroy(pf_mtag_z); } void pf_cleanup(void) { struct pf_keyhash *kh; struct pf_idhash *ih; struct pf_srchash *sh; struct pf_send_entry *pfse, *next; u_int i; for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask; i++, kh++, ih++) { KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty", __func__)); KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty", __func__)); mtx_destroy(&kh->lock); mtx_destroy(&ih->lock); } free(V_pf_keyhash, M_PFHASH); free(V_pf_idhash, M_PFHASH); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { KASSERT(LIST_EMPTY(&sh->nodes), ("%s: source node hash not empty", __func__)); mtx_destroy(&sh->lock); } free(V_pf_srchash, M_PFHASH); STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) { m_freem(pfse->pfse_m); free(pfse, M_PFTEMP); } uma_zdestroy(V_pf_sources_z); uma_zdestroy(V_pf_state_z); uma_zdestroy(V_pf_state_key_z); } static int pf_mtag_uminit(void *mem, int size, int how) { struct m_tag *t; t = (struct m_tag *)mem; t->m_tag_cookie = MTAG_ABI_COMPAT; t->m_tag_id = PACKET_TAG_PF; t->m_tag_len = sizeof(struct pf_mtag); t->m_tag_free = pf_mtag_free; return (0); } static void pf_mtag_free(struct m_tag *t) { uma_zfree(pf_mtag_z, t); } struct pf_mtag * pf_get_mtag(struct mbuf *m) { struct m_tag *mtag; if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL) return ((struct pf_mtag *)(mtag + 1)); mtag = uma_zalloc(pf_mtag_z, M_NOWAIT); if (mtag == NULL) return (NULL); bzero(mtag + 1, sizeof(struct pf_mtag)); m_tag_prepend(m, mtag); return ((struct pf_mtag *)(mtag + 1)); } static int pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s) { struct pf_keyhash *khs, *khw, *kh; struct pf_state_key *sk, *cur; struct pf_kstate *si, *olds = NULL; int idx; KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__)); KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__)); /* * We need to lock hash slots of both keys. To avoid deadlock * we always lock the slot with lower address first. Unlock order * isn't important. * * We also need to lock ID hash slot before dropping key * locks. On success we return with ID hash slot locked. */ if (skw == sks) { khs = khw = &V_pf_keyhash[pf_hashkey(skw)]; PF_HASHROW_LOCK(khs); } else { khs = &V_pf_keyhash[pf_hashkey(sks)]; khw = &V_pf_keyhash[pf_hashkey(skw)]; if (khs == khw) { PF_HASHROW_LOCK(khs); } else if (khs < khw) { PF_HASHROW_LOCK(khs); PF_HASHROW_LOCK(khw); } else { PF_HASHROW_LOCK(khw); PF_HASHROW_LOCK(khs); } } #define KEYS_UNLOCK() do { \ if (khs != khw) { \ PF_HASHROW_UNLOCK(khs); \ PF_HASHROW_UNLOCK(khw); \ } else \ PF_HASHROW_UNLOCK(khs); \ } while (0) /* * First run: start with wire key. */ sk = skw; kh = khw; idx = PF_SK_WIRE; MPASS(s->lock == NULL); s->lock = &V_pf_idhash[PF_IDHASH(s)].lock; keyattach: LIST_FOREACH(cur, &kh->keys, entry) if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0) break; if (cur != NULL) { /* Key exists. Check for same kif, if none, add to key. */ TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) { struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)]; PF_HASHROW_LOCK(ih); if (si->kif == s->kif && si->direction == s->direction) { if (sk->proto == IPPROTO_TCP && si->src.state >= TCPS_FIN_WAIT_2 && si->dst.state >= TCPS_FIN_WAIT_2) { /* * New state matches an old >FIN_WAIT_2 * state. We can't drop key hash locks, * thus we can't unlink it properly. * * As a workaround we drop it into * TCPS_CLOSED state, schedule purge * ASAP and push it into the very end * of the slot TAILQ, so that it won't * conflict with our new state. */ pf_set_protostate(si, PF_PEER_BOTH, TCPS_CLOSED); si->timeout = PFTM_PURGE; olds = si; } else { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: %s key attach " "failed on %s: ", (idx == PF_SK_WIRE) ? "wire" : "stack", s->kif->pfik_name); pf_print_state_parts(s, (idx == PF_SK_WIRE) ? sk : NULL, (idx == PF_SK_STACK) ? sk : NULL); printf(", existing: "); pf_print_state_parts(si, (idx == PF_SK_WIRE) ? sk : NULL, (idx == PF_SK_STACK) ? sk : NULL); printf("\n"); } PF_HASHROW_UNLOCK(ih); KEYS_UNLOCK(); uma_zfree(V_pf_state_key_z, sk); if (idx == PF_SK_STACK) pf_detach_state(s); return (EEXIST); /* collision! */ } } PF_HASHROW_UNLOCK(ih); } uma_zfree(V_pf_state_key_z, sk); s->key[idx] = cur; } else { LIST_INSERT_HEAD(&kh->keys, sk, entry); s->key[idx] = sk; } stateattach: /* List is sorted, if-bound states before floating. */ if (s->kif == V_pfi_all) TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]); else TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]); if (olds) { TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]); TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds, key_list[idx]); olds = NULL; } /* * Attach done. See how should we (or should not?) * attach a second key. */ if (sks == skw) { s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; idx = PF_SK_STACK; sks = NULL; goto stateattach; } else if (sks != NULL) { /* * Continue attaching with stack key. */ sk = sks; kh = khs; idx = PF_SK_STACK; sks = NULL; goto keyattach; } PF_STATE_LOCK(s); KEYS_UNLOCK(); KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL, ("%s failure", __func__)); return (0); #undef KEYS_UNLOCK } static void pf_detach_state(struct pf_kstate *s) { struct pf_state_key *sks = s->key[PF_SK_STACK]; struct pf_keyhash *kh; if (sks != NULL) { kh = &V_pf_keyhash[pf_hashkey(sks)]; PF_HASHROW_LOCK(kh); if (s->key[PF_SK_STACK] != NULL) pf_state_key_detach(s, PF_SK_STACK); /* * If both point to same key, then we are done. */ if (sks == s->key[PF_SK_WIRE]) { pf_state_key_detach(s, PF_SK_WIRE); PF_HASHROW_UNLOCK(kh); return; } PF_HASHROW_UNLOCK(kh); } if (s->key[PF_SK_WIRE] != NULL) { kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])]; PF_HASHROW_LOCK(kh); if (s->key[PF_SK_WIRE] != NULL) pf_state_key_detach(s, PF_SK_WIRE); PF_HASHROW_UNLOCK(kh); } } static void pf_state_key_detach(struct pf_kstate *s, int idx) { struct pf_state_key *sk = s->key[idx]; #ifdef INVARIANTS struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)]; PF_HASHROW_ASSERT(kh); #endif TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]); s->key[idx] = NULL; if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) { LIST_REMOVE(sk, entry); uma_zfree(V_pf_state_key_z, sk); } } static int pf_state_key_ctor(void *mem, int size, void *arg, int flags) { struct pf_state_key *sk = mem; bzero(sk, sizeof(struct pf_state_key_cmp)); TAILQ_INIT(&sk->states[PF_SK_WIRE]); TAILQ_INIT(&sk->states[PF_SK_STACK]); return (0); } struct pf_state_key * pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t sport, u_int16_t dport) { struct pf_state_key *sk; sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sk == NULL) return (NULL); PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af); PF_ACPY(&sk->addr[pd->didx], daddr, pd->af); sk->port[pd->sidx] = sport; sk->port[pd->didx] = dport; sk->proto = pd->proto; sk->af = pd->af; return (sk); } struct pf_state_key * pf_state_key_clone(struct pf_state_key *orig) { struct pf_state_key *sk; sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sk == NULL) return (NULL); bcopy(orig, sk, sizeof(struct pf_state_key_cmp)); return (sk); } int pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif, struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s) { struct pf_idhash *ih; struct pf_kstate *cur; int error; KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]), ("%s: sks not pristine", __func__)); KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]), ("%s: skw not pristine", __func__)); KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); s->kif = kif; s->orig_kif = orig_kif; if (s->id == 0 && s->creatorid == 0) { s->id = alloc_unr64(&V_pf_stateid); s->id = htobe64(s->id); s->creatorid = V_pf_status.hostid; } /* Returns with ID locked on success. */ if ((error = pf_state_key_attach(skw, sks, s)) != 0) return (error); ih = &V_pf_idhash[PF_IDHASH(s)]; PF_HASHROW_ASSERT(ih); LIST_FOREACH(cur, &ih->states, entry) if (cur->id == s->id && cur->creatorid == s->creatorid) break; if (cur != NULL) { PF_HASHROW_UNLOCK(ih); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state ID collision: " "id: %016llx creatorid: %08x\n", (unsigned long long)be64toh(s->id), ntohl(s->creatorid)); } pf_detach_state(s); return (EEXIST); } LIST_INSERT_HEAD(&ih->states, s, entry); /* One for keys, one for ID hash. */ refcount_init(&s->refs, 2); pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1); if (V_pfsync_insert_state_ptr != NULL) V_pfsync_insert_state_ptr(s); /* Returns locked. */ return (0); } /* * Find state by ID: returns with locked row on success. */ struct pf_kstate * pf_find_state_byid(uint64_t id, uint32_t creatorid) { struct pf_idhash *ih; struct pf_kstate *s; pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))]; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) if (s->id == id && s->creatorid == creatorid) break; if (s == NULL) PF_HASHROW_UNLOCK(ih); return (s); } /* * Find state by key. * Returns with ID hash slot locked on success. */ static struct pf_kstate * pf_find_state(struct pfi_kkif *kif, struct pf_state_key_cmp *key, u_int dir) { struct pf_keyhash *kh; struct pf_state_key *sk; struct pf_kstate *s; int idx; pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; PF_HASHROW_LOCK(kh); LIST_FOREACH(sk, &kh->keys, entry) if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) break; if (sk == NULL) { PF_HASHROW_UNLOCK(kh); return (NULL); } idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK); /* List is sorted, if-bound states before floating ones. */ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) if (s->kif == V_pfi_all || s->kif == kif) { PF_STATE_LOCK(s); PF_HASHROW_UNLOCK(kh); if (__predict_false(s->timeout >= PFTM_MAX)) { /* * State is either being processed by * pf_unlink_state() in an other thread, or * is scheduled for immediate expiry. */ PF_STATE_UNLOCK(s); return (NULL); } return (s); } PF_HASHROW_UNLOCK(kh); return (NULL); } /* * Returns with ID hash slot locked on success. */ struct pf_kstate * pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) { struct pf_keyhash *kh; struct pf_state_key *sk; struct pf_kstate *s, *ret = NULL; int idx, inout = 0; pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; PF_HASHROW_LOCK(kh); LIST_FOREACH(sk, &kh->keys, entry) if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) break; if (sk == NULL) { PF_HASHROW_UNLOCK(kh); return (NULL); } switch (dir) { case PF_IN: idx = PF_SK_WIRE; break; case PF_OUT: idx = PF_SK_STACK; break; case PF_INOUT: idx = PF_SK_WIRE; inout = 1; break; default: panic("%s: dir %u", __func__, dir); } second_run: TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) { if (more == NULL) { PF_STATE_LOCK(s); PF_HASHROW_UNLOCK(kh); return (s); } if (ret) (*more)++; else { ret = s; PF_STATE_LOCK(s); } } if (inout == 1) { inout = 0; idx = PF_SK_STACK; goto second_run; } PF_HASHROW_UNLOCK(kh); return (ret); } /* * FIXME * This routine is inefficient -- locks the state only to unlock immediately on * return. * It is racy -- after the state is unlocked nothing stops other threads from * removing it. */ bool pf_find_state_all_exists(struct pf_state_key_cmp *key, u_int dir) { struct pf_kstate *s; s = pf_find_state_all(key, dir, NULL); if (s != NULL) { PF_STATE_UNLOCK(s); return (true); } return (false); } /* END state table stuff */ static void pf_send(struct pf_send_entry *pfse) { PF_SENDQ_LOCK(); STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next); PF_SENDQ_UNLOCK(); swi_sched(V_pf_swi_cookie, 0); } static bool pf_isforlocal(struct mbuf *m, int af) { switch (af) { #ifdef INET case AF_INET: { struct ip *ip = mtod(m, struct ip *); return (in_localip(ip->ip_dst)); } #endif #ifdef INET6 case AF_INET6: { struct ip6_hdr *ip6; struct in6_ifaddr *ia; ip6 = mtod(m, struct ip6_hdr *); ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); if (ia == NULL) return (false); return (! (ia->ia6_flags & IN6_IFF_NOTREADY)); } #endif default: panic("Unsupported af %d", af); } return (false); } void pf_intr(void *v) { struct epoch_tracker et; struct pf_send_head queue; struct pf_send_entry *pfse, *next; CURVNET_SET((struct vnet *)v); PF_SENDQ_LOCK(); queue = V_pf_sendqueue; STAILQ_INIT(&V_pf_sendqueue); PF_SENDQ_UNLOCK(); NET_EPOCH_ENTER(et); STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) { switch (pfse->pfse_type) { #ifdef INET case PFSE_IP: { if (pf_isforlocal(pfse->pfse_m, AF_INET)) { pfse->pfse_m->m_flags |= M_SKIP_FIREWALL; pfse->pfse_m->m_pkthdr.csum_flags |= CSUM_IP_VALID | CSUM_IP_CHECKED; ip_input(pfse->pfse_m); } else { ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL); } break; } case PFSE_ICMP: icmp_error(pfse->pfse_m, pfse->icmpopts.type, pfse->icmpopts.code, 0, pfse->icmpopts.mtu); break; #endif /* INET */ #ifdef INET6 case PFSE_IP6: if (pf_isforlocal(pfse->pfse_m, AF_INET6)) { pfse->pfse_m->m_flags |= M_SKIP_FIREWALL; ip6_input(pfse->pfse_m); } else { ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL, NULL); } break; case PFSE_ICMP6: icmp6_error(pfse->pfse_m, pfse->icmpopts.type, pfse->icmpopts.code, pfse->icmpopts.mtu); break; #endif /* INET6 */ default: panic("%s: unknown type", __func__); } free(pfse, M_PFTEMP); } NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } #define pf_purge_thread_period (hz / 10) #ifdef PF_WANT_32_TO_64_COUNTER static void pf_status_counter_u64_periodic(void) { PF_RULES_RASSERT(); if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) { return; } for (int i = 0; i < FCNT_MAX; i++) { pf_counter_u64_periodic(&V_pf_status.fcounters[i]); } } static void pf_kif_counter_u64_periodic(void) { struct pfi_kkif *kif; size_t r, run; PF_RULES_RASSERT(); if (__predict_false(V_pf_allkifcount == 0)) { return; } if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) { return; } run = V_pf_allkifcount / 10; if (run < 5) run = 5; for (r = 0; r < run; r++) { kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist); if (kif == NULL) { LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist); LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist); break; } LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist); LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist); for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { for (int k = 0; k < 2; k++) { pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]); pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]); } } } } } static void pf_rule_counter_u64_periodic(void) { struct pf_krule *rule; size_t r, run; PF_RULES_RASSERT(); if (__predict_false(V_pf_allrulecount == 0)) { return; } if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) { return; } run = V_pf_allrulecount / 10; if (run < 5) run = 5; for (r = 0; r < run; r++) { rule = LIST_NEXT(V_pf_rulemarker, allrulelist); if (rule == NULL) { LIST_REMOVE(V_pf_rulemarker, allrulelist); LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist); break; } LIST_REMOVE(V_pf_rulemarker, allrulelist); LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist); pf_counter_u64_periodic(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_periodic(&rule->packets[i]); pf_counter_u64_periodic(&rule->bytes[i]); } } } static void pf_counter_u64_periodic_main(void) { PF_RULES_RLOCK_TRACKER; V_pf_counter_periodic_iter++; PF_RULES_RLOCK(); pf_counter_u64_critical_enter(); pf_status_counter_u64_periodic(); pf_kif_counter_u64_periodic(); pf_rule_counter_u64_periodic(); pf_counter_u64_critical_exit(); PF_RULES_RUNLOCK(); } #else #define pf_counter_u64_periodic_main() do { } while (0) #endif void pf_purge_thread(void *unused __unused) { VNET_ITERATOR_DECL(vnet_iter); sx_xlock(&pf_end_lock); while (pf_end_threads == 0) { sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); /* Wait until V_pf_default_rule is initialized. */ if (V_pf_vnet_active == 0) { CURVNET_RESTORE(); continue; } pf_counter_u64_periodic_main(); /* * Process 1/interval fraction of the state * table every run. */ V_pf_purge_idx = pf_purge_expired_states(V_pf_purge_idx, pf_hashmask / (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); /* * Purge other expired types every * PFTM_INTERVAL seconds. */ if (V_pf_purge_idx == 0) { /* * Order is important: * - states and src nodes reference rules * - states and rules reference kifs */ pf_purge_expired_fragments(); pf_purge_expired_src_nodes(); pf_purge_unlinked_rules(); pfi_kkif_purge(); } CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); } pf_end_threads++; sx_xunlock(&pf_end_lock); kproc_exit(0); } void pf_unload_vnet_purge(void) { /* * To cleanse up all kifs and rules we need * two runs: first one clears reference flags, * then pf_purge_expired_states() doesn't * raise them, and then second run frees. */ pf_purge_unlinked_rules(); pfi_kkif_purge(); /* * Now purge everything. */ pf_purge_expired_states(0, pf_hashmask); pf_purge_fragments(UINT_MAX); pf_purge_expired_src_nodes(); /* * Now all kifs & rules should be unreferenced, * thus should be successfully freed. */ pf_purge_unlinked_rules(); pfi_kkif_purge(); } u_int32_t pf_state_expires(const struct pf_kstate *state) { u_int32_t timeout; u_int32_t start; u_int32_t end; u_int32_t states; /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) return (time_uptime); KASSERT(state->timeout != PFTM_UNLINKED, ("pf_state_expires: timeout == PFTM_UNLINKED")); KASSERT((state->timeout < PFTM_MAX), ("pf_state_expires: timeout > PFTM_MAX")); timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) timeout = V_pf_default_rule.timeout[state->timeout]; start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start && state->rule.ptr != &V_pf_default_rule) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; states = counter_u64_fetch(state->rule.ptr->states_cur); } else { start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; states = V_pf_status.states; } if (end && states > start && start < end) { if (states < end) { timeout = (u_int64_t)timeout * (end - states) / (end - start); return (state->expire + timeout); } else return (time_uptime); } return (state->expire + timeout); } void pf_purge_expired_src_nodes(void) { struct pf_ksrc_node_list freelist; struct pf_srchash *sh; struct pf_ksrc_node *cur, *next; int i; LIST_INIT(&freelist); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) if (cur->states == 0 && cur->expire <= time_uptime) { pf_unlink_src_node(cur); LIST_INSERT_HEAD(&freelist, cur, entry); } else if (cur->rule.ptr != NULL) cur->rule.ptr->rule_ref |= PFRULE_REFS; PF_HASHROW_UNLOCK(sh); } pf_free_src_nodes(&freelist); V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z); } static void pf_src_tree_remove_state(struct pf_kstate *s) { struct pf_ksrc_node *sn; uint32_t timeout; timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ? s->rule.ptr->timeout[PFTM_SRC_NODE] : V_pf_default_rule.timeout[PFTM_SRC_NODE]; if (s->src_node != NULL) { sn = s->src_node; PF_SRC_NODE_LOCK(sn); if (s->src.tcp_est) --sn->conn; if (--sn->states == 0) sn->expire = time_uptime + timeout; PF_SRC_NODE_UNLOCK(sn); } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { sn = s->nat_src_node; PF_SRC_NODE_LOCK(sn); if (--sn->states == 0) sn->expire = time_uptime + timeout; PF_SRC_NODE_UNLOCK(sn); } s->src_node = s->nat_src_node = NULL; } /* * Unlink and potentilly free a state. Function may be * called with ID hash row locked, but always returns * unlocked, since it needs to go through key hash locking. */ int pf_unlink_state(struct pf_kstate *s) { struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)]; PF_HASHROW_ASSERT(ih); if (s->timeout == PFTM_UNLINKED) { /* * State is being processed * by pf_unlink_state() in * an other thread. */ PF_HASHROW_UNLOCK(ih); return (0); /* XXXGL: undefined actually */ } if (s->src.state == PF_TCPS_PROXY_DST) { /* XXX wire key the right one? */ pf_send_tcp(s->rule.ptr, s->key[PF_SK_WIRE]->af, &s->key[PF_SK_WIRE]->addr[1], &s->key[PF_SK_WIRE]->addr[0], s->key[PF_SK_WIRE]->port[1], s->key[PF_SK_WIRE]->port[0], s->src.seqhi, s->src.seqlo + 1, - TH_RST|TH_ACK, 0, 0, 0, true, s->tag, 0, s->rtableid); + TH_RST|TH_ACK, 0, 0, 0, true, s->tag, 0, s->act.rtableid); } LIST_REMOVE(s, entry); pf_src_tree_remove_state(s); if (V_pfsync_delete_state_ptr != NULL) V_pfsync_delete_state_ptr(s); STATE_DEC_COUNTERS(s); s->timeout = PFTM_UNLINKED; /* Ensure we remove it from the list of halfopen states, if needed. */ if (s->key[PF_SK_STACK] != NULL && s->key[PF_SK_STACK]->proto == IPPROTO_TCP) pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED); PF_HASHROW_UNLOCK(ih); pf_detach_state(s); /* pf_state_insert() initialises refs to 2 */ return (pf_release_staten(s, 2)); } struct pf_kstate * pf_alloc_state(int flags) { return (uma_zalloc(V_pf_state_z, flags | M_ZERO)); } void pf_free_state(struct pf_kstate *cur) { struct pf_krule_item *ri; KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur)); KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__, cur->timeout)); while ((ri = SLIST_FIRST(&cur->match_rules))) { SLIST_REMOVE_HEAD(&cur->match_rules, entry); free(ri, M_PF_RULE_ITEM); } pf_normalize_tcp_cleanup(cur); uma_zfree(V_pf_state_z, cur); pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1); } /* * Called only from pf_purge_thread(), thus serialized. */ static u_int pf_purge_expired_states(u_int i, int maxcheck) { struct pf_idhash *ih; struct pf_kstate *s; struct pf_krule_item *mrm; V_pf_status.states = uma_zone_get_cur(V_pf_state_z); /* * Go through hash and unlink states that expire now. */ while (maxcheck > 0) { ih = &V_pf_idhash[i]; /* only take the lock if we expect to do work */ if (!LIST_EMPTY(&ih->states)) { relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (pf_state_expires(s) <= time_uptime) { V_pf_status.states -= pf_unlink_state(s); goto relock; } s->rule.ptr->rule_ref |= PFRULE_REFS; if (s->nat_rule.ptr != NULL) s->nat_rule.ptr->rule_ref |= PFRULE_REFS; if (s->anchor.ptr != NULL) s->anchor.ptr->rule_ref |= PFRULE_REFS; s->kif->pfik_flags |= PFI_IFLAG_REFS; SLIST_FOREACH(mrm, &s->match_rules, entry) mrm->r->rule_ref |= PFRULE_REFS; if (s->rt_kif) s->rt_kif->pfik_flags |= PFI_IFLAG_REFS; } PF_HASHROW_UNLOCK(ih); } /* Return when we hit end of hash. */ if (++i > pf_hashmask) { V_pf_status.states = uma_zone_get_cur(V_pf_state_z); return (0); } maxcheck--; } V_pf_status.states = uma_zone_get_cur(V_pf_state_z); return (i); } static void pf_purge_unlinked_rules(void) { struct pf_krulequeue tmpq; struct pf_krule *r, *r1; /* * If we have overloading task pending, then we'd * better skip purging this time. There is a tiny * probability that overloading task references * an already unlinked rule. */ PF_OVERLOADQ_LOCK(); if (!SLIST_EMPTY(&V_pf_overloadqueue)) { PF_OVERLOADQ_UNLOCK(); return; } PF_OVERLOADQ_UNLOCK(); /* * Do naive mark-and-sweep garbage collecting of old rules. * Reference flag is raised by pf_purge_expired_states() * and pf_purge_expired_src_nodes(). * * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK, * use a temporary queue. */ TAILQ_INIT(&tmpq); PF_UNLNKDRULES_LOCK(); TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) { if (!(r->rule_ref & PFRULE_REFS)) { TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries); TAILQ_INSERT_TAIL(&tmpq, r, entries); } else r->rule_ref &= ~PFRULE_REFS; } PF_UNLNKDRULES_UNLOCK(); if (!TAILQ_EMPTY(&tmpq)) { PF_CONFIG_LOCK(); PF_RULES_WLOCK(); TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) { TAILQ_REMOVE(&tmpq, r, entries); pf_free_rule(r); } PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); } } void pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: { u_int32_t a = ntohl(addr->addr32[0]); printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, (a>>8)&255, a&255); if (p) { p = ntohs(p); printf(":%u", p); } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { u_int16_t b; u_int8_t i, curstart, curend, maxstart, maxend; curstart = curend = maxstart = maxend = 255; for (i = 0; i < 8; i++) { if (!addr->addr16[i]) { if (curstart == 255) curstart = i; curend = i; } else { if ((curend - curstart) > (maxend - maxstart)) { maxstart = curstart; maxend = curend; } curstart = curend = 255; } } if ((curend - curstart) > (maxend - maxstart)) { maxstart = curstart; maxend = curend; } for (i = 0; i < 8; i++) { if (i >= maxstart && i <= maxend) { if (i == 0) printf(":"); if (i == maxend) printf(":"); } else { b = ntohs(addr->addr16[i]); printf("%x", b); if (i < 7) printf(":"); } } if (p) { p = ntohs(p); printf("[%u]", p); } break; } #endif /* INET6 */ } } void pf_print_state(struct pf_kstate *s) { pf_print_state_parts(s, NULL, NULL); } static void pf_print_state_parts(struct pf_kstate *s, struct pf_state_key *skwp, struct pf_state_key *sksp) { struct pf_state_key *skw, *sks; u_int8_t proto, dir; /* Do our best to fill these, but they're skipped if NULL */ skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); proto = skw ? skw->proto : (sks ? sks->proto : 0); dir = s ? s->direction : 0; switch (proto) { case IPPROTO_IPV4: printf("IPv4"); break; case IPPROTO_IPV6: printf("IPv6"); break; case IPPROTO_TCP: printf("TCP"); break; case IPPROTO_UDP: printf("UDP"); break; case IPPROTO_ICMP: printf("ICMP"); break; case IPPROTO_ICMPV6: printf("ICMPv6"); break; default: printf("%u", proto); break; } switch (dir) { case PF_IN: printf(" in"); break; case PF_OUT: printf(" out"); break; } if (skw) { printf(" wire: "); pf_print_host(&skw->addr[0], skw->port[0], skw->af); printf(" "); pf_print_host(&skw->addr[1], skw->port[1], skw->af); } if (sks) { printf(" stack: "); if (sks != skw) { pf_print_host(&sks->addr[0], sks->port[0], sks->af); printf(" "); pf_print_host(&sks->addr[1], sks->port[1], sks->af); } else printf("-"); } if (s) { if (proto == IPPROTO_TCP) { printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, s->src.seqhi, s->src.max_win, s->src.seqdiff); if (s->src.wscale && s->dst.wscale) printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); printf("]"); printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); if (s->src.wscale && s->dst.wscale) printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); printf("]"); } printf(" %u:%u", s->src.state, s->dst.state); } } void pf_print_flags(u_int8_t f) { if (f) printf(" "); if (f & TH_FIN) printf("F"); if (f & TH_SYN) printf("S"); if (f & TH_RST) printf("R"); if (f & TH_PUSH) printf("P"); if (f & TH_ACK) printf("A"); if (f & TH_URG) printf("U"); if (f & TH_ECE) printf("E"); if (f & TH_CWR) printf("W"); } #define PF_SET_SKIP_STEPS(i) \ do { \ while (head[i] != cur) { \ head[i]->skip[i].ptr = cur; \ head[i] = TAILQ_NEXT(head[i], entries); \ } \ } while (0) void pf_calc_skip_steps(struct pf_krulequeue *rules) { struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT]; int i; cur = TAILQ_FIRST(rules); prev = cur; for (i = 0; i < PF_SKIP_COUNT; ++i) head[i] = cur; while (cur != NULL) { if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) PF_SET_SKIP_STEPS(PF_SKIP_IFP); if (cur->direction != prev->direction) PF_SET_SKIP_STEPS(PF_SKIP_DIR); if (cur->af != prev->af) PF_SET_SKIP_STEPS(PF_SKIP_AF); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PF_SKIP_PROTO); if (cur->src.neg != prev->src.neg || pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); if (cur->src.port[0] != prev->src.port[0] || cur->src.port[1] != prev->src.port[1] || cur->src.port_op != prev->src.port_op) PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); if (cur->dst.neg != prev->dst.neg || pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); if (cur->dst.port[0] != prev->dst.port[0] || cur->dst.port[1] != prev->dst.port[1] || cur->dst.port_op != prev->dst.port_op) PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); prev = cur; cur = TAILQ_NEXT(cur, entries); } for (i = 0; i < PF_SKIP_COUNT; ++i) PF_SET_SKIP_STEPS(i); } int pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) { if (aw1->type != aw2->type) return (1); switch (aw1->type) { case PF_ADDR_ADDRMASK: case PF_ADDR_RANGE: if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) return (1); if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) return (1); return (0); case PF_ADDR_DYNIFTL: return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); default: printf("invalid address type: %d\n", aw1->type); return (1); } } /** * Checksum updates are a little complicated because the checksum in the TCP/UDP * header isn't always a full checksum. In some cases (i.e. output) it's a * pseudo-header checksum, which is a partial checksum over src/dst IP * addresses, protocol number and length. * * That means we have the following cases: * * Input or forwarding: we don't have TSO, the checksum fields are full * checksums, we need to update the checksum whenever we change anything. * * Output (i.e. the checksum is a pseudo-header checksum): * x The field being updated is src/dst address or affects the length of * the packet. We need to update the pseudo-header checksum (note that this * checksum is not ones' complement). * x Some other field is being modified (e.g. src/dst port numbers): We * don't have to update anything. **/ u_int16_t pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { u_int32_t x; x = cksum + old - new; x = (x + (x >> 16)) & 0xffff; /* optimise: eliminate a branch when not udp */ if (udp && cksum == 0x0000) return cksum; if (udp && x == 0x0000) x = 0xffff; return (u_int16_t)(x); } static void pf_patch_8(struct mbuf *m, u_int16_t *cksum, u_int8_t *f, u_int8_t v, bool hi, u_int8_t udp) { u_int16_t old = htons(hi ? (*f << 8) : *f); u_int16_t new = htons(hi ? ( v << 8) : v); if (*f == v) return; *f = v; if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) return; *cksum = pf_cksum_fixup(*cksum, old, new, udp); } void pf_patch_16_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int16_t v, bool hi, u_int8_t udp) { u_int8_t *fb = (u_int8_t *)f; u_int8_t *vb = (u_int8_t *)&v; pf_patch_8(m, cksum, fb++, *vb++, hi, udp); pf_patch_8(m, cksum, fb++, *vb++, !hi, udp); } void pf_patch_32_unaligned(struct mbuf *m, u_int16_t *cksum, void *f, u_int32_t v, bool hi, u_int8_t udp) { u_int8_t *fb = (u_int8_t *)f; u_int8_t *vb = (u_int8_t *)&v; pf_patch_8(m, cksum, fb++, *vb++, hi, udp); pf_patch_8(m, cksum, fb++, *vb++, !hi, udp); pf_patch_8(m, cksum, fb++, *vb++, hi, udp); pf_patch_8(m, cksum, fb++, *vb++, !hi, udp); } u_int16_t pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) return (cksum); return (pf_cksum_fixup(cksum, old, new, udp)); } static void pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) { struct pf_addr ao; u_int16_t po = *p; PF_ACPY(&ao, a, af); PF_ACPY(a, an, af); if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) *pc = ~*pc; *p = pn; switch (af) { #ifdef INET case AF_INET: *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, ao.addr16[0], an->addr16[0], 0), ao.addr16[1], an->addr16[1], 0); *p = pn; *pc = pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u); *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u); break; #endif /* INET */ #ifdef INET6 case AF_INET6: *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), ao.addr16[2], an->addr16[2], u), ao.addr16[3], an->addr16[3], u), ao.addr16[4], an->addr16[4], u), ao.addr16[5], an->addr16[5], u), ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u); *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u); break; #endif /* INET6 */ } if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { *pc = ~*pc; if (! *pc) *pc = 0xffff; } } /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ void pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) { u_int32_t ao; memcpy(&ao, a, sizeof(ao)); memcpy(a, &an, sizeof(u_int32_t)); *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), ao % 65536, an % 65536, u); } void pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp) { u_int32_t ao; memcpy(&ao, a, sizeof(ao)); memcpy(a, &an, sizeof(u_int32_t)); *c = pf_proto_cksum_fixup(m, pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp), ao % 65536, an % 65536, udp); } #ifdef INET6 static void pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; PF_ACPY(&ao, a, AF_INET6); PF_ACPY(a, an, AF_INET6); *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*c, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), ao.addr16[2], an->addr16[2], u), ao.addr16[3], an->addr16[3], u), ao.addr16[4], an->addr16[4], u), ao.addr16[5], an->addr16[5], u), ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u); } #endif /* INET6 */ static void pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) { struct pf_addr oia, ooa; PF_ACPY(&oia, ia, af); if (oa) PF_ACPY(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { u_int16_t oip = *ip; u_int32_t opc; if (pc != NULL) opc = *pc; *ip = np; if (pc != NULL) *pc = pf_cksum_fixup(*pc, oip, *ip, u); *ic = pf_cksum_fixup(*ic, oip, *ip, 0); if (pc != NULL) *ic = pf_cksum_fixup(*ic, opc, *pc, 0); } /* Change inner ip address, fix inner ip and icmp checksums. */ PF_ACPY(ia, na, af); switch (af) { #ifdef INET case AF_INET: { u_int32_t oh2c = *h2c; *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, oia.addr16[0], ia->addr16[0], 0), oia.addr16[1], ia->addr16[1], 0); *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, oia.addr16[0], ia->addr16[0], 0), oia.addr16[1], ia->addr16[1], 0); *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); break; } #endif /* INET */ #ifdef INET6 case AF_INET6: *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*ic, oia.addr16[0], ia->addr16[0], u), oia.addr16[1], ia->addr16[1], u), oia.addr16[2], ia->addr16[2], u), oia.addr16[3], ia->addr16[3], u), oia.addr16[4], ia->addr16[4], u), oia.addr16[5], ia->addr16[5], u), oia.addr16[6], ia->addr16[6], u), oia.addr16[7], ia->addr16[7], u); break; #endif /* INET6 */ } /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ if (oa) { PF_ACPY(oa, na, af); switch (af) { #ifdef INET case AF_INET: *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, ooa.addr16[0], oa->addr16[0], 0), ooa.addr16[1], oa->addr16[1], 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*ic, ooa.addr16[0], oa->addr16[0], u), ooa.addr16[1], oa->addr16[1], u), ooa.addr16[2], oa->addr16[2], u), ooa.addr16[3], oa->addr16[3], u), ooa.addr16[4], oa->addr16[4], u), ooa.addr16[5], oa->addr16[5], u), ooa.addr16[6], oa->addr16[6], u), ooa.addr16[7], oa->addr16[7], u); break; #endif /* INET6 */ } } } /* * Need to modulate the sequence numbers in the TCP SACK option * (credits to Krzysztof Pfaff for report and patch) */ static int pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *dst) { int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; u_int8_t opts[TCP_MAXOLEN], *opt = opts; int copyback = 0, i, olen; struct sackblk sack; #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) if (hlen < TCPOLEN_SACKLEN || !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) return 0; while (hlen >= TCPOLEN_SACKLEN) { size_t startoff = opt - opts; olen = opt[1]; switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; break; case TCPOPT_SACK: if (olen > hlen) olen = hlen; if (olen >= TCPOLEN_SACKLEN) { for (i = 2; i + TCPOLEN_SACK <= olen; i += TCPOLEN_SACK) { memcpy(&sack, &opt[i], sizeof(sack)); pf_patch_32_unaligned(m, &th->th_sum, &sack.start, htonl(ntohl(sack.start) - dst->seqdiff), PF_ALGNMNT(startoff), 0); pf_patch_32_unaligned(m, &th->th_sum, &sack.end, htonl(ntohl(sack.end) - dst->seqdiff), PF_ALGNMNT(startoff), 0); memcpy(&opt[i], &sack, sizeof(sack)); } copyback = 1; } /* FALLTHROUGH */ default: if (olen < 2) olen = 2; hlen -= olen; opt += olen; } } if (copyback) m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts); return (copyback); } struct mbuf * pf_build_tcp(const struct pf_krule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, bool skip_firewall, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) { struct mbuf *m; int len, tlen; #ifdef INET struct ip *h = NULL; #endif /* INET */ #ifdef INET6 struct ip6_hdr *h6 = NULL; #endif /* INET6 */ struct tcphdr *th; char *opt; struct pf_mtag *pf_mtag; len = 0; th = NULL; /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); if (mss) tlen += 4; switch (af) { #ifdef INET case AF_INET: len = sizeof(struct ip) + tlen; break; #endif /* INET */ #ifdef INET6 case AF_INET6: len = sizeof(struct ip6_hdr) + tlen; break; #endif /* INET6 */ default: panic("%s: unsupported af %d", __func__, af); } m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); #ifdef MAC mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { m_freem(m); return (NULL); } if (skip_firewall) m->m_flags |= M_SKIP_FIREWALL; pf_mtag->tag = mtag_tag; pf_mtag->flags = mtag_flags; if (rtableid >= 0) M_SETFIB(m, rtableid); #ifdef ALTQ if (r != NULL && r->qid) { pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m, struct ip *); } #endif /* ALTQ */ m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len = len; /* The rest of the stack assumes a rcvif, so provide one. * This is a locally generated packet, so .. close enough. */ m->m_pkthdr.rcvif = V_loif; bzero(m->m_data, len); switch (af) { #ifdef INET case AF_INET: h = mtod(m, struct ip *); /* IP header fields included in the TCP checksum */ h->ip_p = IPPROTO_TCP; h->ip_len = htons(tlen); h->ip_src.s_addr = saddr->v4.s_addr; h->ip_dst.s_addr = daddr->v4.s_addr; th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: h6 = mtod(m, struct ip6_hdr *); /* IP header fields included in the TCP checksum */ h6->ip6_nxt = IPPROTO_TCP; h6->ip6_plen = htons(tlen); memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); break; #endif /* INET6 */ } /* TCP header */ th->th_sport = sport; th->th_dport = dport; th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_off = tlen >> 2; th->th_flags = tcp_flags; th->th_win = htons(win); if (mss) { opt = (char *)(th + 1); opt[0] = TCPOPT_MAXSEG; opt[1] = 4; HTONS(mss); bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); } switch (af) { #ifdef INET case AF_INET: /* TCP checksum */ th->th_sum = in_cksum(m, len); /* Finish the IP header */ h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0); h->ip_len = htons(len); h->ip_ttl = ttl ? ttl : V_ip_defttl; h->ip_sum = 0; break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* TCP checksum */ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen); h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; break; #endif /* INET6 */ } return (m); } void pf_send_tcp(const struct pf_krule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, bool skip_firewall, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) { struct pf_send_entry *pfse; struct mbuf *m; m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags, win, mss, ttl, skip_firewall, mtag_tag, mtag_flags, rtableid); if (m == NULL) return; /* Allocate outgoing queue entry, mbuf and mbuf tag. */ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); if (pfse == NULL) { m_freem(m); return; } switch (af) { #ifdef INET case AF_INET: pfse->pfse_type = PFSE_IP; break; #endif /* INET */ #ifdef INET6 case AF_INET6: pfse->pfse_type = PFSE_IP6; break; #endif /* INET6 */ } pfse->pfse_m = m; pf_send(pfse); } static void pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd, struct pf_state_key *sk, int off, struct mbuf *m, struct tcphdr *th, struct pfi_kkif *kif, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen, u_short *reason, int rtableid) { struct pf_addr * const saddr = pd->src; struct pf_addr * const daddr = pd->dst; sa_family_t af = pd->af; /* undo NAT changes, if they have taken place */ if (nr != NULL) { PF_ACPY(saddr, &sk->addr[pd->sidx], af); PF_ACPY(daddr, &sk->addr[pd->didx], af); if (pd->sport) *pd->sport = sk->port[pd->sidx]; if (pd->dport) *pd->dport = sk->port[pd->didx]; if (pd->proto_sum) *pd->proto_sum = bproto_sum; if (pd->ip_sum) *pd->ip_sum = bip_sum; m_copyback(m, off, hdrlen, pd->hdr.any); } if (pd->proto == IPPROTO_TCP && ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURN)) && !(th->th_flags & TH_RST)) { u_int32_t ack = ntohl(th->th_seq) + pd->p_len; int len = 0; #ifdef INET struct ip *h4; #endif #ifdef INET6 struct ip6_hdr *h6; #endif switch (af) { #ifdef INET case AF_INET: h4 = mtod(m, struct ip *); len = ntohs(h4->ip_len) - off; break; #endif #ifdef INET6 case AF_INET6: h6 = mtod(m, struct ip6_hdr *); len = ntohs(h6->ip6_plen) - (off - sizeof(*h6)); break; #endif } if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) REASON_SET(reason, PFRES_PROTCKSUM); else { if (th->th_flags & TH_SYN) ack++; if (th->th_flags & TH_FIN) ack++; pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, true, 0, 0, rtableid); } } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r, rtableid); else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r, rtableid); } static int pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m) { struct m_tag *mtag; u_int8_t mpcp; mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); if (mtag == NULL) return (0); if (prio == PF_PRIO_ZERO) prio = 0; mpcp = *(uint8_t *)(mtag + 1); return (mpcp == prio); } static int pf_icmp_to_bandlim(uint8_t type) { switch (type) { case ICMP_ECHO: case ICMP_ECHOREPLY: return (BANDLIM_ICMP_ECHO); case ICMP_TSTAMP: case ICMP_TSTAMPREPLY: return (BANDLIM_ICMP_TSTAMP); case ICMP_UNREACH: default: return (BANDLIM_ICMP_UNREACH); } } static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_krule *r, int rtableid) { struct pf_send_entry *pfse; struct mbuf *m0; struct pf_mtag *pf_mtag; /* ICMP packet rate limitation. */ #ifdef INET6 if (af == AF_INET6) { if (icmp6_ratelimit(NULL, type, code)) return; } #endif #ifdef INET if (af == AF_INET) { if (badport_bandlim(pf_icmp_to_bandlim(type)) != 0) return; } #endif /* Allocate outgoing queue entry, mbuf and mbuf tag. */ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); if (pfse == NULL) return; if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) { free(pfse, M_PFTEMP); return; } if ((pf_mtag = pf_get_mtag(m0)) == NULL) { free(pfse, M_PFTEMP); return; } /* XXX: revisit */ m0->m_flags |= M_SKIP_FIREWALL; if (rtableid >= 0) M_SETFIB(m0, rtableid); #ifdef ALTQ if (r->qid) { pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m0, struct ip *); } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: pfse->pfse_type = PFSE_ICMP; break; #endif /* INET */ #ifdef INET6 case AF_INET6: pfse->pfse_type = PFSE_ICMP6; break; #endif /* INET6 */ } pfse->pfse_m = m0; pfse->icmpopts.type = type; pfse->icmpopts.code = code; pf_send(pfse); } /* * Return 1 if the addresses a and b match (with mask m), otherwise return 0. * If n is 0, they match if they are equal. If n is != 0, they match if they * are different. */ int pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, struct pf_addr *b, sa_family_t af) { int match = 0; switch (af) { #ifdef INET case AF_INET: if ((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) match++; break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) && ((a->addr32[1] & m->addr32[1]) == (b->addr32[1] & m->addr32[1])) && ((a->addr32[2] & m->addr32[2]) == (b->addr32[2] & m->addr32[2])) && ((a->addr32[3] & m->addr32[3]) == (b->addr32[3] & m->addr32[3]))) match++; break; #endif /* INET6 */ } if (match) { if (n) return (0); else return (1); } else { if (n) return (1); else return (0); } } /* * Return 1 if b <= a <= e, otherwise return 0. */ int pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, struct pf_addr *a, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) return (0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: { int i; /* check a >= b */ for (i = 0; i < 4; ++i) if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) break; else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) return (0); /* check a <= e */ for (i = 0; i < 4; ++i) if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) break; else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) return (0); break; } #endif /* INET6 */ } return (1); } static int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { switch (op) { case PF_OP_IRG: return ((p > a1) && (p < a2)); case PF_OP_XRG: return ((p < a1) || (p > a2)); case PF_OP_RRG: return ((p >= a1) && (p <= a2)); case PF_OP_EQ: return (p == a1); case PF_OP_NE: return (p != a1); case PF_OP_LT: return (p < a1); case PF_OP_LE: return (p <= a1); case PF_OP_GT: return (p > a1); case PF_OP_GE: return (p >= a1); } return (0); /* never reached */ } int pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) { NTOHS(a1); NTOHS(a2); NTOHS(p); return (pf_match(op, a1, a2, p)); } static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, u)); } static int pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) { if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, g)); } int pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag) { if (*tag == -1) *tag = mtag; return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag) { KASSERT(tag > 0, ("%s: tag %d", __func__, tag)); if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL)) return (ENOMEM); pd->pf_mtag->tag = tag; return (0); } #define PF_ANCHOR_STACKSIZE 32 struct pf_kanchor_stackframe { struct pf_kruleset *rs; struct pf_krule *r; /* XXX: + match bit */ struct pf_kanchor *child; }; /* * XXX: We rely on malloc(9) returning pointer aligned addresses. */ #define PF_ANCHORSTACK_MATCH 0x00000001 #define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH) #define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH) #define PF_ANCHOR_RULE(f) (struct pf_krule *) \ ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK) #define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \ ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \ } while (0) void pf_step_into_anchor(struct pf_kanchor_stackframe *stack, int *depth, struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a, int *match) { struct pf_kanchor_stackframe *f; PF_RULES_RASSERT(); if (match) *match = 0; if (*depth >= PF_ANCHOR_STACKSIZE) { printf("%s: anchor stack overflow on %s\n", __func__, (*r)->anchor->name); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; f = stack + (*depth)++; f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { struct pf_kanchor_node *parent = &(*r)->anchor->children; if ((f->child = RB_MIN(pf_kanchor_node, parent)) == NULL) { *r = NULL; return; } *rs = &f->child->ruleset; } else { f->child = NULL; *rs = &(*r)->anchor->ruleset; } *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); } int pf_step_out_of_anchor(struct pf_kanchor_stackframe *stack, int *depth, struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a, int *match) { struct pf_kanchor_stackframe *f; struct pf_krule *fr; int quick = 0; PF_RULES_RASSERT(); do { if (*depth <= 0) break; f = stack + *depth - 1; fr = PF_ANCHOR_RULE(f); if (f->child != NULL) { /* * This block traverses through * a wildcard anchor. */ if (match != NULL && *match) { /* * If any of "*" matched, then * "foo/ *" matched, mark frame * appropriately. */ PF_ANCHOR_SET_MATCH(f); *match = 0; } f->child = RB_NEXT(pf_kanchor_node, &fr->anchor->children, f->child); if (f->child != NULL) { *rs = &f->child->ruleset; *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); if (*r == NULL) continue; else break; } } (*depth)--; if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; if (PF_ANCHOR_MATCH(f) || (match != NULL && *match)) quick = fr->quick; *r = TAILQ_NEXT(fr, entries); } while (*r == NULL); return (quick); } struct pf_keth_anchor_stackframe { struct pf_keth_ruleset *rs; struct pf_keth_rule *r; /* XXX: + match bit */ struct pf_keth_anchor *child; }; #define PF_ETH_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH) #define PF_ETH_ANCHOR_RULE(f) (struct pf_keth_rule *) \ ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK) #define PF_ETH_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \ ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \ } while (0) void pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, struct pf_keth_ruleset **rs, struct pf_keth_rule **r, struct pf_keth_rule **a, int *match) { struct pf_keth_anchor_stackframe *f; NET_EPOCH_ASSERT(); if (match) *match = 0; if (*depth >= PF_ANCHOR_STACKSIZE) { printf("%s: anchor stack overflow on %s\n", __func__, (*r)->anchor->name); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; f = stack + (*depth)++; f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { struct pf_keth_anchor_node *parent = &(*r)->anchor->children; if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) { *r = NULL; return; } *rs = &f->child->ruleset; } else { f->child = NULL; *rs = &(*r)->anchor->ruleset; } *r = TAILQ_FIRST((*rs)->active.rules); } int pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, struct pf_keth_ruleset **rs, struct pf_keth_rule **r, struct pf_keth_rule **a, int *match) { struct pf_keth_anchor_stackframe *f; struct pf_keth_rule *fr; int quick = 0; NET_EPOCH_ASSERT(); do { if (*depth <= 0) break; f = stack + *depth - 1; fr = PF_ETH_ANCHOR_RULE(f); if (f->child != NULL) { /* * This block traverses through * a wildcard anchor. */ if (match != NULL && *match) { /* * If any of "*" matched, then * "foo/ *" matched, mark frame * appropriately. */ PF_ETH_ANCHOR_SET_MATCH(f); *match = 0; } f->child = RB_NEXT(pf_keth_anchor_node, &fr->anchor->children, f->child); if (f->child != NULL) { *rs = &f->child->ruleset; *r = TAILQ_FIRST((*rs)->active.rules); if (*r == NULL) continue; else break; } } (*depth)--; if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match)) quick = fr->quick; *r = TAILQ_NEXT(fr, entries); } while (*r == NULL); return (quick); } #ifdef INET6 void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); break; #endif /* INET */ case AF_INET6: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); break; } } void pf_addr_inc(struct pf_addr *addr, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); break; #endif /* INET */ case AF_INET6: if (addr->addr32[3] == 0xffffffff) { addr->addr32[3] = 0; if (addr->addr32[2] == 0xffffffff) { addr->addr32[2] = 0; if (addr->addr32[1] == 0xffffffff) { addr->addr32[1] = 0; addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); } else addr->addr32[1] = htonl(ntohl(addr->addr32[1]) + 1); } else addr->addr32[2] = htonl(ntohl(addr->addr32[2]) + 1); } else addr->addr32[3] = htonl(ntohl(addr->addr32[3]) + 1); break; } } #endif /* INET6 */ void pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a) { + /* + * Modern rules use the same flags in rules as they do in states. + */ a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| - PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); + PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); + + /* + * Old-style scrub rules have different flags which need to be translated. + */ + if (r->rule_flag & PFRULE_RANDOMID) + a->flags |= PFSTATE_RANDOMID; + if (r->scrub_flags & PFSTATE_SETTOS || r->rule_flag & PFRULE_SET_TOS ) { + a->flags |= PFSTATE_SETTOS; + a->set_tos = r->set_tos; + } + if (r->qid) a->qid = r->qid; if (r->pqid) a->pqid = r->pqid; if (r->rtableid >= 0) a->rtableid = r->rtableid; a->log |= r->log; - if (a->flags & PFSTATE_SETTOS) - a->set_tos = r->set_tos; if (r->min_ttl) a->min_ttl = r->min_ttl; if (r->max_mss) a->max_mss = r->max_mss; if (r->dnpipe) a->dnpipe = r->dnpipe; if (r->dnrpipe) a->dnrpipe = r->dnrpipe; if (r->dnpipe || r->dnrpipe) { if (r->free_flags & PFRULE_DN_IS_PIPE) a->flags |= PFSTATE_DN_IS_PIPE; else a->flags &= ~PFSTATE_DN_IS_PIPE; } - if (a->flags & PFSTATE_SETPRIO) { + if (r->scrub_flags & PFSTATE_SETPRIO) { a->set_prio[0] = r->set_prio[0]; a->set_prio[1] = r->set_prio[1]; } } int pf_socket_lookup(struct pf_pdesc *pd, struct mbuf *m) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; struct inpcbinfo *pi; struct inpcb *inp; pd->lookup.uid = UID_MAX; pd->lookup.gid = GID_MAX; switch (pd->proto) { case IPPROTO_TCP: sport = pd->hdr.tcp.th_sport; dport = pd->hdr.tcp.th_dport; pi = &V_tcbinfo; break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; pi = &V_udbinfo; break; default: return (-1); } if (pd->dir == PF_IN) { saddr = pd->src; daddr = pd->dst; } else { u_int16_t p; p = sport; sport = dport; dport = p; saddr = pd->dst; daddr = pd->src; } switch (pd->af) { #ifdef INET case AF_INET: inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } break; #endif /* INET6 */ default: return (-1); } INP_RLOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; INP_RUNLOCK(inp); return (1); } u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; u_int8_t wscale = 0; hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) return (0); if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) return (0); opt = hdr + sizeof(struct tcphdr); hlen -= sizeof(struct tcphdr); while (hlen >= 3) { switch (*opt) { case TCPOPT_EOL: case TCPOPT_NOP: ++opt; --hlen; break; case TCPOPT_WINDOW: wscale = opt[2]; if (wscale > TCP_MAX_WINSHIFT) wscale = TCP_MAX_WINSHIFT; wscale |= PF_WSCALE_FLAG; /* FALLTHROUGH */ default: optlen = opt[1]; if (optlen < 2) optlen = 2; hlen -= optlen; opt += optlen; break; } } return (wscale); } u_int16_t pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; u_int16_t mss = V_tcp_mssdflt; hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) return (0); if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) return (0); opt = hdr + sizeof(struct tcphdr); hlen -= sizeof(struct tcphdr); while (hlen >= TCPOLEN_MAXSEG) { switch (*opt) { case TCPOPT_EOL: case TCPOPT_NOP: ++opt; --hlen; break; case TCPOPT_MAXSEG: bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); NTOHS(mss); /* FALLTHROUGH */ default: optlen = opt[1]; if (optlen < 2) optlen = 2; hlen -= optlen; opt += optlen; break; } } return (mss); } static u_int16_t pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { struct nhop_object *nh; #ifdef INET6 struct in6_addr dst6; uint32_t scopeid; #endif /* INET6 */ int hlen = 0; uint16_t mss = 0; NET_EPOCH_ASSERT(); switch (af) { #ifdef INET case AF_INET: hlen = sizeof(struct ip); nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0); if (nh != NULL) mss = nh->nh_mtu - hlen - sizeof(struct tcphdr); break; #endif /* INET */ #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); in6_splitscope(&addr->v6, &dst6, &scopeid); nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0); if (nh != NULL) mss = nh->nh_mtu - hlen - sizeof(struct tcphdr); break; #endif /* INET6 */ } mss = max(V_tcp_mssdflt, mss); mss = min(mss, offer); mss = max(mss, 64); /* sanity - at least max opt space */ return (mss); } static u_int32_t pf_tcp_iss(struct pf_pdesc *pd) { MD5_CTX ctx; u_int32_t digest[4]; if (V_pf_tcp_secret_init == 0) { arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); MD5Init(&V_pf_tcp_secret_ctx); MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); V_pf_tcp_secret_init = 1; } ctx = V_pf_tcp_secret_ctx; MD5Update(&ctx, (char *)&pd->hdr.tcp.th_sport, sizeof(u_short)); MD5Update(&ctx, (char *)&pd->hdr.tcp.th_dport, sizeof(u_short)); if (pd->af == AF_INET6) { MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); } else { MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); } MD5Final((u_char *)digest, &ctx); V_pf_tcp_iss_off += 4096; #define ISN_RANDOM_INCREMENT (4096 - 1) return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) + V_pf_tcp_iss_off); #undef ISN_RANDOM_INCREMENT } static bool pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r) { bool match = true; /* Always matches if not set */ if (! r->isset) return (!r->neg); for (int i = 0; i < ETHER_ADDR_LEN; i++) { if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) { match = false; break; } } return (match ^ r->neg); } static int pf_match_eth_tag(struct mbuf *m, struct pf_keth_rule *r, int *tag, int mtag) { if (*tag == -1) *tag = mtag; return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } static void pf_bridge_to(struct ifnet *ifp, struct mbuf *m) { /* If we don't have the interface drop the packet. */ if (ifp == NULL) { m_freem(m); return; } switch (ifp->if_type) { case IFT_ETHER: case IFT_XETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_IEEE8023ADLAG: break; default: m_freem(m); return; } ifp->if_transmit(ifp, m); } static int pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0) { #ifdef INET struct ip ip; #endif #ifdef INET6 struct ip6_hdr ip6; #endif struct mbuf *m = *m0; struct ether_header *e; struct pf_keth_rule *r, *rm, *a = NULL; struct pf_keth_ruleset *ruleset = NULL; struct pf_mtag *mtag; struct pf_keth_ruleq *rules; struct pf_addr *src = NULL, *dst = NULL; struct pfi_kkif *bridge_to; sa_family_t af = 0; uint16_t proto; int asd = 0, match = 0; int tag = -1; uint8_t action; struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; MPASS(kif->pfik_ifp->if_vnet == curvnet); NET_EPOCH_ASSERT(); PF_RULES_RLOCK_TRACKER; SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m); mtag = pf_find_mtag(m); if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) { /* Dummynet re-injects packets after they've * completed their delay. We've already * processed them, so pass unconditionally. */ /* But only once. We may see the packet multiple times (e.g. * PFIL_IN/PFIL_OUT). */ mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; return (PF_PASS); } ruleset = V_pf_keth; rules = ck_pr_load_ptr(&ruleset->active.rules); r = TAILQ_FIRST(rules); rm = NULL; e = mtod(m, struct ether_header *); proto = ntohs(e->ether_type); switch (proto) { #ifdef INET case ETHERTYPE_IP: { if (m_length(m, NULL) < (sizeof(struct ether_header) + sizeof(ip))) return (PF_DROP); af = AF_INET; m_copydata(m, sizeof(struct ether_header), sizeof(ip), (caddr_t)&ip); src = (struct pf_addr *)&ip.ip_src; dst = (struct pf_addr *)&ip.ip_dst; break; } #endif /* INET */ #ifdef INET6 case ETHERTYPE_IPV6: { if (m_length(m, NULL) < (sizeof(struct ether_header) + sizeof(ip6))) return (PF_DROP); af = AF_INET6; m_copydata(m, sizeof(struct ether_header), sizeof(ip6), (caddr_t)&ip6); src = (struct pf_addr *)&ip6.ip6_src; dst = (struct pf_addr *)&ip6.ip6_dst; break; } #endif /* INET6 */ } PF_RULES_RLOCK(); while (r != NULL) { counter_u64_add(r->evaluations, 1); SDT_PROBE2(pf, eth, test_rule, test, r->nr, r); if (pfi_kkif_match(r->kif, kif) == r->ifnot) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "kif"); r = r->skip[PFE_SKIP_IFP].ptr; } else if (r->direction && r->direction != dir) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "dir"); r = r->skip[PFE_SKIP_DIR].ptr; } else if (r->proto && r->proto != proto) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "proto"); r = r->skip[PFE_SKIP_PROTO].ptr; } else if (! pf_match_eth_addr(e->ether_shost, &r->src)) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "src"); r = r->skip[PFE_SKIP_SRC_ADDR].ptr; } else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "dst"); r = r->skip[PFE_SKIP_DST_ADDR].ptr; } else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af, r->ipsrc.neg, kif, M_GETFIB(m))) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "ip_src"); r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr; } else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af, r->ipdst.neg, kif, M_GETFIB(m))) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "ip_dst"); r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr; } else if (r->match_tag && !pf_match_eth_tag(m, r, &tag, mtag ? mtag->tag : 0)) { SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, "match_tag"); r = TAILQ_NEXT(r, entries); } else { if (r->tag) tag = r->tag; if (r->anchor == NULL) { /* Rule matches */ rm = r; SDT_PROBE2(pf, eth, test_rule, match, r->nr, r); if (r->quick) break; r = TAILQ_NEXT(r, entries); } else { pf_step_into_keth_anchor(anchor_stack, &asd, &ruleset, &r, &a, &match); } } if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd, &ruleset, &r, &a, &match)) break; } r = rm; SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r); /* Default to pass. */ if (r == NULL) { PF_RULES_RUNLOCK(); return (PF_PASS); } /* Execute action. */ counter_u64_add(r->packets[dir == PF_OUT], 1); counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL)); pf_update_timestamp(r); /* Shortcut. Don't tag if we're just going to drop anyway. */ if (r->action == PF_DROP) { PF_RULES_RUNLOCK(); return (PF_DROP); } if (tag > 0) { if (mtag == NULL) mtag = pf_get_mtag(m); if (mtag == NULL) { PF_RULES_RUNLOCK(); counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); return (PF_DROP); } mtag->tag = tag; } if (r->qid != 0) { if (mtag == NULL) mtag = pf_get_mtag(m); if (mtag == NULL) { PF_RULES_RUNLOCK(); counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); return (PF_DROP); } mtag->qid = r->qid; } action = r->action; bridge_to = r->bridge_to; /* Dummynet */ if (r->dnpipe) { struct ip_fw_args dnflow; /* Drop packet if dummynet is not loaded. */ if (ip_dn_io_ptr == NULL) { PF_RULES_RUNLOCK(); m_freem(m); counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); return (PF_DROP); } if (mtag == NULL) mtag = pf_get_mtag(m); if (mtag == NULL) { PF_RULES_RUNLOCK(); counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); return (PF_DROP); } bzero(&dnflow, sizeof(dnflow)); /* We don't have port numbers here, so we set 0. That means * that we'll be somewhat limited in distinguishing flows (i.e. * only based on IP addresses, not based on port numbers), but * it's better than nothing. */ dnflow.f_id.dst_port = 0; dnflow.f_id.src_port = 0; dnflow.f_id.proto = 0; dnflow.rule.info = r->dnpipe; dnflow.rule.info |= IPFW_IS_DUMMYNET; if (r->dnflags & PFRULE_DN_IS_PIPE) dnflow.rule.info |= IPFW_IS_PIPE; dnflow.f_id.extra = dnflow.rule.info; dnflow.flags = dir == PF_IN ? IPFW_ARGS_IN : IPFW_ARGS_OUT; dnflow.flags |= IPFW_ARGS_ETHER; dnflow.ifp = kif->pfik_ifp; switch (af) { case AF_INET: dnflow.f_id.addr_type = 4; dnflow.f_id.src_ip = src->v4.s_addr; dnflow.f_id.dst_ip = dst->v4.s_addr; break; case AF_INET6: dnflow.flags |= IPFW_ARGS_IP6; dnflow.f_id.addr_type = 6; dnflow.f_id.src_ip6 = src->v6; dnflow.f_id.dst_ip6 = dst->v6; break; } PF_RULES_RUNLOCK(); mtag->flags |= PF_MTAG_FLAG_DUMMYNET; ip_dn_io_ptr(m0, &dnflow); if (*m0 != NULL) mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; } else { PF_RULES_RUNLOCK(); } if (action == PF_PASS && bridge_to) { pf_bridge_to(bridge_to->pfik_ifp, *m0); *m0 = NULL; /* We've eaten the packet. */ } return (action); } static int pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, struct pf_krule **am, struct pf_kruleset **rsm, struct inpcb *inp) { struct pf_krule *nr = NULL; struct pf_addr * const saddr = pd->src; struct pf_addr * const daddr = pd->dst; sa_family_t af = pd->af; struct pf_krule *r, *a = NULL; struct pf_kruleset *ruleset = NULL; struct pf_krule_slist match_rules; struct pf_krule_item *ri; struct pf_ksrc_node *nsn = NULL; struct tcphdr *th = &pd->hdr.tcp; struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; int rewrite = 0, hdrlen = 0; int tag = -1; int asd = 0; int match = 0; int state_icmp = 0; u_int16_t sport = 0, dport = 0; u_int16_t bproto_sum = 0, bip_sum = 0; u_int8_t icmptype = 0, icmpcode = 0; struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; PF_RULES_RASSERT(); if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; pd->lookup.done = 1; } switch (pd->proto) { case IPPROTO_TCP: sport = th->th_sport; dport = th->th_dport; hdrlen = sizeof(*th); break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; hdrlen = sizeof(pd->hdr.udp); break; #ifdef INET case IPPROTO_ICMP: if (pd->af != AF_INET) break; sport = dport = pd->hdr.icmp.icmp_id; hdrlen = sizeof(pd->hdr.icmp); icmptype = pd->hdr.icmp.icmp_type; icmpcode = pd->hdr.icmp.icmp_code; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || icmptype == ICMP_REDIRECT || icmptype == ICMP_TIMXCEED || icmptype == ICMP_PARAMPROB) state_icmp++; break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: if (af != AF_INET6) break; sport = dport = pd->hdr.icmp6.icmp6_id; hdrlen = sizeof(pd->hdr.icmp6); icmptype = pd->hdr.icmp6.icmp6_type; icmpcode = pd->hdr.icmp6.icmp6_code; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || icmptype == ICMP6_TIME_EXCEEDED || icmptype == ICMP6_PARAM_PROB) state_icmp++; break; #endif /* INET6 */ default: sport = dport = hdrlen = 0; break; } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ if ((nr = pf_get_translation(pd, m, off, kif, &nsn, &sk, &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) { KASSERT(sk != NULL, ("%s: null sk", __func__)); KASSERT(nk != NULL, ("%s: null nk", __func__)); if (nr->log) { PFLOG_PACKET(kif, m, af, PFRES_MATCH, nr, a, ruleset, pd, 1); } if (pd->ip_sum) bip_sum = *pd->ip_sum; switch (pd->proto) { case IPPROTO_TCP: bproto_sum = th->th_sum; pd->proto_sum = &th->th_sum; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || nk->port[pd->sidx] != sport) { pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 0, af); pd->sport = &th->th_sport; sport = th->th_sport; } if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || nk->port[pd->didx] != dport) { pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum, &th->th_sum, &nk->addr[pd->didx], nk->port[pd->didx], 0, af); dport = th->th_dport; pd->dport = &th->th_dport; } rewrite++; break; case IPPROTO_UDP: bproto_sum = pd->hdr.udp.uh_sum; pd->proto_sum = &pd->hdr.udp.uh_sum; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || nk->port[pd->sidx] != sport) { pf_change_ap(m, saddr, &pd->hdr.udp.uh_sport, pd->ip_sum, &pd->hdr.udp.uh_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 1, af); sport = pd->hdr.udp.uh_sport; pd->sport = &pd->hdr.udp.uh_sport; } if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || nk->port[pd->didx] != dport) { pf_change_ap(m, daddr, &pd->hdr.udp.uh_dport, pd->ip_sum, &pd->hdr.udp.uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, af); dport = pd->hdr.udp.uh_dport; pd->dport = &pd->hdr.udp.uh_dport; } rewrite++; break; #ifdef INET case IPPROTO_ICMP: nk->port[0] = nk->port[1]; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); if (nk->port[1] != pd->hdr.icmp.icmp_id) { pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( pd->hdr.icmp.icmp_cksum, sport, nk->port[1], 0); pd->hdr.icmp.icmp_id = nk->port[1]; pd->sport = &pd->hdr.icmp.icmp_id; } m_copyback(m, off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: nk->port[0] = nk->port[1]; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) pf_change_a6(saddr, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->sidx], 0); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->didx], 0); rewrite++; break; #endif /* INET */ default: switch (af) { #ifdef INET case AF_INET: if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) PF_ACPY(saddr, &nk->addr[pd->sidx], af); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) PF_ACPY(daddr, &nk->addr[pd->didx], af); break; #endif /* INET */ } break; } if (nr->natpass) r = NULL; pd->nat_rule = nr; } SLIST_INIT(&match_rules); while (r != NULL) { pf_counter_u64_add(&r->evaluations, 1); if (pfi_kkif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != pd->dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; /* icmp only. type always 0 in other cases */ else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); /* icmp only. type always 0 in other cases */ else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_TCP && (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], pd->lookup.uid)) r = TAILQ_NEXT(r, entries); /* tcp/udp only. gid.op always 0 in other cases */ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(pd, m), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prio && !pf_match_ieee8021q_pcp(r->prio, m)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; if (r->anchor == NULL) { if (r->action == PF_MATCH) { ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO); if (ri == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } ri->r = r; SLIST_INSERT_HEAD(&match_rules, ri, entry); pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); pf_counter_u64_critical_exit(); pf_rule_to_actions(r, &pd->act); if (r->log) PFLOG_PACKET(kif, m, af, PFRES_MATCH, r, a, ruleset, pd, 1); } else { match = 1; *rm = r; *am = a; *rsm = ruleset; } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match); } if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match)) break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); /* apply actions for last matching pass/block rule */ pf_rule_to_actions(r, &pd->act); if (r->log) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); PFLOG_PACKET(kif, m, af, reason, r, a, ruleset, pd, 1); } if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { pf_return(r, nr, pd, sk, off, m, th, kif, bproto_sum, bip_sum, hdrlen, &reason, r->rtableid); } if (r->action == PF_DROP) goto cleanup; if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } if (pd->act.rtableid >= 0) M_SETFIB(m, pd->act.rtableid); if (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM))) { int action; action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum, hdrlen, &match_rules); if (action != PF_PASS) { if (action == PF_DROP && (r->rule_flag & PFRULE_RETURN)) pf_return(r, nr, pd, sk, off, m, th, kif, bproto_sum, bip_sum, hdrlen, &reason, pd->act.rtableid); return (action); } } else { uma_zfree(V_pf_state_key_z, sk); uma_zfree(V_pf_state_key_z, nk); } /* copy back packet headers if we performed NAT operations */ if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && pd->dir == PF_OUT && V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, m)) /* * We want the state created, but we dont * want to send this in case a partner * firewall has to know about it to allow * replies through it. */ return (PF_DEFER); return (PF_PASS); cleanup: while ((ri = SLIST_FIRST(&match_rules))) { SLIST_REMOVE_HEAD(&match_rules, entry); free(ri, M_PF_RULE_ITEM); } uma_zfree(V_pf_state_key_z, sk); uma_zfree(V_pf_state_key_z, nk); return (PF_DROP); } static int pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a, struct pf_pdesc *pd, struct pf_ksrc_node *nsn, struct pf_state_key *nk, struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, struct pfi_kkif *kif, struct pf_kstate **sm, int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen, struct pf_krule_slist *match_rules) { struct pf_kstate *s = NULL; struct pf_ksrc_node *sn = NULL; struct tcphdr *th = &pd->hdr.tcp; u_int16_t mss = V_tcp_mssdflt; u_short reason, sn_reason; /* check maximums */ if (r->max_states && (counter_u64_fetch(r->states_cur) >= r->max_states)) { counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); REASON_SET(&reason, PFRES_MAXSTATES); goto csfailed; } /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && (sn_reason = pf_insert_src_node(&sn, r, pd->src, pd->af)) != 0) { REASON_SET(&reason, sn_reason); goto csfailed; } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && (sn_reason = pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) != 0 ) { REASON_SET(&reason, sn_reason); goto csfailed; } s = pf_alloc_state(M_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto csfailed; } s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; bcopy(match_rules, &s->match_rules, sizeof(s->match_rules)); + memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions)); + STATE_INC_COUNTERS(s); if (r->allow_opts) s->state_flags |= PFSTATE_ALLOWOPTS; if (r->rule_flag & PFRULE_STATESLOPPY) s->state_flags |= PFSTATE_SLOPPY; if (pd->flags & PFDESC_TCP_NORM) /* Set by old-style scrub rules */ s->state_flags |= PFSTATE_SCRUB_TCP; - s->log = pd->act.log & PF_LOG_ALL; - s->qid = pd->act.qid; - s->pqid = pd->act.pqid; - s->rtableid = pd->act.rtableid; - s->min_ttl = pd->act.min_ttl; - s->set_tos = pd->act.set_tos; - s->max_mss = pd->act.max_mss; + + s->act.log = pd->act.log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; - s->qid = pd->act.qid; - s->pqid = pd->act.pqid; - s->dnpipe = pd->act.dnpipe; - s->dnrpipe = pd->act.dnrpipe; - s->set_prio[0] = pd->act.set_prio[0]; - s->set_prio[1] = pd->act.set_prio[1]; - s->state_flags |= pd->act.flags; + s->state_flags |= pd->act.flags; /* Only needed for pfsync and state export */ + if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; + s->act.log |= nr->log & PF_LOG_ALL; switch (pd->proto) { case IPPROTO_TCP: s->src.seqlo = ntohl(th->th_seq); s->src.seqhi = s->src.seqlo + pd->p_len + 1; if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 0) s->src.seqdiff = 1; pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); *rewrite = 1; } else s->src.seqdiff = 0; if (th->th_flags & TH_SYN) { s->src.seqhi++; s->src.wscale = pf_get_wscale(m, off, th->th_off, pd->af); } s->src.max_win = MAX(ntohs(th->th_win), 1); if (s->src.wscale & PF_WSCALE_MASK) { /* Remove scale factor from initial window */ int win = s->src.max_win; win += 1 << (s->src.wscale & PF_WSCALE_MASK); s->src.max_win = (win - 1) >> (s->src.wscale & PF_WSCALE_MASK); } if (th->th_flags & TH_FIN) s->src.seqhi++; s->dst.seqhi = 1; s->dst.max_win = 1; pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); s->timeout = PFTM_TCP_FIRST_PACKET; atomic_add_32(&V_pf_status.states_halfopen, 1); break; case IPPROTO_UDP: pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); s->timeout = PFTM_UDP_FIRST_PACKET; break; case IPPROTO_ICMP: #ifdef INET6 case IPPROTO_ICMPV6: #endif s->timeout = PFTM_ICMP_FIRST_PACKET; break; default: pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); s->timeout = PFTM_OTHER_FIRST_PACKET; } if (r->rt) { /* pf_map_addr increases the reason counters */ if ((reason = pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) != 0) { pf_src_tree_remove_state(s); s->timeout = PFTM_UNLINKED; STATE_DEC_COUNTERS(s); pf_free_state(s); goto csfailed; } s->rt_kif = r->rpool.cur->kif; s->rt = r->rt; } s->creation = time_uptime; s->expire = time_uptime; if (sn != NULL) s->src_node = sn; if (nsn != NULL) { /* XXX We only modify one side for now. */ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); s->nat_src_node = nsn; } if (pd->proto == IPPROTO_TCP) { if (s->state_flags & PFSTATE_SCRUB_TCP && pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); s->timeout = PFTM_UNLINKED; STATE_DEC_COUNTERS(s); pf_free_state(s); return (PF_DROP); } if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, &s->src, &s->dst, rewrite)) { /* This really shouldn't happen!!! */ DPFPRINTF(PF_DEBUG_URGENT, ("pf_normalize_tcp_stateful failed on first " "pkt\n")); pf_src_tree_remove_state(s); s->timeout = PFTM_UNLINKED; STATE_DEC_COUNTERS(s); pf_free_state(s); return (PF_DROP); } } s->direction = pd->dir; /* * sk/nk could already been setup by pf_get_translation(). */ if (nr == NULL) { KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p", __func__, nr, sk, nk)); sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport); if (sk == NULL) goto csfailed; nk = sk; } else KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p", __func__, nr, sk, nk)); /* Swap sk/nk for PF_OUT. */ if (pf_state_insert(BOUND_IFACE(r, kif), kif, (pd->dir == PF_IN) ? sk : nk, (pd->dir == PF_IN) ? nk : sk, s)) { REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); s->timeout = PFTM_UNLINKED; STATE_DEC_COUNTERS(s); pf_free_state(s); return (PF_DROP); } else *sm = s; if (tag > 0) s->tag = tag; if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); /* undo NAT changes, if they have taken place */ if (nr != NULL) { struct pf_state_key *skt = s->key[PF_SK_WIRE]; if (pd->dir == PF_OUT) skt = s->key[PF_SK_STACK]; PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); if (pd->sport) *pd->sport = skt->port[pd->sidx]; if (pd->dport) *pd->dport = skt->port[pd->didx]; if (pd->proto_sum) *pd->proto_sum = bproto_sum; if (pd->ip_sum) *pd->ip_sum = bip_sum; m_copyback(m, off, hdrlen, pd->hdr.any); } s->src.seqhi = htonl(arc4random()); /* Find mss option */ int rtid = M_GETFIB(m); mss = pf_get_mss(m, off, th->th_off, pd->af); mss = pf_calc_mss(pd->src, pd->af, rtid, mss); mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); s->src.mss = mss; pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0, true, 0, 0, pd->act.rtableid); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } return (PF_PASS); csfailed: uma_zfree(V_pf_state_key_z, sk); uma_zfree(V_pf_state_key_z, nk); if (sn != NULL) { PF_SRC_NODE_LOCK(sn); if (--sn->states == 0 && sn->expire == 0) { pf_unlink_src_node(sn); uma_zfree(V_pf_sources_z, sn); counter_u64_add( V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); } PF_SRC_NODE_UNLOCK(sn); } if (nsn != sn && nsn != NULL) { PF_SRC_NODE_LOCK(nsn); if (--nsn->states == 0 && nsn->expire == 0) { pf_unlink_src_node(nsn); uma_zfree(V_pf_sources_z, nsn); counter_u64_add( V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); } PF_SRC_NODE_UNLOCK(nsn); } return (PF_DROP); } static int pf_test_fragment(struct pf_krule **rm, struct pfi_kkif *kif, struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_krule **am, struct pf_kruleset **rsm) { struct pf_krule *r, *a = NULL; struct pf_kruleset *ruleset = NULL; struct pf_krule_slist match_rules; struct pf_krule_item *ri; sa_family_t af = pd->af; u_short reason; int tag = -1; int asd = 0; int match = 0; struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; PF_RULES_RASSERT(); r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); SLIST_INIT(&match_rules); while (r != NULL) { pf_counter_u64_add(&r->evaluations, 1); if (pfi_kkif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != pd->dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_UDP && (r->src.port_op || r->dst.port_op)) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_TCP && (r->src.port_op || r->dst.port_op || r->flagset)) r = TAILQ_NEXT(r, entries); else if ((pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); else if (r->prio && !pf_match_ieee8021q_pcp(r->prio, m)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { if (r->action == PF_MATCH) { ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO); if (ri == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } ri->r = r; SLIST_INSERT_HEAD(&match_rules, ri, entry); pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); pf_counter_u64_critical_exit(); pf_rule_to_actions(r, &pd->act); if (r->log) PFLOG_PACKET(kif, m, af, PFRES_MATCH, r, a, ruleset, pd, 1); } else { match = 1; *rm = r; *am = a; *rsm = ruleset; } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match); } if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match)) break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); /* apply actions for last matching pass/block rule */ pf_rule_to_actions(r, &pd->act); if (r->log) PFLOG_PACKET(kif, m, af, reason, r, a, ruleset, pd, 1); if (r->action != PF_PASS) return (PF_DROP); if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } return (PF_PASS); cleanup: while ((ri = SLIST_FIRST(&match_rules))) { SLIST_REMOVE_HEAD(&match_rules, entry); free(ri, M_PF_RULE_ITEM); } return (PF_DROP); } static int pf_tcp_track_full(struct pf_kstate **state, struct pfi_kkif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, int *copyback) { struct tcphdr *th = &pd->hdr.tcp; struct pf_state_peer *src, *dst; u_int16_t win = ntohs(th->th_win); u_int32_t ack, end, seq, orig_seq; u_int8_t sws, dws, psrc, pdst; int ackskew; if (pd->dir == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; psrc = PF_PEER_SRC; pdst = PF_PEER_DST; } else { src = &(*state)->dst; dst = &(*state)->src; psrc = PF_PEER_DST; pdst = PF_PEER_SRC; } if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { sws = src->wscale & PF_WSCALE_MASK; dws = dst->wscale & PF_WSCALE_MASK; } else sws = dws = 0; /* * Sequence tracking algorithm from Guido van Rooij's paper: * http://www.madison-gurkha.com/publications/tcp_filtering/ * tcp_filtering.ps */ orig_seq = seq = ntohl(th->th_seq); if (src->seqlo == 0) { /* First packet from this end. Set its state */ if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && src->scrub == NULL) { if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } } /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { /* use random iss for the TCP server */ while ((src->seqdiff = arc4random() - seq) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0); *copyback = 1; } else { ack = ntohl(th->th_ack); } end = seq + pd->p_len; if (th->th_flags & TH_SYN) { end++; if (dst->wscale & PF_WSCALE_FLAG) { src->wscale = pf_get_wscale(m, off, th->th_off, pd->af); if (src->wscale & PF_WSCALE_FLAG) { /* Remove scale factor from initial * window */ sws = src->wscale & PF_WSCALE_MASK; win = ((u_int32_t)win + (1 << sws) - 1) >> sws; dws = dst->wscale & PF_WSCALE_MASK; } else { /* fixup other window */ dst->max_win <<= dst->wscale & PF_WSCALE_MASK; /* in case of a retrans SYN|ACK */ dst->wscale = 0; } } } if (th->th_flags & TH_FIN) end++; src->seqlo = seq; if (src->state < TCPS_SYN_SENT) pf_set_protostate(*state, psrc, TCPS_SYN_SENT); /* * May need to slide the window (seqhi may have been set by * the crappy stack check or if we picked up the connection * after establishment) */ if (src->seqhi == 1 || SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) src->seqhi = end + MAX(1, dst->max_win << dws); if (win > src->max_win) src->max_win = win; } else { ack = ntohl(th->th_ack) - dst->seqdiff; if (src->seqdiff) { /* Modulate sequence numbers */ pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0); *copyback = 1; } end = seq + pd->p_len; if (th->th_flags & TH_SYN) end++; if (th->th_flags & TH_FIN) end++; } if ((th->th_flags & TH_ACK) == 0) { /* Let it pass through the ack skew check */ ack = dst->seqlo; } else if ((ack == 0 && (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || /* broken tcp stacks do not set ack */ (dst->state < TCPS_SYN_SENT)) { /* * Many stacks (ours included) will set the ACK number in an * FIN|ACK if the SYN times out -- no sequence to ACK. */ ack = dst->seqlo; } if (seq == end) { /* Ease sequencing restrictions on no data packets */ seq = src->seqlo; end = seq; } ackskew = dst->seqlo - ack; /* * Need to demodulate the sequence numbers in any TCP SACK options * (Selective ACK). We could optionally validate the SACK values * against the current ACK window, either forwards or backwards, but * I'm not confident that SACK has been implemented properly * everywhere. It wouldn't surprise me if several stacks accidentally * SACK too far backwards of previously ACKed data. There really aren't * any security implications of bad SACKing unless the target stack * doesn't validate the option length correctly. Someone trying to * spoof into a TCP connection won't bother blindly sending SACK * options anyway. */ if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { if (pf_modulate_sack(m, off, pd, th, dst)) *copyback = 1; } #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && /* Retrans: not more than one window back */ (ackskew >= -MAXACKWINDOW) && /* Acking not more than one reassembled fragment backwards */ (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || (pd->flags & PFDESC_IP_REAS) == 0)) { /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, *state, src, dst, copyback)) return (PF_DROP); } /* update max window */ if (src->max_win < win) src->max_win = win; /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) src->seqlo = end; /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) dst->seqhi = ack + MAX((win << sws), 1); /* update states */ if (th->th_flags & TH_SYN) if (src->state < TCPS_SYN_SENT) pf_set_protostate(*state, psrc, TCPS_SYN_SENT); if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) pf_set_protostate(*state, psrc, TCPS_CLOSING); if (th->th_flags & TH_ACK) { if (dst->state == TCPS_SYN_SENT) { pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); if (src->state == TCPS_ESTABLISHED && (*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (dst->state == TCPS_CLOSING) pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); } if (th->th_flags & TH_RST) pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); /* update expire time */ (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; else if (src->state >= TCPS_CLOSING && dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) (*state)->timeout = PFTM_TCP_OPENING; else if (src->state >= TCPS_CLOSING || dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_CLOSING; else (*state)->timeout = PFTM_TCP_ESTABLISHED; /* Fall through to PASS packet */ } else if ((dst->state < TCPS_SYN_SENT || dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) && SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && /* Within a window forward of the originating packet */ SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { /* Within a window backward of the originating packet */ /* * This currently handles three situations: * 1) Stupid stacks will shotgun SYNs before their peer * replies. * 2) When PF catches an already established stream (the * firewall rebooted, the state table was flushed, routes * changed...) * 3) Packets get funky immediately after the connection * closes (this should catch Solaris spurious ACK|FINs * that web servers like to spew after a close) * * This must be a little more careful than the above code * since packet floods will also be caught here. We don't * update the TTL here to mitigate the damage of a packet * flood and so the same code can handle awkward establishment * and a loosened connection close. * In the establishment case, a correct peer response will * validate the connection, go through the normal state code * and keep updating the state TTL. */ if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); } if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, *state, src, dst, copyback)) return (PF_DROP); } /* update max window */ if (src->max_win < win) src->max_win = win; /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) src->seqlo = end; /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) dst->seqhi = ack + MAX((win << sws), 1); /* * Cannot set dst->seqhi here since this could be a shotgunned * SYN and not an already established connection. */ if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) pf_set_protostate(*state, psrc, TCPS_CLOSING); if (th->th_flags & TH_RST) pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); /* Fall through to PASS packet */ } else { if ((*state)->dst.state == TCPS_SYN_SENT && (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, (*state)->rule.ptr->return_ttl, true, 0, 0, - (*state)->rtableid); + (*state)->act.rtableid); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; } else if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? ' ': '2', (ackskew >= -MAXACKWINDOW) ? ' ' : '3', (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } return (PF_PASS); } static int pf_tcp_track_sloppy(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) { struct tcphdr *th = &pd->hdr.tcp; struct pf_state_peer *src, *dst; u_int8_t psrc, pdst; if (pd->dir == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; psrc = PF_PEER_SRC; pdst = PF_PEER_DST; } else { src = &(*state)->dst; dst = &(*state)->src; psrc = PF_PEER_DST; pdst = PF_PEER_SRC; } if (th->th_flags & TH_SYN) if (src->state < TCPS_SYN_SENT) pf_set_protostate(*state, psrc, TCPS_SYN_SENT); if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) pf_set_protostate(*state, psrc, TCPS_CLOSING); if (th->th_flags & TH_ACK) { if (dst->state == TCPS_SYN_SENT) { pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); if (src->state == TCPS_ESTABLISHED && (*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (dst->state == TCPS_CLOSING) { pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); } else if (src->state == TCPS_SYN_SENT && dst->state < TCPS_SYN_SENT) { /* * Handle a special sloppy case where we only see one * half of the connection. If there is a ACK after * the initial SYN without ever seeing a packet from * the destination, set the connection to established. */ pf_set_protostate(*state, PF_PEER_BOTH, TCPS_ESTABLISHED); dst->state = src->state = TCPS_ESTABLISHED; if ((*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (src->state == TCPS_CLOSING && dst->state == TCPS_ESTABLISHED && dst->seqlo == 0) { /* * Handle the closing of half connections where we * don't see the full bidirectional FIN/ACK+ACK * handshake. */ pf_set_protostate(*state, pdst, TCPS_CLOSING); } } if (th->th_flags & TH_RST) pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); /* update expire time */ (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; else if (src->state >= TCPS_CLOSING && dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) (*state)->timeout = PFTM_TCP_OPENING; else if (src->state >= TCPS_CLOSING || dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_CLOSING; else (*state)->timeout = PFTM_TCP_ESTABLISHED; return (PF_PASS); } static int pf_synproxy(struct pf_pdesc *pd, struct pf_kstate **state, u_short *reason) { struct pf_state_key *sk = (*state)->key[pd->didx]; struct tcphdr *th = &pd->hdr.tcp; if ((*state)->src.state == PF_TCPS_PROXY_SRC) { if (pd->dir != (*state)->direction) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } if (th->th_flags & TH_SYN) { if (ntohl(th->th_seq) != (*state)->src.seqlo) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, true, 0, 0, - (*state)->rtableid); + (*state)->act.rtableid); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else if ((*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } else pf_set_protostate(*state, PF_PEER_SRC, PF_TCPS_PROXY_DST); } if ((*state)->src.state == PF_TCPS_PROXY_DST) { if (pd->dir == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) (*state)->dst.seqhi = htonl(arc4random()); pf_send_tcp((*state)->rule.ptr, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0, false, (*state)->tag, 0, - (*state)->rtableid); + (*state)->act.rtableid); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != (TH_SYN|TH_ACK)) || (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, false, - (*state)->tag, 0, (*state)->rtableid); + (*state)->tag, 0, (*state)->act.rtableid); pf_send_tcp((*state)->rule.ptr, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, true, 0, 0, - (*state)->rtableid); + (*state)->act.rtableid); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - (*state)->dst.seqlo; (*state)->src.seqhi = (*state)->src.seqlo + (*state)->dst.max_win; (*state)->dst.seqhi = (*state)->dst.seqlo + (*state)->src.max_win; (*state)->src.wscale = (*state)->dst.wscale = 0; pf_set_protostate(*state, PF_PEER_BOTH, TCPS_ESTABLISHED); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } return (PF_PASS); } static int pf_test_state_tcp(struct pf_kstate **state, struct pfi_kkif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_state_key_cmp key; struct tcphdr *th = &pd->hdr.tcp; int copyback = 0; int action; struct pf_state_peer *src, *dst; bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_TCP; if (pd->dir == PF_IN) { /* wire side, straight */ PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); key.port[0] = th->th_sport; key.port[1] = th->th_dport; } else { /* stack side, reverse */ PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); key.port[1] = th->th_sport; key.port[0] = th->th_dport; } STATE_LOOKUP(kif, &key, *state, pd); if (pd->dir == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) return (action); if (dst->state >= TCPS_FIN_WAIT_2 && src->state >= TCPS_FIN_WAIT_2 && (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) || ((th->th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && pf_syncookie_check(pd) && pd->dir == PF_IN))) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state reuse "); pf_print_state(*state); pf_print_flags(th->th_flags); printf("\n"); } /* XXX make sure it's the same direction ?? */ pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); pf_unlink_state(*state); *state = NULL; return (PF_DROP); } if ((*state)->state_flags & PFSTATE_SLOPPY) { if (pf_tcp_track_sloppy(state, pd, reason) == PF_DROP) return (PF_DROP); } else { if (pf_tcp_track_full(state, kif, m, off, pd, reason, ©back) == PF_DROP) return (PF_DROP); } /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || nk->port[pd->sidx] != th->th_sport) pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 0, pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != th->th_dport) pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, &th->th_sum, &nk->addr[pd->didx], nk->port[pd->didx], 0, pd->af); copyback = 1; } /* Copyback sequence modulation or stateful scrub changes if needed */ if (copyback) m_copyback(m, off, sizeof(*th), (caddr_t)th); return (PF_PASS); } static int pf_test_state_udp(struct pf_kstate **state, struct pfi_kkif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; struct udphdr *uh = &pd->hdr.udp; uint8_t psrc, pdst; bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_UDP; if (pd->dir == PF_IN) { /* wire side, straight */ PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); key.port[0] = uh->uh_sport; key.port[1] = uh->uh_dport; } else { /* stack side, reverse */ PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); key.port[1] = uh->uh_sport; key.port[0] = uh->uh_dport; } STATE_LOOKUP(kif, &key, *state, pd); if (pd->dir == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; psrc = PF_PEER_SRC; pdst = PF_PEER_DST; } else { src = &(*state)->dst; dst = &(*state)->src; psrc = PF_PEER_DST; pdst = PF_PEER_SRC; } /* update states */ if (src->state < PFUDPS_SINGLE) pf_set_protostate(*state, psrc, PFUDPS_SINGLE); if (dst->state == PFUDPS_SINGLE) pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); /* update expire time */ (*state)->expire = time_uptime; if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) (*state)->timeout = PFTM_UDP_MULTIPLE; else (*state)->timeout = PFTM_UDP_SINGLE; /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || nk->port[pd->sidx] != uh->uh_sport) pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 1, pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != uh->uh_dport) pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, pd->af); m_copyback(m, off, sizeof(*uh), (caddr_t)uh); } return (PF_PASS); } static int pf_test_state_icmp(struct pf_kstate **state, struct pfi_kkif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; u_int16_t icmpid = 0, *icmpsum; u_int8_t icmptype, icmpcode; int state_icmp = 0; struct pf_state_key_cmp key; bzero(&key, sizeof(key)); switch (pd->proto) { #ifdef INET case IPPROTO_ICMP: icmptype = pd->hdr.icmp.icmp_type; icmpcode = pd->hdr.icmp.icmp_code; icmpid = pd->hdr.icmp.icmp_id; icmpsum = &pd->hdr.icmp.icmp_cksum; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || icmptype == ICMP_REDIRECT || icmptype == ICMP_TIMXCEED || icmptype == ICMP_PARAMPROB) state_icmp++; break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: icmptype = pd->hdr.icmp6.icmp6_type; icmpcode = pd->hdr.icmp6.icmp6_code; icmpid = pd->hdr.icmp6.icmp6_id; icmpsum = &pd->hdr.icmp6.icmp6_cksum; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || icmptype == ICMP6_TIME_EXCEEDED || icmptype == ICMP6_PARAM_PROB) state_icmp++; break; #endif /* INET6 */ } if (!state_icmp) { /* * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ key.af = pd->af; key.proto = pd->proto; key.port[0] = key.port[1] = icmpid; if (pd->dir == PF_IN) { /* wire side, straight */ PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); } else { /* stack side, reverse */ PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); } STATE_LOOKUP(kif, &key, *state, pd); (*state)->expire = time_uptime; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; switch (pd->af) { #ifdef INET case AF_INET: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); if (nk->port[0] != pd->hdr.icmp.icmp_id) { pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( pd->hdr.icmp.icmp_cksum, icmpid, nk->port[pd->sidx], 0); pd->hdr.icmp.icmp_id = nk->port[pd->sidx]; } m_copyback(m, off, ICMP_MINLEN, (caddr_t )&pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6)) pf_change_a6(saddr, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->sidx], 0); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->didx], 0); m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )&pd->hdr.icmp6); break; #endif /* INET6 */ } } return (PF_PASS); } else { /* * ICMP error message in response to a TCP/UDP packet. * Extract the inner TCP/UDP header and search for that state. */ struct pf_pdesc pd2; bzero(&pd2, sizeof pd2); #ifdef INET struct ip h2; #endif /* INET */ #ifdef INET6 struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ int ipoff2 = 0; int off2 = 0; pd2.af = pd->af; /* Payload packet is from the opposite direction. */ pd2.sidx = (pd->dir == PF_IN) ? 1 : 0; pd2.didx = (pd->dir == PF_IN) ? 0 : 1; switch (pd->af) { #ifdef INET case AF_INET: /* offset of h2 in mbuf chain */ ipoff2 = off + ICMP_MINLEN; if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip)\n")); return (PF_DROP); } /* * ICMP error messages don't refer to non-first * fragments */ if (h2.ip_off & htons(IP_OFFMASK)) { REASON_SET(reason, PFRES_FRAG); return (PF_DROP); } /* offset of protocol header that follows h2 */ off2 = ipoff2 + (h2.ip_hl << 2); pd2.proto = h2.ip_p; pd2.src = (struct pf_addr *)&h2.ip_src; pd2.dst = (struct pf_addr *)&h2.ip_dst; pd2.ip_sum = &h2.ip_sum; break; #endif /* INET */ #ifdef INET6 case AF_INET6: ipoff2 = off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip6)\n")); return (PF_DROP); } pd2.proto = h2_6.ip6_nxt; pd2.src = (struct pf_addr *)&h2_6.ip6_src; pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; pd2.ip_sum = NULL; off2 = ipoff2 + sizeof(h2_6); do { switch (pd2.proto) { case IPPROTO_FRAGMENT: /* * ICMPv6 error messages for * non-first fragments */ REASON_SET(reason, PFRES_FRAG); return (PF_DROP); case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off2, &opt6, sizeof(opt6), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMPv6 short opt\n")); return (PF_DROP); } if (pd2.proto == IPPROTO_AH) off2 += (opt6.ip6e_len + 2) * 4; else off2 += (opt6.ip6e_len + 1) * 8; pd2.proto = opt6.ip6e_nxt; /* goto the next header */ break; } default: terminal++; break; } } while (!terminal); break; #endif /* INET6 */ } if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d outer dst: ", icmptype, icmpcode); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); printf(" inner src: "); pf_print_host(pd2.src, 0, pd2.af); printf(" -> "); pf_print_host(pd2.dst, 0, pd2.af); printf("\n"); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } switch (pd2.proto) { case IPPROTO_TCP: { struct tcphdr th; u_int32_t seq; struct pf_state_peer *src, *dst; u_int8_t dws; int copyback = 0; /* * Only the first 8 bytes of the TCP header can be * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(tcp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_TCP; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = th.th_sport; key.port[pd2.didx] = th.th_dport; STATE_LOOKUP(kif, &key, *state, pd); if (pd->dir == (*state)->direction) { src = &(*state)->dst; dst = &(*state)->src; } else { src = &(*state)->src; dst = &(*state)->dst; } if (src->wscale && dst->wscale) dws = dst->wscale & PF_WSCALE_MASK; else dws = 0; /* Demodulate sequence number */ seq = ntohl(th.th_seq) - src->seqdiff; if (src->seqdiff) { pf_change_a(&th.th_seq, icmpsum, htonl(seq), 0); copyback = 1; } if (!((*state)->state_flags & PFSTATE_SLOPPY) && (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d ", icmptype, icmpcode); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); printf(" state: "); pf_print_state(*state); printf(" seq=%u\n", seq); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } else { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: OK ICMP %d:%d ", icmptype, icmpcode); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); printf(" state: "); pf_print_state(*state); printf(" seq=%u\n", seq); } } /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != th.th_sport) pf_change_icmp(pd2.src, &th.th_sport, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != th.th_dport) pf_change_icmp(pd2.dst, &th.th_dport, saddr, &nk->addr[pd2.didx], nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); copyback = 1; } if (copyback) { switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, (caddr_t )&pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t )&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )&pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t )&h2_6); break; #endif /* INET6 */ } m_copyback(m, off2, 8, (caddr_t)&th); } return (PF_PASS); break; } case IPPROTO_UDP: { struct udphdr uh; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(udp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_UDP; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = uh.uh_sport; key.port[pd2.didx] = uh.uh_dport; STATE_LOOKUP(kif, &key, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != uh.uh_sport) pf_change_icmp(pd2.src, &uh.uh_sport, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != uh.uh_dport) pf_change_icmp(pd2.dst, &uh.uh_dport, saddr, &nk->addr[pd2.didx], nk->port[pd2.didx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, (caddr_t )&pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )&pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t )&h2_6); break; #endif /* INET6 */ } m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); } return (PF_PASS); break; } #ifdef INET case IPPROTO_ICMP: { struct icmp iih; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short i" "(icmp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_ICMP; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp_id; STATE_LOOKUP(kif, &key, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != iih.icmp_id) pf_change_icmp(pd2.src, &iih.icmp_id, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != iih.icmp_id) pf_change_icmp(pd2.dst, &iih.icmp_id, saddr, &nk->addr[pd2.didx], nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); m_copyback(m, off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); } return (PF_PASS); break; } #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: { struct icmp6_hdr iih; if (!pf_pull_hdr(m, off2, &iih, sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(icmp6)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_ICMPV6; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp6_id; STATE_LOOKUP(kif, &key, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != iih.icmp6_id) pf_change_icmp(pd2.src, &iih.icmp6_id, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != iih.icmp6_id) pf_change_icmp(pd2.dst, &iih.icmp6_id, saddr, &nk->addr[pd2.didx], nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)&pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), (caddr_t)&iih); } return (PF_PASS); break; } #endif /* INET6 */ default: { key.af = pd2.af; key.proto = pd2.proto; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; STATE_LOOKUP(kif, &key, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af)) pf_change_icmp(pd2.src, NULL, daddr, &nk->addr[pd2.sidx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af)) pf_change_icmp(pd2.dst, NULL, saddr, &nk->addr[pd2.didx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )&pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t )&h2_6); break; #endif /* INET6 */ } } return (PF_PASS); break; } } } } static int pf_test_state_other(struct pf_kstate **state, struct pfi_kkif *kif, struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; uint8_t psrc, pdst; bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->proto; if (pd->dir == PF_IN) { PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); key.port[0] = key.port[1] = 0; } else { PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); key.port[1] = key.port[0] = 0; } STATE_LOOKUP(kif, &key, *state, pd); if (pd->dir == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; psrc = PF_PEER_SRC; pdst = PF_PEER_DST; } else { src = &(*state)->dst; dst = &(*state)->src; psrc = PF_PEER_DST; pdst = PF_PEER_SRC; } /* update states */ if (src->state < PFOTHERS_SINGLE) pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); if (dst->state == PFOTHERS_SINGLE) pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); /* update expire time */ (*state)->expire = time_uptime; if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) (*state)->timeout = PFTM_OTHER_MULTIPLE; else (*state)->timeout = PFTM_OTHER_SINGLE; /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; KASSERT(nk, ("%s: nk is null", __func__)); KASSERT(pd, ("%s: pd is null", __func__)); KASSERT(pd->src, ("%s: pd->src is null", __func__)); KASSERT(pd->dst, ("%s: pd->dst is null", __func__)); switch (pd->af) { #ifdef INET case AF_INET: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); #endif /* INET6 */ } } return (PF_PASS); } /* * ipoff and off are measured from the start of the mbuf chain. * h must be at "ipoff" on the mbuf chain. */ void * pf_pull_hdr(struct mbuf *m, int off, void *p, int len, u_short *actionp, u_short *reasonp, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: { struct ip *h = mtod(m, struct ip *); u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; if (fragoff) { if (fragoff >= len) ACTION_SET(actionp, PF_PASS); else { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_FRAG); } return (NULL); } if (m->m_pkthdr.len < off + len || ntohs(h->ip_len) < off + len) { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); if (m->m_pkthdr.len < off + len || (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < (unsigned)(off + len)) { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } break; } #endif /* INET6 */ } m_copydata(m, off, len, p); return (p); } int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif, int rtableid) { struct ifnet *ifp; /* * Skip check for addresses with embedded interface scope, * as they would always match anyway. */ if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6)) return (1); if (af != AF_INET && af != AF_INET6) return (0); /* Skip checks for ipsec interfaces */ if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) return (1); ifp = (kif != NULL) ? kif->pfik_ifp : NULL; switch (af) { #ifdef INET6 case AF_INET6: return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE, ifp)); #endif #ifdef INET case AF_INET: return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE, ifp)); #endif } return (0); } #ifdef INET static void pf_route(struct mbuf **m, struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *m1, *md; struct sockaddr_in dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_ksrc_node *sn = NULL; int error = 0; uint16_t ip_len, ip_off; int r_rt, r_dir; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); if (s) { r_rt = s->rt; r_dir = s->direction; } else { r_rt = r->rt; r_dir = r->direction; } KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT || r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", __func__)); if ((pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; goto bad_locked; } if (r_rt == PF_DUPTO) { if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { if (s == NULL) { ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; /* If pfsync'd */ if (ifp == NULL) ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; PF_STATE_UNLOCK(s); } if (ifp == oifp) { /* When the 2nd interface is not skipped */ return; } else { m0 = *m; *m = NULL; goto bad; } } else { pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) { if (s) PF_STATE_UNLOCK(s); return; } } } else { if ((r_rt == PF_REPLYTO) == (r_dir == pd->dir)) { pf_dummynet(pd, s, r, m); if (s) PF_STATE_UNLOCK(s); return; } m0 = *m; } ip = mtod(m0, struct ip *); bzero(&dst, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); dst.sin_addr = ip->ip_dst; bzero(&naddr, sizeof(naddr)); if (s == NULL) { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET)) dst.sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) dst.sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; PF_STATE_UNLOCK(s); } /* If pfsync'd */ if (ifp == NULL) ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; if (ifp == NULL) goto bad; if (pd->dir == PF_IN) { if (pf_test(PF_OUT, 0, ifp, &m0, inp, &pd->act) != PF_PASS) goto bad; else if (m0 == NULL) goto done; if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); goto bad; } ip = mtod(m0, struct ip *); } if (ifp->if_flags & IFF_LOOPBACK) m0->m_flags |= M_SKIP_FIREWALL; ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); /* Copied from FreeBSD 10.0-CURRENT ip_output. */ m0->m_pkthdr.csum_flags |= CSUM_IP; if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { in_delayed_cksum(m0); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #if defined(SCTP) || defined(SCTP_SUPPORT) if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { sctp_delayed_cksum(m0, (uint32_t)(ip->ip_hl << 2)); m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif /* * If small enough for interface, or the interface will take * care of the fragmentation for us, we can just send directly. */ if (ip_len <= ifp->if_mtu || (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { ip->ip_sum = 0; if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } m_clrprotoflags(m0); /* Avoid confusing lower layers. */ md = m0; error = pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md); if (md != NULL) error = (*ifp->if_output)(ifp, md, sintosa(&dst), NULL); goto done; } /* Balk when DF bit is set or the interface didn't support TSO. */ if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); if (r_rt != PF_DUPTO) { if (s && pd->nat_rule != NULL) PACKET_UNDO_NAT(m0, pd, (ip->ip_hl << 2) + (ip_off & IP_OFFMASK), s); icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); goto done; } else goto bad; } error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist); if (error) goto bad; for (; m0; m0 = m1) { m1 = m0->m_nextpkt; m0->m_nextpkt = NULL; if (error == 0) { m_clrprotoflags(m0); md = m0; error = pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md); if (md != NULL) error = (*ifp->if_output)(ifp, md, sintosa(&dst), NULL); } else m_freem(m0); } if (error == 0) KMOD_IPSTAT_INC(ips_fragmented); done: if (r_rt != PF_DUPTO) *m = NULL; return; bad_locked: if (s) PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; } #endif /* INET */ #ifdef INET6 static void pf_route6(struct mbuf **m, struct pf_krule *r, struct ifnet *oifp, struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) { struct mbuf *m0, *md; struct sockaddr_in6 dst; struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_ksrc_node *sn = NULL; int r_rt, r_dir; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); if (s) { r_rt = s->rt; r_dir = s->direction; } else { r_rt = r->rt; r_dir = r->direction; } KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT || r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", __func__)); if ((pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; goto bad_locked; } if (r_rt == PF_DUPTO) { if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { if (s == NULL) { ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; /* If pfsync'd */ if (ifp == NULL) ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; PF_STATE_UNLOCK(s); } if (ifp == oifp) { /* When the 2nd interface is not skipped */ return; } else { m0 = *m; *m = NULL; goto bad; } } else { pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; if (((m0 = m_dup(*m, M_NOWAIT)) == NULL)) { if (s) PF_STATE_UNLOCK(s); return; } } } else { if ((r_rt == PF_REPLYTO) == (r_dir == pd->dir)) { pf_dummynet(pd, s, r, m); if (s) PF_STATE_UNLOCK(s); return; } m0 = *m; } ip6 = mtod(m0, struct ip6_hdr *); bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); dst.sin6_addr = ip6->ip6_dst; bzero(&naddr, sizeof(naddr)); if (s == NULL) { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } if (s) PF_STATE_UNLOCK(s); /* If pfsync'd */ if (ifp == NULL) ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; if (ifp == NULL) goto bad; if (pd->dir == PF_IN) { if (pf_test6(PF_OUT, 0, ifp, &m0, inp, &pd->act) != PF_PASS) goto bad; else if (m0 == NULL) goto done; if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", __func__)); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); } if (ifp->if_flags & IFF_LOOPBACK) m0->m_flags |= M_SKIP_FIREWALL; if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~ifp->if_hwassist) { uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6); in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr)); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { md = m0; pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md); if (md != NULL) nd6_output_ifp(ifp, ifp, md, &dst, NULL); } else { in6_ifstat_inc(ifp, ifs6_in_toobig); if (r_rt != PF_DUPTO) { if (s && pd->nat_rule != NULL) PACKET_UNDO_NAT(m0, pd, ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr), s); icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); } else goto bad; } done: if (r_rt != PF_DUPTO) *m = NULL; return; bad_locked: if (s) PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; } #endif /* INET6 */ /* * FreeBSD supports cksum offloads for the following drivers. * em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4) * * CSUM_DATA_VALID | CSUM_PSEUDO_HDR : * network driver performed cksum including pseudo header, need to verify * csum_data * CSUM_DATA_VALID : * network driver performed cksum, needs to additional pseudo header * cksum computation with partial csum_data(i.e. lack of H/W support for * pseudo header, for instance sk(4) and possibly gem(4)) * * After validating the cksum of packet, set both flag CSUM_DATA_VALID and * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper * TCP/UDP layer. * Also, set csum_data to 0xffff to force cksum validation. */ static int pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) { u_int16_t sum = 0; int hw_assist = 0; struct ip *ip; if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) return (1); if (m->m_pkthdr.len < off + len) return (1); switch (p) { case IPPROTO_TCP: if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_TCP)); } sum ^= 0xffff; ++hw_assist; } break; case IPPROTO_UDP: if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_UDP)); } sum ^= 0xffff; ++hw_assist; } break; case IPPROTO_ICMP: #ifdef INET6 case IPPROTO_ICMPV6: #endif /* INET6 */ break; default: return (1); } if (!hw_assist) { switch (af) { case AF_INET: if (p == IPPROTO_ICMP) { if (m->m_len < off) return (1); m->m_data += off; m->m_len -= off; sum = in_cksum(m, len); m->m_data -= off; m->m_len += off; } else { if (m->m_len < sizeof(struct ip)) return (1); sum = in4_cksum(m, p, off, len); } break; #ifdef INET6 case AF_INET6: if (m->m_len < sizeof(struct ip6_hdr)) return (1); sum = in6_cksum(m, p, off, len); break; #endif /* INET6 */ default: return (1); } } if (sum) { switch (p) { case IPPROTO_TCP: { KMOD_TCPSTAT_INC(tcps_rcvbadsum); break; } case IPPROTO_UDP: { KMOD_UDPSTAT_INC(udps_badsum); break; } #ifdef INET case IPPROTO_ICMP: { KMOD_ICMPSTAT_INC(icps_checksum); break; } #endif #ifdef INET6 case IPPROTO_ICMPV6: { KMOD_ICMP6STAT_INC(icp6s_checksum); break; } #endif /* INET6 */ } return (1); } else { if (p == IPPROTO_TCP || p == IPPROTO_UDP) { m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } } return (0); } static bool pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r, const struct pf_kstate *s, struct ip_fw_args *dnflow) { int dndir = r->direction; if (s && dndir == PF_INOUT) { dndir = s->direction; } else if (dndir == PF_INOUT) { /* Assume primary direction. Happens when we've set dnpipe in * the ethernet level code. */ dndir = pd->dir; } memset(dnflow, 0, sizeof(*dnflow)); if (pd->dport != NULL) dnflow->f_id.dst_port = ntohs(*pd->dport); if (pd->sport != NULL) dnflow->f_id.src_port = ntohs(*pd->sport); if (pd->dir == PF_IN) dnflow->flags |= IPFW_ARGS_IN; else dnflow->flags |= IPFW_ARGS_OUT; if (pd->dir != dndir && pd->act.dnrpipe) { dnflow->rule.info = pd->act.dnrpipe; } else if (pd->dir == dndir && pd->act.dnpipe) { dnflow->rule.info = pd->act.dnpipe; } else { return (false); } dnflow->rule.info |= IPFW_IS_DUMMYNET; if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFSTATE_DN_IS_PIPE) dnflow->rule.info |= IPFW_IS_PIPE; dnflow->f_id.proto = pd->proto; dnflow->f_id.extra = dnflow->rule.info; switch (pd->af) { case AF_INET: dnflow->f_id.addr_type = 4; dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); break; case AF_INET6: dnflow->flags |= IPFW_ARGS_IP6; dnflow->f_id.addr_type = 6; dnflow->f_id.src_ip6 = pd->src->v6; dnflow->f_id.dst_ip6 = pd->dst->v6; break; default: panic("Invalid AF"); break; } return (true); } int pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kkif *kif; struct mbuf *m = *m0; M_ASSERTPKTHDR(m); MPASS(ifp->if_vnet == curvnet); NET_EPOCH_ASSERT(); if (!V_pf_status.running) return (PF_PASS); kif = (struct pfi_kkif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: kif == NULL, if_xname %s\n", __func__, ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); if (m->m_flags & M_SKIP_FIREWALL) return (PF_PASS); /* Stateless! */ return (pf_test_eth_rule(dir, kif, m0)); } static int pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s, struct pf_krule *r, struct mbuf **m0) { return (pf_dummynet_route(pd, s, r, NULL, NULL, m0)); } static int pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s, struct pf_krule *r, struct ifnet *ifp, struct sockaddr *sa, struct mbuf **m0) { NET_EPOCH_ASSERT(); - if (s && (s->dnpipe || s->dnrpipe)) { - pd->act.dnpipe = s->dnpipe; - pd->act.dnrpipe = s->dnrpipe; - pd->act.flags = s->state_flags; - } else if (r->dnpipe || r->dnrpipe) { - pd->act.dnpipe = r->dnpipe; - pd->act.dnrpipe = r->dnrpipe; - pd->act.flags = r->free_flags; - } if (pd->act.dnpipe || pd->act.dnrpipe) { struct ip_fw_args dnflow; if (ip_dn_io_ptr == NULL) { m_freem(*m0); *m0 = NULL; return (ENOMEM); } if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) { m_freem(*m0); *m0 = NULL; return (ENOMEM); } if (ifp != NULL) { pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO; pd->pf_mtag->if_index = ifp->if_index; pd->pf_mtag->if_idxgen = ifp->if_idxgen; MPASS(sa != NULL); if (pd->af == AF_INET) memcpy(&pd->pf_mtag->dst, sa, sizeof(struct sockaddr_in)); else memcpy(&pd->pf_mtag->dst, sa, sizeof(struct sockaddr_in6)); } if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) { pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET; ip_dn_io_ptr(m0, &dnflow); if (*m0 != NULL) { pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; pd->pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; } } } return (0); } #ifdef INET int pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp, struct pf_rule_actions *default_actions) { struct pfi_kkif *kif; - u_short action, reason = 0, log = 0; + u_short action, reason = 0; struct mbuf *m = *m0; struct ip *h = NULL; struct m_tag *ipfwtag; struct pf_krule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; struct pf_kstate *s = NULL; struct pf_kruleset *ruleset = NULL; struct pf_pdesc pd; - int off, dirndx; - uint16_t scrub_flags; -#ifdef ALTQ - uint16_t qid; -#endif - uint16_t pqid; + int off, dirndx, use_2nd_queue = 0; uint16_t tag; - int32_t rtableid; - uint8_t min_ttl; - uint8_t set_tos; uint8_t rt; - uint8_t set_prio[2]; PF_RULES_RLOCK_TRACKER; KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir)); M_ASSERTPKTHDR(m); if (!V_pf_status.running) return (PF_PASS); PF_RULES_RLOCK(); kif = (struct pfi_kkif *)ifp->if_pf_kif; if (__predict_false(kif == NULL)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); PF_RULES_RUNLOCK(); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) { PF_RULES_RUNLOCK(); return (PF_PASS); } if (m->m_flags & M_SKIP_FIREWALL) { PF_RULES_RUNLOCK(); return (PF_PASS); } memset(&pd, 0, sizeof(pd)); if (default_actions != NULL) memcpy(&pd.act, default_actions, sizeof(pd.act)); pd.pf_mtag = pf_find_mtag(m); if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; ifp = ifnet_byindexgen(pd.pf_mtag->if_index, pd.pf_mtag->if_idxgen); if (ifp == NULL || ifp->if_flags & IFF_DYING) { PF_RULES_RUNLOCK(); m_freem(*m0); *m0 = NULL; return (PF_PASS); } PF_RULES_RUNLOCK(); (ifp->if_output)(ifp, m, sintosa(&pd.pf_mtag->dst), NULL); *m0 = NULL; return (PF_PASS); } if (pd.pf_mtag && pd.pf_mtag->dnpipe) { pd.act.dnpipe = pd.pf_mtag->dnpipe; pd.act.flags = pd.pf_mtag->dnflags; } if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL && pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) { /* Dummynet re-injects packets after they've * completed their delay. We've already * processed them, so pass unconditionally. */ /* But only once. We may see the packet multiple times (e.g. * PFIL_IN/PFIL_OUT). */ pd.pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; PF_RULES_RUNLOCK(); return (PF_PASS); } pd.sport = pd.dport = NULL; pd.proto_sum = NULL; pd.dir = dir; pd.sidx = (dir == PF_IN) ? 0 : 1; pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET; pd.act.rtableid = -1; if (__predict_false(ip_divert_ptr != NULL) && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; goto done; } pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED; m_tag_delete(m, ipfwtag); } if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; } } else if (pf_normalize_ip(m0, kif, &reason, &pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ action = PF_DROP; goto done; } m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip *); off = h->ip_hl << 2; if (off < (int)sizeof(struct ip)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; pd.ip_sum = &h->ip_sum; pd.proto = h->ip_p; pd.tos = h->ip_tos & ~IPTOS_ECN_MASK; pd.tot_len = ntohs(h->ip_len); /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { action = pf_test_fragment(&r, kif, m, h, &pd, &a, &ruleset); goto done; } switch (h->ip_p) { case IPPROTO_TCP: { if (!pf_pull_hdr(m, off, &pd.hdr.tcp, sizeof(pd.hdr.tcp), &action, &reason, AF_INET)) { if (action != PF_PASS) - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2); pd.sport = &pd.hdr.tcp.th_sport; pd.dport = &pd.hdr.tcp.th_dport; /* Respond to SYN with a syncookie. */ if ((pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN && pd.dir == PF_IN && pf_synflood_check(&pd)) { pf_syncookie_send(m, off, &pd); action = PF_DROP; break; } if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) - pqid = 1; + use_2nd_queue = 1; action = pf_normalize_tcp(kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; action = pf_test_state_tcp(&s, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) { /* Validate remote SYN|ACK, re-create original SYN if * valid. */ if ((pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && pf_syncookie_validate(&pd) && pd.dir == PF_IN) { struct mbuf *msyn; msyn = pf_syncookie_recreate_syn(h->ip_ttl, off,&pd); if (msyn == NULL) { action = PF_DROP; break; } action = pf_test(dir, pflags, ifp, &msyn, inp, &pd.act); m_freem(msyn); if (action == PF_PASS) { action = pf_test_state_tcp(&s, kif, m, off, h, &pd, &reason); if (action != PF_PASS || s == NULL) { action = PF_DROP; break; } s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1; s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1; pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST); action = pf_synproxy(&pd, &s, &reason); if (action != PF_PASS) break; } break; } else { action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); } } - if (s) { - if (s->max_mss) - pf_normalize_mss(m, off, &pd, s->max_mss); - } else if (r->max_mss) - pf_normalize_mss(m, off, &pd, r->max_mss); break; } case IPPROTO_UDP: { if (!pf_pull_hdr(m, off, &pd.hdr.udp, sizeof(pd.hdr.udp), &action, &reason, AF_INET)) { if (action != PF_PASS) - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } pd.sport = &pd.hdr.udp.uh_sport; pd.dport = &pd.hdr.udp.uh_dport; if (pd.hdr.udp.uh_dport == 0 || ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off || ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, kif, m, off, h, &pd); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); break; } case IPPROTO_ICMP: { if (!pf_pull_hdr(m, off, &pd.hdr.icmp, ICMP_MINLEN, &action, &reason, AF_INET)) { if (action != PF_PASS) - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } action = pf_test_state_icmp(&s, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); break; } #ifdef INET6 case IPPROTO_ICMPV6: { action = PF_DROP; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping IPv4 packet with ICMPv6 payload\n")); goto done; } #endif default: action = pf_test_state_other(&s, kif, m, &pd); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); break; } done: PF_RULES_RUNLOCK(); if (action == PF_PASS && h->ip_hl > 5 && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with ip options\n")); } if (s) { - scrub_flags = s->state_flags; - min_ttl = s->min_ttl; - set_tos = s->set_tos; - rtableid = s->rtableid; - pqid = s->pqid; -#ifdef ALTQ - qid = s->qid; -#endif + memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions)); tag = s->tag; rt = s->rt; - set_prio[0] = s->set_prio[0]; - set_prio[1] = s->set_prio[1]; } else { - scrub_flags = r->scrub_flags; - min_ttl = r->min_ttl; - set_tos = r->set_tos; - rtableid = r->rtableid; - pqid = r->pqid; -#ifdef ALTQ - qid = r->qid; -#endif tag = r->tag; rt = r->rt; - set_prio[0] = r->set_prio[0]; - set_prio[1] = r->set_prio[1]; } if (tag > 0 && pf_tag_packet(m, &pd, tag)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } - pf_scrub_ip(&m, scrub_flags, min_ttl, set_tos); + pf_scrub_ip(&m, &pd); + if (pd.proto == IPPROTO_TCP && pd.act.max_mss) + pf_normalize_mss(m, off, &pd); - if (rtableid >= 0) - M_SETFIB(m, rtableid); + if (pd.act.rtableid >= 0) + M_SETFIB(m, pd.act.rtableid); - if (scrub_flags & PFSTATE_SETPRIO) { + if (pd.act.flags & PFSTATE_SETPRIO) { if (pd.tos & IPTOS_LOWDELAY) - pqid = 1; - if (vlan_set_pcp(m, set_prio[pqid])) { + use_2nd_queue = 1; + if (vlan_set_pcp(m, pd.act.set_prio[use_2nd_queue])) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, ("pf: failed to allocate 802.1q mtag\n")); } } #ifdef ALTQ - if (qid) { - pd.act.pqid = pqid; - pd.act.qid = qid; - } - if (action == PF_PASS && pd.act.qid) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } else { if (s != NULL) pd.pf_mtag->qid_hash = pf_state_hash(s); - if (pqid || (pd.tos & IPTOS_LOWDELAY)) + if (use_2nd_queue || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = pd.act.pqid; else pd.pf_mtag->qid = pd.act.qid; /* Add hints for ecn. */ pd.pf_mtag->hdr = h; } } #endif /* ALTQ */ /* * connections redirected to loopback should not match sockets * bound specifically to loopback due to security implications, * see tcp_input() and in_pcblookup_listen(). */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN_LOOPBACK(ntohl(pd.dst->v4.s_addr))) m->m_flags |= M_SKIP_FIREWALL; if (__predict_false(ip_divert_ptr != NULL) && action == PF_PASS && r->divert.port && !PACKET_LOOPED(&pd)) { ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); if (ipfwtag != NULL) { ((struct ipfw_rule_ref *)(ipfwtag+1))->info = ntohs(r->divert.port); ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; if (s) PF_STATE_UNLOCK(s); m_tag_prepend(m, ipfwtag); if (m->m_flags & M_FASTFWD_OURS) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, ("pf: failed to allocate tag\n")); } else { pd.pf_mtag->flags |= PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; m->m_flags &= ~M_FASTFWD_OURS; } } ip_divert_ptr(*m0, dir == PF_IN); *m0 = NULL; return (action); } else { /* XXX: ipfw has the same behaviour! */ action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, ("pf: failed to allocate divert tag\n")); } } - if (log) { + if (pd.act.log) { struct pf_krule *lr; struct pf_krule_item *ri; if (s != NULL && s->nat_rule.ptr != NULL && s->nat_rule.ptr->log & PF_LOG_ALL) lr = s->nat_rule.ptr; else lr = r; - if (log & PF_LOG_FORCE || lr->log & PF_LOG_ALL) + if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL) PFLOG_PACKET(kif, m, AF_INET, reason, lr, a, ruleset, &pd, (s == NULL)); if (s) { SLIST_FOREACH(ri, &s->match_rules, entry) if (ri->r->log & PF_LOG_ALL) PFLOG_PACKET(kif, m, AF_INET, reason, ri->r, a, ruleset, &pd, 0); } } pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS], pd.tot_len); pf_counter_u64_add_protected(&kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS], 1); if (action == PF_PASS || r->action == PF_DROP) { dirndx = (dir == PF_OUT); pf_counter_u64_add_protected(&r->packets[dirndx], 1); pf_counter_u64_add_protected(&r->bytes[dirndx], pd.tot_len); pf_update_timestamp(r); if (a != NULL) { pf_counter_u64_add_protected(&a->packets[dirndx], 1); pf_counter_u64_add_protected(&a->bytes[dirndx], pd.tot_len); } if (s != NULL) { struct pf_krule_item *ri; if (s->nat_rule.ptr != NULL) { pf_counter_u64_add_protected(&s->nat_rule.ptr->packets[dirndx], 1); pf_counter_u64_add_protected(&s->nat_rule.ptr->bytes[dirndx], pd.tot_len); } if (s->src_node != NULL) { counter_u64_add(s->src_node->packets[dirndx], 1); counter_u64_add(s->src_node->bytes[dirndx], pd.tot_len); } if (s->nat_src_node != NULL) { counter_u64_add(s->nat_src_node->packets[dirndx], 1); counter_u64_add(s->nat_src_node->bytes[dirndx], pd.tot_len); } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; SLIST_FOREACH(ri, &s->match_rules, entry) { pf_counter_u64_add_protected(&ri->r->packets[dirndx], 1); pf_counter_u64_add_protected(&ri->r->bytes[dirndx], pd.tot_len); } } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; if (nr != NULL && r == &V_pf_default_rule) tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, (s == NULL) ? pd.src : &s->key[(s->direction == PF_IN)]-> addr[(s->direction == PF_OUT)], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL) ? pd.dst : &s->key[(s->direction == PF_IN)]-> addr[(s->direction == PF_IN)], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->dst.neg); } pf_counter_u64_critical_exit(); switch (action) { case PF_SYNPROXY_DROP: m_freem(*m0); case PF_DEFER: *m0 = NULL; action = PF_PASS; break; case PF_DROP: m_freem(*m0); *m0 = NULL; break; default: /* pf_route() returns unlocked. */ if (rt) { pf_route(m0, r, kif->pfik_ifp, s, &pd, inp); return (action); } if (pf_dummynet(&pd, s, r, m0) != 0) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } break; } SDT_PROBE4(pf, ip, test, done, action, reason, r, s); if (s) PF_STATE_UNLOCK(s); return (action); } #endif /* INET */ #ifdef INET6 int pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp, struct pf_rule_actions *default_actions) { struct pfi_kkif *kif; - u_short action, reason = 0, log = 0; + u_short action, reason = 0; struct mbuf *m = *m0, *n = NULL; struct m_tag *mtag; struct ip6_hdr *h = NULL; struct pf_krule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; struct pf_kstate *s = NULL; struct pf_kruleset *ruleset = NULL; struct pf_pdesc pd; - int off, terminal = 0, dirndx, rh_cnt = 0; - uint16_t scrub_flags; -#ifdef ALTQ - uint16_t qid; -#endif - uint16_t pqid; + int off, terminal = 0, dirndx, rh_cnt = 0, use_2nd_queue = 0; uint16_t tag; - int32_t rtableid; - uint8_t min_ttl; - uint8_t set_tos; uint8_t rt; - uint8_t set_prio[2]; PF_RULES_RLOCK_TRACKER; KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir)); M_ASSERTPKTHDR(m); if (!V_pf_status.running) return (PF_PASS); PF_RULES_RLOCK(); kif = (struct pfi_kkif *)ifp->if_pf_kif; if (__predict_false(kif == NULL)) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); PF_RULES_RUNLOCK(); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) { PF_RULES_RUNLOCK(); return (PF_PASS); } if (m->m_flags & M_SKIP_FIREWALL) { PF_RULES_RUNLOCK(); return (PF_PASS); } memset(&pd, 0, sizeof(pd)); if (default_actions != NULL) memcpy(&pd.act, default_actions, sizeof(pd.act)); pd.pf_mtag = pf_find_mtag(m); if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; ifp = ifnet_byindexgen(pd.pf_mtag->if_index, pd.pf_mtag->if_idxgen); if (ifp == NULL || ifp->if_flags & IFF_DYING) { PF_RULES_RUNLOCK(); m_freem(*m0); *m0 = NULL; return (PF_PASS); } PF_RULES_RUNLOCK(); nd6_output_ifp(ifp, ifp, m, (struct sockaddr_in6 *)&pd.pf_mtag->dst, NULL); *m0 = NULL; return (PF_PASS); } if (pd.pf_mtag && pd.pf_mtag->dnpipe) { pd.act.dnpipe = pd.pf_mtag->dnpipe; pd.act.flags = pd.pf_mtag->dnflags; } if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL && pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) { pd.pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; /* Dummynet re-injects packets after they've * completed their delay. We've already * processed them, so pass unconditionally. */ PF_RULES_RUNLOCK(); return (PF_PASS); } pd.sport = pd.dport = NULL; pd.ip_sum = NULL; pd.proto_sum = NULL; pd.dir = dir; pd.sidx = (dir == PF_IN) ? 0 : 1; pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET6; pd.act.rtableid = -1; /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip6_hdr *); /* * we do not support jumbogram. if we keep going, zero ip6_plen * will do something bad, so drop the packet for now. */ if (htons(h->ip6_plen) == 0) { action = PF_DROP; REASON_SET(&reason, PFRES_NORM); /*XXX*/ goto done; } pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; pd.tos = IPV6_DSCP(h); pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; do { switch (pd.proto) { case IPPROTO_FRAGMENT: action = pf_test_fragment(&r, kif, m, h, &pd, &a, &ruleset); if (action == PF_DROP) REASON_SET(&reason, PFRES_FRAG); goto done; case IPPROTO_ROUTING: { struct ip6_rthdr rthdr; if (rh_cnt++) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 more than one rthdr\n")); action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short rthdr\n")); action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 rthdr0\n")); action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } /* FALLTHROUGH */ } case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short opt\n")); action = PF_DROP; - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; goto done; } if (pd.proto == IPPROTO_AH) off += (opt6.ip6e_len + 2) * 4; else off += (opt6.ip6e_len + 1) * 8; pd.proto = opt6.ip6e_nxt; /* goto the next header */ break; } default: terminal++; break; } } while (!terminal); /* if there's no routing header, use unmodified mbuf for checksumming */ if (!n) n = m; switch (pd.proto) { case IPPROTO_TCP: { if (!pf_pull_hdr(m, off, &pd.hdr.tcp, sizeof(pd.hdr.tcp), &action, &reason, AF_INET6)) { if (action != PF_PASS) - log |= PF_LOG_FORCE; + pd.act.log |= PF_LOG_FORCE; goto done; } pd.p_len = pd.tot_len - off - (pd.hdr.tcp.th_off << 2); pd.sport = &pd.hdr.tcp.th_sport; pd.dport = &pd.hdr.tcp.th_dport; action = pf_normalize_tcp(kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; action = pf_test_state_tcp(&s, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); - if (s) { - if (s->max_mss) - pf_normalize_mss(m, off, &pd, s->max_mss); - } else if (r->max_mss) - pf_normalize_mss(m, off, &pd, r->max_mss); break; } case IPPROTO_UDP: { if (!pf_pull_hdr(m, off, &pd.hdr.udp, sizeof(pd.hdr.udp), &action, &reason, AF_INET6)) { if (action != PF_PASS) - log |= PF_LOG_FORCE; + pd.act.log |= PF_LOG_FORCE; goto done; } pd.sport = &pd.hdr.udp.uh_sport; pd.dport = &pd.hdr.udp.uh_dport; if (pd.hdr.udp.uh_dport == 0 || ntohs(pd.hdr.udp.uh_ulen) > m->m_pkthdr.len - off || ntohs(pd.hdr.udp.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, kif, m, off, h, &pd); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); break; } case IPPROTO_ICMP: { action = PF_DROP; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping IPv6 packet with ICMPv4 payload\n")); goto done; } case IPPROTO_ICMPV6: { if (!pf_pull_hdr(m, off, &pd.hdr.icmp6, sizeof(pd.hdr.icmp6), &action, &reason, AF_INET6)) { if (action != PF_PASS) - log |= PF_LOG_FORCE; + pd.act.log |= PF_LOG_FORCE; goto done; } action = pf_test_state_icmp(&s, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); break; } default: action = pf_test_state_other(&s, kif, m, &pd); if (action == PF_PASS) { if (V_pfsync_update_state_ptr != NULL) V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; - log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, kif, m, off, &pd, &a, &ruleset, inp); break; } done: PF_RULES_RUNLOCK(); if (n != m) { m_freem(n); n = NULL; } /* handle dangerous IPv6 extension headers. */ if (action == PF_PASS && rh_cnt && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); - log = r->log; + pd.act.log = r->log; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with dangerous v6 headers\n")); } if (s) { - scrub_flags = s->state_flags; - min_ttl = s->min_ttl; - set_tos = s->set_tos; - rtableid = s->rtableid; - pqid = s->pqid; -#ifdef ALTQ - qid = s->qid; -#endif + memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions)); tag = s->tag; rt = s->rt; - set_prio[0] = s->set_prio[0]; - set_prio[1] = s->set_prio[1]; } else { - scrub_flags = r->scrub_flags; - min_ttl = r->min_ttl; - set_tos = r->set_tos; - rtableid = r->rtableid; - pqid = r->pqid; -#ifdef ALTQ - qid = r->qid; -#endif tag = r->tag; rt = r->rt; - set_prio[0] = r->set_prio[0]; - set_prio[1] = r->set_prio[1]; } if (tag > 0 && pf_tag_packet(m, &pd, tag)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } - pf_scrub_ip6(&m, scrub_flags, min_ttl, set_tos); + pf_scrub_ip6(&m, &pd); + if (pd.proto == IPPROTO_TCP && pd.act.max_mss) + pf_normalize_mss(m, off, &pd); - if (rtableid >= 0) - M_SETFIB(m, rtableid); + if (pd.act.rtableid >= 0) + M_SETFIB(m, pd.act.rtableid); - if (scrub_flags & PFSTATE_SETPRIO) { + if (pd.act.flags & PFSTATE_SETPRIO) { if (pd.tos & IPTOS_LOWDELAY) - pqid = 1; - if (vlan_set_pcp(m, set_prio[pqid])) { + use_2nd_queue = 1; + if (vlan_set_pcp(m, pd.act.set_prio[use_2nd_queue])) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); - log = PF_LOG_FORCE; + pd.act.log = PF_LOG_FORCE; DPFPRINTF(PF_DEBUG_MISC, ("pf: failed to allocate 802.1q mtag\n")); } } #ifdef ALTQ - if (qid) { - pd.act.pqid = pqid; - pd.act.qid = qid; - } - if (action == PF_PASS && pd.act.qid) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } else { if (s != NULL) pd.pf_mtag->qid_hash = pf_state_hash(s); if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = pd.act.pqid; else pd.pf_mtag->qid = pd.act.qid; /* Add hints for ecn. */ pd.pf_mtag->hdr = h; } } #endif /* ALTQ */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) m->m_flags |= M_SKIP_FIREWALL; /* XXX: Anybody working on it?! */ if (r->divert.port) printf("pf: divert(9) is not supported for IPv6\n"); - if (log) { + if (pd.act.log) { struct pf_krule *lr; struct pf_krule_item *ri; if (s != NULL && s->nat_rule.ptr != NULL && s->nat_rule.ptr->log & PF_LOG_ALL) lr = s->nat_rule.ptr; else lr = r; - if (log & PF_LOG_FORCE || lr->log & PF_LOG_ALL) + if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL) PFLOG_PACKET(kif, m, AF_INET6, reason, lr, a, ruleset, &pd, (s == NULL)); if (s) { SLIST_FOREACH(ri, &s->match_rules, entry) if (ri->r->log & PF_LOG_ALL) PFLOG_PACKET(kif, m, AF_INET6, reason, ri->r, a, ruleset, &pd, 0); } } pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS], pd.tot_len); pf_counter_u64_add_protected(&kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS], 1); if (action == PF_PASS || r->action == PF_DROP) { dirndx = (dir == PF_OUT); pf_counter_u64_add_protected(&r->packets[dirndx], 1); pf_counter_u64_add_protected(&r->bytes[dirndx], pd.tot_len); if (a != NULL) { pf_counter_u64_add_protected(&a->packets[dirndx], 1); pf_counter_u64_add_protected(&a->bytes[dirndx], pd.tot_len); } if (s != NULL) { if (s->nat_rule.ptr != NULL) { pf_counter_u64_add_protected(&s->nat_rule.ptr->packets[dirndx], 1); pf_counter_u64_add_protected(&s->nat_rule.ptr->bytes[dirndx], pd.tot_len); } if (s->src_node != NULL) { counter_u64_add(s->src_node->packets[dirndx], 1); counter_u64_add(s->src_node->bytes[dirndx], pd.tot_len); } if (s->nat_src_node != NULL) { counter_u64_add(s->nat_src_node->packets[dirndx], 1); counter_u64_add(s->nat_src_node->bytes[dirndx], pd.tot_len); } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; if (nr != NULL && r == &V_pf_default_rule) tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, (s == NULL) ? pd.src : &s->key[(s->direction == PF_IN)]->addr[0], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL) ? pd.dst : &s->key[(s->direction == PF_IN)]->addr[1], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->dst.neg); } pf_counter_u64_critical_exit(); switch (action) { case PF_SYNPROXY_DROP: m_freem(*m0); case PF_DEFER: *m0 = NULL; action = PF_PASS; break; case PF_DROP: m_freem(*m0); *m0 = NULL; break; default: /* pf_route6() returns unlocked. */ if (rt) { pf_route6(m0, r, kif->pfik_ifp, s, &pd, inp); return (action); } if (pf_dummynet(&pd, s, r, m0) != 0) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } break; } if (s) PF_STATE_UNLOCK(s); /* If reassembled packet passed, create new fragments. */ if (action == PF_PASS && *m0 && dir == PF_OUT && (mtag = m_tag_find(m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL) action = pf_refragment6(ifp, m0, mtag, pflags & PFIL_FWD); SDT_PROBE4(pf, ip, test6, done, action, reason, r, s); return (action); } #endif /* INET6 */ diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index b78c30aa4b8c..78727566cde6 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -1,6965 +1,6965 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * * $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #ifdef ALTQ #include #endif SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int"); SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int"); SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int"); SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int"); static struct pf_kpool *pf_get_kpool(const char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); static void pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *); static void pf_empty_kpool(struct pf_kpalist *); static int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); static int pf_begin_eth(uint32_t *, const char *); static void pf_rollback_eth_cb(struct epoch_context *); static int pf_rollback_eth(uint32_t, const char *); static int pf_commit_eth(uint32_t, const char *); static void pf_free_eth_rule(struct pf_keth_rule *); #ifdef ALTQ static int pf_begin_altq(u_int32_t *); static int pf_rollback_altq(u_int32_t); static int pf_commit_altq(u_int32_t); static int pf_enable_altq(struct pf_altq *); static int pf_disable_altq(struct pf_altq *); static uint16_t pf_qname2qid(const char *); static void pf_qid_unref(uint16_t); #endif /* ALTQ */ static int pf_begin_rules(u_int32_t *, int, const char *); static int pf_rollback_rules(u_int32_t, int, char *); static int pf_setup_pfsync_matching(struct pf_kruleset *); static void pf_hash_rule_rolling(MD5_CTX *, struct pf_krule *); static void pf_hash_rule(struct pf_krule *); static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); static int pf_commit_rules(u_int32_t, int, char *); static int pf_addr_setup(struct pf_kruleset *, struct pf_addr_wrap *, sa_family_t); static void pf_addr_copyout(struct pf_addr_wrap *); static void pf_src_node_copy(const struct pf_ksrc_node *, struct pf_src_node *); #ifdef ALTQ static int pf_export_kaltq(struct pf_altq *, struct pfioc_altq_v1 *, size_t); static int pf_import_kaltq(struct pfioc_altq_v1 *, struct pf_altq *, size_t); #endif /* ALTQ */ VNET_DEFINE(struct pf_krule, pf_default_rule); static __inline int pf_krule_compare(struct pf_krule *, struct pf_krule *); RB_GENERATE(pf_krule_global, pf_krule, entry_global, pf_krule_compare); #ifdef ALTQ VNET_DEFINE_STATIC(int, pf_altq_running); #define V_pf_altq_running VNET(pf_altq_running) #endif #define TAGID_MAX 50000 struct pf_tagname { TAILQ_ENTRY(pf_tagname) namehash_entries; TAILQ_ENTRY(pf_tagname) taghash_entries; char name[PF_TAG_NAME_SIZE]; uint16_t tag; int ref; }; struct pf_tagset { TAILQ_HEAD(, pf_tagname) *namehash; TAILQ_HEAD(, pf_tagname) *taghash; unsigned int mask; uint32_t seed; BITSET_DEFINE(, TAGID_MAX) avail; }; VNET_DEFINE(struct pf_tagset, pf_tags); #define V_pf_tags VNET(pf_tags) static unsigned int pf_rule_tag_hashsize; #define PF_RULE_TAG_HASH_SIZE_DEFAULT 128 SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN, &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT, "Size of pf(4) rule tag hashtable"); #ifdef ALTQ VNET_DEFINE(struct pf_tagset, pf_qids); #define V_pf_qids VNET(pf_qids) static unsigned int pf_queue_tag_hashsize; #define PF_QUEUE_TAG_HASH_SIZE_DEFAULT 128 SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN, &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT, "Size of pf(4) queue tag hashtable"); #endif VNET_DEFINE(uma_zone_t, pf_tag_z); #define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif VNET_DEFINE_STATIC(bool, pf_filter_local) = false; #define V_pf_filter_local VNET(pf_filter_local) SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pf_filter_local), false, "Enable filtering for packets delivered to local network stack"); static void pf_init_tagset(struct pf_tagset *, unsigned int *, unsigned int); static void pf_cleanup_tagset(struct pf_tagset *); static uint16_t tagname2hashindex(const struct pf_tagset *, const char *); static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t); static u_int16_t tagname2tag(struct pf_tagset *, const char *); static u_int16_t pf_tagname2tag(const char *); static void tag_unref(struct pf_tagset *, u_int16_t); #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x struct cdev *pf_dev; /* * XXX - These are new and need to be checked when moveing to a new version */ static void pf_clear_all_states(void); static unsigned int pf_clear_states(const struct pf_kstate_kill *); static void pf_killstates(struct pf_kstate_kill *, unsigned int *); static int pf_killstates_row(struct pf_kstate_kill *, struct pf_idhash *); static int pf_killstates_nv(struct pfioc_nv *); static int pf_clearstates_nv(struct pfioc_nv *); static int pf_getstate(struct pfioc_nv *); static int pf_getstatus(struct pfioc_nv *); static int pf_clear_tables(void); static void pf_clear_srcnodes(struct pf_ksrc_node *); static void pf_kill_srcnodes(struct pfioc_src_node_kill *); static int pf_keepcounters(struct pfioc_nv *); static void pf_tbladdr_copyout(struct pf_addr_wrap *); /* * Wrapper functions for pfil(9) hooks */ static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); #ifdef INET static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); #endif #ifdef INET6 static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); #endif static void hook_pf_eth(void); static void hook_pf(void); static void dehook_pf_eth(void); static void dehook_pf(void); static int shutdown_pf(void); static int pf_load(void); static void pf_unload(void); static struct cdevsw pf_cdevsw = { .d_ioctl = pfioctl, .d_name = PF_NAME, .d_version = D_VERSION, }; VNET_DEFINE_STATIC(bool, pf_pfil_hooked); #define V_pf_pfil_hooked VNET(pf_pfil_hooked) VNET_DEFINE_STATIC(bool, pf_pfil_eth_hooked); #define V_pf_pfil_eth_hooked VNET(pf_pfil_eth_hooked) /* * We need a flag that is neither hooked nor running to know when * the VNET is "valid". We primarily need this to control (global) * external event, e.g., eventhandlers. */ VNET_DEFINE(int, pf_vnet_active); #define V_pf_vnet_active VNET(pf_vnet_active) int pf_end_threads; struct proc *pf_purge_proc; VNET_DEFINE(struct rmlock, pf_rules_lock); VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock); #define V_pf_ioctl_lock VNET(pf_ioctl_lock) struct sx pf_end_lock; /* pfsync */ VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr); VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr); VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr); VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr); VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr); VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr); pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr; /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; /* * Copy a user-provided string, returning an error if truncation would occur. * Avoid scanning past "sz" bytes in the source string since there's no * guarantee that it's nul-terminated. */ static int pf_user_strcpy(char *dst, const char *src, size_t sz) { if (strnlen(src, sz) == sz) return (EINVAL); (void)strlcpy(dst, src, sz); return (0); } static void pfattach_vnet(void) { u_int32_t *my_timeout = V_pf_default_rule.timeout; bzero(&V_pf_status, sizeof(V_pf_status)); pf_initialize(); pfr_initialize(); pfi_initialize_vnet(); pf_normalize_init(); pf_syncookies_init(); V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; RB_INIT(&V_pf_anchors); pf_init_kruleset(&pf_main_ruleset); pf_init_keth(V_pf_keth); /* default rule should never be garbage collected */ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; #ifdef PF_DEFAULT_TO_DROP V_pf_default_rule.action = PF_DROP; #else V_pf_default_rule.action = PF_PASS; #endif V_pf_default_rule.nr = -1; V_pf_default_rule.rtableid = -1; pf_counter_u64_init(&V_pf_default_rule.evaluations, M_WAITOK); for (int i = 0; i < 2; i++) { pf_counter_u64_init(&V_pf_default_rule.packets[i], M_WAITOK); pf_counter_u64_init(&V_pf_default_rule.bytes[i], M_WAITOK); } V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK); V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK); V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK); V_pf_default_rule.timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone, M_WAITOK | M_ZERO); #ifdef PF_WANT_32_TO_64_COUNTER V_pf_kifmarker = malloc(sizeof(*V_pf_kifmarker), PFI_MTYPE, M_WAITOK | M_ZERO); V_pf_rulemarker = malloc(sizeof(*V_pf_rulemarker), M_PFRULE, M_WAITOK | M_ZERO); PF_RULES_WLOCK(); LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist); LIST_INSERT_HEAD(&V_pf_allrulelist, &V_pf_default_rule, allrulelist); V_pf_allrulecount++; LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist); PF_RULES_WUNLOCK(); #endif /* initialize default timeouts */ my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL; my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; V_pf_status.debug = PF_DEBUG_URGENT; /* * XXX This is different than in OpenBSD where reassembly is enabled by * defult. In FreeBSD we expect people to still use scrub rules and * switch to the new syntax later. Only when they switch they must * explicitly enable reassemle. We could change the default once the * scrub rule functionality is hopefully removed some day in future. */ V_pf_status.reass = 0; V_pf_pfil_hooked = false; V_pf_pfil_eth_hooked = false; /* XXX do our best to avoid a conflict */ V_pf_status.hostid = arc4random(); for (int i = 0; i < PFRES_MAX; i++) V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK); for (int i = 0; i < KLCNT_MAX; i++) V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK); for (int i = 0; i < FCNT_MAX; i++) pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK); for (int i = 0; i < SCNT_MAX; i++) V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK); if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET, INTR_MPSAFE, &V_pf_swi_cookie) != 0) /* XXXGL: leaked all above. */ return; } static struct pf_kpool * pf_get_kpool(const char *anchor, u_int32_t ticket, u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, u_int8_t check_ticket) { struct pf_kruleset *ruleset; struct pf_krule *rule; int rs_num; ruleset = pf_find_kruleset(anchor); if (ruleset == NULL) return (NULL); rs_num = pf_get_ruleset_number(rule_action); if (rs_num >= PF_RULESET_MAX) return (NULL); if (active) { if (check_ticket && ticket != ruleset->rules[rs_num].active.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_krulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); } else { if (check_ticket && ticket != ruleset->rules[rs_num].inactive.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_krulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); } if (!r_last) { while ((rule != NULL) && (rule->nr != rule_number)) rule = TAILQ_NEXT(rule, entries); } if (rule == NULL) return (NULL); return (&rule->rpool); } static void pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb) { struct pf_kpooladdr *mv_pool_pa; while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { TAILQ_REMOVE(poola, mv_pool_pa, entries); TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); } } static void pf_empty_kpool(struct pf_kpalist *poola) { struct pf_kpooladdr *pa; while ((pa = TAILQ_FIRST(poola)) != NULL) { switch (pa->addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(pa->addr.p.dyn); break; case PF_ADDR_TABLE: /* XXX: this could be unfinished pooladdr on pabuf */ if (pa->addr.p.tbl != NULL) pfr_detach_table(pa->addr.p.tbl); break; } if (pa->kif) pfi_kkif_unref(pa->kif); TAILQ_REMOVE(poola, pa, entries); free(pa, M_PFRULE); } } static void pf_unlink_rule_locked(struct pf_krulequeue *rulequeue, struct pf_krule *rule) { PF_RULES_WASSERT(); PF_UNLNKDRULES_ASSERT(); TAILQ_REMOVE(rulequeue, rule, entries); rule->rule_ref |= PFRULE_REFS; TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries); } static void pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule) { PF_RULES_WASSERT(); PF_UNLNKDRULES_LOCK(); pf_unlink_rule_locked(rulequeue, rule); PF_UNLNKDRULES_UNLOCK(); } static void pf_free_eth_rule(struct pf_keth_rule *rule) { PF_RULES_WASSERT(); if (rule == NULL) return; if (rule->tag) tag_unref(&V_pf_tags, rule->tag); if (rule->match_tag) tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ pf_qid_unref(rule->qid); #endif if (rule->bridge_to) pfi_kkif_unref(rule->bridge_to); if (rule->kif) pfi_kkif_unref(rule->kif); if (rule->ipsrc.addr.type == PF_ADDR_TABLE) pfr_detach_table(rule->ipsrc.addr.p.tbl); if (rule->ipdst.addr.type == PF_ADDR_TABLE) pfr_detach_table(rule->ipdst.addr.p.tbl); counter_u64_free(rule->evaluations); for (int i = 0; i < 2; i++) { counter_u64_free(rule->packets[i]); counter_u64_free(rule->bytes[i]); } uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp); pf_keth_anchor_remove(rule); free(rule, M_PFRULE); } void pf_free_rule(struct pf_krule *rule) { PF_RULES_WASSERT(); PF_CONFIG_ASSERT(); if (rule->tag) tag_unref(&V_pf_tags, rule->tag); if (rule->match_tag) tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ if (rule->pqid != rule->qid) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif switch (rule->src.addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(rule->src.addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(rule->src.addr.p.tbl); break; } switch (rule->dst.addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(rule->dst.addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(rule->dst.addr.p.tbl); break; } if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); if (rule->kif) pfi_kkif_unref(rule->kif); pf_kanchor_remove(rule); pf_empty_kpool(&rule->rpool.list); pf_krule_free(rule); } static void pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size, unsigned int default_size) { unsigned int i; unsigned int hashsize; if (*tunable_size == 0 || !powerof2(*tunable_size)) *tunable_size = default_size; hashsize = *tunable_size; ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH, M_WAITOK); ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH, M_WAITOK); ts->mask = hashsize - 1; ts->seed = arc4random(); for (i = 0; i < hashsize; i++) { TAILQ_INIT(&ts->namehash[i]); TAILQ_INIT(&ts->taghash[i]); } BIT_FILL(TAGID_MAX, &ts->avail); } static void pf_cleanup_tagset(struct pf_tagset *ts) { unsigned int i; unsigned int hashsize; struct pf_tagname *t, *tmp; /* * Only need to clean up one of the hashes as each tag is hashed * into each table. */ hashsize = ts->mask + 1; for (i = 0; i < hashsize; i++) TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp) uma_zfree(V_pf_tag_z, t); free(ts->namehash, M_PFHASH); free(ts->taghash, M_PFHASH); } static uint16_t tagname2hashindex(const struct pf_tagset *ts, const char *tagname) { size_t len; len = strnlen(tagname, PF_TAG_NAME_SIZE - 1); return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask); } static uint16_t tag2hashindex(const struct pf_tagset *ts, uint16_t tag) { return (tag & ts->mask); } static u_int16_t tagname2tag(struct pf_tagset *ts, const char *tagname) { struct pf_tagname *tag; u_int32_t index; u_int16_t new_tagid; PF_RULES_WASSERT(); index = tagname2hashindex(ts, tagname); TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; return (tag->tag); } /* * new entry * * to avoid fragmentation, we do a linear search from the beginning * and take the first free slot we find. */ new_tagid = BIT_FFS(TAGID_MAX, &ts->avail); /* * Tags are 1-based, with valid tags in the range [1..TAGID_MAX]. * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits * set. It may also return a bit number greater than TAGID_MAX due * to rounding of the number of bits in the vector up to a multiple * of the vector word size at declaration/allocation time. */ if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) return (0); /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */ BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail); /* allocate and fill new struct pf_tagname */ tag = uma_zalloc(V_pf_tag_z, M_NOWAIT); if (tag == NULL) return (0); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref = 1; /* Insert into namehash */ TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries); /* Insert into taghash */ index = tag2hashindex(ts, new_tagid); TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries); return (tag->tag); } static void tag_unref(struct pf_tagset *ts, u_int16_t tag) { struct pf_tagname *t; uint16_t index; PF_RULES_WASSERT(); index = tag2hashindex(ts, tag); TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) if (tag == t->tag) { if (--t->ref == 0) { TAILQ_REMOVE(&ts->taghash[index], t, taghash_entries); index = tagname2hashindex(ts, t->name); TAILQ_REMOVE(&ts->namehash[index], t, namehash_entries); /* Bits are 0-based for BIT_SET() */ BIT_SET(TAGID_MAX, tag - 1, &ts->avail); uma_zfree(V_pf_tag_z, t); } break; } } static uint16_t pf_tagname2tag(const char *tagname) { return (tagname2tag(&V_pf_tags, tagname)); } static int pf_begin_eth(uint32_t *ticket, const char *anchor) { struct pf_keth_rule *rule, *tmp; struct pf_keth_ruleset *rs; PF_RULES_WASSERT(); rs = pf_find_or_create_keth_ruleset(anchor); if (rs == NULL) return (EINVAL); /* Purge old inactive rules. */ TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries, tmp) { TAILQ_REMOVE(rs->inactive.rules, rule, entries); pf_free_eth_rule(rule); } *ticket = ++rs->inactive.ticket; rs->inactive.open = 1; return (0); } static void pf_rollback_eth_cb(struct epoch_context *ctx) { struct pf_keth_ruleset *rs; rs = __containerof(ctx, struct pf_keth_ruleset, epoch_ctx); CURVNET_SET(rs->vnet); PF_RULES_WLOCK(); pf_rollback_eth(rs->inactive.ticket, rs->anchor ? rs->anchor->path : ""); PF_RULES_WUNLOCK(); CURVNET_RESTORE(); } static int pf_rollback_eth(uint32_t ticket, const char *anchor) { struct pf_keth_rule *rule, *tmp; struct pf_keth_ruleset *rs; PF_RULES_WASSERT(); rs = pf_find_keth_ruleset(anchor); if (rs == NULL) return (EINVAL); if (!rs->inactive.open || ticket != rs->inactive.ticket) return (0); /* Purge old inactive rules. */ TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries, tmp) { TAILQ_REMOVE(rs->inactive.rules, rule, entries); pf_free_eth_rule(rule); } rs->inactive.open = 0; pf_remove_if_empty_keth_ruleset(rs); return (0); } #define PF_SET_SKIP_STEPS(i) \ do { \ while (head[i] != cur) { \ head[i]->skip[i].ptr = cur; \ head[i] = TAILQ_NEXT(head[i], entries); \ } \ } while (0) static void pf_eth_calc_skip_steps(struct pf_keth_ruleq *rules) { struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT]; int i; cur = TAILQ_FIRST(rules); prev = cur; for (i = 0; i < PFE_SKIP_COUNT; ++i) head[i] = cur; while (cur != NULL) { if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) PF_SET_SKIP_STEPS(PFE_SKIP_IFP); if (cur->direction != prev->direction) PF_SET_SKIP_STEPS(PFE_SKIP_DIR); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PFE_SKIP_PROTO); if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0) PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR); if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0) PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR); if (cur->ipsrc.neg != prev->ipsrc.neg || pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr)) PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR); if (cur->ipdst.neg != prev->ipdst.neg || pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr)) PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR); prev = cur; cur = TAILQ_NEXT(cur, entries); } for (i = 0; i < PFE_SKIP_COUNT; ++i) PF_SET_SKIP_STEPS(i); } static int pf_commit_eth(uint32_t ticket, const char *anchor) { struct pf_keth_ruleq *rules; struct pf_keth_ruleset *rs; rs = pf_find_keth_ruleset(anchor); if (rs == NULL) { return (EINVAL); } if (!rs->inactive.open || ticket != rs->inactive.ticket) return (EBUSY); PF_RULES_WASSERT(); pf_eth_calc_skip_steps(rs->inactive.rules); rules = rs->active.rules; ck_pr_store_ptr(&rs->active.rules, rs->inactive.rules); rs->inactive.rules = rules; rs->inactive.ticket = rs->active.ticket; /* Clean up inactive rules (i.e. previously active rules), only when * we're sure they're no longer used. */ NET_EPOCH_CALL(pf_rollback_eth_cb, &rs->epoch_ctx); return (0); } #ifdef ALTQ static uint16_t pf_qname2qid(const char *qname) { return (tagname2tag(&V_pf_qids, qname)); } static void pf_qid_unref(uint16_t qid) { tag_unref(&V_pf_qids, qid); } static int pf_begin_altq(u_int32_t *ticket) { struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); /* Purge the old altq lists */ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altq_ifs_inactive); TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altqs_inactive); if (error) return (error); *ticket = ++V_ticket_altqs_inactive; V_altqs_inactive_open = 1; return (0); } static int pf_rollback_altq(u_int32_t ticket) { struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); /* Purge the old altq lists */ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altq_ifs_inactive); TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); } static int pf_commit_altq(u_int32_t ticket) { struct pf_altqqueue *old_altqs, *old_altq_ifs; struct pf_altq *altq, *tmp; int err, error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (EBUSY); /* swap altqs, keep the old. */ old_altqs = V_pf_altqs_active; old_altq_ifs = V_pf_altq_ifs_active; V_pf_altqs_active = V_pf_altqs_inactive; V_pf_altq_ifs_active = V_pf_altq_ifs_inactive; V_pf_altqs_inactive = old_altqs; V_pf_altq_ifs_inactive = old_altq_ifs; V_ticket_altqs_active = V_ticket_altqs_inactive; /* Attach new disciplines */ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* attach the discipline */ error = altq_pfattach(altq); if (error == 0 && V_pf_altq_running) error = pf_enable_altq(altq); if (error != 0) return (error); } } /* Purge the old altq lists */ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ if (V_pf_altq_running) error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) error = err; err = altq_remove(altq); if (err != 0 && error == 0) error = err; } free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altq_ifs_inactive); TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); } static int pf_enable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int error = 0; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); if (ifp->if_snd.altq_type != ALTQT_NONE) error = altq_enable(&ifp->if_snd); /* set tokenbucket regulator */ if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; error = tbr_set(&ifp->if_snd, &tb); } return (error); } static int pf_disable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int error; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); /* * when the discipline is no longer referenced, it was overridden * by a new one. if so, just return. */ if (altq->altq_disc != ifp->if_snd.altq_disc) return (0); error = altq_disable(&ifp->if_snd); if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; error = tbr_set(&ifp->if_snd, &tb); } return (error); } static int pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket, struct pf_altq *altq) { struct ifnet *ifp1; int error = 0; /* Deactivate the interface in question */ altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; if ((ifp1 = ifunit(altq->ifname)) == NULL || (remove && ifp1 == ifp)) { altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { error = altq_add(ifp1, altq); if (ticket != V_ticket_altqs_inactive) error = EBUSY; if (error) free(altq, M_PFALTQ); } return (error); } void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { struct pf_altq *a1, *a2, *a3; u_int32_t ticket; int error = 0; /* * No need to re-evaluate the configuration for events on interfaces * that do not support ALTQ, as it's not possible for such * interfaces to be part of the configuration. */ if (!ALTQ_IS_READY(&ifp->if_snd)) return; /* Interrupt userland queue modifications */ if (V_altqs_inactive_open) pf_rollback_altq(V_ticket_altqs_inactive); /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; } bcopy(a1, a2, sizeof(struct pf_altq)); error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); if (error) break; TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries); } if (error) goto out; TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; } bcopy(a1, a2, sizeof(struct pf_altq)); if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { error = EBUSY; free(a2, M_PFALTQ); break; } a2->altq_disc = NULL; TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) { if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0) { a2->altq_disc = a3->altq_disc; break; } } error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); if (error) break; TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); } out: if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); } #endif /* ALTQ */ static struct pf_krule_global * pf_rule_tree_alloc(int flags) { struct pf_krule_global *tree; tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags); if (tree == NULL) return (NULL); RB_INIT(tree); return (tree); } static void pf_rule_tree_free(struct pf_krule_global *tree) { free(tree, M_TEMP); } static int pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_krule_global *tree; struct pf_kruleset *rs; struct pf_krule *rule; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); tree = pf_rule_tree_alloc(M_NOWAIT); if (tree == NULL) return (ENOMEM); rs = pf_find_or_create_kruleset(anchor); if (rs == NULL) { free(tree, M_TEMP); return (EINVAL); } pf_rule_tree_free(rs->rules[rs_num].inactive.tree); rs->rules[rs_num].inactive.tree = tree; while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); } static int pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_kruleset *rs; struct pf_krule *rule; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_kruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } rs->rules[rs_num].inactive.open = 0; return (0); } #define PF_MD5_UPD(st, elm) \ MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) #define PF_MD5_UPD_STR(st, elm) \ MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) #define PF_MD5_UPD_HTONL(st, elm, stor) do { \ (stor) = htonl((st)->elm); \ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ } while (0) #define PF_MD5_UPD_HTONS(st, elm, stor) do { \ (stor) = htons((st)->elm); \ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ } while (0) static void pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) { PF_MD5_UPD(pfr, addr.type); switch (pfr->addr.type) { case PF_ADDR_DYNIFTL: PF_MD5_UPD(pfr, addr.v.ifname); PF_MD5_UPD(pfr, addr.iflags); break; case PF_ADDR_TABLE: PF_MD5_UPD(pfr, addr.v.tblname); break; case PF_ADDR_ADDRMASK: /* XXX ignore af? */ PF_MD5_UPD(pfr, addr.v.a.addr.addr32); PF_MD5_UPD(pfr, addr.v.a.mask.addr32); break; } PF_MD5_UPD(pfr, port[0]); PF_MD5_UPD(pfr, port[1]); PF_MD5_UPD(pfr, neg); PF_MD5_UPD(pfr, port_op); } static void pf_hash_rule_rolling(MD5_CTX *ctx, struct pf_krule *rule) { u_int16_t x; u_int32_t y; pf_hash_rule_addr(ctx, &rule->src); pf_hash_rule_addr(ctx, &rule->dst); for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++) PF_MD5_UPD_STR(rule, label[i]); PF_MD5_UPD_STR(rule, ifname); PF_MD5_UPD_STR(rule, match_tagname); PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ PF_MD5_UPD_HTONL(rule, os_fingerprint, y); PF_MD5_UPD_HTONL(rule, prob, y); PF_MD5_UPD_HTONL(rule, uid.uid[0], y); PF_MD5_UPD_HTONL(rule, uid.uid[1], y); PF_MD5_UPD(rule, uid.op); PF_MD5_UPD_HTONL(rule, gid.gid[0], y); PF_MD5_UPD_HTONL(rule, gid.gid[1], y); PF_MD5_UPD(rule, gid.op); PF_MD5_UPD_HTONL(rule, rule_flag, y); PF_MD5_UPD(rule, action); PF_MD5_UPD(rule, direction); PF_MD5_UPD(rule, af); PF_MD5_UPD(rule, quick); PF_MD5_UPD(rule, ifnot); PF_MD5_UPD(rule, match_tag_not); PF_MD5_UPD(rule, natpass); PF_MD5_UPD(rule, keep_state); PF_MD5_UPD(rule, proto); PF_MD5_UPD(rule, type); PF_MD5_UPD(rule, code); PF_MD5_UPD(rule, flags); PF_MD5_UPD(rule, flagset); PF_MD5_UPD(rule, allow_opts); PF_MD5_UPD(rule, rt); PF_MD5_UPD(rule, tos); PF_MD5_UPD(rule, scrub_flags); PF_MD5_UPD(rule, min_ttl); PF_MD5_UPD(rule, set_tos); if (rule->anchor != NULL) PF_MD5_UPD_STR(rule, anchor->path); } static void pf_hash_rule(struct pf_krule *rule) { MD5_CTX ctx; MD5Init(&ctx); pf_hash_rule_rolling(&ctx, rule); MD5Final(rule->md5sum, &ctx); } static int pf_krule_compare(struct pf_krule *a, struct pf_krule *b) { return (memcmp(a->md5sum, b->md5sum, PF_MD5_DIGEST_LENGTH)); } static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_kruleset *rs; struct pf_krule *rule, **old_array, *old_rule; struct pf_krulequeue *old_rules; struct pf_krule_global *old_tree; int error; u_int32_t old_rcount; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_kruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); /* Calculate checksum for the main ruleset */ if (rs == &pf_main_ruleset) { error = pf_setup_pfsync_matching(rs); if (error != 0) return (error); } /* Swap rules, keep the old. */ old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; old_array = rs->rules[rs_num].active.ptr_array; old_tree = rs->rules[rs_num].active.tree; rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; rs->rules[rs_num].active.ptr_array = rs->rules[rs_num].inactive.ptr_array; rs->rules[rs_num].active.tree = rs->rules[rs_num].inactive.tree; rs->rules[rs_num].active.rcount = rs->rules[rs_num].inactive.rcount; /* Attempt to preserve counter information. */ if (V_pf_status.keep_counters && old_tree != NULL) { TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr, entries) { old_rule = RB_FIND(pf_krule_global, old_tree, rule); if (old_rule == NULL) { continue; } pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&rule->evaluations, pf_counter_u64_fetch(&old_rule->evaluations)); pf_counter_u64_add_protected(&rule->packets[0], pf_counter_u64_fetch(&old_rule->packets[0])); pf_counter_u64_add_protected(&rule->packets[1], pf_counter_u64_fetch(&old_rule->packets[1])); pf_counter_u64_add_protected(&rule->bytes[0], pf_counter_u64_fetch(&old_rule->bytes[0])); pf_counter_u64_add_protected(&rule->bytes[1], pf_counter_u64_fetch(&old_rule->bytes[1])); pf_counter_u64_critical_exit(); } } rs->rules[rs_num].inactive.ptr = old_rules; rs->rules[rs_num].inactive.ptr_array = old_array; rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */ rs->rules[rs_num].inactive.rcount = old_rcount; rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); /* Purge the old rule list. */ PF_UNLNKDRULES_LOCK(); while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_unlink_rule_locked(old_rules, rule); PF_UNLNKDRULES_UNLOCK(); if (rs->rules[rs_num].inactive.ptr_array) free(rs->rules[rs_num].inactive.ptr_array, M_TEMP); rs->rules[rs_num].inactive.ptr_array = NULL; rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_kruleset(rs); free(old_tree, M_TEMP); return (0); } static int pf_setup_pfsync_matching(struct pf_kruleset *rs) { MD5_CTX ctx; struct pf_krule *rule; int rs_cnt; u_int8_t digest[PF_MD5_DIGEST_LENGTH]; MD5Init(&ctx); for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { /* XXX PF_RULESET_SCRUB as well? */ if (rs_cnt == PF_RULESET_SCRUB) continue; if (rs->rules[rs_cnt].inactive.ptr_array) free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); rs->rules[rs_cnt].inactive.ptr_array = NULL; if (rs->rules[rs_cnt].inactive.rcount) { rs->rules[rs_cnt].inactive.ptr_array = mallocarray(rs->rules[rs_cnt].inactive.rcount, sizeof(struct pf_rule **), M_TEMP, M_NOWAIT); if (!rs->rules[rs_cnt].inactive.ptr_array) return (ENOMEM); } TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, entries) { pf_hash_rule_rolling(&ctx, rule); (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; } } MD5Final(digest, &ctx); memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); return (0); } static int pf_eth_addr_setup(struct pf_keth_ruleset *ruleset, struct pf_addr_wrap *addr) { int error = 0; switch (addr->type) { case PF_ADDR_TABLE: addr->p.tbl = pfr_eth_attach_table(ruleset, addr->v.tblname); if (addr->p.tbl == NULL) error = ENOMEM; break; default: error = EINVAL; } return (error); } static int pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr, sa_family_t af) { int error = 0; switch (addr->type) { case PF_ADDR_TABLE: addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname); if (addr->p.tbl == NULL) error = ENOMEM; break; case PF_ADDR_DYNIFTL: error = pfi_dynaddr_setup(addr, af); break; } return (error); } static void pf_addr_copyout(struct pf_addr_wrap *addr) { switch (addr->type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_copyout(addr); break; case PF_ADDR_TABLE: pf_tbladdr_copyout(addr); break; } } static void pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out) { int secs = time_uptime, diff; bzero(out, sizeof(struct pf_src_node)); bcopy(&in->addr, &out->addr, sizeof(struct pf_addr)); bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr)); if (in->rule.ptr != NULL) out->rule.nr = in->rule.ptr->nr; for (int i = 0; i < 2; i++) { out->bytes[i] = counter_u64_fetch(in->bytes[i]); out->packets[i] = counter_u64_fetch(in->packets[i]); } out->states = in->states; out->conn = in->conn; out->af = in->af; out->ruletype = in->ruletype; out->creation = secs - in->creation; if (out->expire > secs) out->expire -= secs; else out->expire = 0; /* Adjust the connection rate estimate. */ diff = secs - in->conn_rate.last; if (diff >= in->conn_rate.seconds) out->conn_rate.count = 0; else out->conn_rate.count -= in->conn_rate.count * diff / in->conn_rate.seconds; } #ifdef ALTQ /* * Handle export of struct pf_kaltq to user binaries that may be using any * version of struct pf_altq. */ static int pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) { u_int32_t version; if (ioc_size == sizeof(struct pfioc_altq_v0)) version = 0; else version = pa->version; if (version > PFIOC_ALTQ_VERSION) return (EINVAL); #define ASSIGN(x) exported_q->x = q->x #define COPY(x) \ bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x))) #define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX) #define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) switch (version) { case 0: { struct pf_altq_v0 *exported_q = &((struct pfioc_altq_v0 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); exported_q->tbrsize = SATU16(q->tbrsize); exported_q->ifbandwidth = SATU32(q->ifbandwidth); COPY(qname); COPY(parent); ASSIGN(parent_qid); exported_q->bandwidth = SATU32(q->bandwidth); ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); if (q->scheduler == ALTQT_HFSC) { #define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x #define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \ SATU32(q->pq_u.hfsc_opts.x) ASSIGN_OPT_SATU32(rtsc_m1); ASSIGN_OPT(rtsc_d); ASSIGN_OPT_SATU32(rtsc_m2); ASSIGN_OPT_SATU32(lssc_m1); ASSIGN_OPT(lssc_d); ASSIGN_OPT_SATU32(lssc_m2); ASSIGN_OPT_SATU32(ulsc_m1); ASSIGN_OPT(ulsc_d); ASSIGN_OPT_SATU32(ulsc_m2); ASSIGN_OPT(flags); #undef ASSIGN_OPT #undef ASSIGN_OPT_SATU32 } else COPY(pq_u); ASSIGN(qid); break; } case 1: { struct pf_altq_v1 *exported_q = &((struct pfioc_altq_v1 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); ASSIGN(ifbandwidth); COPY(qname); COPY(parent); ASSIGN(parent_qid); ASSIGN(bandwidth); ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); COPY(pq_u); ASSIGN(qid); break; } default: panic("%s: unhandled struct pfioc_altq version", __func__); break; } #undef ASSIGN #undef COPY #undef SATU16 #undef SATU32 return (0); } /* * Handle import to struct pf_kaltq of struct pf_altq from user binaries * that may be using any version of it. */ static int pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) { u_int32_t version; if (ioc_size == sizeof(struct pfioc_altq_v0)) version = 0; else version = pa->version; if (version > PFIOC_ALTQ_VERSION) return (EINVAL); #define ASSIGN(x) q->x = imported_q->x #define COPY(x) \ bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x))) switch (version) { case 0: { struct pf_altq_v0 *imported_q = &((struct pfioc_altq_v0 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); /* 16-bit -> 32-bit */ ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */ COPY(qname); COPY(parent); ASSIGN(parent_qid); ASSIGN(bandwidth); /* 32-bit -> 64-bit */ ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); if (imported_q->scheduler == ALTQT_HFSC) { #define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x /* * The m1 and m2 parameters are being copied from * 32-bit to 64-bit. */ ASSIGN_OPT(rtsc_m1); ASSIGN_OPT(rtsc_d); ASSIGN_OPT(rtsc_m2); ASSIGN_OPT(lssc_m1); ASSIGN_OPT(lssc_d); ASSIGN_OPT(lssc_m2); ASSIGN_OPT(ulsc_m1); ASSIGN_OPT(ulsc_d); ASSIGN_OPT(ulsc_m2); ASSIGN_OPT(flags); #undef ASSIGN_OPT } else COPY(pq_u); ASSIGN(qid); break; } case 1: { struct pf_altq_v1 *imported_q = &((struct pfioc_altq_v1 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); ASSIGN(ifbandwidth); COPY(qname); COPY(parent); ASSIGN(parent_qid); ASSIGN(bandwidth); ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); COPY(pq_u); ASSIGN(qid); break; } default: panic("%s: unhandled struct pfioc_altq version", __func__); break; } #undef ASSIGN #undef COPY return (0); } static struct pf_altq * pf_altq_get_nth_active(u_int32_t n) { struct pf_altq *altq; u_int32_t nr; nr = 0; TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if (nr == n) return (altq); nr++; } TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (nr == n) return (altq); nr++; } return (NULL); } #endif /* ALTQ */ struct pf_krule * pf_krule_alloc(void) { struct pf_krule *rule; rule = malloc(sizeof(struct pf_krule), M_PFRULE, M_WAITOK | M_ZERO); mtx_init(&rule->rpool.mtx, "pf_krule_pool", NULL, MTX_DEF); rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone, M_WAITOK | M_ZERO); return (rule); } void pf_krule_free(struct pf_krule *rule) { #ifdef PF_WANT_32_TO_64_COUNTER bool wowned; #endif if (rule == NULL) return; #ifdef PF_WANT_32_TO_64_COUNTER if (rule->allrulelinked) { wowned = PF_RULES_WOWNED(); if (!wowned) PF_RULES_WLOCK(); LIST_REMOVE(rule, allrulelist); V_pf_allrulecount--; if (!wowned) PF_RULES_WUNLOCK(); } #endif pf_counter_u64_deinit(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_deinit(&rule->packets[i]); pf_counter_u64_deinit(&rule->bytes[i]); } counter_u64_free(rule->states_cur); counter_u64_free(rule->states_tot); counter_u64_free(rule->src_nodes); uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp); mtx_destroy(&rule->rpool.mtx); free(rule, M_PFRULE); } static void pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool, struct pf_pooladdr *pool) { bzero(pool, sizeof(*pool)); bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr)); strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname)); } static int pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool, struct pf_kpooladdr *kpool) { int ret; bzero(kpool, sizeof(*kpool)); bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr)); ret = pf_user_strcpy(kpool->ifname, pool->ifname, sizeof(kpool->ifname)); return (ret); } static void pf_kpool_to_pool(const struct pf_kpool *kpool, struct pf_pool *pool) { bzero(pool, sizeof(*pool)); bcopy(&kpool->key, &pool->key, sizeof(pool->key)); bcopy(&kpool->counter, &pool->counter, sizeof(pool->counter)); pool->tblidx = kpool->tblidx; pool->proxy_port[0] = kpool->proxy_port[0]; pool->proxy_port[1] = kpool->proxy_port[1]; pool->opts = kpool->opts; } static void pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool) { _Static_assert(sizeof(pool->key) == sizeof(kpool->key), ""); _Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), ""); bcopy(&pool->key, &kpool->key, sizeof(kpool->key)); bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter)); kpool->tblidx = pool->tblidx; kpool->proxy_port[0] = pool->proxy_port[0]; kpool->proxy_port[1] = pool->proxy_port[1]; kpool->opts = pool->opts; } static void pf_krule_to_rule(const struct pf_krule *krule, struct pf_rule *rule) { bzero(rule, sizeof(*rule)); bcopy(&krule->src, &rule->src, sizeof(rule->src)); bcopy(&krule->dst, &rule->dst, sizeof(rule->dst)); for (int i = 0; i < PF_SKIP_COUNT; ++i) { if (rule->skip[i].ptr == NULL) rule->skip[i].nr = -1; else rule->skip[i].nr = krule->skip[i].ptr->nr; } strlcpy(rule->label, krule->label[0], sizeof(rule->label)); strlcpy(rule->ifname, krule->ifname, sizeof(rule->ifname)); strlcpy(rule->qname, krule->qname, sizeof(rule->qname)); strlcpy(rule->pqname, krule->pqname, sizeof(rule->pqname)); strlcpy(rule->tagname, krule->tagname, sizeof(rule->tagname)); strlcpy(rule->match_tagname, krule->match_tagname, sizeof(rule->match_tagname)); strlcpy(rule->overload_tblname, krule->overload_tblname, sizeof(rule->overload_tblname)); pf_kpool_to_pool(&krule->rpool, &rule->rpool); rule->evaluations = pf_counter_u64_fetch(&krule->evaluations); for (int i = 0; i < 2; i++) { rule->packets[i] = pf_counter_u64_fetch(&krule->packets[i]); rule->bytes[i] = pf_counter_u64_fetch(&krule->bytes[i]); } /* kif, anchor, overload_tbl are not copied over. */ rule->os_fingerprint = krule->os_fingerprint; rule->rtableid = krule->rtableid; bcopy(krule->timeout, rule->timeout, sizeof(krule->timeout)); rule->max_states = krule->max_states; rule->max_src_nodes = krule->max_src_nodes; rule->max_src_states = krule->max_src_states; rule->max_src_conn = krule->max_src_conn; rule->max_src_conn_rate.limit = krule->max_src_conn_rate.limit; rule->max_src_conn_rate.seconds = krule->max_src_conn_rate.seconds; rule->qid = krule->qid; rule->pqid = krule->pqid; rule->nr = krule->nr; rule->prob = krule->prob; rule->cuid = krule->cuid; rule->cpid = krule->cpid; rule->return_icmp = krule->return_icmp; rule->return_icmp6 = krule->return_icmp6; rule->max_mss = krule->max_mss; rule->tag = krule->tag; rule->match_tag = krule->match_tag; rule->scrub_flags = krule->scrub_flags; bcopy(&krule->uid, &rule->uid, sizeof(krule->uid)); bcopy(&krule->gid, &rule->gid, sizeof(krule->gid)); rule->rule_flag = krule->rule_flag; rule->action = krule->action; rule->direction = krule->direction; rule->log = krule->log; rule->logif = krule->logif; rule->quick = krule->quick; rule->ifnot = krule->ifnot; rule->match_tag_not = krule->match_tag_not; rule->natpass = krule->natpass; rule->keep_state = krule->keep_state; rule->af = krule->af; rule->proto = krule->proto; rule->type = krule->type; rule->code = krule->code; rule->flags = krule->flags; rule->flagset = krule->flagset; rule->min_ttl = krule->min_ttl; rule->allow_opts = krule->allow_opts; rule->rt = krule->rt; rule->return_ttl = krule->return_ttl; rule->tos = krule->tos; rule->set_tos = krule->set_tos; rule->anchor_relative = krule->anchor_relative; rule->anchor_wildcard = krule->anchor_wildcard; rule->flush = krule->flush; rule->prio = krule->prio; rule->set_prio[0] = krule->set_prio[0]; rule->set_prio[1] = krule->set_prio[1]; bcopy(&krule->divert, &rule->divert, sizeof(krule->divert)); rule->u_states_cur = counter_u64_fetch(krule->states_cur); rule->u_states_tot = counter_u64_fetch(krule->states_tot); rule->u_src_nodes = counter_u64_fetch(krule->src_nodes); } static int pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule) { int ret; #ifndef INET if (rule->af == AF_INET) { return (EAFNOSUPPORT); } #endif /* INET */ #ifndef INET6 if (rule->af == AF_INET6) { return (EAFNOSUPPORT); } #endif /* INET6 */ ret = pf_check_rule_addr(&rule->src); if (ret != 0) return (ret); ret = pf_check_rule_addr(&rule->dst); if (ret != 0) return (ret); bcopy(&rule->src, &krule->src, sizeof(rule->src)); bcopy(&rule->dst, &krule->dst, sizeof(rule->dst)); ret = pf_user_strcpy(krule->label[0], rule->label, sizeof(rule->label)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->ifname, rule->ifname, sizeof(rule->ifname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->qname, rule->qname, sizeof(rule->qname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->pqname, rule->pqname, sizeof(rule->pqname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->tagname, rule->tagname, sizeof(rule->tagname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->match_tagname, rule->match_tagname, sizeof(rule->match_tagname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->overload_tblname, rule->overload_tblname, sizeof(rule->overload_tblname)); if (ret != 0) return (ret); pf_pool_to_kpool(&rule->rpool, &krule->rpool); /* Don't allow userspace to set evaluations, packets or bytes. */ /* kif, anchor, overload_tbl are not copied over. */ krule->os_fingerprint = rule->os_fingerprint; krule->rtableid = rule->rtableid; bcopy(rule->timeout, krule->timeout, sizeof(krule->timeout)); krule->max_states = rule->max_states; krule->max_src_nodes = rule->max_src_nodes; krule->max_src_states = rule->max_src_states; krule->max_src_conn = rule->max_src_conn; krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit; krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds; krule->qid = rule->qid; krule->pqid = rule->pqid; krule->nr = rule->nr; krule->prob = rule->prob; krule->cuid = rule->cuid; krule->cpid = rule->cpid; krule->return_icmp = rule->return_icmp; krule->return_icmp6 = rule->return_icmp6; krule->max_mss = rule->max_mss; krule->tag = rule->tag; krule->match_tag = rule->match_tag; krule->scrub_flags = rule->scrub_flags; bcopy(&rule->uid, &krule->uid, sizeof(krule->uid)); bcopy(&rule->gid, &krule->gid, sizeof(krule->gid)); krule->rule_flag = rule->rule_flag; krule->action = rule->action; krule->direction = rule->direction; krule->log = rule->log; krule->logif = rule->logif; krule->quick = rule->quick; krule->ifnot = rule->ifnot; krule->match_tag_not = rule->match_tag_not; krule->natpass = rule->natpass; krule->keep_state = rule->keep_state; krule->af = rule->af; krule->proto = rule->proto; krule->type = rule->type; krule->code = rule->code; krule->flags = rule->flags; krule->flagset = rule->flagset; krule->min_ttl = rule->min_ttl; krule->allow_opts = rule->allow_opts; krule->rt = rule->rt; krule->return_ttl = rule->return_ttl; krule->tos = rule->tos; krule->set_tos = rule->set_tos; krule->flush = rule->flush; krule->prio = rule->prio; krule->set_prio[0] = rule->set_prio[0]; krule->set_prio[1] = rule->set_prio[1]; bcopy(&rule->divert, &krule->divert, sizeof(krule->divert)); return (0); } static int pf_state_kill_to_kstate_kill(const struct pfioc_state_kill *psk, struct pf_kstate_kill *kill) { int ret; bzero(kill, sizeof(*kill)); bcopy(&psk->psk_pfcmp, &kill->psk_pfcmp, sizeof(kill->psk_pfcmp)); kill->psk_af = psk->psk_af; kill->psk_proto = psk->psk_proto; bcopy(&psk->psk_src, &kill->psk_src, sizeof(kill->psk_src)); bcopy(&psk->psk_dst, &kill->psk_dst, sizeof(kill->psk_dst)); ret = pf_user_strcpy(kill->psk_ifname, psk->psk_ifname, sizeof(kill->psk_ifname)); if (ret != 0) return (ret); ret = pf_user_strcpy(kill->psk_label, psk->psk_label, sizeof(kill->psk_label)); if (ret != 0) return (ret); return (0); } static int pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, uint32_t pool_ticket, const char *anchor, const char *anchor_call, struct thread *td) { struct pf_kruleset *ruleset; struct pf_krule *tail; struct pf_kpooladdr *pa; struct pfi_kkif *kif = NULL; int rs_num; int error = 0; if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) { error = EINVAL; goto errout_unlocked; } #define ERROUT(x) ERROUT_FUNCTION(errout, x) if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); pf_counter_u64_init(&rule->evaluations, M_WAITOK); for (int i = 0; i < 2; i++) { pf_counter_u64_init(&rule->packets[i], M_WAITOK); pf_counter_u64_init(&rule->bytes[i], M_WAITOK); } rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); rule->src_nodes = counter_u64_alloc(M_WAITOK); rule->cuid = td->td_ucred->cr_ruid; rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&rule->rpool.list); PF_CONFIG_LOCK(); PF_RULES_WLOCK(); #ifdef PF_WANT_32_TO_64_COUNTER LIST_INSERT_HEAD(&V_pf_allrulelist, rule, allrulelist); MPASS(!rule->allrulelinked); rule->allrulelinked = true; V_pf_allrulecount++; #endif ruleset = pf_find_kruleset(anchor); if (ruleset == NULL) ERROUT(EINVAL); rs_num = pf_get_ruleset_number(rule->action); if (rs_num >= PF_RULESET_MAX) ERROUT(EINVAL); if (ticket != ruleset->rules[rs_num].inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, ("ticket: %d != [%d]%d\n", ticket, rs_num, ruleset->rules[rs_num].inactive.ticket)); ERROUT(EBUSY); } if (pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, ("pool_ticket: %d != %d\n", pool_ticket, V_ticket_pabuf)); ERROUT(EBUSY); } /* * XXXMJG hack: there is no mechanism to ensure they started the * transaction. Ticket checked above may happen to match by accident, * even if nobody called DIOCXBEGIN, let alone this process. * Partially work around it by checking if the RB tree got allocated, * see pf_begin_rules. */ if (ruleset->rules[rs_num].inactive.tree == NULL) { ERROUT(EINVAL); } tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_krulequeue); if (tail) rule->nr = tail->nr + 1; else rule->nr = 0; if (rule->ifname[0]) { rule->kif = pfi_kkif_attach(kif, rule->ifname); kif = NULL; pfi_kkif_ref(rule->kif); } else rule->kif = NULL; if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) error = EBUSY; #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { if ((rule->qid = pf_qname2qid(rule->qname)) == 0) error = EBUSY; else if (rule->pqname[0] != 0) { if ((rule->pqid = pf_qname2qid(rule->pqname)) == 0) error = EBUSY; } else rule->pqid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) error = EBUSY; if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag(rule->match_tagname)) == 0) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; if (!rule->log) rule->logif = 0; if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) error = ENOMEM; if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) error = ENOMEM; if (pf_kanchor_setup(rule, ruleset, anchor_call)) error = EINVAL; if (rule->scrub_flags & PFSTATE_SETPRIO && (rule->set_prio[0] > PF_PRIO_MAX || rule->set_prio[1] > PF_PRIO_MAX)) error = EINVAL; TAILQ_FOREACH(pa, &V_pf_pabuf, entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) error = ENOMEM; } rule->overload_tbl = NULL; if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, rule->overload_tblname)) == NULL) error = EINVAL; else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_kpool(&V_pf_pabuf, &rule->rpool.list); if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_NOPFROUTE)) && (TAILQ_FIRST(&rule->rpool.list) == NULL)) error = EINVAL; if (error) { pf_free_rule(rule); rule = NULL; ERROUT(error); } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; PF_RULES_WUNLOCK(); pf_hash_rule(rule); if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) { PF_RULES_WLOCK(); TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount--; pf_free_rule(rule); rule = NULL; ERROUT(EEXIST); } PF_CONFIG_UNLOCK(); return (0); #undef ERROUT errout: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); errout_unlocked: pf_kkif_free(kif); pf_krule_free(rule); return (error); } static bool pf_label_match(const struct pf_krule *rule, const char *label) { int i = 0; while (*rule->label[i]) { if (strcmp(rule->label[i], label) == 0) return (true); i++; } return (false); } static unsigned int pf_kill_matching_state(struct pf_state_key_cmp *key, int dir) { struct pf_kstate *s; int more = 0; s = pf_find_state_all(key, dir, &more); if (s == NULL) return (0); if (more) { PF_STATE_UNLOCK(s); return (0); } pf_unlink_state(s); return (1); } static int pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih) { struct pf_kstate *s; struct pf_state_key *sk; struct pf_addr *srcaddr, *dstaddr; struct pf_state_key_cmp match_key; int idx, killed = 0; unsigned int dir; u_int16_t srcport, dstport; struct pfi_kkif *kif; relock_DIOCKILLSTATES: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { /* For floating states look at the original kif. */ kif = s->kif == V_pfi_all ? s->orig_kif : s->kif; sk = s->key[PF_SK_WIRE]; if (s->direction == PF_OUT) { srcaddr = &sk->addr[1]; dstaddr = &sk->addr[0]; srcport = sk->port[1]; dstport = sk->port[0]; } else { srcaddr = &sk->addr[0]; dstaddr = &sk->addr[1]; srcport = sk->port[0]; dstport = sk->port[1]; } if (psk->psk_af && sk->af != psk->psk_af) continue; if (psk->psk_proto && psk->psk_proto != sk->proto) continue; if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, srcaddr, sk->af)) continue; if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af)) continue; if (! PF_MATCHA(psk->psk_rt_addr.neg, &psk->psk_rt_addr.addr.v.a.addr, &psk->psk_rt_addr.addr.v.a.mask, &s->rt_addr, sk->af)) continue; if (psk->psk_src.port_op != 0 && ! pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], srcport)) continue; if (psk->psk_dst.port_op != 0 && ! pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], dstport)) continue; if (psk->psk_label[0] && ! pf_label_match(s->rule.ptr, psk->psk_label)) continue; if (psk->psk_ifname[0] && strcmp(psk->psk_ifname, kif->pfik_name)) continue; if (psk->psk_kill_match) { /* Create the key to find matching states, with lock * held. */ bzero(&match_key, sizeof(match_key)); if (s->direction == PF_OUT) { dir = PF_IN; idx = PF_SK_STACK; } else { dir = PF_OUT; idx = PF_SK_WIRE; } match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; PF_ACPY(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; PF_ACPY(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } pf_unlink_state(s); killed++; if (psk->psk_kill_match) killed += pf_kill_matching_state(&match_key, dir); goto relock_DIOCKILLSTATES; } PF_HASHROW_UNLOCK(ih); return (killed); } static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { int error = 0; PF_RULES_RLOCK_TRACKER; #define ERROUT_IOCTL(target, x) \ do { \ error = (x); \ SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__); \ goto target; \ } while (0) /* XXX keep in sync with switch() below */ if (securelevel_gt(td->td_ucred, 2)) switch (cmd) { case DIOCGETRULES: case DIOCGETRULE: case DIOCGETRULENV: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCGETSTATENV: case DIOCSETSTATUSIF: case DIOCGETSTATUS: case DIOCGETSTATUSNV: case DIOCCLRSTATUS: case DIOCNATLOOK: case DIOCSETDEBUG: case DIOCGETSTATES: case DIOCGETSTATESV2: case DIOCGETTIMEOUT: case DIOCCLRRULECTRS: case DIOCGETLIMIT: case DIOCGETALTQSV0: case DIOCGETALTQSV1: case DIOCGETALTQV0: case DIOCGETALTQV1: case DIOCGETQSTATSV0: case DIOCGETQSTATSV1: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRCLRASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCGETSYNCOOKIES: case DIOCIGETIFACES: case DIOCGIFSPEEDV0: case DIOCGIFSPEEDV1: case DIOCSETIFFLAG: case DIOCCLRIFFLAG: case DIOCGETETHRULES: case DIOCGETETHRULE: case DIOCGETETHRULESETS: case DIOCGETETHRULESET: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) break; /* dummy operation ok */ return (EPERM); default: return (EPERM); } if (!(flags & FWRITE)) switch (cmd) { case DIOCGETRULES: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCGETSTATENV: case DIOCGETSTATUS: case DIOCGETSTATUSNV: case DIOCGETSTATES: case DIOCGETSTATESV2: case DIOCGETTIMEOUT: case DIOCGETLIMIT: case DIOCGETALTQSV0: case DIOCGETALTQSV1: case DIOCGETALTQV0: case DIOCGETALTQV1: case DIOCGETQSTATSV0: case DIOCGETQSTATSV1: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCGETSYNCOOKIES: case DIOCIGETIFACES: case DIOCGIFSPEEDV1: case DIOCGIFSPEEDV0: case DIOCGETRULENV: case DIOCGETETHRULES: case DIOCGETETHRULE: case DIOCGETETHRULESETS: case DIOCGETETHRULESET: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) { flags |= FWRITE; /* need write lock for dummy */ break; /* dummy operation ok */ } return (EACCES); case DIOCGETRULE: if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) return (EACCES); break; default: return (EACCES); } CURVNET_SET(TD_TO_VNET(td)); switch (cmd) { case DIOCSTART: sx_xlock(&V_pf_ioctl_lock); if (V_pf_status.running) error = EEXIST; else { hook_pf(); if (! TAILQ_EMPTY(V_pf_keth->active.rules)) hook_pf_eth(); V_pf_status.running = 1; V_pf_status.since = time_second; new_unrhdr64(&V_pf_stateid, time_second); DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); } break; case DIOCSTOP: sx_xlock(&V_pf_ioctl_lock); if (!V_pf_status.running) error = ENOENT; else { V_pf_status.running = 0; dehook_pf(); dehook_pf_eth(); V_pf_status.since = time_second; DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); } break; case DIOCGETETHRULES: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl; void *packed; struct pf_keth_rule *tail; struct pf_keth_ruleset *rs; u_int32_t ticket, nr; const char *anchor = ""; nvl = NULL; packed = NULL; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULES_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); /* Copy the request in */ packed = malloc(nv->len, M_NVLIST, M_WAITOK); if (packed == NULL) ERROUT(ENOMEM); error = copyin(nv->data, packed, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(packed, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "anchor")) ERROUT(EBADMSG); anchor = nvlist_get_string(nvl, "anchor"); rs = pf_find_keth_ruleset(anchor); nvlist_destroy(nvl); nvl = NULL; free(packed, M_NVLIST); packed = NULL; if (rs == NULL) ERROUT(ENOENT); /* Reply */ nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); PF_RULES_RLOCK(); ticket = rs->active.ticket; tail = TAILQ_LAST(rs->active.rules, pf_keth_ruleq); if (tail) nr = tail->nr + 1; else nr = 0; PF_RULES_RUNLOCK(); nvlist_add_number(nvl, "ticket", ticket); nvlist_add_number(nvl, "nr", nr); packed = nvlist_pack(nvl, &nv->len); if (packed == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(packed, nv->data, nv->len); #undef ERROUT DIOCGETETHRULES_error: free(packed, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCGETETHRULE: { struct epoch_tracker et; struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_rule *rule = NULL; struct pf_keth_ruleset *rs; u_int32_t ticket, nr; bool clear = false; const char *anchor; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULE_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EBADMSG); ticket = nvlist_get_number(nvl, "ticket"); if (! nvlist_exists_string(nvl, "anchor")) ERROUT(EBADMSG); anchor = nvlist_get_string(nvl, "anchor"); if (nvlist_exists_bool(nvl, "clear")) clear = nvlist_get_bool(nvl, "clear"); if (clear && !(flags & FWRITE)) ERROUT(EACCES); if (! nvlist_exists_number(nvl, "nr")) ERROUT(EBADMSG); nr = nvlist_get_number(nvl, "nr"); PF_RULES_RLOCK(); rs = pf_find_keth_ruleset(anchor); if (rs == NULL) { PF_RULES_RUNLOCK(); ERROUT(ENOENT); } if (ticket != rs->active.ticket) { PF_RULES_RUNLOCK(); ERROUT(EBUSY); } nvlist_destroy(nvl); nvl = NULL; free(nvlpacked, M_NVLIST); nvlpacked = NULL; rule = TAILQ_FIRST(rs->active.rules); while ((rule != NULL) && (rule->nr != nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_RUNLOCK(); ERROUT(ENOENT); } /* Make sure rule can't go away. */ NET_EPOCH_ENTER(et); PF_RULES_RUNLOCK(); nvl = pf_keth_rule_to_nveth_rule(rule); if (pf_keth_anchor_nvcopyout(rs, rule, nvl)) ERROUT(EBUSY); NET_EPOCH_EXIT(et); if (nvl == NULL) ERROUT(ENOMEM); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); if (error == 0 && clear) { counter_u64_zero(rule->evaluations); for (int i = 0; i < 2; i++) { counter_u64_zero(rule->packets[i]); counter_u64_zero(rule->bytes[i]); } } #undef ERROUT DIOCGETETHRULE_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCADDETHRULE: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_rule *rule = NULL, *tail = NULL; struct pf_keth_ruleset *ruleset = NULL; struct pfi_kkif *kif = NULL, *bridge_to_kif = NULL; const char *anchor = "", *anchor_call = ""; #define ERROUT(x) ERROUT_IOCTL(DIOCADDETHRULE_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EBADMSG); if (nvlist_exists_string(nvl, "anchor")) anchor = nvlist_get_string(nvl, "anchor"); if (nvlist_exists_string(nvl, "anchor_call")) anchor_call = nvlist_get_string(nvl, "anchor_call"); ruleset = pf_find_keth_ruleset(anchor); if (ruleset == NULL) ERROUT(EINVAL); if (nvlist_get_number(nvl, "ticket") != ruleset->inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, ("ticket: %d != %d\n", (u_int32_t)nvlist_get_number(nvl, "ticket"), ruleset->inactive.ticket)); ERROUT(EBUSY); } rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK); if (rule == NULL) ERROUT(ENOMEM); rule->timestamp = NULL; error = pf_nveth_rule_to_keth_rule(nvl, rule); if (error != 0) ERROUT(error); if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); if (rule->bridge_to_name[0]) bridge_to_kif = pf_kkif_create(M_WAITOK); rule->evaluations = counter_u64_alloc(M_WAITOK); for (int i = 0; i < 2; i++) { rule->packets[i] = counter_u64_alloc(M_WAITOK); rule->bytes[i] = counter_u64_alloc(M_WAITOK); } rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone, M_WAITOK | M_ZERO); PF_RULES_WLOCK(); if (rule->ifname[0]) { rule->kif = pfi_kkif_attach(kif, rule->ifname); pfi_kkif_ref(rule->kif); } else rule->kif = NULL; if (rule->bridge_to_name[0]) { rule->bridge_to = pfi_kkif_attach(bridge_to_kif, rule->bridge_to_name); pfi_kkif_ref(rule->bridge_to); } else rule->bridge_to = NULL; #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { if ((rule->qid = pf_qname2qid(rule->qname)) == 0) error = EBUSY; else rule->qid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) error = EBUSY; if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag( rule->match_tagname)) == 0) error = EBUSY; if (error == 0 && rule->ipdst.addr.type == PF_ADDR_TABLE) error = pf_eth_addr_setup(ruleset, &rule->ipdst.addr); if (error == 0 && rule->ipsrc.addr.type == PF_ADDR_TABLE) error = pf_eth_addr_setup(ruleset, &rule->ipsrc.addr); if (error) { pf_free_eth_rule(rule); PF_RULES_WUNLOCK(); ERROUT(error); } if (pf_keth_anchor_setup(rule, ruleset, anchor_call)) { pf_free_eth_rule(rule); PF_RULES_WUNLOCK(); ERROUT(EINVAL); } tail = TAILQ_LAST(ruleset->inactive.rules, pf_keth_ruleq); if (tail) rule->nr = tail->nr + 1; else rule->nr = 0; TAILQ_INSERT_TAIL(ruleset->inactive.rules, rule, entries); PF_RULES_WUNLOCK(); #undef ERROUT DIOCADDETHRULE_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); break; } case DIOCGETETHRULESETS: { struct epoch_tracker et; struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_ruleset *ruleset; struct pf_keth_anchor *anchor; int nr = 0; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULESETS_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "path")) ERROUT(EBADMSG); NET_EPOCH_ENTER(et); if ((ruleset = pf_find_keth_ruleset( nvlist_get_string(nvl, "path"))) == NULL) { NET_EPOCH_EXIT(et); ERROUT(ENOENT); } if (ruleset->anchor == NULL) { RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors) if (anchor->parent == NULL) nr++; } else { RB_FOREACH(anchor, pf_keth_anchor_node, &ruleset->anchor->children) nr++; } NET_EPOCH_EXIT(et); nvlist_destroy(nvl); nvl = NULL; free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_number(nvl, "nr", nr); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT DIOCGETETHRULESETS_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCGETETHRULESET: { struct epoch_tracker et; struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_ruleset *ruleset; struct pf_keth_anchor *anchor; int nr = 0, req_nr = 0; bool found = false; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULESET_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "path")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "nr")) ERROUT(EBADMSG); req_nr = nvlist_get_number(nvl, "nr"); NET_EPOCH_ENTER(et); if ((ruleset = pf_find_keth_ruleset( nvlist_get_string(nvl, "path"))) == NULL) { NET_EPOCH_EXIT(et); ERROUT(ENOENT); } nvlist_destroy(nvl); nvl = NULL; free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvl = nvlist_create(0); if (nvl == NULL) { NET_EPOCH_EXIT(et); ERROUT(ENOMEM); } if (ruleset->anchor == NULL) { RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors) { if (anchor->parent == NULL && nr++ == req_nr) { found = true; break; } } } else { RB_FOREACH(anchor, pf_keth_anchor_node, &ruleset->anchor->children) { if (nr++ == req_nr) { found = true; break; } } } NET_EPOCH_EXIT(et); if (found) { nvlist_add_number(nvl, "nr", nr); nvlist_add_string(nvl, "name", anchor->name); if (ruleset->anchor) nvlist_add_string(nvl, "path", ruleset->anchor->path); else nvlist_add_string(nvl, "path", ""); } else { ERROUT(EBUSY); } nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT DIOCGETETHRULESET_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCADDRULENV: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_krule *rule = NULL; const char *anchor = "", *anchor_call = ""; uint32_t ticket = 0, pool_ticket = 0; #define ERROUT(x) ERROUT_IOCTL(DIOCADDRULENV_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EINVAL); ticket = nvlist_get_number(nvl, "ticket"); if (! nvlist_exists_number(nvl, "pool_ticket")) ERROUT(EINVAL); pool_ticket = nvlist_get_number(nvl, "pool_ticket"); if (! nvlist_exists_nvlist(nvl, "rule")) ERROUT(EINVAL); rule = pf_krule_alloc(); error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"), rule); if (error) ERROUT(error); if (nvlist_exists_string(nvl, "anchor")) anchor = nvlist_get_string(nvl, "anchor"); if (nvlist_exists_string(nvl, "anchor_call")) anchor_call = nvlist_get_string(nvl, "anchor_call"); if ((error = nvlist_error(nvl))) ERROUT(error); /* Frees rule on error */ error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor, anchor_call, td); nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); break; #undef ERROUT DIOCADDRULENV_error: pf_krule_free(rule); nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); break; } case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_krule *rule; rule = pf_krule_alloc(); error = pf_rule_to_krule(&pr->rule, rule); if (error != 0) { pf_krule_free(rule); break; } pr->anchor[sizeof(pr->anchor) - 1] = 0; /* Frees rule on error */ error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket, pr->anchor, pr->anchor_call, td); break; } case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_kruleset *ruleset; struct pf_krule *tail; int rs_num; pr->anchor[sizeof(pr->anchor) - 1] = 0; PF_RULES_WLOCK(); ruleset = pf_find_kruleset(pr->anchor); if (ruleset == NULL) { PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); error = EINVAL; break; } tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_krulequeue); if (tail) pr->nr = tail->nr + 1; else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; PF_RULES_WUNLOCK(); break; } case DIOCGETRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_kruleset *ruleset; struct pf_krule *rule; int rs_num; pr->anchor[sizeof(pr->anchor) - 1] = 0; PF_RULES_WLOCK(); ruleset = pf_find_kruleset(pr->anchor); if (ruleset == NULL) { PF_RULES_WUNLOCK(); error = EINVAL; break; } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); error = EINVAL; break; } if (pr->ticket != ruleset->rules[rs_num].active.ticket) { PF_RULES_WUNLOCK(); error = EBUSY; break; } rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_WUNLOCK(); error = EBUSY; break; } pf_krule_to_rule(rule, &pr->rule); if (pf_kanchor_copyout(ruleset, rule, pr)) { PF_RULES_WUNLOCK(); error = EBUSY; break; } pf_addr_copyout(&pr->rule.src.addr); pf_addr_copyout(&pr->rule.dst.addr); if (pr->action == PF_GET_CLR_CNTR) { pf_counter_u64_zero(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_zero(&rule->packets[i]); pf_counter_u64_zero(&rule->bytes[i]); } counter_u64_zero(rule->states_tot); } PF_RULES_WUNLOCK(); break; } case DIOCGETRULENV: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvrule = NULL; nvlist_t *nvl = NULL; struct pf_kruleset *ruleset; struct pf_krule *rule; void *nvlpacked = NULL; int rs_num, nr; bool clear_counter = false; #define ERROUT(x) ERROUT_IOCTL(DIOCGETRULENV_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); /* Copy the request in */ nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "anchor")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ruleset")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "nr")) ERROUT(EBADMSG); if (nvlist_exists_bool(nvl, "clear_counter")) clear_counter = nvlist_get_bool(nvl, "clear_counter"); if (clear_counter && !(flags & FWRITE)) ERROUT(EACCES); nr = nvlist_get_number(nvl, "nr"); PF_RULES_WLOCK(); ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor")); if (ruleset == NULL) { PF_RULES_WUNLOCK(); ERROUT(ENOENT); } rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset")); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); ERROUT(EINVAL); } if (nvlist_get_number(nvl, "ticket") != ruleset->rules[rs_num].active.ticket) { PF_RULES_WUNLOCK(); ERROUT(EBUSY); } if ((error = nvlist_error(nvl))) { PF_RULES_WUNLOCK(); ERROUT(error); } rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_WUNLOCK(); ERROUT(EBUSY); } nvrule = pf_krule_to_nvrule(rule); nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) { PF_RULES_WUNLOCK(); ERROUT(ENOMEM); } nvlist_add_number(nvl, "nr", nr); nvlist_add_nvlist(nvl, "rule", nvrule); nvlist_destroy(nvrule); nvrule = NULL; if (pf_kanchor_nvcopyout(ruleset, rule, nvl)) { PF_RULES_WUNLOCK(); ERROUT(EBUSY); } free(nvlpacked, M_NVLIST); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) { PF_RULES_WUNLOCK(); ERROUT(ENOMEM); } if (nv->size == 0) { PF_RULES_WUNLOCK(); ERROUT(0); } else if (nv->size < nv->len) { PF_RULES_WUNLOCK(); ERROUT(ENOSPC); } if (clear_counter) { pf_counter_u64_zero(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_zero(&rule->packets[i]); pf_counter_u64_zero(&rule->bytes[i]); } counter_u64_zero(rule->states_tot); } PF_RULES_WUNLOCK(); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT DIOCGETRULENV_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvrule); nvlist_destroy(nvl); break; } case DIOCCHANGERULE: { struct pfioc_rule *pcr = (struct pfioc_rule *)addr; struct pf_kruleset *ruleset; struct pf_krule *oldrule = NULL, *newrule = NULL; struct pfi_kkif *kif = NULL; struct pf_kpooladdr *pa; u_int32_t nr = 0; int rs_num; pcr->anchor[sizeof(pcr->anchor) - 1] = 0; if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; break; } if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; } if (pcr->action != PF_CHANGE_REMOVE) { newrule = pf_krule_alloc(); error = pf_rule_to_krule(&pcr->rule, newrule); if (error != 0) { pf_krule_free(newrule); break; } if (newrule->ifname[0]) kif = pf_kkif_create(M_WAITOK); pf_counter_u64_init(&newrule->evaluations, M_WAITOK); for (int i = 0; i < 2; i++) { pf_counter_u64_init(&newrule->packets[i], M_WAITOK); pf_counter_u64_init(&newrule->bytes[i], M_WAITOK); } newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); newrule->src_nodes = counter_u64_alloc(M_WAITOK); newrule->cuid = td->td_ucred->cr_ruid; newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->rpool.list); } #define ERROUT(x) ERROUT_IOCTL(DIOCCHANGERULE_error, x) PF_CONFIG_LOCK(); PF_RULES_WLOCK(); #ifdef PF_WANT_32_TO_64_COUNTER if (newrule != NULL) { LIST_INSERT_HEAD(&V_pf_allrulelist, newrule, allrulelist); newrule->allrulelinked = true; V_pf_allrulecount++; } #endif if (!(pcr->action == PF_CHANGE_REMOVE || pcr->action == PF_CHANGE_GET_TICKET) && pcr->pool_ticket != V_ticket_pabuf) ERROUT(EBUSY); ruleset = pf_find_kruleset(pcr->anchor); if (ruleset == NULL) ERROUT(EINVAL); rs_num = pf_get_ruleset_number(pcr->rule.action); if (rs_num >= PF_RULESET_MAX) ERROUT(EINVAL); /* * XXXMJG: there is no guarantee that the ruleset was * created by the usual route of calling DIOCXBEGIN. * As a result it is possible the rule tree will not * be allocated yet. Hack around it by doing it here. * Note it is fine to let the tree persist in case of * error as it will be freed down the road on future * updates (if need be). */ if (ruleset->rules[rs_num].active.tree == NULL) { ruleset->rules[rs_num].active.tree = pf_rule_tree_alloc(M_NOWAIT); if (ruleset->rules[rs_num].active.tree == NULL) { ERROUT(ENOMEM); } } if (pcr->action == PF_CHANGE_GET_TICKET) { pcr->ticket = ++ruleset->rules[rs_num].active.ticket; ERROUT(0); } else if (pcr->ticket != ruleset->rules[rs_num].active.ticket) ERROUT(EINVAL); if (pcr->action != PF_CHANGE_REMOVE) { if (newrule->ifname[0]) { newrule->kif = pfi_kkif_attach(kif, newrule->ifname); kif = NULL; pfi_kkif_ref(newrule->kif); } else newrule->kif = NULL; if (newrule->rtableid > 0 && newrule->rtableid >= rt_numfibs) error = EBUSY; #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { if ((newrule->qid = pf_qname2qid(newrule->qname)) == 0) error = EBUSY; else if (newrule->pqname[0] != 0) { if ((newrule->pqid = pf_qname2qid(newrule->pqname)) == 0) error = EBUSY; } else newrule->pqid = newrule->qid; } #endif /* ALTQ */ if (newrule->tagname[0]) if ((newrule->tag = pf_tagname2tag(newrule->tagname)) == 0) error = EBUSY; if (newrule->match_tagname[0]) if ((newrule->match_tag = pf_tagname2tag( newrule->match_tagname)) == 0) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; if (!newrule->log) newrule->logif = 0; if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) error = ENOMEM; if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) error = ENOMEM; if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; TAILQ_FOREACH(pa, &V_pf_pabuf, entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) error = ENOMEM; } newrule->overload_tbl = NULL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( ruleset, newrule->overload_tblname)) == NULL) error = EINVAL; else newrule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_kpool(&V_pf_pabuf, &newrule->rpool.list); if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_NOPFROUTE)) && !newrule->anchor)) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; if (error) { pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); break; } newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); } pf_empty_kpool(&V_pf_pabuf); if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); else if (pcr->action == PF_CHANGE_ADD_TAIL) oldrule = TAILQ_LAST( ruleset->rules[rs_num].active.ptr, pf_krulequeue); else { oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { if (newrule != NULL) pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); error = EINVAL; break; } } if (pcr->action == PF_CHANGE_REMOVE) { pf_unlink_rule(ruleset->rules[rs_num].active.ptr, oldrule); RB_REMOVE(pf_krule_global, ruleset->rules[rs_num].active.tree, oldrule); ruleset->rules[rs_num].active.rcount--; } else { pf_hash_rule(newrule); if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].active.tree, newrule) != NULL) { pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); error = EEXIST; break; } if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, newrule, entries); else if (pcr->action == PF_CHANGE_ADD_HEAD || pcr->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldrule, newrule, entries); else TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); ruleset->rules[rs_num].active.rcount++; } nr = 0; TAILQ_FOREACH(oldrule, ruleset->rules[rs_num].active.ptr, entries) oldrule->nr = nr++; ruleset->rules[rs_num].active.ticket++; pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_kruleset(ruleset); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); break; #undef ERROUT DIOCCHANGERULE_error: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); pf_krule_free(newrule); pf_kkif_free(kif); break; } case DIOCCLRSTATES: { struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; struct pf_kstate_kill kill; error = pf_state_kill_to_kstate_kill(psk, &kill); if (error) break; psk->psk_killed = pf_clear_states(&kill); break; } case DIOCCLRSTATESNV: { error = pf_clearstates_nv((struct pfioc_nv *)addr); break; } case DIOCKILLSTATES: { struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; struct pf_kstate_kill kill; error = pf_state_kill_to_kstate_kill(psk, &kill); if (error) break; psk->psk_killed = 0; pf_killstates(&kill, &psk->psk_killed); break; } case DIOCKILLSTATESNV: { error = pf_killstates_nv((struct pfioc_nv *)addr); break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pfsync_state_1301 *sp = &ps->state; if (sp->timeout >= PFTM_MAX) { error = EINVAL; break; } if (V_pfsync_state_import_ptr != NULL) { PF_RULES_RLOCK(); error = V_pfsync_state_import_ptr( (union pfsync_state_union *)sp, PFSYNC_SI_IOCTL, PFSYNC_MSG_VERSION_1301); PF_RULES_RUNLOCK(); } else error = EOPNOTSUPP; break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_kstate *s; s = pf_find_state_byid(ps->state.id, ps->state.creatorid); if (s == NULL) { error = ENOENT; break; } pfsync_state_export((union pfsync_state_union*)&ps->state, s, PFSYNC_MSG_VERSION_1301); PF_STATE_UNLOCK(s); break; } case DIOCGETSTATENV: { error = pf_getstate((struct pfioc_nv *)addr); break; } case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_kstate *s; struct pfsync_state_1301 *pstore, *p; int i, nr; size_t slice_count = 16, count; void *out; if (ps->ps_len <= 0) { nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pfsync_state_1301) * nr; break; } out = ps->ps_states; pstore = mallocarray(slice_count, sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; DIOCGETSTATES_retry: p = pstore; if (LIST_EMPTY(&ih->states)) continue; PF_HASHROW_LOCK(ih); count = 0; LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; count++; } if (count > slice_count) { PF_HASHROW_UNLOCK(ih); free(pstore, M_TEMP); slice_count = count * 2; pstore = mallocarray(slice_count, sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO); goto DIOCGETSTATES_retry; } if ((nr+count) * sizeof(*p) > ps->ps_len) { PF_HASHROW_UNLOCK(ih); goto DIOCGETSTATES_full; } LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; pfsync_state_export((union pfsync_state_union*)p, s, PFSYNC_MSG_VERSION_1301); p++; nr++; } PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pfsync_state_1301) * count); if (error) break; out = ps->ps_states + nr; } DIOCGETSTATES_full: ps->ps_len = sizeof(struct pfsync_state_1301) * nr; free(pstore, M_TEMP); break; } case DIOCGETSTATESV2: { struct pfioc_states_v2 *ps = (struct pfioc_states_v2 *)addr; struct pf_kstate *s; struct pf_state_export *pstore, *p; int i, nr; size_t slice_count = 16, count; void *out; if (ps->ps_req_version > PF_STATE_VERSION) { error = ENOTSUP; break; } if (ps->ps_len <= 0) { nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pf_state_export) * nr; break; } out = ps->ps_states; pstore = mallocarray(slice_count, sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; DIOCGETSTATESV2_retry: p = pstore; if (LIST_EMPTY(&ih->states)) continue; PF_HASHROW_LOCK(ih); count = 0; LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; count++; } if (count > slice_count) { PF_HASHROW_UNLOCK(ih); free(pstore, M_TEMP); slice_count = count * 2; pstore = mallocarray(slice_count, sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO); goto DIOCGETSTATESV2_retry; } if ((nr+count) * sizeof(*p) > ps->ps_len) { PF_HASHROW_UNLOCK(ih); goto DIOCGETSTATESV2_full; } LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; pf_state_export(p, s); p++; nr++; } PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pf_state_export) * count); if (error) break; out = ps->ps_states + nr; } DIOCGETSTATESV2_full: ps->ps_len = nr * sizeof(struct pf_state_export); free(pstore, M_TEMP); break; } case DIOCGETSTATUS: { struct pf_status *s = (struct pf_status *)addr; PF_RULES_RLOCK(); s->running = V_pf_status.running; s->since = V_pf_status.since; s->debug = V_pf_status.debug; s->hostid = V_pf_status.hostid; s->states = V_pf_status.states; s->src_nodes = V_pf_status.src_nodes; for (int i = 0; i < PFRES_MAX; i++) s->counters[i] = counter_u64_fetch(V_pf_status.counters[i]); for (int i = 0; i < LCNT_MAX; i++) s->lcounters[i] = counter_u64_fetch(V_pf_status.lcounters[i]); for (int i = 0; i < FCNT_MAX; i++) s->fcounters[i] = pf_counter_u64_fetch(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) s->scounters[i] = counter_u64_fetch(V_pf_status.scounters[i]); bcopy(V_pf_status.ifname, s->ifname, IFNAMSIZ); bcopy(V_pf_status.pf_chksum, s->pf_chksum, PF_MD5_DIGEST_LENGTH); pfi_update_status(s->ifname, s); PF_RULES_RUNLOCK(); break; } case DIOCGETSTATUSNV: { error = pf_getstatus((struct pfioc_nv *)addr); break; } case DIOCSETSTATUSIF: { struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { bzero(V_pf_status.ifname, IFNAMSIZ); break; } PF_RULES_WLOCK(); error = pf_user_strcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); PF_RULES_WUNLOCK(); break; } case DIOCCLRSTATUS: { PF_RULES_WLOCK(); for (int i = 0; i < PFRES_MAX; i++) counter_u64_zero(V_pf_status.counters[i]); for (int i = 0; i < FCNT_MAX; i++) pf_counter_u64_zero(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_zero(V_pf_status.scounters[i]); for (int i = 0; i < KLCNT_MAX; i++) counter_u64_zero(V_pf_status.lcounters[i]); V_pf_status.since = time_second; if (*V_pf_status.ifname) pfi_update_status(V_pf_status.ifname, NULL); PF_RULES_WUNLOCK(); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; struct pf_state_key *sk; struct pf_kstate *state; struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; int sidx, didx; /* NATLOOK src and dst are reversed, so reverse sidx/didx */ sidx = (direction == PF_IN) ? 1 : 0; didx = (direction == PF_IN) ? 0 : 1; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || ((pnl->proto == IPPROTO_TCP || pnl->proto == IPPROTO_UDP) && (!pnl->dport || !pnl->sport))) error = EINVAL; else { bzero(&key, sizeof(key)); key.af = pnl->af; key.proto = pnl->proto; PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); key.port[sidx] = pnl->sport; PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; state = pf_find_state_all(&key, direction, &m); if (state == NULL) { error = ENOENT; } else { if (m > 1) { PF_STATE_UNLOCK(state); error = E2BIG; /* more than one state */ } else { sk = state->key[sidx]; PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); pnl->rdport = sk->port[didx]; PF_STATE_UNLOCK(state); } } } break; } case DIOCSETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; int old; if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || pt->seconds < 0) { error = EINVAL; break; } PF_RULES_WLOCK(); old = V_pf_default_rule.timeout[pt->timeout]; if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) pt->seconds = 1; V_pf_default_rule.timeout[pt->timeout] = pt->seconds; if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) wakeup(pf_purge_thread); pt->seconds = old; PF_RULES_WUNLOCK(); break; } case DIOCGETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { error = EINVAL; break; } PF_RULES_RLOCK(); pt->seconds = V_pf_default_rule.timeout[pt->timeout]; PF_RULES_RUNLOCK(); break; } case DIOCGETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { error = EINVAL; break; } PF_RULES_RLOCK(); pl->limit = V_pf_limits[pl->index].limit; PF_RULES_RUNLOCK(); break; } case DIOCSETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; int old_limit; PF_RULES_WLOCK(); if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || V_pf_limits[pl->index].zone == NULL) { PF_RULES_WUNLOCK(); error = EINVAL; break; } uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit); old_limit = V_pf_limits[pl->index].limit; V_pf_limits[pl->index].limit = pl->limit; pl->limit = old_limit; PF_RULES_WUNLOCK(); break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; PF_RULES_WLOCK(); V_pf_status.debug = *level; PF_RULES_WUNLOCK(); break; } case DIOCCLRRULECTRS: { /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_kruleset *ruleset = &pf_main_ruleset; struct pf_krule *rule; PF_RULES_WLOCK(); TAILQ_FOREACH(rule, ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { pf_counter_u64_zero(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_zero(&rule->packets[i]); pf_counter_u64_zero(&rule->bytes[i]); } } PF_RULES_WUNLOCK(); break; } case DIOCGIFSPEEDV0: case DIOCGIFSPEEDV1: { struct pf_ifspeed_v1 *psp = (struct pf_ifspeed_v1 *)addr; struct pf_ifspeed_v1 ps; struct ifnet *ifp; if (psp->ifname[0] == '\0') { error = EINVAL; break; } error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ); if (error != 0) break; ifp = ifunit(ps.ifname); if (ifp != NULL) { psp->baudrate32 = (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX); if (cmd == DIOCGIFSPEEDV1) psp->baudrate = ifp->if_baudrate; } else { error = EINVAL; } break; } #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) V_pf_altq_running = 1; PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } case DIOCSTOPALTQ: { struct pf_altq *altq; PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_disable_altq(altq); if (error != 0) break; } } if (error == 0) V_pf_altq_running = 0; PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } case DIOCADDALTQV0: case DIOCADDALTQV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq, *a; struct ifnet *ifp; altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO); error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd)); if (error) break; altq->local_flags = 0; PF_RULES_WLOCK(); if (pa->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); error = EBUSY; break; } /* * if this is for a queue, find the discipline and * copy the necessary fields */ if (altq->qname[0] != 0) { if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { PF_RULES_WUNLOCK(); error = EBUSY; free(altq, M_PFALTQ); break; } altq->altq_disc = NULL; TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) { if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0) { altq->altq_disc = a->altq_disc; break; } } } if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; else error = altq_add(ifp, altq); if (error) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); break; } if (altq->qname[0] != 0) TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); else TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries); /* version error check done on import above */ pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_WUNLOCK(); break; } case DIOCGETALTQSV0: case DIOCGETALTQSV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; PF_RULES_RLOCK(); pa->nr = 0; TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) pa->nr++; TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; PF_RULES_RUNLOCK(); break; } case DIOCGETALTQV0: case DIOCGETALTQV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; break; } altq = pf_altq_get_nth_active(pa->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_RUNLOCK(); break; } case DIOCCHANGEALTQV0: case DIOCCHANGEALTQV1: /* CHANGEALTQ not supported yet! */ error = ENODEV; break; case DIOCGETQSTATSV0: case DIOCGETQSTATSV1: { struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr; struct pf_altq *altq; int nbytes; u_int32_t version; PF_RULES_RLOCK(); if (pq->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; break; } nbytes = pq->nbytes; altq = pf_altq_get_nth_active(pq->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { PF_RULES_RUNLOCK(); error = ENXIO; break; } PF_RULES_RUNLOCK(); if (cmd == DIOCGETQSTATSV0) version = 0; /* DIOCGETQSTATSV0 means stats struct v0 */ else version = pq->version; error = altq_getqstats(altq, pq->buf, &nbytes, version); if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; } break; } #endif /* ALTQ */ case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; PF_RULES_WLOCK(); pf_empty_kpool(&V_pf_pabuf); pp->ticket = ++V_ticket_pabuf; PF_RULES_WUNLOCK(); break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_kpooladdr *pa; struct pfi_kkif *kif = NULL; #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pp->af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ if (pp->addr.addr.type != PF_ADDR_ADDRMASK && pp->addr.addr.type != PF_ADDR_DYNIFTL && pp->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; break; } if (pp->addr.addr.p.dyn != NULL) { error = EINVAL; break; } pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK); error = pf_pooladdr_to_kpooladdr(&pp->addr, pa); if (error != 0) break; if (pa->ifname[0]) kif = pf_kkif_create(M_WAITOK); PF_RULES_WLOCK(); if (pp->ticket != V_ticket_pabuf) { PF_RULES_WUNLOCK(); if (pa->ifname[0]) pf_kkif_free(kif); free(pa, M_PFRULE); error = EBUSY; break; } if (pa->ifname[0]) { pa->kif = pfi_kkif_attach(kif, pa->ifname); kif = NULL; pfi_kkif_ref(pa->kif); } else pa->kif = NULL; if (pa->addr.type == PF_ADDR_DYNIFTL && ((error = pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) { if (pa->ifname[0]) pfi_kkif_unref(pa->kif); PF_RULES_WUNLOCK(); free(pa, M_PFRULE); break; } TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries); PF_RULES_WUNLOCK(); break; } case DIOCGETADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_kpool *pool; struct pf_kpooladdr *pa; pp->anchor[sizeof(pp->anchor) - 1] = 0; pp->nr = 0; PF_RULES_RLOCK(); pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 0); if (pool == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; PF_RULES_RUNLOCK(); break; } case DIOCGETADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_kpool *pool; struct pf_kpooladdr *pa; u_int32_t nr = 0; pp->anchor[sizeof(pp->anchor) - 1] = 0; PF_RULES_RLOCK(); pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 1); if (pool == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } pa = TAILQ_FIRST(&pool->list); while ((pa != NULL) && (nr < pp->nr)) { pa = TAILQ_NEXT(pa, entries); nr++; } if (pa == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; break; } pf_kpooladdr_to_pooladdr(pa, &pp->addr); pf_addr_copyout(&pp->addr.addr); PF_RULES_RUNLOCK(); break; } case DIOCCHANGEADDR: { struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; struct pf_kpool *pool; struct pf_kpooladdr *oldpa = NULL, *newpa = NULL; struct pf_kruleset *ruleset; struct pfi_kkif *kif = NULL; pca->anchor[sizeof(pca->anchor) - 1] = 0; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { error = EINVAL; break; } if (pca->addr.addr.type != PF_ADDR_ADDRMASK && pca->addr.addr.type != PF_ADDR_DYNIFTL && pca->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; break; } if (pca->addr.addr.p.dyn != NULL) { error = EINVAL; break; } if (pca->action != PF_CHANGE_REMOVE) { #ifndef INET if (pca->af == AF_INET) { error = EAFNOSUPPORT; break; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { error = EAFNOSUPPORT; break; } #endif /* INET6 */ newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK); bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); if (newpa->ifname[0]) kif = pf_kkif_create(M_WAITOK); newpa->kif = NULL; } #define ERROUT(x) ERROUT_IOCTL(DIOCCHANGEADDR_error, x) PF_RULES_WLOCK(); ruleset = pf_find_kruleset(pca->anchor); if (ruleset == NULL) ERROUT(EBUSY); pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action, pca->r_num, pca->r_last, 1, 1); if (pool == NULL) ERROUT(EBUSY); if (pca->action != PF_CHANGE_REMOVE) { if (newpa->ifname[0]) { newpa->kif = pfi_kkif_attach(kif, newpa->ifname); pfi_kkif_ref(newpa->kif); kif = NULL; } switch (newpa->addr.type) { case PF_ADDR_DYNIFTL: error = pfi_dynaddr_setup(&newpa->addr, pca->af); break; case PF_ADDR_TABLE: newpa->addr.p.tbl = pfr_attach_table(ruleset, newpa->addr.v.tblname); if (newpa->addr.p.tbl == NULL) error = ENOMEM; break; } if (error) goto DIOCCHANGEADDR_error; } switch (pca->action) { case PF_CHANGE_ADD_HEAD: oldpa = TAILQ_FIRST(&pool->list); break; case PF_CHANGE_ADD_TAIL: oldpa = TAILQ_LAST(&pool->list, pf_kpalist); break; default: oldpa = TAILQ_FIRST(&pool->list); for (int i = 0; oldpa && i < pca->nr; i++) oldpa = TAILQ_NEXT(oldpa, entries); if (oldpa == NULL) ERROUT(EINVAL); } if (pca->action == PF_CHANGE_REMOVE) { TAILQ_REMOVE(&pool->list, oldpa, entries); switch (oldpa->addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(oldpa->addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(oldpa->addr.p.tbl); break; } if (oldpa->kif) pfi_kkif_unref(oldpa->kif); free(oldpa, M_PFRULE); } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); else if (pca->action == PF_CHANGE_ADD_HEAD || pca->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldpa, newpa, entries); else TAILQ_INSERT_AFTER(&pool->list, oldpa, newpa, entries); } pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); PF_RULES_WUNLOCK(); break; #undef ERROUT DIOCCHANGEADDR_error: if (newpa != NULL) { if (newpa->kif) pfi_kkif_unref(newpa->kif); free(newpa, M_PFRULE); } PF_RULES_WUNLOCK(); pf_kkif_free(kif); break; } case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; struct pf_kruleset *ruleset; struct pf_kanchor *anchor; pr->path[sizeof(pr->path) - 1] = 0; PF_RULES_RLOCK(); if ((ruleset = pf_find_kruleset(pr->path)) == NULL) { PF_RULES_RUNLOCK(); error = ENOENT; break; } pr->nr = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL) pr->nr++; } else { RB_FOREACH(anchor, pf_kanchor_node, &ruleset->anchor->children) pr->nr++; } PF_RULES_RUNLOCK(); break; } case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; struct pf_kruleset *ruleset; struct pf_kanchor *anchor; u_int32_t nr = 0; pr->path[sizeof(pr->path) - 1] = 0; PF_RULES_RLOCK(); if ((ruleset = pf_find_kruleset(pr->path)) == NULL) { PF_RULES_RUNLOCK(); error = ENOENT; break; } pr->name[0] = 0; if (ruleset->anchor == NULL) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } else { RB_FOREACH(anchor, pf_kanchor_node, &ruleset->anchor->children) if (nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } if (!pr->name[0]) error = EBUSY; PF_RULES_RUNLOCK(); break; } case DIOCRCLRTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; break; } PF_RULES_WLOCK(); error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); break; } case DIOCRADDTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; break; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRDELTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; break; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRGETTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; int n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); error = EINVAL; break; } io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_NOWAIT | M_ZERO); if (pfrts == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); break; } error = pfr_get_tables(&io->pfrio_table, pfrts, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrts, io->pfrio_buffer, totlen); free(pfrts, M_TEMP); break; } case DIOCRGETTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_tstats *pfrtstats; size_t totlen; int n; if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; break; } PF_TABLE_STATS_LOCK(); PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); error = EINVAL; break; } io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_tstats); pfrtstats = mallocarray(io->pfrio_size, sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO); if (pfrtstats == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); break; } error = pfr_get_tstats(&io->pfrio_table, pfrtstats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); if (error == 0) error = copyout(pfrtstats, io->pfrio_buffer, totlen); free(pfrtstats, M_TEMP); break; } case DIOCRCLRTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { /* We used to count tables and use the minimum required * size, so we didn't fail on overly large requests. * Keep doing so. */ io->pfrio_size = pf_ioctl_maxcount; break; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); break; } PF_TABLE_STATS_LOCK(); PF_RULES_RLOCK(); error = pfr_clr_tstats(pfrts, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRSETTFLAGS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; int n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; break; } PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); error = EINVAL; break; } io->pfrio_size = min(io->pfrio_size, n); PF_RULES_RUNLOCK(); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_TEMP); break; } case DIOCRCLRADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; break; } PF_RULES_WLOCK(); error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); break; } case DIOCRADDADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_add_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRDELADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_del_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRSETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen, count; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size2 < 0) { error = EINVAL; break; } count = max(io->pfrio_size, io->pfrio_size2); if (count > pf_ioctl_maxcount || WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = count * sizeof(struct pfr_addr); pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_set_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | PFR_FLAG_USERIOCTL, 0); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRGETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRGETASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_astats *pfrastats; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_astats); pfrastats = mallocarray(io->pfrio_size, sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrastats, io->pfrio_buffer, totlen); free(pfrastats, M_TEMP); break; } case DIOCRCLRASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_clr_astats(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRTSTADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_RLOCK(); error = pfr_tst_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_TEMP); break; } case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; break; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; break; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_TEMP, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_TEMP); break; } PF_RULES_WLOCK(); error = pfr_ina_define(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfras, M_TEMP); break; } case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; PF_RULES_WLOCK(); error = pf_osfp_add(io); PF_RULES_WUNLOCK(); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; PF_RULES_RLOCK(); error = pf_osfp_get(io); PF_RULES_RUNLOCK(); break; } case DIOCXBEGIN: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioes, *ioe; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; break; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; break; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_TEMP, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_TEMP); break; } /* Ensure there's no more ethernet rules to clean up. */ NET_EPOCH_DRAIN_CALLBACKS(); PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { ioe->anchor[sizeof(ioe->anchor) - 1] = '\0'; switch (ioe->rs_num) { case PF_RULESET_ETH: if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; } default: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; } break; } } PF_RULES_WUNLOCK(); error = copyout(ioes, io->array, totlen); free(ioes, M_TEMP); break; } case DIOCXROLLBACK: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioe, *ioes; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; break; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; break; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_TEMP, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_TEMP); break; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { ioe->anchor[sizeof(ioe->anchor) - 1] = '\0'; switch (ioe->rs_num) { case PF_RULESET_ETH: if ((error = pf_rollback_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } default: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); free(ioes, M_TEMP); break; } case DIOCXCOMMIT: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioe, *ioes; struct pf_kruleset *rs; struct pf_keth_ruleset *ers; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; break; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; break; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_TEMP, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_TEMP); break; } PF_RULES_WLOCK(); /* First makes sure everything will succeed. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { ioe->anchor[sizeof(ioe->anchor) - 1] = 0; switch (ioe->rs_num) { case PF_RULESET_ETH: ers = pf_find_keth_ruleset(ioe->anchor); if (ers == NULL || ioe->ticket == 0 || ioe->ticket != ers->inactive.ticket) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } break; #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: rs = pf_find_kruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EBUSY; goto fail; } break; default: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EINVAL; goto fail; } rs = pf_find_kruleset(ioe->anchor); if (rs == NULL || !rs->rules[ioe->rs_num].inactive.open || rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); error = EBUSY; goto fail; } break; } } /* Now do the commit - no errors should happen here. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { case PF_RULESET_ETH: if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; #ifdef ALTQ case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); (void)strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_commit(&table, ioe->ticket, NULL, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } default: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_TEMP); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); /* Only hook into EtherNet taffic if we've got rules for it. */ if (! TAILQ_EMPTY(V_pf_keth->active.rules)) hook_pf_eth(); else dehook_pf_eth(); free(ioes, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; struct pf_srchash *sh; struct pf_ksrc_node *n; struct pf_src_node *p, *pstore; uint32_t i, nr = 0; for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) nr++; PF_HASHROW_UNLOCK(sh); } psn->psn_len = min(psn->psn_len, sizeof(struct pf_src_node) * nr); if (psn->psn_len == 0) { psn->psn_len = sizeof(struct pf_src_node) * nr; break; } nr = 0; p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) { if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; pf_src_node_copy(n, p); p++; nr++; } PF_HASHROW_UNLOCK(sh); } error = copyout(pstore, psn->psn_src_nodes, sizeof(struct pf_src_node) * nr); if (error) { free(pstore, M_TEMP); break; } psn->psn_len = sizeof(struct pf_src_node) * nr; free(pstore, M_TEMP); break; } case DIOCCLRSRCNODES: { pf_clear_srcnodes(NULL); pf_purge_expired_src_nodes(); break; } case DIOCKILLSRCNODES: pf_kill_srcnodes((struct pfioc_src_node_kill *)addr); break; #ifdef COMPAT_FREEBSD13 case DIOCKEEPCOUNTERS_FREEBSD13: #endif case DIOCKEEPCOUNTERS: error = pf_keepcounters((struct pfioc_nv *)addr); break; case DIOCGETSYNCOOKIES: error = pf_get_syncookies((struct pfioc_nv *)addr); break; case DIOCSETSYNCOOKIES: error = pf_set_syncookies((struct pfioc_nv *)addr); break; case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; PF_RULES_WLOCK(); if (*hostid == 0) V_pf_status.hostid = arc4random(); else V_pf_status.hostid = *hostid; PF_RULES_WUNLOCK(); break; } case DIOCOSFPFLUSH: PF_RULES_WLOCK(); pf_osfp_flush(); PF_RULES_WUNLOCK(); break; case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; struct pfi_kif *ifstore; size_t bufsiz; if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } if (io->pfiio_size < 0 || io->pfiio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) { error = EINVAL; break; } io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; bufsiz = io->pfiio_size * sizeof(struct pfi_kif); ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif), M_TEMP, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); PF_RULES_RUNLOCK(); error = copyout(ifstore, io->pfiio_buffer, bufsiz); free(ifstore, M_TEMP); break; } case DIOCSETIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; PF_RULES_WLOCK(); error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); PF_RULES_WUNLOCK(); break; } case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; PF_RULES_WLOCK(); error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); PF_RULES_WUNLOCK(); break; } case DIOCSETREASS: { u_int32_t *reass = (u_int32_t *)addr; V_pf_status.reass = *reass & (PF_REASS_ENABLED|PF_REASS_NODF); /* Removal of DF flag without reassembly enabled is not a * valid combination. Disable reassembly in such case. */ if (!(V_pf_status.reass & PF_REASS_ENABLED)) V_pf_status.reass = 0; break; } default: error = ENODEV; break; } fail: if (sx_xlocked(&V_pf_ioctl_lock)) sx_xunlock(&V_pf_ioctl_lock); CURVNET_RESTORE(); #undef ERROUT_IOCTL return (error); } void pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version) { bzero(sp, sizeof(union pfsync_state_union)); /* copy from state key */ sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; sp->pfs_1301.key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; sp->pfs_1301.key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; sp->pfs_1301.key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; sp->pfs_1301.key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname)); bcopy(&st->rt_addr, &sp->pfs_1301.rt_addr, sizeof(sp->pfs_1301.rt_addr)); sp->pfs_1301.creation = htonl(time_uptime - st->creation); sp->pfs_1301.expire = pf_state_expires(st); if (sp->pfs_1301.expire <= time_uptime) sp->pfs_1301.expire = htonl(0); else sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime); sp->pfs_1301.direction = st->direction; - sp->pfs_1301.log = st->log; + sp->pfs_1301.log = st->act.log; sp->pfs_1301.timeout = st->timeout; switch (msg_version) { case PFSYNC_MSG_VERSION_1301: sp->pfs_1301.state_flags = st->state_flags; break; case PFSYNC_MSG_VERSION_1400: sp->pfs_1400.state_flags = htons(st->state_flags); - sp->pfs_1400.qid = htons(st->qid); - sp->pfs_1400.pqid = htons(st->pqid); - sp->pfs_1400.dnpipe = htons(st->dnpipe); - sp->pfs_1400.dnrpipe = htons(st->dnrpipe); - sp->pfs_1400.rtableid = htonl(st->rtableid); - sp->pfs_1400.min_ttl = st->min_ttl; - sp->pfs_1400.set_tos = st->set_tos; - sp->pfs_1400.max_mss = htons(st->max_mss); - sp->pfs_1400.set_prio[0] = st->set_prio[0]; - sp->pfs_1400.set_prio[1] = st->set_prio[1]; + sp->pfs_1400.qid = htons(st->act.qid); + sp->pfs_1400.pqid = htons(st->act.pqid); + sp->pfs_1400.dnpipe = htons(st->act.dnpipe); + sp->pfs_1400.dnrpipe = htons(st->act.dnrpipe); + sp->pfs_1400.rtableid = htonl(st->act.rtableid); + sp->pfs_1400.min_ttl = st->act.min_ttl; + sp->pfs_1400.set_tos = st->act.set_tos; + sp->pfs_1400.max_mss = htons(st->act.max_mss); + sp->pfs_1400.set_prio[0] = st->act.set_prio[0]; + sp->pfs_1400.set_prio[1] = st->act.set_prio[1]; sp->pfs_1400.rt = st->rt; if (st->rt_kif) strlcpy(sp->pfs_1400.rt_ifname, st->rt_kif->pfik_name, sizeof(sp->pfs_1400.rt_ifname)); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } if (st->src_node) sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; if (st->nat_src_node) sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; sp->pfs_1301.id = st->id; sp->pfs_1301.creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->pfs_1301.src); pf_state_peer_hton(&st->dst, &sp->pfs_1301.dst); if (st->rule.ptr == NULL) sp->pfs_1301.rule = htonl(-1); else sp->pfs_1301.rule = htonl(st->rule.ptr->nr); if (st->anchor.ptr == NULL) sp->pfs_1301.anchor = htonl(-1); else sp->pfs_1301.anchor = htonl(st->anchor.ptr->nr); if (st->nat_rule.ptr == NULL) sp->pfs_1301.nat_rule = htonl(-1); else sp->pfs_1301.nat_rule = htonl(st->nat_rule.ptr->nr); pf_state_counter_hton(st->packets[0], sp->pfs_1301.packets[0]); pf_state_counter_hton(st->packets[1], sp->pfs_1301.packets[1]); pf_state_counter_hton(st->bytes[0], sp->pfs_1301.bytes[0]); pf_state_counter_hton(st->bytes[1], sp->pfs_1301.bytes[1]); } void pf_state_export(struct pf_state_export *sp, struct pf_kstate *st) { bzero(sp, sizeof(*sp)); sp->version = PF_STATE_VERSION; /* copy from state key */ sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; sp->proto = st->key[PF_SK_WIRE]->proto; sp->af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); strlcpy(sp->orig_ifname, st->orig_kif->pfik_name, sizeof(sp->orig_ifname)); bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(time_uptime - st->creation); sp->expire = pf_state_expires(st); if (sp->expire <= time_uptime) sp->expire = htonl(0); else sp->expire = htonl(sp->expire - time_uptime); sp->direction = st->direction; - sp->log = st->log; + sp->log = st->act.log; sp->timeout = st->timeout; /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */ sp->state_flags_compat = st->state_flags; sp->state_flags = htons(st->state_flags); if (st->src_node) sp->sync_flags |= PFSYNC_FLAG_SRCNODE; if (st->nat_src_node) sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; sp->id = st->id; sp->creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->src); pf_state_peer_hton(&st->dst, &sp->dst); if (st->rule.ptr == NULL) sp->rule = htonl(-1); else sp->rule = htonl(st->rule.ptr->nr); if (st->anchor.ptr == NULL) sp->anchor = htonl(-1); else sp->anchor = htonl(st->anchor.ptr->nr); if (st->nat_rule.ptr == NULL) sp->nat_rule = htonl(-1); else sp->nat_rule = htonl(st->nat_rule.ptr->nr); sp->packets[0] = st->packets[0]; sp->packets[1] = st->packets[1]; sp->bytes[0] = st->bytes[0]; sp->bytes[1] = st->bytes[1]; - sp->qid = htons(st->qid); - sp->pqid = htons(st->pqid); - sp->dnpipe = htons(st->dnpipe); - sp->dnrpipe = htons(st->dnrpipe); - sp->rtableid = htonl(st->rtableid); - sp->min_ttl = st->min_ttl; - sp->set_tos = st->set_tos; - sp->max_mss = htons(st->max_mss); + sp->qid = htons(st->act.qid); + sp->pqid = htons(st->act.pqid); + sp->dnpipe = htons(st->act.dnpipe); + sp->dnrpipe = htons(st->act.dnrpipe); + sp->rtableid = htonl(st->act.rtableid); + sp->min_ttl = st->act.min_ttl; + sp->set_tos = st->act.set_tos; + sp->max_mss = htons(st->act.max_mss); sp->rt = st->rt; if (st->rt_kif) strlcpy(sp->rt_ifname, st->rt_kif->pfik_name, sizeof(sp->rt_ifname)); - sp->set_prio[0] = st->set_prio[0]; - sp->set_prio[1] = st->set_prio[1]; + sp->set_prio[0] = st->act.set_prio[0]; + sp->set_prio[1] = st->act.set_prio[1]; } static void pf_tbladdr_copyout(struct pf_addr_wrap *aw) { struct pfr_ktable *kt; KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type)); kt = aw->p.tbl; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; aw->p.tbl = NULL; aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? kt->pfrkt_cnt : -1; } static int pf_add_status_counters(nvlist_t *nvl, const char *name, counter_u64_t *counters, size_t number, char **names) { nvlist_t *nvc; nvc = nvlist_create(0); if (nvc == NULL) return (ENOMEM); for (int i = 0; i < number; i++) { nvlist_append_number_array(nvc, "counters", counter_u64_fetch(counters[i])); nvlist_append_string_array(nvc, "names", names[i]); nvlist_append_number_array(nvc, "ids", i); } nvlist_add_nvlist(nvl, name, nvc); nvlist_destroy(nvc); return (0); } static int pf_getstatus(struct pfioc_nv *nv) { nvlist_t *nvl = NULL, *nvc = NULL; void *nvlpacked = NULL; int error; struct pf_status s; char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES; char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES; char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES; PF_RULES_RLOCK_TRACKER; #define ERROUT(x) ERROUT_FUNCTION(errout, x) PF_RULES_RLOCK(); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_bool(nvl, "running", V_pf_status.running); nvlist_add_number(nvl, "since", V_pf_status.since); nvlist_add_number(nvl, "debug", V_pf_status.debug); nvlist_add_number(nvl, "hostid", V_pf_status.hostid); nvlist_add_number(nvl, "states", V_pf_status.states); nvlist_add_number(nvl, "src_nodes", V_pf_status.src_nodes); nvlist_add_number(nvl, "reass", V_pf_status.reass); nvlist_add_bool(nvl, "syncookies_active", V_pf_status.syncookies_active); /* counters */ error = pf_add_status_counters(nvl, "counters", V_pf_status.counters, PFRES_MAX, pf_reasons); if (error != 0) ERROUT(error); /* lcounters */ error = pf_add_status_counters(nvl, "lcounters", V_pf_status.lcounters, KLCNT_MAX, pf_lcounter); if (error != 0) ERROUT(error); /* fcounters */ nvc = nvlist_create(0); if (nvc == NULL) ERROUT(ENOMEM); for (int i = 0; i < FCNT_MAX; i++) { nvlist_append_number_array(nvc, "counters", pf_counter_u64_fetch(&V_pf_status.fcounters[i])); nvlist_append_string_array(nvc, "names", pf_fcounter[i]); nvlist_append_number_array(nvc, "ids", i); } nvlist_add_nvlist(nvl, "fcounters", nvc); nvlist_destroy(nvc); nvc = NULL; /* scounters */ error = pf_add_status_counters(nvl, "scounters", V_pf_status.scounters, SCNT_MAX, pf_fcounter); if (error != 0) ERROUT(error); nvlist_add_string(nvl, "ifname", V_pf_status.ifname); nvlist_add_binary(nvl, "chksum", V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH); pfi_update_status(V_pf_status.ifname, &s); /* pcounters / bcounters */ for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { for (int k = 0; k < 2; k++) { nvlist_append_number_array(nvl, "pcounters", s.pcounters[i][j][k]); } nvlist_append_number_array(nvl, "bcounters", s.bcounters[i][j]); } } nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); PF_RULES_RUNLOCK(); error = copyout(nvlpacked, nv->data, nv->len); goto done; #undef ERROUT errout: PF_RULES_RUNLOCK(); done: free(nvlpacked, M_NVLIST); nvlist_destroy(nvc); nvlist_destroy(nvl); return (error); } /* * XXX - Check for version mismatch!!! */ static void pf_clear_all_states(void) { struct pf_kstate *s; u_int i; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { s->timeout = PFTM_PURGE; /* Don't send out individual delete messages. */ s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s); goto relock; } PF_HASHROW_UNLOCK(ih); } } static int pf_clear_tables(void) { struct pfioc_table io; int error; bzero(&io, sizeof(io)); error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel, io.pfrio_flags); return (error); } static void pf_clear_srcnodes(struct pf_ksrc_node *n) { struct pf_kstate *s; int i; for (i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (n == NULL || n == s->src_node) s->src_node = NULL; if (n == NULL || n == s->nat_src_node) s->nat_src_node = NULL; } PF_HASHROW_UNLOCK(ih); } if (n == NULL) { struct pf_srchash *sh; for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) { n->expire = 1; n->states = 0; } PF_HASHROW_UNLOCK(sh); } } else { /* XXX: hash slot should already be locked here. */ n->expire = 1; n->states = 0; } } static void pf_kill_srcnodes(struct pfioc_src_node_kill *psnk) { struct pf_ksrc_node_list kill; LIST_INIT(&kill); for (int i = 0; i <= pf_srchashmask; i++) { struct pf_srchash *sh = &V_pf_srchash[i]; struct pf_ksrc_node *sn, *tmp; PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) if (PF_MATCHA(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && PF_MATCHA(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af)) { pf_unlink_src_node(sn); LIST_INSERT_HEAD(&kill, sn, entry); sn->expire = 1; } PF_HASHROW_UNLOCK(sh); } for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_kstate *s; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->src_node && s->src_node->expire == 1) s->src_node = NULL; if (s->nat_src_node && s->nat_src_node->expire == 1) s->nat_src_node = NULL; } PF_HASHROW_UNLOCK(ih); } psnk->psnk_killed = pf_free_src_nodes(&kill); } static int pf_keepcounters(struct pfioc_nv *nv) { nvlist_t *nvl = NULL; void *nvlpacked = NULL; int error = 0; #define ERROUT(x) ERROUT_FUNCTION(on_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_bool(nvl, "keep_counters")) ERROUT(EBADMSG); V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters"); on_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); return (error); } static unsigned int pf_clear_states(const struct pf_kstate_kill *kill) { struct pf_state_key_cmp match_key; struct pf_kstate *s; struct pfi_kkif *kif; int idx; unsigned int killed = 0, dir; for (unsigned int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock_DIOCCLRSTATES: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { /* For floating states look at the original kif. */ kif = s->kif == V_pfi_all ? s->orig_kif : s->kif; if (kill->psk_ifname[0] && strcmp(kill->psk_ifname, kif->pfik_name)) continue; if (kill->psk_kill_match) { bzero(&match_key, sizeof(match_key)); if (s->direction == PF_OUT) { dir = PF_IN; idx = PF_SK_STACK; } else { dir = PF_OUT; idx = PF_SK_WIRE; } match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; PF_ACPY(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; PF_ACPY(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } /* * Don't send out individual * delete messages. */ s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s); killed++; if (kill->psk_kill_match) killed += pf_kill_matching_state(&match_key, dir); goto relock_DIOCCLRSTATES; } PF_HASHROW_UNLOCK(ih); } if (V_pfsync_clear_states_ptr != NULL) V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname); return (killed); } static void pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed) { struct pf_kstate *s; if (kill->psk_pfcmp.id) { if (kill->psk_pfcmp.creatorid == 0) kill->psk_pfcmp.creatorid = V_pf_status.hostid; if ((s = pf_find_state_byid(kill->psk_pfcmp.id, kill->psk_pfcmp.creatorid))) { pf_unlink_state(s); *killed = 1; } return; } for (unsigned int i = 0; i <= pf_hashmask; i++) *killed += pf_killstates_row(kill, &V_pf_idhash[i]); return; } static int pf_killstates_nv(struct pfioc_nv *nv) { struct pf_kstate_kill kill; nvlist_t *nvl = NULL; void *nvlpacked = NULL; int error = 0; unsigned int killed = 0; #define ERROUT(x) ERROUT_FUNCTION(on_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); error = pf_nvstate_kill_to_kstate_kill(nvl, &kill); if (error) ERROUT(error); pf_killstates(&kill, &killed); free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_number(nvl, "killed", killed); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); on_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); return (error); } static int pf_clearstates_nv(struct pfioc_nv *nv) { struct pf_kstate_kill kill; nvlist_t *nvl = NULL; void *nvlpacked = NULL; int error = 0; unsigned int killed; #define ERROUT(x) ERROUT_FUNCTION(on_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); error = pf_nvstate_kill_to_kstate_kill(nvl, &kill); if (error) ERROUT(error); killed = pf_clear_states(&kill); free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_number(nvl, "killed", killed); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT on_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); return (error); } static int pf_getstate(struct pfioc_nv *nv) { nvlist_t *nvl = NULL, *nvls; void *nvlpacked = NULL; struct pf_kstate *s = NULL; int error = 0; uint64_t id, creatorid; #define ERROUT(x) ERROUT_FUNCTION(errout, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); if (nvlpacked == NULL) ERROUT(ENOMEM); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); PFNV_CHK(pf_nvuint64(nvl, "id", &id)); PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid)); s = pf_find_state_byid(id, creatorid); if (s == NULL) ERROUT(ENOENT); free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvls = pf_state_to_nvstate(s); if (nvls == NULL) ERROUT(ENOMEM); nvlist_add_nvlist(nvl, "state", nvls); nvlist_destroy(nvls); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT errout: if (s != NULL) PF_STATE_UNLOCK(s); free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); return (error); } /* * XXX - Check for version mismatch!!! */ /* * Duplicate pfctl -Fa operation to get rid of as much as we can. */ static int shutdown_pf(void) { int error = 0; u_int32_t t[5]; char nn = '\0'; do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n")); break; } if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn); pf_commit_rules(t[1], PF_RULESET_FILTER, &nn); pf_commit_rules(t[2], PF_RULESET_NAT, &nn); pf_commit_rules(t[3], PF_RULESET_BINAT, &nn); pf_commit_rules(t[4], PF_RULESET_RDR, &nn); if ((error = pf_clear_tables()) != 0) break; if ((error = pf_begin_eth(&t[0], &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: eth\n")); break; } pf_commit_eth(t[0], &nn); #ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); break; } pf_commit_altq(t[0]); #endif pf_clear_all_states(); pf_clear_srcnodes(NULL); /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ } while(0); return (error); } static pfil_return_t pf_check_return(int chk, struct mbuf **m) { switch (chk) { case PF_PASS: if (*m == NULL) return (PFIL_CONSUMED); else return (PFIL_PASS); break; default: if (*m != NULL) { m_freem(*m); *m = NULL; } return (PFIL_DROPPED); } } static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test_eth(PF_IN, flags, ifp, m, inp); return (pf_check_return(chk, m)); } static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test_eth(PF_OUT, flags, ifp, m, inp); return (pf_check_return(chk, m)); } #ifdef INET static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test(PF_IN, flags, ifp, m, inp, NULL); return (pf_check_return(chk, m)); } static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; chk = pf_test(PF_OUT, flags, ifp, m, inp, NULL); return (pf_check_return(chk, m)); } #endif #ifdef INET6 static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; /* * In case of loopback traffic IPv6 uses the real interface in * order to support scoped addresses. In order to support stateful * filtering we have change this to lo0 as it is the case in IPv4. */ CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp, NULL); CURVNET_RESTORE(); return (pf_check_return(chk, m)); } static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_SET(ifp->if_vnet); chk = pf_test6(PF_OUT, flags, ifp, m, inp, NULL); CURVNET_RESTORE(); return (pf_check_return(chk, m)); } #endif /* INET6 */ VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook); VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook); #define V_pf_eth_in_hook VNET(pf_eth_in_hook) #define V_pf_eth_out_hook VNET(pf_eth_out_hook) #ifdef INET VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook); VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook); #define V_pf_ip4_in_hook VNET(pf_ip4_in_hook) #define V_pf_ip4_out_hook VNET(pf_ip4_out_hook) #endif #ifdef INET6 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook); VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook); #define V_pf_ip6_in_hook VNET(pf_ip6_in_hook) #define V_pf_ip6_out_hook VNET(pf_ip6_out_hook) #endif static void hook_pf_eth(void) { struct pfil_hook_args pha = { .pa_version = PFIL_VERSION, .pa_modname = "pf", .pa_type = PFIL_TYPE_ETHERNET, }; struct pfil_link_args pla = { .pa_version = PFIL_VERSION, }; int ret __diagused; if (atomic_load_bool(&V_pf_pfil_eth_hooked)) return; pha.pa_mbuf_chk = pf_eth_check_in; pha.pa_flags = PFIL_IN; pha.pa_rulname = "eth-in"; V_pf_eth_in_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_link_pfil_head; pla.pa_hook = V_pf_eth_in_hook; ret = pfil_link(&pla); MPASS(ret == 0); pha.pa_mbuf_chk = pf_eth_check_out; pha.pa_flags = PFIL_OUT; pha.pa_rulname = "eth-out"; V_pf_eth_out_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_link_pfil_head; pla.pa_hook = V_pf_eth_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); atomic_store_bool(&V_pf_pfil_eth_hooked, true); } static void hook_pf(void) { struct pfil_hook_args pha = { .pa_version = PFIL_VERSION, .pa_modname = "pf", }; struct pfil_link_args pla = { .pa_version = PFIL_VERSION, }; int ret __diagused; if (atomic_load_bool(&V_pf_pfil_hooked)) return; #ifdef INET pha.pa_type = PFIL_TYPE_IP4; pha.pa_mbuf_chk = pf_check_in; pha.pa_flags = PFIL_IN; pha.pa_rulname = "default-in"; V_pf_ip4_in_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet_pfil_head; pla.pa_hook = V_pf_ip4_in_hook; ret = pfil_link(&pla); MPASS(ret == 0); pha.pa_mbuf_chk = pf_check_out; pha.pa_flags = PFIL_OUT; pha.pa_rulname = "default-out"; V_pf_ip4_out_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet_pfil_head; pla.pa_hook = V_pf_ip4_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); if (V_pf_filter_local) { pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet_local_pfil_head; pla.pa_hook = V_pf_ip4_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); } #endif #ifdef INET6 pha.pa_type = PFIL_TYPE_IP6; pha.pa_mbuf_chk = pf_check6_in; pha.pa_flags = PFIL_IN; pha.pa_rulname = "default-in6"; V_pf_ip6_in_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet6_pfil_head; pla.pa_hook = V_pf_ip6_in_hook; ret = pfil_link(&pla); MPASS(ret == 0); pha.pa_mbuf_chk = pf_check6_out; pha.pa_rulname = "default-out6"; pha.pa_flags = PFIL_OUT; V_pf_ip6_out_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet6_pfil_head; pla.pa_hook = V_pf_ip6_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); if (V_pf_filter_local) { pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet6_local_pfil_head; pla.pa_hook = V_pf_ip6_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); } #endif atomic_store_bool(&V_pf_pfil_hooked, true); } static void dehook_pf_eth(void) { if (!atomic_load_bool(&V_pf_pfil_eth_hooked)) return; pfil_remove_hook(V_pf_eth_in_hook); pfil_remove_hook(V_pf_eth_out_hook); atomic_store_bool(&V_pf_pfil_eth_hooked, false); } static void dehook_pf(void) { if (!atomic_load_bool(&V_pf_pfil_hooked)) return; #ifdef INET pfil_remove_hook(V_pf_ip4_in_hook); pfil_remove_hook(V_pf_ip4_out_hook); #endif #ifdef INET6 pfil_remove_hook(V_pf_ip6_in_hook); pfil_remove_hook(V_pf_ip6_out_hook); #endif atomic_store_bool(&V_pf_pfil_hooked, false); } static void pf_load_vnet(void) { V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE); sx_init(&V_pf_ioctl_lock, "pf ioctl"); pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT); #ifdef ALTQ pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT); #endif V_pf_keth = &V_pf_main_keth_anchor.ruleset; pfattach_vnet(); V_pf_vnet_active = 1; } static int pf_load(void) { int error; sx_init(&pf_end_lock, "pf end thread"); pf_mtag_initialize(); pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME); if (pf_dev == NULL) return (ENOMEM); pf_end_threads = 0; error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge"); if (error != 0) return (error); pfi_initialize(); return (0); } static void pf_unload_vnet(void) { int ret __diagused; V_pf_vnet_active = 0; V_pf_status.running = 0; dehook_pf(); dehook_pf_eth(); PF_RULES_WLOCK(); pf_syncookies_cleanup(); shutdown_pf(); PF_RULES_WUNLOCK(); /* Make sure we've cleaned up ethernet rules before we continue. */ NET_EPOCH_DRAIN_CALLBACKS(); ret = swi_remove(V_pf_swi_cookie); MPASS(ret == 0); ret = intr_event_destroy(V_pf_swi_ie); MPASS(ret == 0); pf_unload_vnet_purge(); pf_normalize_cleanup(); PF_RULES_WLOCK(); pfi_cleanup_vnet(); PF_RULES_WUNLOCK(); pfr_cleanup(); pf_osfp_flush(); pf_cleanup(); if (IS_DEFAULT_VNET(curvnet)) pf_mtag_cleanup(); pf_cleanup_tagset(&V_pf_tags); #ifdef ALTQ pf_cleanup_tagset(&V_pf_qids); #endif uma_zdestroy(V_pf_tag_z); #ifdef PF_WANT_32_TO_64_COUNTER PF_RULES_WLOCK(); LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist); MPASS(LIST_EMPTY(&V_pf_allkiflist)); MPASS(V_pf_allkifcount == 0); LIST_REMOVE(&V_pf_default_rule, allrulelist); V_pf_allrulecount--; LIST_REMOVE(V_pf_rulemarker, allrulelist); /* * There are known pf rule leaks when running the test suite. */ #ifdef notyet MPASS(LIST_EMPTY(&V_pf_allrulelist)); MPASS(V_pf_allrulecount == 0); #endif PF_RULES_WUNLOCK(); free(V_pf_kifmarker, PFI_MTYPE); free(V_pf_rulemarker, M_PFRULE); #endif /* Free counters last as we updated them during shutdown. */ pf_counter_u64_deinit(&V_pf_default_rule.evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_deinit(&V_pf_default_rule.packets[i]); pf_counter_u64_deinit(&V_pf_default_rule.bytes[i]); } counter_u64_free(V_pf_default_rule.states_cur); counter_u64_free(V_pf_default_rule.states_tot); counter_u64_free(V_pf_default_rule.src_nodes); uma_zfree_pcpu(pf_timestamp_pcpu_zone, V_pf_default_rule.timestamp); for (int i = 0; i < PFRES_MAX; i++) counter_u64_free(V_pf_status.counters[i]); for (int i = 0; i < KLCNT_MAX; i++) counter_u64_free(V_pf_status.lcounters[i]); for (int i = 0; i < FCNT_MAX; i++) pf_counter_u64_deinit(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_free(V_pf_status.scounters[i]); rm_destroy(&V_pf_rules_lock); sx_destroy(&V_pf_ioctl_lock); } static void pf_unload(void) { sx_xlock(&pf_end_lock); pf_end_threads = 1; while (pf_end_threads < 2) { wakeup_one(pf_purge_thread); sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0); } sx_xunlock(&pf_end_lock); if (pf_dev != NULL) destroy_dev(pf_dev); pfi_cleanup(); sx_destroy(&pf_end_lock); } static void vnet_pf_init(void *unused __unused) { pf_load_vnet(); } VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_init, NULL); static void vnet_pf_uninit(const void *unused __unused) { pf_unload_vnet(); } SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL); VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pf_uninit, NULL); static int pf_modevent(module_t mod, int type, void *data) { int error = 0; switch(type) { case MOD_LOAD: error = pf_load(); break; case MOD_UNLOAD: /* Handled in SYSUNINIT(pf_unload) to ensure it's done after * the vnet_pf_uninit()s */ break; default: error = EINVAL; break; } return (error); } static moduledata_t pf_mod = { "pf", pf_modevent, 0 }; DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND); MODULE_VERSION(pf, PF_MODVER); diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index f62d0e7e51a4..5f6d9c1635cd 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1,2110 +1,2080 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 2001 Niels Provos * Copyright 2011-2018 Alexander Bluhm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ struct pf_frent { TAILQ_ENTRY(pf_frent) fr_next; struct mbuf *fe_m; uint16_t fe_hdrlen; /* ipv4 header length with ip options ipv6, extension, fragment header */ uint16_t fe_extoff; /* last extension header offset or 0 */ uint16_t fe_len; /* fragment length */ uint16_t fe_off; /* fragment offset */ uint16_t fe_mff; /* more fragment flag */ }; struct pf_fragment_cmp { struct pf_addr frc_src; struct pf_addr frc_dst; uint32_t frc_id; sa_family_t frc_af; uint8_t frc_proto; }; struct pf_fragment { struct pf_fragment_cmp fr_key; #define fr_src fr_key.frc_src #define fr_dst fr_key.frc_dst #define fr_id fr_key.frc_id #define fr_af fr_key.frc_af #define fr_proto fr_key.frc_proto /* pointers to queue element */ struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS]; /* count entries between pointers */ uint8_t fr_entries[PF_FRAG_ENTRY_POINTS]; RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; uint32_t fr_timeout; uint16_t fr_maxlen; /* maximum length of single fragment */ u_int16_t fr_holes; /* number of holes in the queue */ TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; }; struct pf_fragment_tag { uint16_t ft_hdrlen; /* header length of reassembled pkt */ uint16_t ft_extoff; /* last extension header offset or 0 */ uint16_t ft_maxlen; /* maximum fragment payload length */ uint32_t ft_id; /* fragment id */ }; VNET_DEFINE_STATIC(struct mtx, pf_frag_mtx); #define V_pf_frag_mtx VNET(pf_frag_mtx) #define PF_FRAG_LOCK() mtx_lock(&V_pf_frag_mtx) #define PF_FRAG_UNLOCK() mtx_unlock(&V_pf_frag_mtx) #define PF_FRAG_ASSERT() mtx_assert(&V_pf_frag_mtx, MA_OWNED) VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */ VNET_DEFINE_STATIC(uma_zone_t, pf_frent_z); #define V_pf_frent_z VNET(pf_frent_z) VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z); #define V_pf_frag_z VNET(pf_frag_z) TAILQ_HEAD(pf_fragqueue, pf_fragment); TAILQ_HEAD(pf_cachequeue, pf_fragment); VNET_DEFINE_STATIC(struct pf_fragqueue, pf_fragqueue); #define V_pf_fragqueue VNET(pf_fragqueue) RB_HEAD(pf_frag_tree, pf_fragment); VNET_DEFINE_STATIC(struct pf_frag_tree, pf_frag_tree); #define V_pf_frag_tree VNET(pf_frag_tree) static int pf_frag_compare(struct pf_fragment *, struct pf_fragment *); static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); static void pf_flush_fragments(void); static void pf_free_fragment(struct pf_fragment *); static void pf_remove_fragment(struct pf_fragment *); static struct pf_frent *pf_create_fragment(u_short *); static int pf_frent_holes(struct pf_frent *frent); static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree); static inline int pf_frent_index(struct pf_frent *); static int pf_frent_insert(struct pf_fragment *, struct pf_frent *, struct pf_frent *); void pf_frent_remove(struct pf_fragment *, struct pf_frent *); struct pf_frent *pf_frent_previous(struct pf_fragment *, struct pf_frent *); static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, struct pf_frent *, u_short *); static struct mbuf *pf_join_fragment(struct pf_fragment *); #ifdef INET static int pf_reassemble(struct mbuf **, struct ip *, int, u_short *); #endif /* INET */ #ifdef INET6 static int pf_reassemble6(struct mbuf **, struct ip6_hdr *, struct ip6_frag *, uint16_t, uint16_t, u_short *); #endif /* INET6 */ #define DPFPRINTF(x) do { \ if (V_pf_status.debug >= PF_DEBUG_MISC) { \ printf("%s: ", __func__); \ printf x ; \ } \ } while(0) #ifdef INET static void pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key) { key->frc_src.v4 = ip->ip_src; key->frc_dst.v4 = ip->ip_dst; key->frc_af = AF_INET; key->frc_proto = ip->ip_p; key->frc_id = ip->ip_id; } #endif /* INET */ void pf_normalize_init(void) { V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_state_scrub_z = uma_zcreate("pf state scrubs", sizeof(struct pf_state_scrub), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mtx_init(&V_pf_frag_mtx, "pf fragments", NULL, MTX_DEF); V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z; V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT); uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached"); TAILQ_INIT(&V_pf_fragqueue); } void pf_normalize_cleanup(void) { uma_zdestroy(V_pf_state_scrub_z); uma_zdestroy(V_pf_frent_z); uma_zdestroy(V_pf_frag_z); mtx_destroy(&V_pf_frag_mtx); } static int pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { int diff; if ((diff = a->fr_id - b->fr_id) != 0) return (diff); if ((diff = a->fr_proto - b->fr_proto) != 0) return (diff); if ((diff = a->fr_af - b->fr_af) != 0) return (diff); if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0) return (diff); if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) return (diff); return (0); } void pf_purge_expired_fragments(void) { u_int32_t expire = time_uptime - V_pf_default_rule.timeout[PFTM_FRAG]; pf_purge_fragments(expire); } void pf_purge_fragments(uint32_t expire) { struct pf_fragment *frag; PF_FRAG_LOCK(); while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { if (frag->fr_timeout > expire) break; DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); pf_free_fragment(frag); } PF_FRAG_UNLOCK(); } /* * Try to flush old fragments to make space for new ones */ static void pf_flush_fragments(void) { struct pf_fragment *frag; int goal; PF_FRAG_ASSERT(); goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10; DPFPRINTF(("trying to free %d frag entriess\n", goal)); while (goal < uma_zone_get_cur(V_pf_frent_z)) { frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); if (frag) pf_free_fragment(frag); else break; } } /* Frees the fragments and all associated entries */ static void pf_free_fragment(struct pf_fragment *frag) { struct pf_frent *frent; PF_FRAG_ASSERT(); /* Free all fragments */ for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = TAILQ_FIRST(&frag->fr_queue)) { TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); m_freem(frent->fe_m); uma_zfree(V_pf_frent_z, frent); } pf_remove_fragment(frag); } static struct pf_fragment * pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) { struct pf_fragment *frag; PF_FRAG_ASSERT(); frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); if (frag != NULL) { /* XXX Are we sure we want to update the timeout? */ frag->fr_timeout = time_uptime; TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); } return (frag); } /* Removes a fragment from the fragment queue and frees the fragment */ static void pf_remove_fragment(struct pf_fragment *frag) { PF_FRAG_ASSERT(); KASSERT(frag, ("frag != NULL")); RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); uma_zfree(V_pf_frag_z, frag); } static struct pf_frent * pf_create_fragment(u_short *reason) { struct pf_frent *frent; PF_FRAG_ASSERT(); frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (frent == NULL) { pf_flush_fragments(); frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); if (frent == NULL) { REASON_SET(reason, PFRES_MEMORY); return (NULL); } } return (frent); } /* * Calculate the additional holes that were created in the fragment * queue by inserting this fragment. A fragment in the middle * creates one more hole by splitting. For each connected side, * it loses one hole. * Fragment entry must be in the queue when calling this function. */ static int pf_frent_holes(struct pf_frent *frent) { struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); struct pf_frent *next = TAILQ_NEXT(frent, fr_next); int holes = 1; if (prev == NULL) { if (frent->fe_off == 0) holes--; } else { KASSERT(frent->fe_off != 0, ("frent->fe_off != 0")); if (frent->fe_off == prev->fe_off + prev->fe_len) holes--; } if (next == NULL) { if (!frent->fe_mff) holes--; } else { KASSERT(frent->fe_mff, ("frent->fe_mff")); if (next->fe_off == frent->fe_off + frent->fe_len) holes--; } return holes; } static inline int pf_frent_index(struct pf_frent *frent) { /* * We have an array of 16 entry points to the queue. A full size * 65535 octet IP packet can have 8192 fragments. So the queue * traversal length is at most 512 and at most 16 entry points are * checked. We need 128 additional bytes on a 64 bit architecture. */ CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) == 16 - 1); CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1); return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS); } static int pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, struct pf_frent *prev) { int index; CTASSERT(PF_FRAG_ENTRY_LIMIT <= 0xff); /* * A packet has at most 65536 octets. With 16 entry points, each one * spawns 4096 octets. We limit these to 64 fragments each, which * means on average every fragment must have at least 64 octets. */ index = pf_frent_index(frent); if (frag->fr_entries[index] >= PF_FRAG_ENTRY_LIMIT) return ENOBUFS; frag->fr_entries[index]++; if (prev == NULL) { TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); } else { KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, ("overlapping fragment")); TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); } if (frag->fr_firstoff[index] == NULL) { KASSERT(prev == NULL || pf_frent_index(prev) < index, ("prev == NULL || pf_frent_index(pref) < index")); frag->fr_firstoff[index] = frent; } else { if (frent->fe_off < frag->fr_firstoff[index]->fe_off) { KASSERT(prev == NULL || pf_frent_index(prev) < index, ("prev == NULL || pf_frent_index(pref) < index")); frag->fr_firstoff[index] = frent; } else { KASSERT(prev != NULL, ("prev != NULL")); KASSERT(pf_frent_index(prev) == index, ("pf_frent_index(prev) == index")); } } frag->fr_holes += pf_frent_holes(frent); return 0; } void pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) { #ifdef INVARIANTS struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); #endif struct pf_frent *next = TAILQ_NEXT(frent, fr_next); int index; frag->fr_holes -= pf_frent_holes(frent); index = pf_frent_index(frent); KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found")); if (frag->fr_firstoff[index]->fe_off == frent->fe_off) { if (next == NULL) { frag->fr_firstoff[index] = NULL; } else { KASSERT(frent->fe_off + frent->fe_len <= next->fe_off, ("overlapping fragment")); if (pf_frent_index(next) == index) { frag->fr_firstoff[index] = next; } else { frag->fr_firstoff[index] = NULL; } } } else { KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off, ("frag->fr_firstoff[index]->fe_off < frent->fe_off")); KASSERT(prev != NULL, ("prev != NULL")); KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, ("overlapping fragment")); KASSERT(pf_frent_index(prev) == index, ("pf_frent_index(prev) == index")); } TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); KASSERT(frag->fr_entries[index] > 0, ("No fragments remaining")); frag->fr_entries[index]--; } struct pf_frent * pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent) { struct pf_frent *prev, *next; int index; /* * If there are no fragments after frag, take the final one. Assume * that the global queue is not empty. */ prev = TAILQ_LAST(&frag->fr_queue, pf_fragq); KASSERT(prev != NULL, ("prev != NULL")); if (prev->fe_off <= frent->fe_off) return prev; /* * We want to find a fragment entry that is before frag, but still * close to it. Find the first fragment entry that is in the same * entry point or in the first entry point after that. As we have * already checked that there are entries behind frag, this will * succeed. */ for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS; index++) { prev = frag->fr_firstoff[index]; if (prev != NULL) break; } KASSERT(prev != NULL, ("prev != NULL")); /* * In prev we may have a fragment from the same entry point that is * before frent, or one that is just one position behind frent. * In the latter case, we go back one step and have the predecessor. * There may be none if the new fragment will be the first one. */ if (prev->fe_off > frent->fe_off) { prev = TAILQ_PREV(prev, pf_fragq, fr_next); if (prev == NULL) return NULL; KASSERT(prev->fe_off <= frent->fe_off, ("prev->fe_off <= frent->fe_off")); return prev; } /* * In prev is the first fragment of the entry point. The offset * of frag is behind it. Find the closest previous fragment. */ for (next = TAILQ_NEXT(prev, fr_next); next != NULL; next = TAILQ_NEXT(next, fr_next)) { if (next->fe_off > frent->fe_off) break; prev = next; } return prev; } static struct pf_fragment * pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, u_short *reason) { struct pf_frent *after, *next, *prev; struct pf_fragment *frag; uint16_t total; int old_index, new_index; PF_FRAG_ASSERT(); /* No empty fragments. */ if (frent->fe_len == 0) { DPFPRINTF(("bad fragment: len 0\n")); goto bad_fragment; } /* All fragments are 8 byte aligned. */ if (frent->fe_mff && (frent->fe_len & 0x7)) { DPFPRINTF(("bad fragment: mff and len %d\n", frent->fe_len)); goto bad_fragment; } /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */ if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { DPFPRINTF(("bad fragment: max packet %d\n", frent->fe_off + frent->fe_len)); goto bad_fragment; } DPFPRINTF((key->frc_af == AF_INET ? "reass frag %d @ %d-%d\n" : "reass frag %#08x @ %d-%d\n", key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len)); /* Fully buffer all of the fragments in this fragment queue. */ frag = pf_find_fragment(key, &V_pf_frag_tree); /* Create a new reassembly queue for this packet. */ if (frag == NULL) { frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (frag == NULL) { pf_flush_fragments(); frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); if (frag == NULL) { REASON_SET(reason, PFRES_MEMORY); goto drop_fragment; } } *(struct pf_fragment_cmp *)frag = *key; memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff)); memset(frag->fr_entries, 0, sizeof(frag->fr_entries)); frag->fr_timeout = time_uptime; frag->fr_maxlen = frent->fe_len; frag->fr_holes = 1; TAILQ_INIT(&frag->fr_queue); RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); /* We do not have a previous fragment, cannot fail. */ pf_frent_insert(frag, frent, NULL); return (frag); } KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue")); /* Remember maximum fragment len for refragmentation. */ if (frent->fe_len > frag->fr_maxlen) frag->fr_maxlen = frent->fe_len; /* Maximum data we have seen already. */ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; /* Non terminal fragments must have more fragments flag. */ if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) goto bad_fragment; /* Check if we saw the last fragment already. */ if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { if (frent->fe_off + frent->fe_len > total || (frent->fe_off + frent->fe_len == total && frent->fe_mff)) goto bad_fragment; } else { if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) goto bad_fragment; } /* Find neighbors for newly inserted fragment */ prev = pf_frent_previous(frag, frent); if (prev == NULL) { after = TAILQ_FIRST(&frag->fr_queue); KASSERT(after != NULL, ("after != NULL")); } else { after = TAILQ_NEXT(prev, fr_next); } if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { uint16_t precut; precut = prev->fe_off + prev->fe_len - frent->fe_off; if (precut >= frent->fe_len) goto bad_fragment; DPFPRINTF(("overlap -%d\n", precut)); m_adj(frent->fe_m, precut); frent->fe_off += precut; frent->fe_len -= precut; } for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; after = next) { uint16_t aftercut; aftercut = frent->fe_off + frent->fe_len - after->fe_off; DPFPRINTF(("adjust overlap %d\n", aftercut)); if (aftercut < after->fe_len) { m_adj(after->fe_m, aftercut); old_index = pf_frent_index(after); after->fe_off += aftercut; after->fe_len -= aftercut; new_index = pf_frent_index(after); if (old_index != new_index) { DPFPRINTF(("frag index %d, new %d", old_index, new_index)); /* Fragment switched queue as fe_off changed */ after->fe_off -= aftercut; after->fe_len += aftercut; /* Remove restored fragment from old queue */ pf_frent_remove(frag, after); after->fe_off += aftercut; after->fe_len -= aftercut; /* Insert into correct queue */ if (pf_frent_insert(frag, after, prev)) { DPFPRINTF( ("fragment requeue limit exceeded")); m_freem(after->fe_m); uma_zfree(V_pf_frent_z, after); /* There is not way to recover */ goto bad_fragment; } } break; } /* This fragment is completely overlapped, lose it. */ next = TAILQ_NEXT(after, fr_next); pf_frent_remove(frag, after); m_freem(after->fe_m); uma_zfree(V_pf_frent_z, after); } /* If part of the queue gets too long, there is not way to recover. */ if (pf_frent_insert(frag, frent, prev)) { DPFPRINTF(("fragment queue limit exceeded\n")); goto bad_fragment; } return (frag); bad_fragment: REASON_SET(reason, PFRES_FRAG); drop_fragment: uma_zfree(V_pf_frent_z, frent); return (NULL); } static struct mbuf * pf_join_fragment(struct pf_fragment *frag) { struct mbuf *m, *m2; struct pf_frent *frent, *next; frent = TAILQ_FIRST(&frag->fr_queue); next = TAILQ_NEXT(frent, fr_next); m = frent->fe_m; m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len); uma_zfree(V_pf_frent_z, frent); for (frent = next; frent != NULL; frent = next) { next = TAILQ_NEXT(frent, fr_next); m2 = frent->fe_m; /* Strip off ip header. */ m_adj(m2, frent->fe_hdrlen); /* Strip off any trailing bytes. */ m_adj(m2, frent->fe_len - m2->m_pkthdr.len); uma_zfree(V_pf_frent_z, frent); m_cat(m, m2); } /* Remove from fragment queue. */ pf_remove_fragment(frag); return (m); } #ifdef INET static int pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason) { struct mbuf *m = *m0; struct pf_frent *frent; struct pf_fragment *frag; struct pf_fragment_cmp key; uint16_t total, hdrlen; /* Get an entry for the fragment queue */ if ((frent = pf_create_fragment(reason)) == NULL) return (PF_DROP); frent->fe_m = m; frent->fe_hdrlen = ip->ip_hl << 2; frent->fe_extoff = 0; frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; frent->fe_mff = ntohs(ip->ip_off) & IP_MF; pf_ip2key(ip, dir, &key); if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) return (PF_DROP); /* The mbuf is part of the fragment entry, no direct free or access */ m = *m0 = NULL; if (frag->fr_holes) { DPFPRINTF(("frag %d, holes %d\n", frag->fr_id, frag->fr_holes)); return (PF_PASS); /* drop because *m0 is NULL, no error */ } /* We have all the data */ frent = TAILQ_FIRST(&frag->fr_queue); KASSERT(frent != NULL, ("frent != NULL")); total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; hdrlen = frent->fe_hdrlen; m = *m0 = pf_join_fragment(frag); frag = NULL; if (m->m_flags & M_PKTHDR) { int plen = 0; for (m = *m0; m; m = m->m_next) plen += m->m_len; m = *m0; m->m_pkthdr.len = plen; } ip = mtod(m, struct ip *); ip->ip_sum = pf_cksum_fixup(ip->ip_sum, ip->ip_len, htons(hdrlen + total), 0); ip->ip_len = htons(hdrlen + total); ip->ip_sum = pf_cksum_fixup(ip->ip_sum, ip->ip_off, ip->ip_off & ~(IP_MF|IP_OFFMASK), 0); ip->ip_off &= ~(IP_MF|IP_OFFMASK); if (hdrlen + total > IP_MAXPACKET) { DPFPRINTF(("drop: too big: %d\n", total)); ip->ip_len = 0; REASON_SET(reason, PFRES_SHORT); /* PF_DROP requires a valid mbuf *m0 in pf_test() */ return (PF_DROP); } DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); return (PF_PASS); } #endif /* INET */ #ifdef INET6 static int pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr, uint16_t hdrlen, uint16_t extoff, u_short *reason) { struct mbuf *m = *m0; struct pf_frent *frent; struct pf_fragment *frag; struct pf_fragment_cmp key; struct m_tag *mtag; struct pf_fragment_tag *ftag; int off; uint32_t frag_id; uint16_t total, maxlen; uint8_t proto; PF_FRAG_LOCK(); /* Get an entry for the fragment queue. */ if ((frent = pf_create_fragment(reason)) == NULL) { PF_FRAG_UNLOCK(); return (PF_DROP); } frent->fe_m = m; frent->fe_hdrlen = hdrlen; frent->fe_extoff = extoff; frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; key.frc_src.v6 = ip6->ip6_src; key.frc_dst.v6 = ip6->ip6_dst; key.frc_af = AF_INET6; /* Only the first fragment's protocol is relevant. */ key.frc_proto = 0; key.frc_id = fraghdr->ip6f_ident; if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { PF_FRAG_UNLOCK(); return (PF_DROP); } /* The mbuf is part of the fragment entry, no direct free or access. */ m = *m0 = NULL; if (frag->fr_holes) { DPFPRINTF(("frag %d, holes %d\n", frag->fr_id, frag->fr_holes)); PF_FRAG_UNLOCK(); return (PF_PASS); /* Drop because *m0 is NULL, no error. */ } /* We have all the data. */ frent = TAILQ_FIRST(&frag->fr_queue); KASSERT(frent != NULL, ("frent != NULL")); extoff = frent->fe_extoff; maxlen = frag->fr_maxlen; frag_id = frag->fr_id; total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); m = *m0 = pf_join_fragment(frag); frag = NULL; PF_FRAG_UNLOCK(); /* Take protocol from first fragment header. */ m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off); KASSERT(m, ("%s: short mbuf chain", __func__)); proto = *(mtod(m, uint8_t *) + off); m = *m0; /* Delete frag6 header */ if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0) goto fail; if (m->m_flags & M_PKTHDR) { int plen = 0; for (m = *m0; m; m = m->m_next) plen += m->m_len; m = *m0; m->m_pkthdr.len = plen; } if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct pf_fragment_tag), M_NOWAIT)) == NULL) goto fail; ftag = (struct pf_fragment_tag *)(mtag + 1); ftag->ft_hdrlen = hdrlen; ftag->ft_extoff = extoff; ftag->ft_maxlen = maxlen; ftag->ft_id = frag_id; m_tag_prepend(m, mtag); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); if (extoff) { /* Write protocol into next field of last extension header. */ m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), &off); KASSERT(m, ("%s: short mbuf chain", __func__)); *(mtod(m, char *) + off) = proto; m = *m0; } else ip6->ip6_nxt = proto; if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { DPFPRINTF(("drop: too big: %d\n", total)); ip6->ip6_plen = 0; REASON_SET(reason, PFRES_SHORT); /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */ return (PF_DROP); } DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip6->ip6_plen))); return (PF_PASS); fail: REASON_SET(reason, PFRES_MEMORY); /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */ return (PF_DROP); } #endif /* INET6 */ #ifdef INET6 int pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag, bool forward) { struct mbuf *m = *m0, *t; struct ip6_hdr *hdr; struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); struct pf_pdesc pd; uint32_t frag_id; uint16_t hdrlen, extoff, maxlen; uint8_t proto; int error, action; hdrlen = ftag->ft_hdrlen; extoff = ftag->ft_extoff; maxlen = ftag->ft_maxlen; frag_id = ftag->ft_id; m_tag_delete(m, mtag); mtag = NULL; ftag = NULL; if (extoff) { int off; /* Use protocol from next field of last extension header */ m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), &off); KASSERT((m != NULL), ("pf_refragment6: short mbuf chain")); proto = *(mtod(m, uint8_t *) + off); *(mtod(m, char *) + off) = IPPROTO_FRAGMENT; m = *m0; } else { hdr = mtod(m, struct ip6_hdr *); proto = hdr->ip6_nxt; hdr->ip6_nxt = IPPROTO_FRAGMENT; } /* In case of link-local traffic we'll need a scope set. */ hdr = mtod(m, struct ip6_hdr *); in6_setscope(&hdr->ip6_src, ifp, NULL); in6_setscope(&hdr->ip6_dst, ifp, NULL); /* The MTU must be a multiple of 8 bytes, or we risk doing the * fragmentation wrong. */ maxlen = maxlen & ~7; /* * Maxlen may be less than 8 if there was only a single * fragment. As it was fragmented before, add a fragment * header also for a single fragment. If total or maxlen * is less than 8, ip6_fragment() will return EMSGSIZE and * we drop the packet. */ error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id); m = (*m0)->m_nextpkt; (*m0)->m_nextpkt = NULL; if (error == 0) { /* The first mbuf contains the unfragmented packet. */ m_freem(*m0); *m0 = NULL; action = PF_PASS; } else { /* Drop expects an mbuf to free. */ DPFPRINTF(("refragment error %d\n", error)); action = PF_DROP; } for (; m; m = t) { t = m->m_nextpkt; m->m_nextpkt = NULL; m->m_flags |= M_SKIP_FIREWALL; memset(&pd, 0, sizeof(pd)); pd.pf_mtag = pf_find_mtag(m); if (error == 0) if (forward) { MPASS(m->m_pkthdr.rcvif != NULL); ip6_forward(m, 0); } else { (void)ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } else m_freem(m); } return (action); } #endif /* INET6 */ #ifdef INET int pf_normalize_ip(struct mbuf **m0, struct pfi_kkif *kif, u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_krule *r; struct ip *h = mtod(m, struct ip *); int mff = (ntohs(h->ip_off) & IP_MF); int hlen = h->ip_hl << 2; u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; u_int16_t max; int ip_len; int tag = -1; int verdict; int srs; PF_RULES_RASSERT(); r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); /* Check if there any scrub rules. Lack of scrub rules means enforced * packet normalization operation just like in OpenBSD. */ srs = (r != NULL); while (r != NULL) { pf_counter_u64_add(&r->evaluations, 1); if (pfi_kkif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != pd->dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != AF_INET) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != h->ip_p) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else break; } if (srs) { /* With scrub rules present IPv4 normalization happens only * if one of rules has matched and it's not a "no scrub" rule */ if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); pf_counter_u64_critical_exit(); + pf_rule_to_actions(r, &pd->act); } else if ((!V_pf_status.reass && (h->ip_off & htons(IP_MF | IP_OFFMASK)))) { /* With no scrub rules IPv4 fragment reassembly depends on the * global switch. Fragments can be dropped early if reassembly * is disabled. */ REASON_SET(reason, PFRES_NORM); goto drop; } /* Check for illegal packets */ if (hlen < (int)sizeof(struct ip)) { REASON_SET(reason, PFRES_NORM); goto drop; } if (hlen > ntohs(h->ip_len)) { REASON_SET(reason, PFRES_NORM); goto drop; } /* Clear IP_DF if the rule uses the no-df option or we're in no-df mode */ if ((((r && r->rule_flag & PFRULE_NODF) || (V_pf_status.reass & PF_REASS_NODF)) && h->ip_off & htons(IP_DF) )) { u_int16_t ip_off = h->ip_off; h->ip_off &= htons(~IP_DF); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); } /* We will need other tests here */ if (!fragoff && !mff) goto no_fragment; /* We're dealing with a fragment now. Don't allow fragments * with IP_DF to enter the cache. If the flag was cleared by * no-df above, fine. Otherwise drop it. */ if (h->ip_off & htons(IP_DF)) { DPFPRINTF(("IP_DF\n")); goto bad; } ip_len = ntohs(h->ip_len) - hlen; /* All fragments are 8 byte aligned */ if (mff && (ip_len & 0x7)) { DPFPRINTF(("mff and %d\n", ip_len)); goto bad; } /* Respect maximum length */ if (fragoff + ip_len > IP_MAXPACKET) { DPFPRINTF(("max packet %d\n", fragoff + ip_len)); goto bad; } if (r==NULL || !(r->rule_flag & PFRULE_FRAGMENT_NOREASS)) { max = fragoff + ip_len; /* Fully buffer all of the fragments * Might return a completely reassembled mbuf, or NULL */ PF_FRAG_LOCK(); DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); verdict = pf_reassemble(m0, h, pd->dir, reason); PF_FRAG_UNLOCK(); if (verdict != PF_PASS) return (PF_DROP); m = *m0; if (m == NULL) return (PF_DROP); h = mtod(m, struct ip *); no_fragment: /* At this point, only IP_DF is allowed in ip_off */ if (h->ip_off & ~htons(IP_DF)) { u_int16_t ip_off = h->ip_off; h->ip_off &= htons(IP_DF); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); } } - if (r != NULL) { - int scrub_flags = pf_rule_to_scrub_flags(r->rule_flag); - pf_scrub_ip(&m, scrub_flags, r->min_ttl, r->set_tos); - } return (PF_PASS); bad: DPFPRINTF(("dropping bad fragment\n")); REASON_SET(reason, PFRES_FRAG); drop: if (r != NULL && r->log) PFLOG_PACKET(kif, m, AF_INET, *reason, r, NULL, NULL, pd, 1); return (PF_DROP); } #endif #ifdef INET6 int pf_normalize_ip6(struct mbuf **m0, struct pfi_kkif *kif, u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_krule *r; struct ip6_hdr *h = mtod(m, struct ip6_hdr *); int extoff; int off; struct ip6_ext ext; struct ip6_opt opt; struct ip6_frag frag; u_int32_t plen; int optend; int ooff; u_int8_t proto; int terminal; int srs; PF_RULES_RASSERT(); r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); /* Check if there any scrub rules. Lack of scrub rules means enforced * packet normalization operation just like in OpenBSD. */ srs = (r != NULL); while (r != NULL) { pf_counter_u64_add(&r->evaluations, 1); if (pfi_kkif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != pd->dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != AF_INET6) r = r->skip[PF_SKIP_AF].ptr; #if 0 /* header chain! */ else if (r->proto && r->proto != h->ip6_nxt) r = r->skip[PF_SKIP_PROTO].ptr; #endif else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } if (srs) { /* With scrub rules present IPv6 normalization happens only * if one of rules has matched and it's not a "no scrub" rule */ if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); pf_counter_u64_critical_exit(); + pf_rule_to_actions(r, &pd->act); } /* Check for illegal packets */ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) goto drop; plen = ntohs(h->ip6_plen); /* jumbo payload option not supported */ if (plen == 0) goto drop; extoff = 0; off = sizeof(struct ip6_hdr); proto = h->ip6_nxt; terminal = 0; do { switch (proto) { case IPPROTO_FRAGMENT: goto fragment; break; case IPPROTO_AH: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; extoff = off; if (proto == IPPROTO_AH) off += (ext.ip6e_len + 2) * 4; else off += (ext.ip6e_len + 1) * 8; proto = ext.ip6e_nxt; break; case IPPROTO_HOPOPTS: if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, NULL, AF_INET6)) goto shortpkt; extoff = off; optend = off + (ext.ip6e_len + 1) * 8; ooff = off + sizeof(ext); do { if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, sizeof(opt.ip6o_type), NULL, NULL, AF_INET6)) goto shortpkt; if (opt.ip6o_type == IP6OPT_PAD1) { ooff++; continue; } if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), NULL, NULL, AF_INET6)) goto shortpkt; if (ooff + sizeof(opt) + opt.ip6o_len > optend) goto drop; if (opt.ip6o_type == IP6OPT_JUMBO) goto drop; ooff += sizeof(opt) + opt.ip6o_len; } while (ooff < optend); off = optend; proto = ext.ip6e_nxt; break; default: terminal = 1; break; } } while (!terminal); if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; - if (r != NULL) { - int scrub_flags = pf_rule_to_scrub_flags(r->rule_flag); - pf_scrub_ip6(&m, scrub_flags, r->min_ttl, r->set_tos); - } - return (PF_PASS); fragment: if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) goto shortpkt; if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) goto shortpkt; /* Offset now points to data portion. */ off += sizeof(frag); /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */ if (pf_reassemble6(m0, h, &frag, off, extoff, reason) != PF_PASS) return (PF_DROP); m = *m0; if (m == NULL) return (PF_DROP); pd->flags |= PFDESC_IP_REAS; return (PF_PASS); shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) PFLOG_PACKET(kif, m, AF_INET6, *reason, r, NULL, NULL, pd, 1); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) PFLOG_PACKET(kif, m, AF_INET6, *reason, r, NULL, NULL, pd, 1); return (PF_DROP); } #endif /* INET6 */ int pf_normalize_tcp(struct pfi_kkif *kif, struct mbuf *m, int ipoff, int off, void *h, struct pf_pdesc *pd) { struct pf_krule *r, *rm = NULL; struct tcphdr *th = &pd->hdr.tcp; int rewrite = 0; u_short reason; u_int8_t flags; sa_family_t af = pd->af; int srs; PF_RULES_RASSERT(); r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); /* Check if there any scrub rules. Lack of scrub rules means enforced * packet normalization operation just like in OpenBSD. */ srs = (r != NULL); while (r != NULL) { pf_counter_u64_add(&r->evaluations, 1); if (pfi_kkif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != pd->dir) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) r = TAILQ_NEXT(r, entries); else { rm = r; break; } } if (srs) { /* With scrub rules present TCP normalization happens only * if one of rules has matched and it's not a "no scrub" rule */ if (rm == NULL || rm->action == PF_NOSCRUB) return (PF_PASS); pf_counter_u64_critical_enter(); pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); pf_counter_u64_critical_exit(); + pf_rule_to_actions(rm, &pd->act); } if (rm && rm->rule_flag & PFRULE_REASSEMBLE_TCP) pd->flags |= PFDESC_TCP_NORM; flags = th->th_flags; if (flags & TH_SYN) { /* Illegal packet */ if (flags & TH_RST) goto tcp_drop; if (flags & TH_FIN) goto tcp_drop; } else { /* Illegal packet */ if (!(flags & (TH_ACK|TH_RST))) goto tcp_drop; } if (!(flags & TH_ACK)) { /* These flags are only valid if ACK is set */ if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) goto tcp_drop; } /* Check for illegal header length */ if (th->th_off < (sizeof(struct tcphdr) >> 2)) goto tcp_drop; /* If flags changed, or reserved data set, then adjust */ if (flags != th->th_flags || th->th_x2 != 0) { u_int16_t ov, nv; ov = *(u_int16_t *)(&th->th_ack + 1); th->th_flags = flags; th->th_x2 = 0; nv = *(u_int16_t *)(&th->th_ack + 1); th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0); rewrite = 1; } /* Remove urgent pointer, if TH_URG is not set */ if (!(flags & TH_URG) && th->th_urp) { th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp, 0, 0); th->th_urp = 0; rewrite = 1; } - /* Set MSS for old-style scrub rules. - * The function performs its own copyback. */ - if (rm != NULL && rm->max_mss) - pf_normalize_mss(m, off, pd, rm->max_mss); - /* copy back packet headers if we sanitized */ if (rewrite) m_copyback(m, off, sizeof(*th), (caddr_t)th); return (PF_PASS); tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) PFLOG_PACKET(kif, m, AF_INET, reason, r, NULL, NULL, pd, 1); return (PF_DROP); } int pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) { u_int32_t tsval, tsecr; u_int8_t hdr[60]; u_int8_t *opt; KASSERT((src->scrub == NULL), ("pf_normalize_tcp_init: src->scrub != NULL")); src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); if (src->scrub == NULL) return (1); switch (pd->af) { #ifdef INET case AF_INET: { struct ip *h = mtod(m, struct ip *); src->scrub->pfss_ttl = h->ip_ttl; break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); src->scrub->pfss_ttl = h->ip6_hlim; break; } #endif /* INET6 */ } /* * All normalizations below are only begun if we see the start of * the connections. They must all set an enabled bit in pfss_flags */ if ((th->th_flags & TH_SYN) == 0) return (0); if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { /* Diddle with TCP options */ int hlen; opt = hdr + sizeof(struct tcphdr); hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; break; case TCPOPT_TIMESTAMP: if (opt[1] >= TCPOLEN_TIMESTAMP) { src->scrub->pfss_flags |= PFSS_TIMESTAMP; src->scrub->pfss_ts_mod = htonl(arc4random()); /* note PFSS_PAWS not set yet */ memcpy(&tsval, &opt[2], sizeof(u_int32_t)); memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); src->scrub->pfss_tsval0 = ntohl(tsval); src->scrub->pfss_tsval = ntohl(tsval); src->scrub->pfss_tsecr = ntohl(tsecr); getmicrouptime(&src->scrub->pfss_last); } /* FALLTHROUGH */ default: hlen -= MAX(opt[1], 2); opt += MAX(opt[1], 2); break; } } } return (0); } void pf_normalize_tcp_cleanup(struct pf_kstate *state) { uma_zfree(V_pf_state_scrub_z, state->src.scrub); uma_zfree(V_pf_state_scrub_z, state->dst.scrub); /* Someday... flush the TCP segment reassembly descriptors. */ } int pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, struct tcphdr *th, struct pf_kstate *state, struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) { struct timeval uptime; u_int32_t tsval, tsecr; u_int tsval_from_last; u_int8_t hdr[60]; u_int8_t *opt; int copyback = 0; int got_ts = 0; size_t startoff; KASSERT((src->scrub || dst->scrub), ("%s: src->scrub && dst->scrub!", __func__)); /* * Enforce the minimum TTL seen for this connection. Negate a common * technique to evade an intrusion detection system and confuse * firewall state code. */ switch (pd->af) { #ifdef INET case AF_INET: { if (src->scrub) { struct ip *h = mtod(m, struct ip *); if (h->ip_ttl > src->scrub->pfss_ttl) src->scrub->pfss_ttl = h->ip_ttl; h->ip_ttl = src->scrub->pfss_ttl; } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { if (src->scrub) { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); if (h->ip6_hlim > src->scrub->pfss_ttl) src->scrub->pfss_ttl = h->ip6_hlim; h->ip6_hlim = src->scrub->pfss_ttl; } break; } #endif /* INET6 */ } if (th->th_off > (sizeof(struct tcphdr) >> 2) && ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { /* Diddle with TCP options */ int hlen; opt = hdr + sizeof(struct tcphdr); hlen = (th->th_off << 2) - sizeof(struct tcphdr); while (hlen >= TCPOLEN_TIMESTAMP) { startoff = opt - (hdr + sizeof(struct tcphdr)); switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; break; case TCPOPT_TIMESTAMP: /* Modulate the timestamps. Can be used for * NAT detection, OS uptime determination or * reboot detection. */ if (got_ts) { /* Huh? Multiple timestamps!? */ if (V_pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("multiple TS??\n")); pf_print_state(state); printf("\n"); } REASON_SET(reason, PFRES_TS); return (PF_DROP); } if (opt[1] >= TCPOLEN_TIMESTAMP) { memcpy(&tsval, &opt[2], sizeof(u_int32_t)); if (tsval && src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { tsval = ntohl(tsval); pf_patch_32_unaligned(m, &th->th_sum, &opt[2], htonl(tsval + src->scrub->pfss_ts_mod), PF_ALGNMNT(startoff), 0); copyback = 1; } /* Modulate TS reply iff valid (!0) */ memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); if (tsecr && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod; pf_patch_32_unaligned(m, &th->th_sum, &opt[6], htonl(tsecr), PF_ALGNMNT(startoff), 0); copyback = 1; } got_ts = 1; } /* FALLTHROUGH */ default: hlen -= MAX(opt[1], 2); opt += MAX(opt[1], 2); break; } } if (copyback) { /* Copyback the options, caller copys back header */ *writeback = 1; m_copyback(m, off + sizeof(struct tcphdr), (th->th_off << 2) - sizeof(struct tcphdr), hdr + sizeof(struct tcphdr)); } } /* * Must invalidate PAWS checks on connections idle for too long. * The fastest allowed timestamp clock is 1ms. That turns out to * be about 24 days before it wraps. XXX Right now our lowerbound * TS echo check only works for the first 12 days of a connection * when the TS has exhausted half its 32bit space */ #define TS_MAX_IDLE (24*24*60*60) #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ getmicrouptime(&uptime); if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || time_uptime - state->creation > TS_MAX_CONN)) { if (V_pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("src idled out of PAWS\n")); pf_print_state(state); printf("\n"); } src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; } if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { if (V_pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("dst idled out of PAWS\n")); pf_print_state(state); printf("\n"); } dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; } if (got_ts && src->scrub && dst->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (dst->scrub->pfss_flags & PFSS_PAWS)) { /* Validate that the timestamps are "in-window". * RFC1323 describes TCP Timestamp options that allow * measurement of RTT (round trip time) and PAWS * (protection against wrapped sequence numbers). PAWS * gives us a set of rules for rejecting packets on * long fat pipes (packets that were somehow delayed * in transit longer than the time it took to send the * full TCP sequence space of 4Gb). We can use these * rules and infer a few others that will let us treat * the 32bit timestamp and the 32bit echoed timestamp * as sequence numbers to prevent a blind attacker from * inserting packets into a connection. * * RFC1323 tells us: * - The timestamp on this packet must be greater than * or equal to the last value echoed by the other * endpoint. The RFC says those will be discarded * since it is a dup that has already been acked. * This gives us a lowerbound on the timestamp. * timestamp >= other last echoed timestamp * - The timestamp will be less than or equal to * the last timestamp plus the time between the * last packet and now. The RFC defines the max * clock rate as 1ms. We will allow clocks to be * up to 10% fast and will allow a total difference * or 30 seconds due to a route change. And this * gives us an upperbound on the timestamp. * timestamp <= last timestamp + max ticks * We have to be careful here. Windows will send an * initial timestamp of zero and then initialize it * to a random value after the 3whs; presumably to * avoid a DoS by having to call an expensive RNG * during a SYN flood. Proof MS has at least one * good security geek. * * - The TCP timestamp option must also echo the other * endpoints timestamp. The timestamp echoed is the * one carried on the earliest unacknowledged segment * on the left edge of the sequence window. The RFC * states that the host will reject any echoed * timestamps that were larger than any ever sent. * This gives us an upperbound on the TS echo. * tescr <= largest_tsval * - The lowerbound on the TS echo is a little more * tricky to determine. The other endpoint's echoed * values will not decrease. But there may be * network conditions that re-order packets and * cause our view of them to decrease. For now the * only lowerbound we can safely determine is that * the TS echo will never be less than the original * TS. XXX There is probably a better lowerbound. * Remove TS_MAX_CONN with better lowerbound check. * tescr >= other original TS * * It is also important to note that the fastest * timestamp clock of 1ms will wrap its 32bit space in * 24 days. So we just disable TS checking after 24 * days of idle time. We actually must use a 12d * connection limit until we can come up with a better * lowerbound to the TS echo check. */ struct timeval delta_ts; int ts_fudge; /* * PFTM_TS_DIFF is how many seconds of leeway to allow * a host's timestamp. This can happen if the previous * packet got delayed in transit for much longer than * this packet. */ if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; /* Calculate max ticks since the last timestamp */ #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ #define TS_MICROSECS 1000000 /* microseconds per second */ delta_ts = uptime; timevalsub(&delta_ts, &src->scrub->pfss_last); tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); if ((src->state >= TCPS_ESTABLISHED && dst->state >= TCPS_ESTABLISHED) && (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { /* Bad RFC1323 implementation or an insertion attack. * * - Solaris 2.6 and 2.7 are known to send another ACK * after the FIN,FIN|ACK,ACK closing that carries * an old timestamp. */ DPFPRINTF(("Timestamp failed %c%c%c%c\n", SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ? '1' : ' ', SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " "idle: %jus %lums\n", tsval, tsecr, tsval_from_last, (uintmax_t)delta_ts.tv_sec, delta_ts.tv_usec / 1000)); DPFPRINTF((" src->tsval: %u tsecr: %u\n", src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u" "\n", dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); if (V_pf_status.debug >= PF_DEBUG_MISC) { pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); } REASON_SET(reason, PFRES_TS); return (PF_DROP); } /* XXX I'd really like to require tsecr but it's optional */ } else if (!got_ts && (th->th_flags & TH_RST) == 0 && ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) || pd->p_len > 0 || (th->th_flags & TH_SYN)) && src->scrub && dst->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && (dst->scrub->pfss_flags & PFSS_PAWS)) { /* Didn't send a timestamp. Timestamps aren't really useful * when: * - connection opening or closing (often not even sent). * but we must not let an attacker to put a FIN on a * data packet to sneak it through our ESTABLISHED check. * - on a TCP reset. RFC suggests not even looking at TS. * - on an empty ACK. The TS will not be echoed so it will * probably not help keep the RTT calculation in sync and * there isn't as much danger when the sequence numbers * got wrapped. So some stacks don't include TS on empty * ACKs :-( * * To minimize the disruption to mostly RFC1323 conformant * stacks, we will only require timestamps on data packets. * * And what do ya know, we cannot require timestamps on data * packets. There appear to be devices that do legitimate * TCP connection hijacking. There are HTTP devices that allow * a 3whs (with timestamps) and then buffer the HTTP request. * If the intermediate device has the HTTP response cache, it * will spoof the response but not bother timestamping its * packets. So we can look for the presence of a timestamp in * the first data packet and if there, require it in all future * packets. */ if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { /* * Hey! Someone tried to sneak a packet in. Or the * stack changed its RFC1323 behavior?!?! */ if (V_pf_status.debug >= PF_DEBUG_MISC) { DPFPRINTF(("Did not receive expected RFC1323 " "timestamp\n")); pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); } REASON_SET(reason, PFRES_TS); return (PF_DROP); } } /* * We will note if a host sends his data packets with or without * timestamps. And require all data packets to contain a timestamp * if the first does. PAWS implicitly requires that all data packets be * timestamped. But I think there are middle-man devices that hijack * TCP streams immediately after the 3whs and don't timestamp their * packets (seen in a WWW accelerator or cache). */ if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { if (got_ts) src->scrub->pfss_flags |= PFSS_DATA_TS; else { src->scrub->pfss_flags |= PFSS_DATA_NOTS; if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { /* Don't warn if other host rejected RFC1323 */ DPFPRINTF(("Broken RFC1323 stack did not " "timestamp data packet. Disabled PAWS " "security.\n")); pf_print_state(state); pf_print_flags(th->th_flags); printf("\n"); } } } /* * Update PAWS values */ if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { getmicrouptime(&src->scrub->pfss_last); if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || (src->scrub->pfss_flags & PFSS_PAWS) == 0) src->scrub->pfss_tsval = tsval; if (tsecr) { if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || (src->scrub->pfss_flags & PFSS_PAWS) == 0) src->scrub->pfss_tsecr = tsecr; if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && (SEQ_LT(tsval, src->scrub->pfss_tsval0) || src->scrub->pfss_tsval0 == 0)) { /* tsval0 MUST be the lowest timestamp */ src->scrub->pfss_tsval0 = tsval; } /* Only fully initialized after a TS gets echoed */ if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) src->scrub->pfss_flags |= PFSS_PAWS; } } /* I have a dream.... TCP segment reassembly.... */ return (0); } int -pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd, u_int16_t maxmss) +pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd) { struct tcphdr *th = &pd->hdr.tcp; u_int16_t *mss; int thoff; int opt, cnt, optlen = 0; u_char opts[TCP_MAXOLEN]; u_char *optp = opts; size_t startoff; thoff = th->th_off << 2; cnt = thoff - sizeof(struct tcphdr); if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, NULL, NULL, pd->af)) return (0); for (; cnt > 0; cnt -= optlen, optp += optlen) { startoff = optp - opts; opt = optp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { if (cnt < 2) break; optlen = optp[1]; if (optlen < 2 || optlen > cnt) break; } switch (opt) { case TCPOPT_MAXSEG: mss = (u_int16_t *)(optp + 2); - if ((ntohs(*mss)) > maxmss) { + if ((ntohs(*mss)) > pd->act.max_mss) { pf_patch_16_unaligned(m, &th->th_sum, - mss, htons(maxmss), + mss, htons(pd->act.max_mss), PF_ALGNMNT(startoff), 0); m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts); m_copyback(m, off, sizeof(*th), (caddr_t)th); } break; default: break; } } return (0); } -u_int16_t -pf_rule_to_scrub_flags(u_int32_t rule_flags) -{ - /* - * Translate pf_krule->rule_flag to pf_krule->scrub_flags. - * The pf_scrub_ip functions have been adapted to the new style of pass - * rules but they might get called if old scrub rules are used. - */ - int scrub_flags = 0; - - if (rule_flags & PFRULE_SET_TOS) { - scrub_flags |= PFSTATE_SETTOS; - } - if (rule_flags & PFRULE_RANDOMID) - scrub_flags |= PFSTATE_RANDOMID; - - return scrub_flags; -} - #ifdef INET void -pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) +pf_scrub_ip(struct mbuf **m0, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct ip *h = mtod(m, struct ip *); /* Clear IP_DF if no-df was requested */ - if (flags & PFSTATE_NODF && h->ip_off & htons(IP_DF)) { + if (pd->act.flags & PFSTATE_NODF && h->ip_off & htons(IP_DF)) { u_int16_t ip_off = h->ip_off; h->ip_off &= htons(~IP_DF); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); } /* Enforce a minimum ttl, may cause endless packet loops */ - if (min_ttl && h->ip_ttl < min_ttl) { + if (pd->act.min_ttl && h->ip_ttl < pd->act.min_ttl) { u_int16_t ip_ttl = h->ip_ttl; - h->ip_ttl = min_ttl; + h->ip_ttl = pd->act.min_ttl; h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); } /* Enforce tos */ - if (flags & PFSTATE_SETTOS) { + if (pd->act.flags & PFSTATE_SETTOS) { u_int16_t ov, nv; ov = *(u_int16_t *)h; - h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK); + h->ip_tos = pd->act.set_tos | (h->ip_tos & IPTOS_ECN_MASK); nv = *(u_int16_t *)h; h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); } /* random-id, but not for fragments */ - if (flags & PFSTATE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { + if (pd->act.flags & PFSTATE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { uint16_t ip_id = h->ip_id; ip_fillid(h); h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } } #endif /* INET */ #ifdef INET6 void -pf_scrub_ip6(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) +pf_scrub_ip6(struct mbuf **m0, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct ip6_hdr *h = mtod(m, struct ip6_hdr *); /* Enforce a minimum ttl, may cause endless packet loops */ - if (min_ttl && h->ip6_hlim < min_ttl) - h->ip6_hlim = min_ttl; + if (pd->act.min_ttl && h->ip6_hlim < pd->act.min_ttl) + h->ip6_hlim = pd->act.min_ttl; /* Enforce tos. Set traffic class bits */ - if (flags & PFSTATE_SETTOS) { + if (pd->act.flags & PFSTATE_SETTOS) { h->ip6_flow &= IPV6_FLOWLABEL_MASK | IPV6_VERSION_MASK; - h->ip6_flow |= htonl((tos | IPV6_ECN(h)) << 20); + h->ip6_flow |= htonl((pd->act.set_tos | IPV6_ECN(h)) << 20); } } #endif