diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -375,6 +375,52 @@ #define PF_STATE_LOCK_ASSERT(s) do {} while (0) #endif /* INVARIANTS */ +#ifdef INVARIANTS +#define PF_SRC_NODE_LOCK(sn) \ + do { \ + struct pf_ksrc_node *_sn = (sn); \ + struct pf_srchash *_sh = &V_pf_srchash[ \ + pf_hashsrc(&_sn->addr, _sn->af)]; \ + MPASS(_sn->lock == &_sh->lock); \ + mtx_lock(_sn->lock); \ + } while (0) +#define PF_SRC_NODE_UNLOCK(sn) \ + do { \ + struct pf_ksrc_node *_sn = (sn); \ + struct pf_srchash *_sh = &V_pf_srchash[ \ + pf_hashsrc(&_sn->addr, _sn->af)]; \ + MPASS(_sn->lock == &_sh->lock); \ + mtx_unlock(_sn->lock); \ + } while (0) +#else +#define PF_SRC_NODE_LOCK(sn) mtx_lock(sn->lock) +#define PF_SRC_NODE_UNLOCK(sn) mtx_unlock(sn->lock) +#endif + +#ifdef INVARIANTS +#define PF_SRC_NODE_LOCK_ASSERT(sn) \ + do { \ + struct pf_ksrc_node *_sn = (sn); \ + struct pf_srchash *_sh = &V_pf_srchash[ \ + pf_hashsrc(&_sn->addr, _sn->af)]; \ + MPASS(_sn->lock == &_sh->lock); \ + PF_HASHROW_ASSERT(_sh); \ + } while (0) +#else /* !INVARIANTS */ +#define PF_SRC_NODE_LOCK_ASSERT(sn) do {} while (0) +#endif /* INVARIANTS */ + +#ifdef INVARIANTS +#define PF_SRC_NODE_ROW_LOCK_ASSERT(src, af) \ + do { \ + struct pf_srchash *_sh = &V_pf_srchash[ \ + pf_hashsrc(src, af)]; \ + PF_HASHROW_ASSERT(_sh); \ + } while (0) +#else /* !INVARIANTS */ +#define PF_SRC_NODE_ROW_LOCK_ASSERT(src, af) do {} while (0) +#endif /* INVARIANTS */ + extern struct mtx_padalign pf_unlnkdrules_mtx; #define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx) #define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx) @@ -830,6 +876,29 @@ SLIST_HEAD(pf_krule_slist, pf_krule_item); +enum pf_sn_types { PF_SN_LIMIT, PF_SN_NAT, PF_SN_RDR, PF_SN_ROUTE, PF_SN_MAX }; + +/* + * Declared in pf_test_rule() and carried through rule testing and state + * creation. + */ +struct pf_ksrc_node_ptr { + struct pf_ksrc_node *sn; + struct pf_srchash *sh; + enum pf_sn_types type; +}; + +/* + * Attached to pf_kstate in pf_create_state(). + */ +struct pf_sn_item { + SLIST_ENTRY(pf_sn_item) next; + struct pf_ksrc_node *sn; + struct pf_srchash *sh; +}; + +SLIST_HEAD(pf_sn_head, pf_sn_item); + struct pf_ksrc_node { LIST_ENTRY(pf_ksrc_node) entry; struct pf_addr addr; @@ -845,7 +914,8 @@ u_int32_t creation; u_int32_t expire; sa_family_t af; - u_int8_t ruletype; + u_int8_t type; + struct mtx *lock; }; #endif @@ -1016,8 +1086,7 @@ struct pfi_kkif *kif; struct pfi_kkif *orig_kif; /* The real kif, even if we're a floating state (i.e. if == V_pfi_all). */ struct pfi_kkif *rt_kif; - struct pf_ksrc_node *src_node; - struct pf_ksrc_node *nat_src_node; + struct pf_sn_head src_nodes; u_int64_t packets[2]; u_int64_t bytes[2]; u_int32_t creation; @@ -2178,10 +2247,19 @@ u_int, int *); extern bool pf_find_state_all_exists(struct pf_state_key_cmp *, u_int); -extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *, - struct pf_krule *, sa_family_t, int); +int pf_find_src_node(struct pf_addr *, + struct pf_krule *, sa_family_t af, + struct pf_ksrc_node_ptr *); +struct pf_ksrc_node *pf_get_src_node(struct pf_kstate *, + enum pf_sn_types); +u_short pf_insert_src_node(struct pf_ksrc_node_ptr *, + struct pf_krule *, struct pf_addr *, + sa_family_t); extern void pf_unlink_src_node(struct pf_ksrc_node *); extern u_int pf_free_src_nodes(struct pf_ksrc_node_list *); +void pf_state_rm_src_node(struct pf_kstate *, + struct pf_ksrc_node *); +void pf_src_tree_remove_state(struct pf_kstate *, int); extern void pf_print_state(struct pf_kstate *); extern void pf_print_flags(u_int8_t); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, @@ -2433,12 +2511,13 @@ int pf_map_addr(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, - struct pf_addr *, struct pf_ksrc_node **); + struct pf_addr *, struct pf_ksrc_node_ptr *); struct pf_krule *pf_get_translation(struct pf_pdesc *, struct mbuf *, - int, int, struct pfi_kkif *, struct pf_ksrc_node **, - struct pf_state_key **, struct pf_state_key **, - struct pf_addr *, struct pf_addr *, - uint16_t, uint16_t, struct pf_kanchor_stackframe *); + int, int, struct pfi_kkif *, + struct pf_ksrc_node_ptr *, struct pf_state_key **, + struct pf_state_key **, struct pf_addr *, + struct pf_addr *, uint16_t, uint16_t, + struct pf_kanchor_stackframe *); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -658,7 +658,7 @@ u_int32_t creation; u_int32_t expire; sa_family_t af; - u_int8_t ruletype; + u_int8_t ruletype; /* Maintain compatibility, it's not ksn->type */ }; #define PFSNODE_HIWAT 10000 /* default source node table size */ diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -252,7 +252,6 @@ VNET_DEFINE(struct unrhdr64, pf_stateid); -static void pf_src_tree_remove_state(struct pf_kstate *); static void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); static void pf_add_threshold(struct pf_threshold *); @@ -290,7 +289,7 @@ struct pf_kruleset **, struct inpcb *); static int pf_create_state(struct pf_krule *, struct pf_krule *, struct pf_krule *, struct pf_pdesc *, - struct pf_ksrc_node *, struct pf_state_key *, + struct pf_ksrc_node_ptr *, struct pf_state_key *, struct pf_state_key *, struct mbuf *, int, u_int16_t, u_int16_t, int *, struct pfi_kkif *, struct pf_kstate **, int, u_int16_t, u_int16_t, @@ -329,8 +328,6 @@ struct pf_state_key_cmp *, u_int); static int pf_src_connlimit(struct pf_kstate **); static void pf_overload_task(void *v, int pending); -static int pf_insert_src_node(struct pf_ksrc_node **, - struct pf_krule *, struct pf_addr *, sa_family_t); static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); @@ -430,6 +427,8 @@ static u_long pf_srchashsize; u_long pf_ioctl_maxcount = 65535; +MALLOC_DEFINE(M_PFKSNI, "pf_sni", "pf(4) source node item"); + SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN, &pf_hashsize, 0, "Size of pf(4) states hashtable"); SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, @@ -680,43 +679,55 @@ pf_src_connlimit(struct pf_kstate **state) { struct pf_overload_entry *pfoe; + struct pf_ksrc_node *sn; int bad = 0; PF_STATE_LOCK_ASSERT(*state); - (*state)->src_node->conn++; + if ((sn = pf_get_src_node((*state), PF_SN_LIMIT)) == NULL) + return (0); + + PF_SRC_NODE_LOCK_ASSERT(sn); + + sn->conn++; (*state)->src.tcp_est = 1; - pf_add_threshold(&(*state)->src_node->conn_rate); + pf_add_threshold(&sn->conn_rate); if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < - (*state)->src_node->conn) { + sn->conn) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1); bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && - pf_check_threshold(&(*state)->src_node->conn_rate)) { + pf_check_threshold(&sn->conn_rate)) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1); bad++; } - if (!bad) + if (!bad) { + PF_SRC_NODE_UNLOCK(sn); return (0); + } /* Kill this state. */ (*state)->timeout = PFTM_PURGE; pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); - if ((*state)->rule.ptr->overload_tbl == NULL) + if ((*state)->rule.ptr->overload_tbl == NULL) { + PF_SRC_NODE_UNLOCK(sn); return (1); + } /* Schedule overloading and flushing task. */ pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT); - if (pfoe == NULL) + if (pfoe == NULL) { + PF_SRC_NODE_UNLOCK(sn); return (1); /* too bad :( */ + } - bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr)); + bcopy(&sn->addr, &pfoe->addr, sizeof(pfoe->addr)); pfoe->af = (*state)->key[PF_SK_WIRE]->af; pfoe->rule = (*state)->rule.ptr; pfoe->dir = (*state)->direction; @@ -725,6 +736,7 @@ PF_OVERLOADQ_UNLOCK(); taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask); + PF_SRC_NODE_UNLOCK(sn); return (1); } @@ -822,37 +834,69 @@ } /* - * Can return locked on failure, so that we can consistently - * allocate and insert a new one. + * Find a source node matching given parameters. Store the resulting node + * pointer and the pointer to the node hash row in snp. Always returns locked. */ -struct pf_ksrc_node * +int pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af, - int returnlocked) + struct pf_ksrc_node_ptr *snp) { - struct pf_srchash *sh; - struct pf_ksrc_node *n; - counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); - sh = &V_pf_srchash[pf_hashsrc(src, af)]; - PF_HASHROW_LOCK(sh); - LIST_FOREACH(n, &sh->nodes, entry) - if (n->rule.ptr == rule && n->af == af && - ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || - (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) - break; - if (n != NULL) { - n->states++; - PF_HASHROW_UNLOCK(sh); - } else if (returnlocked == 0) - PF_HASHROW_UNLOCK(sh); - - return (n); + snp->sh = &V_pf_srchash[pf_hashsrc(src, af)]; + PF_HASHROW_LOCK(snp->sh); + LIST_FOREACH(snp->sn, &snp->sh->nodes, entry) { + if (snp->sn->rule.ptr == rule && snp->sn->af == af && + snp->sn->type == snp->type && ((af == AF_INET && + snp->sn->addr.v4.s_addr == src->v4.s_addr) || + (af == AF_INET6 && bcmp(&snp->sn->addr, src, + sizeof(*src)) == 0))) { + snp->sn->states++; + return (1); + } + } + return (0); } +/* + * Check if given source node exists in given hash row. Always returns locekd. + */ +static int +pf_find_src_node_ptr(struct pf_ksrc_node_ptr *snp) +{ + struct pf_ksrc_node *sn; + PF_HASHROW_ASSERT(snp->sh); + + LIST_FOREACH(sn, &snp->sh->nodes, entry) { + if (sn == snp->sn) + return (1); + } + return (0); +} + +/* + * Get the source node of given type attached to a state. + */ +struct pf_ksrc_node * +pf_get_src_node(struct pf_kstate *st, enum pf_sn_types type) +{ + struct pf_sn_item *sni; + + PF_STATE_LOCK_ASSERT(st); + + SLIST_FOREACH(sni, &st->src_nodes, next) + if (sni->sn->type == type) { + PF_SRC_NODE_LOCK(sni->sn); + return (sni->sn); + } + return (NULL); +} + + static void pf_free_src_node(struct pf_ksrc_node *sn) { + KASSERT(sn->states == 0, ("%s: %p has states", __func__, sn)); for (int i = 0; i < 2; i++) { counter_u64_free(sn->bytes[i]); @@ -861,78 +905,143 @@ uma_zfree(V_pf_sources_z, sn); } -static int -pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_krule *rule, + +u_short +pf_insert_src_node(struct pf_ksrc_node_ptr *snp, struct pf_krule *rule, struct pf_addr *src, sa_family_t af) { + u_short reason = 0; - KASSERT((rule->rule_flag & PFRULE_SRCTRACK || + KASSERT((rule->nr == -1 || rule->rule_flag & PFRULE_SRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR), ("%s for non-tracking rule %p", __func__, rule)); - if (*sn == NULL) - *sn = pf_find_src_node(src, rule, af, 1); + PF_HASHROW_ASSERT(snp->sh); - if (*sn == NULL) { - struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)]; + snp->sn = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); + if (snp->sn == NULL) { + reason = PFRES_MEMORY; + goto done; + } - PF_HASHROW_ASSERT(sh); + for (int i = 0; i < 2; i++) { + snp->sn->bytes[i] = counter_u64_alloc(M_NOWAIT); + snp->sn->packets[i] = counter_u64_alloc(M_NOWAIT); - if (!rule->max_src_nodes || - counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes) - (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); - else - counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], - 1); - if ((*sn) == NULL) { - PF_HASHROW_UNLOCK(sh); - return (-1); - } - - for (int i = 0; i < 2; i++) { - (*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT); - (*sn)->packets[i] = counter_u64_alloc(M_NOWAIT); - - if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) { - pf_free_src_node(*sn); - PF_HASHROW_UNLOCK(sh); - return (-1); - } - } - - pf_init_threshold(&(*sn)->conn_rate, - rule->max_src_conn_rate.limit, - rule->max_src_conn_rate.seconds); - - (*sn)->af = af; - (*sn)->rule.ptr = rule; - PF_ACPY(&(*sn)->addr, src, af); - LIST_INSERT_HEAD(&sh->nodes, *sn, entry); - (*sn)->creation = time_uptime; - (*sn)->ruletype = rule->action; - (*sn)->states = 1; - if ((*sn)->rule.ptr != NULL) - counter_u64_add((*sn)->rule.ptr->src_nodes, 1); - PF_HASHROW_UNLOCK(sh); - counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); - } else { - if (rule->max_src_states && - (*sn)->states >= rule->max_src_states) { - counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], - 1); - return (-1); + if (snp->sn->bytes[i] == NULL || snp->sn->packets[i] == NULL) { + pf_free_src_node(snp->sn); + reason = PFRES_MEMORY; + snp->sn = NULL; + goto done; } } - return (0); + + pf_init_threshold(&(snp->sn)->conn_rate, + rule->max_src_conn_rate.limit, + rule->max_src_conn_rate.seconds); + + MPASS(snp->sn->lock == NULL); + snp->sn->lock = &snp->sh->lock; + + snp->sn->af = af; + snp->sn->rule.ptr = rule; + PF_ACPY(&snp->sn->addr, src, af); + LIST_INSERT_HEAD(&snp->sh->nodes, snp->sn, entry); + snp->sn->creation = time_uptime; + snp->sn->states = 1; /* Prevent this sn from being purged */ + snp->sn->type = snp->type; + + counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); + +done: + /* Returns locked */ + return (reason); } +static u_short +check_source_limits(struct pf_ksrc_node_ptr *snp, struct pf_krule *rule, + struct pf_addr *src, sa_family_t af) +{ + u_short reason = 0; + struct pf_krule *r_track = rule; + + /* + * Only src tracking uses this function! Load balancing code uses + * pf_map_addr() which calls the low level functions directly. + */ + KASSERT(snp->type == PF_SN_LIMIT && rule->rule_flag & PFRULE_SRCTRACK, + ("%s for non-tracking rule %p", __func__, rule)); + + /* + * Rules with global source tracking store the counters of connected + * sources and their states in the default rule. + */ + if (!(rule->rule_flag & PFRULE_RULESRCTRACK)) + r_track = &V_pf_default_rule; + + pf_find_src_node(src, r_track, af, snp); + + PF_HASHROW_ASSERT(snp->sh); + + if (snp->sn == NULL) { + if (rule->max_src_nodes && + counter_u64_fetch(r_track->src_nodes) >= rule->max_src_nodes) { + counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], + 1); + reason = PFRES_SRCLIMIT; + goto done; + } + if ((reason = pf_insert_src_node(snp, r_track, src, af)) != 0) + goto done; + if (snp->sn->rule.ptr != NULL) + counter_u64_add(snp->sn->rule.ptr->src_nodes, 1); + } else { + if (rule->max_src_states && + snp->sn->states >= rule->max_src_states) { + counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], + 1); + reason = PFRES_MAXSTATES; + goto done; + } + } + +done: + PF_HASHROW_UNLOCK(snp->sh); + + return (reason); +} + +/* + * Check if a source node can be removed and if yes then unlink + * it from the hash and free it. + */ +static void +pf_remove_src_node(struct pf_ksrc_node *sn) +{ + PF_SRC_NODE_LOCK_ASSERT(sn); + + if (sn->states > 0 || sn->expire > time_uptime) { + PF_SRC_NODE_UNLOCK(sn); + return; + } + + pf_unlink_src_node(sn); + PF_SRC_NODE_UNLOCK(sn); + + pf_free_src_node(sn); + counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); +} + +/* + * Unlink a source node from the source node hash. + */ void pf_unlink_src_node(struct pf_ksrc_node *src) { + PF_SRC_NODE_LOCK_ASSERT(src); - PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]); LIST_REMOVE(src, entry); - if (src->rule.ptr) + if (src->type == PF_SN_LIMIT && src->rule.ptr) counter_u64_add(src->rule.ptr->src_nodes, -1); } @@ -947,11 +1056,32 @@ count++; } - counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count); - return (count); } +void +pf_state_rm_src_node(struct pf_kstate *s, struct pf_ksrc_node *sn) +{ + struct pf_sn_item *sni, *tmp; + + PF_STATE_LOCK_ASSERT(s); + + SLIST_FOREACH_SAFE(sni, &s->src_nodes, next, tmp) { + PF_HASHROW_LOCK(sni->sh); + if ( + /* expire==1 means marked by pf_kill_src_nodes() */ + (sn == NULL && sni->sn->expire == 1) || + (sn != NULL && sni->sn == sn) + ) { + SLIST_REMOVE(&s->src_nodes, sni, pf_sn_item, next); + free(sni, M_PFKSNI); + if (sn != NULL) + sn->states--; + } + PF_HASHROW_LOCK(sni->sh); + } +} + void pf_mtag_initialize(void) { @@ -1978,36 +2108,35 @@ V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z); } -static void -pf_src_tree_remove_state(struct pf_kstate *s) +/* + * Go over all src nodes of the the given state and detach them from it. + * If the nodes are linked in their hash mark them for expiry. + */ +void +pf_src_tree_remove_state(struct pf_kstate *s, int linked) { - struct pf_ksrc_node *sn; - struct pf_srchash *sh; - uint32_t timeout; + struct pf_sn_item *sni; + uint32_t timeout; + + PF_STATE_LOCK_ASSERT(s); timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ? s->rule.ptr->timeout[PFTM_SRC_NODE] : V_pf_default_rule.timeout[PFTM_SRC_NODE]; - if (s->src_node != NULL) { - sn = s->src_node; - sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; - PF_HASHROW_LOCK(sh); - if (s->src.tcp_est) - --sn->conn; - if (--sn->states == 0) - sn->expire = time_uptime + timeout; - PF_HASHROW_UNLOCK(sh); + while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { + SLIST_REMOVE_HEAD(&s->src_nodes, next); + if (linked) { + PF_SRC_NODE_LOCK(sni->sn); + if (s->src.tcp_est) + --sni->sn->conn; + if (--sni->sn->states == 0) + sni->sn->expire = time_uptime + timeout; + PF_SRC_NODE_UNLOCK(sni->sn); + } + free(sni, M_PFKSNI); } - if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { - sn = s->nat_src_node; - sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; - PF_HASHROW_LOCK(sh); - if (--sn->states == 0) - sn->expire = time_uptime + timeout; - PF_HASHROW_UNLOCK(sh); - } - s->src_node = s->nat_src_node = NULL; + } /* @@ -2044,7 +2173,7 @@ } LIST_REMOVE(s, entry); - pf_src_tree_remove_state(s); + pf_src_tree_remove_state(s, 1); if (V_pfsync_delete_state_ptr != NULL) V_pfsync_delete_state_ptr(s); @@ -4175,7 +4304,7 @@ struct pf_kruleset *ruleset = NULL; struct pf_krule_slist match_rules; struct pf_krule_item *ri; - struct pf_ksrc_node *nsn = NULL; + struct pf_ksrc_node_ptr snps[PF_SN_MAX]; struct tcphdr *th = &pd->hdr.tcp; struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; @@ -4184,6 +4313,7 @@ int asd = 0; int match = 0; int state_icmp = 0; + int i; u_int16_t sport = 0, dport = 0; u_int16_t bproto_sum = 0, bip_sum = 0; u_int8_t icmptype = 0, icmpcode = 0; @@ -4191,6 +4321,10 @@ PF_RULES_RASSERT(); + bzero(snps, sizeof(snps)); + for (i = 0; i < PF_SN_MAX; i++) + snps[i].type = i; + if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; @@ -4250,7 +4384,7 @@ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk, + if ((nr = pf_get_translation(pd, m, off, direction, kif, snps, &sk, &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) { KASSERT(sk != NULL, ("%s: null sk", __func__)); KASSERT(nk != NULL, ("%s: null nk", __func__)); @@ -4522,7 +4656,8 @@ if (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM))) { int action; - action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off, + + action = pf_create_state(r, nr, a, pd, snps, nk, sk, m, off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum, hdrlen, &match_rules); if (action != PF_PASS) { @@ -4572,17 +4707,18 @@ static int pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a, - struct pf_pdesc *pd, struct pf_ksrc_node *nsn, struct pf_state_key *nk, - struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, - u_int16_t dport, int *rewrite, struct pfi_kkif *kif, struct pf_kstate **sm, - int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen, - struct pf_krule_slist *match_rules) + struct pf_pdesc *pd, struct pf_ksrc_node_ptr *snps, + struct pf_state_key *nk, struct pf_state_key *sk, struct mbuf *m, int off, + u_int16_t sport, u_int16_t dport, int *rewrite, struct pfi_kkif *kif, + struct pf_kstate **sm, int tag, u_int16_t bproto_sum, u_int16_t bip_sum, + int hdrlen, struct pf_krule_slist *match_rules) { struct pf_kstate *s = NULL; - struct pf_ksrc_node *sn = NULL; struct tcphdr *th = &pd->hdr.tcp; u_int16_t mss = V_tcp_mssdflt; - u_short reason; + u_short reason, sn_reason = 0; + struct pf_sn_item *sni; + int i; /* check maximums */ if (r->max_states && @@ -4591,19 +4727,20 @@ REASON_SET(&reason, PFRES_MAXSTATES); goto csfailed; } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto csfailed; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto csfailed; + + /* + * If this rule has source tracking find or create a source + * node of PF_SN_LIMIT type. This node will be used for + * providing connection limit per source. + */ + if (r->rule_flag & PFRULE_SRCTRACK) { + if ((sn_reason = check_source_limits(&snps[PF_SN_LIMIT], r, + pd->src, pd->af)) != 0) { + REASON_SET(&reason, sn_reason); + goto csfailed; + } } + s = pf_alloc_state(M_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); @@ -4612,8 +4749,10 @@ s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; + bcopy(match_rules, &s->match_rules, sizeof(s->match_rules)); STATE_INC_COUNTERS(s); + if (r->allow_opts) s->state_flags |= PFSTATE_ALLOWOPTS; if (r->rule_flag & PFRULE_STATESLOPPY) @@ -4637,6 +4776,8 @@ s->state_flags |= pd->act.flags; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; + SLIST_INIT(&s->src_nodes); + switch (pd->proto) { case IPPROTO_TCP: s->src.seqlo = ntohl(th->th_seq); @@ -4692,12 +4833,9 @@ } if (r->rt) { - if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) { - REASON_SET(&reason, PFRES_MAPFAILED); - pf_src_tree_remove_state(s); - s->timeout = PFTM_UNLINKED; - STATE_DEC_COUNTERS(s); - pf_free_state(s); + /* pf_map_addr increases the reason counters */ + if ((reason = pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, + &snps[PF_SN_ROUTE])) != 0) { goto csfailed; } s->rt_kif = r->rpool.cur->kif; @@ -4707,22 +4845,11 @@ s->creation = time_uptime; s->expire = time_uptime; - if (sn != NULL) - s->src_node = sn; - if (nsn != NULL) { - /* XXX We only modify one side for now. */ - PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); - s->nat_src_node = nsn; - } if (pd->proto == IPPROTO_TCP) { if (s->state_flags & PFSTATE_SCRUB_TCP && pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); - pf_src_tree_remove_state(s); - s->timeout = PFTM_UNLINKED; - STATE_DEC_COUNTERS(s); - pf_free_state(s); - return (PF_DROP); + goto csfailed; } if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, @@ -4731,11 +4858,7 @@ DPFPRINTF(PF_DEBUG_URGENT, ("pf_normalize_tcp_stateful failed on first " "pkt\n")); - pf_src_tree_remove_state(s); - s->timeout = PFTM_UNLINKED; - STATE_DEC_COUNTERS(s); - pf_free_state(s); - return (PF_DROP); + goto csfailed; } } s->direction = pd->dir; @@ -4759,14 +4882,46 @@ (pd->dir == PF_IN) ? sk : nk, (pd->dir == PF_IN) ? nk : sk, s)) { REASON_SET(&reason, PFRES_STATEINS); - pf_src_tree_remove_state(s); - s->timeout = PFTM_UNLINKED; - STATE_DEC_COUNTERS(s); - pf_free_state(s); - return (PF_DROP); + goto csfailed; } else *sm = s; + /* + * Source nodes might have been inserted or found by: + * - NAT rules: pf_test() -> pf_get_translation() -> pf_map_addr() + * - route-to rules: pf_map_addr() just above + * - source tracking: check_source_limits() just above + * + * All of those functions return unlocked for 2 reasons: + * - Some of them, like the NAT rules, would require holding the lock + * during rule testing all the way until here. That would be too long. + * - They would map to the same src hash row. + * + * Therefore we need to find the nodes again. But we have pointers, + * so we can as well just check if those are still pointing to valid + * source nodes. + */ + for (i = 0; i < PF_SN_MAX; i++) { + if (snps[i].sn == NULL) + continue; + PF_HASHROW_LOCK(snps[i].sh); + if (pf_find_src_node_ptr(&snps[i])) { + sni = malloc(sizeof(struct pf_sn_item), M_PFKSNI, M_NOWAIT); + if (sni == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto csfailed; + } + sni->sn = snps[i].sn; + sni->sh = snps[i].sh; + SLIST_INSERT_HEAD(&s->src_nodes, sni, next); + } else { + snps[i].sn = NULL; /* The SN is gone. */ + } + PF_HASHROW_UNLOCK(snps[i].sh); + } + + STATE_INC_COUNTERS(s); + if (tag > 0) s->tag = tag; if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == @@ -4811,34 +4966,28 @@ if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); - if (sn != NULL) { - struct pf_srchash *sh; - - sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; - PF_HASHROW_LOCK(sh); - if (--sn->states == 0 && sn->expire == 0) { - pf_unlink_src_node(sn); - uma_zfree(V_pf_sources_z, sn); - counter_u64_add( - V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); - } - PF_HASHROW_UNLOCK(sh); + if (s) { + /* Detach source nodes from this state. */ + pf_src_tree_remove_state(s, 1); /* performs sn->states-- */ + s->timeout = PFTM_UNLINKED; + pf_free_state(s); } - if (nsn != sn && nsn != NULL) { - struct pf_srchash *sh; + /* + * There's a chance source that source nodes have been created together + * with this state. Since the state creation has failed we can delete + * those source nodes too. + */ + for (i = 0; i < PF_SN_MAX; i++) { + if (snps[i].sn == NULL) + continue; - sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)]; - PF_HASHROW_LOCK(sh); - if (--nsn->states == 0 && nsn->expire == 0) { - pf_unlink_src_node(nsn); - uma_zfree(V_pf_sources_z, nsn); - counter_u64_add( - V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); - } - PF_HASHROW_UNLOCK(sh); + PF_SRC_NODE_LOCK(snps[i].sn); + if (s == NULL) + snps[i].sn->states--; + + pf_remove_src_node(snps[i].sn); } - return (PF_DROP); } @@ -5169,7 +5318,7 @@ pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); if (src->state == TCPS_ESTABLISHED && - (*state)->src_node != NULL && + !SLIST_EMPTY(&(*state)->src_nodes) && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); @@ -5340,7 +5489,7 @@ if (dst->state == TCPS_SYN_SENT) { pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); if (src->state == TCPS_ESTABLISHED && - (*state)->src_node != NULL && + !SLIST_EMPTY(&(*state)->src_nodes) && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); @@ -5358,7 +5507,7 @@ pf_set_protostate(*state, PF_PEER_BOTH, TCPS_ESTABLISHED); dst->state = src->state = TCPS_ESTABLISHED; - if ((*state)->src_node != NULL && + if (!SLIST_EMPTY(&(*state)->src_nodes) && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); @@ -5425,7 +5574,7 @@ (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); - } else if ((*state)->src_node != NULL && + } else if (!SLIST_EMPTY(&(*state)->src_nodes) && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); @@ -6442,13 +6591,16 @@ struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; - struct pf_ksrc_node *sn = NULL; + struct pf_ksrc_node_ptr snp; int error = 0; uint16_t ip_len, ip_off; int r_rt, r_dir; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); + bzero(&snp, sizeof(snp)); + snp.type = PF_SN_ROUTE; + if (s) { r_rt = s->rt; r_dir = s->direction; @@ -6524,7 +6676,7 @@ goto bad_locked; } pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, - &naddr, NULL, &sn); + &naddr, NULL, &snp); if (!PF_AZERO(&naddr, AF_INET)) dst.sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? @@ -6657,11 +6809,14 @@ struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; - struct pf_ksrc_node *sn = NULL; + struct pf_ksrc_node_ptr snp; int r_rt, r_dir; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); + bzero(&snp, sizeof(snp)); + snp.type = PF_SN_ROUTE; + if (s) { r_rt = s->rt; r_dir = s->direction; @@ -6737,7 +6892,7 @@ goto bad_locked; } pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, - &naddr, NULL, &sn); + &naddr, NULL, &snp); if (!PF_AZERO(&naddr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); @@ -7590,23 +7745,17 @@ } if (s != NULL) { struct pf_krule_item *ri; - + struct pf_sn_item *sni; if (s->nat_rule.ptr != NULL) { pf_counter_u64_add_protected(&s->nat_rule.ptr->packets[dirndx], 1); pf_counter_u64_add_protected(&s->nat_rule.ptr->bytes[dirndx], pd.tot_len); } - if (s->src_node != NULL) { - counter_u64_add(s->src_node->packets[dirndx], + SLIST_FOREACH(sni, &s->src_nodes, next) { + counter_u64_add(sni->sn->packets[dirndx], 1); - counter_u64_add(s->src_node->bytes[dirndx], - pd.tot_len); - } - if (s->nat_src_node != NULL) { - counter_u64_add(s->nat_src_node->packets[dirndx], - 1); - counter_u64_add(s->nat_src_node->bytes[dirndx], + counter_u64_add(sni->sn->bytes[dirndx], pd.tot_len); } dirndx = (dir == s->direction) ? 0 : 1; @@ -8098,22 +8247,18 @@ pf_counter_u64_add_protected(&a->bytes[dirndx], pd.tot_len); } if (s != NULL) { + struct pf_sn_item *sni; + if (s->nat_rule.ptr != NULL) { pf_counter_u64_add_protected(&s->nat_rule.ptr->packets[dirndx], 1); pf_counter_u64_add_protected(&s->nat_rule.ptr->bytes[dirndx], pd.tot_len); } - if (s->src_node != NULL) { - counter_u64_add(s->src_node->packets[dirndx], + SLIST_FOREACH(sni, &s->src_nodes, next) { + counter_u64_add(sni->sn->packets[dirndx], 1); - counter_u64_add(s->src_node->bytes[dirndx], - pd.tot_len); - } - if (s->nat_src_node != NULL) { - counter_u64_add(s->nat_src_node->packets[dirndx], - 1); - counter_u64_add(s->nat_src_node->bytes[dirndx], + counter_u64_add(sni->sn->bytes[dirndx], pd.tot_len); } dirndx = (dir == s->direction) ? 0 : 1; diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -222,7 +222,7 @@ static int pf_getstate(struct pfioc_nv *); static int pf_getstatus(struct pfioc_nv *); static int pf_clear_tables(void); -static void pf_clear_srcnodes(struct pf_ksrc_node *); +static void pf_clear_srcnodes(void); static void pf_kill_srcnodes(struct pfioc_src_node_kill *); static int pf_keepcounters(struct pfioc_nv *); static void pf_tbladdr_copyout(struct pf_addr_wrap *); @@ -1540,7 +1540,22 @@ out->states = in->states; out->conn = in->conn; out->af = in->af; - out->ruletype = in->ruletype; + + /* + * Src Nodes have their own enum. Convert its values for compatibility + * with the old pfctl. + */ + switch (in->type) { + case PF_SN_LIMIT: + out->ruletype = PF_PASS; + break; + case PF_SN_NAT: + out->ruletype = PF_NAT; + break; + case PF_SN_RDR: + out->ruletype = PF_RDR; + break; + } out->creation = secs - in->creation; if (out->expire > secs) @@ -5500,8 +5515,7 @@ struct pf_src_node *p, *pstore; uint32_t i, nr = 0; - for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; - i++, sh++) { + for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) nr++; @@ -5546,7 +5560,7 @@ } case DIOCCLRSRCNODES: { - pf_clear_srcnodes(NULL); + pf_clear_srcnodes(); pf_purge_expired_src_nodes(); break; } @@ -5670,6 +5684,7 @@ pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version) { bzero(sp, sizeof(union pfsync_state_union)); + struct pf_sn_item *sni; /* copy from state key */ sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; @@ -5724,10 +5739,12 @@ __func__, msg_version); } - if (st->src_node) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->nat_src_node) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + SLIST_FOREACH(sni, &st->src_nodes, next) { + if (sni->sn->type == PF_SN_LIMIT) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; + else + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + } sp->pfs_1301.id = st->id; sp->pfs_1301.creatorid = st->creatorid; @@ -5756,6 +5773,8 @@ void pf_state_export(struct pf_state_export *sp, struct pf_kstate *st) { + struct pf_sn_item *sni; + bzero(sp, sizeof(*sp)); sp->version = PF_STATE_VERSION; @@ -5787,13 +5806,17 @@ sp->direction = st->direction; sp->log = st->log; sp->timeout = st->timeout; + /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */ sp->state_flags_compat = st->state_flags; sp->state_flags = htons(st->state_flags); - if (st->src_node) - sp->sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->nat_src_node) - sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + SLIST_FOREACH(sni, &st->src_nodes, next) { + if (sni->sn->type == PF_SN_LIMIT) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + else + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + } sp->id = st->id; sp->creatorid = st->creatorid; @@ -6020,9 +6043,11 @@ } static void -pf_clear_srcnodes(struct pf_ksrc_node *n) +pf_clear_srcnodes(void) { + struct pf_ksrc_node *n; struct pf_kstate *s; + struct pf_srchash *sh; int i; for (i = 0; i <= pf_hashmask; i++) { @@ -6030,30 +6055,23 @@ PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { - if (n == NULL || n == s->src_node) - s->src_node = NULL; - if (n == NULL || n == s->nat_src_node) - s->nat_src_node = NULL; + /* + * No need to care about SN's counters, since it will + * be removed soon. Pretend that it's unlinked already. + */ + pf_src_tree_remove_state(s, 0); } PF_HASHROW_UNLOCK(ih); } - if (n == NULL) { - struct pf_srchash *sh; - - for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; - i++, sh++) { - PF_HASHROW_LOCK(sh); - LIST_FOREACH(n, &sh->nodes, entry) { - n->expire = 1; - n->states = 0; - } - PF_HASHROW_UNLOCK(sh); + for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; + i++, sh++) { + PF_HASHROW_LOCK(sh); + LIST_FOREACH(n, &sh->nodes, entry) { + n->expire = 1; + n->states = 0; } - } else { - /* XXX: hash slot should already be locked here. */ - n->expire = 1; - n->states = 0; + PF_HASHROW_UNLOCK(sh); } } @@ -6090,10 +6108,7 @@ PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { - if (s->src_node && s->src_node->expire == 1) - s->src_node = NULL; - if (s->nat_src_node && s->nat_src_node->expire == 1) - s->nat_src_node = NULL; + pf_src_tree_remove_state(s, 0); } PF_HASHROW_UNLOCK(ih); } @@ -6476,7 +6491,7 @@ pf_clear_all_states(); - pf_clear_srcnodes(NULL); + pf_clear_srcnodes(); /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -64,7 +64,7 @@ uint16_t, int, struct pf_kanchor_stackframe *); static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *, struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *, - uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **); + uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node_ptr *); #define mix(a,b,c) \ do { \ @@ -218,13 +218,13 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r, struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr, uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low, - uint16_t high, struct pf_ksrc_node **sn) + uint16_t high, struct pf_ksrc_node_ptr *snp) { struct pf_state_key_cmp key; struct pf_addr init_addr; bzero(&init_addr, sizeof(init_addr)); - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + if (pf_map_addr(af, r, saddr, naddr, &init_addr, snp)) return (1); bzero(&key, sizeof(key)); @@ -293,7 +293,7 @@ * pick a different source address since we're out * of free port choices for the current one. */ - if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + if (pf_map_addr(af, r, saddr, naddr, &init_addr, snp)) return (1); break; case PF_POOL_NONE: @@ -310,7 +310,7 @@ pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r, struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr, uint16_t dport, struct pf_addr *naddr, uint16_t *nport, - struct pf_ksrc_node **sn) + struct pf_ksrc_node_ptr *snp) { uint16_t psmask, low, highmask; uint16_t i, ahigh, cut; @@ -330,13 +330,13 @@ for (i = cut; i <= ahigh; i++) { low = (i << ashift) | psmask; if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport, - naddr, nport, low, low | highmask, sn)) + naddr, nport, low, low | highmask, snp)) return (0); } for (i = cut - 1; i > 0; i--) { low = (i << ashift) | psmask; if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport, - naddr, nport, low, low | highmask, sn)) + naddr, nport, low, low | highmask, snp)) return (0); } return (1); @@ -344,29 +344,34 @@ int pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_ksrc_node **sn) + struct pf_addr *naddr, struct pf_addr *init_addr, + struct pf_ksrc_node_ptr *snp) { + u_short sn_reason = 0; struct pf_kpool *rpool = &r->rpool; struct pf_addr *raddr = NULL, *rmask = NULL; - /* Try to find a src_node if none was given and this - is a sticky-address rule. */ - if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + /* + * Try to find an existing src_node or create a new one if none was + * given and this is a sticky-address rule. + */ + if (snp->sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(saddr, r, af, 0); + pf_find_src_node(saddr, r, af, snp); - /* If a src_node was found or explicitly given and it has a non-zero - route address, use this address. A zeroed address is found if the - src node was created just a moment ago in pf_create_state and it - needs to be filled in with routing decision calculated here. */ - if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { - /* If the supplied address is the same as the current one we've + if (snp->sn != NULL) { + /* + * If the supplied address is the same as the current one we've * been asked before, so tell the caller that there's no other - * address to be had. */ - if (PF_AEQ(naddr, &(*sn)->raddr, af)) - return (1); + * address to be had. + */ + if (PF_AEQ(naddr, &snp->sn->raddr, af)) { + sn_reason = 1; + goto done_sn_mtx; + } + + PF_ACPY(naddr, &(snp->sn)->raddr, af); - PF_ACPY(naddr, &(*sn)->raddr, af); if (V_pf_status.debug >= PF_DEBUG_NOISY) { printf("pf_map_addr: src tracking maps "); pf_print_host(saddr, 0, af); @@ -374,15 +379,17 @@ pf_print_host(naddr, 0, af); printf("\n"); } - return (0); + + sn_reason = 0; + goto done_sn_mtx; } mtx_lock(&rpool->mtx); /* Find the route using chosen algorithm. Store the found route in src_node if it was given or found. */ if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { - mtx_unlock(&rpool->mtx); - return (1); + sn_reason = PFRES_MAPFAILED; + goto done_pool_mtx; } if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { switch (af) { @@ -391,8 +398,8 @@ if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { - mtx_unlock(&rpool->mtx); - return (1); + sn_reason = PFRES_MAPFAILED; + goto done_pool_mtx; } raddr = &rpool->cur->addr.p.dyn->pfid_addr4; rmask = &rpool->cur->addr.p.dyn->pfid_mask4; @@ -403,8 +410,8 @@ if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { - mtx_unlock(&rpool->mtx); - return (1); + sn_reason = PFRES_MAPFAILED; + goto done_pool_mtx; } raddr = &rpool->cur->addr.p.dyn->pfid_addr6; rmask = &rpool->cur->addr.p.dyn->pfid_mask6; @@ -413,8 +420,8 @@ } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { - mtx_unlock(&rpool->mtx); - return (1); /* unsupported */ + sn_reason = PFRES_MAPFAILED; + goto done_pool_mtx; /* unsupported */ } } else { raddr = &rpool->cur->addr.v.a.addr; @@ -502,8 +509,8 @@ /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; - mtx_unlock(&rpool->mtx); - return (1); + sn_reason = PFRES_MAPFAILED; + goto done_pool_mtx; } } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { rpool->tblidx = -1; @@ -512,8 +519,8 @@ /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; - mtx_unlock(&rpool->mtx); - return (1); + sn_reason = PFRES_MAPFAILED; + goto done_pool_mtx; } } else { raddr = &rpool->cur->addr.v.a.addr; @@ -529,24 +536,37 @@ break; } } - if (*sn != NULL) - PF_ACPY(&(*sn)->raddr, naddr, af); - mtx_unlock(&rpool->mtx); + if (snp->sh) { + if ((sn_reason = pf_insert_src_node(snp, r, saddr, af)) != 0) + goto done_pool_mtx; - if (V_pf_status.debug >= PF_DEBUG_NOISY && - (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - printf("pf_map_addr: selected address "); - pf_print_host(naddr, 0, af); - printf("\n"); + PF_ACPY(&(snp->sn)->raddr, naddr, af); + + if (V_pf_status.debug >= PF_DEBUG_NOISY) { + printf("pf_map_addr: selected address "); + pf_print_host(naddr, 0, af); + printf("\n"); + } } - return (0); +done_pool_mtx: + mtx_unlock(&rpool->mtx); + +done_sn_mtx: + if (snp->sh) + PF_HASHROW_UNLOCK(snp->sh); + + if (sn_reason) { + counter_u64_add(V_pf_status.counters[sn_reason], 1); + } + + return (sn_reason); } struct pf_krule * pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, - struct pfi_kkif *kif, struct pf_ksrc_node **sn, + struct pfi_kkif *kif, struct pf_ksrc_node_ptr *snps, struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack) @@ -611,7 +631,8 @@ } if (r->rpool.mape.offset > 0) { if (pf_get_mape_sport(pd->af, pd->proto, r, saddr, - sport, daddr, dport, naddr, nport, sn)) { + sport, daddr, dport, naddr, nport, &snps[PF_SN_NAT] + )) { DPFPRINTF(PF_DEBUG_MISC, ("pf: MAP-E port allocation (%u/%u/%u)" " failed\n", @@ -621,7 +642,7 @@ goto notrans; } } else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, - daddr, dport, naddr, nport, low, high, sn)) { + daddr, dport, naddr, nport, low, high, &snps[PF_SN_NAT])) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation (%u-%u) failed\n", r->rpool.proxy_port[0], r->rpool.proxy_port[1])); @@ -695,7 +716,7 @@ } break; case PF_RDR: { - if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, &snps[PF_SN_RDR])) goto notrans; if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask, @@ -728,7 +749,8 @@ uma_zfree(V_pf_state_key_z, *nkp); uma_zfree(V_pf_state_key_z, *skp); *skp = *nkp = NULL; - *sn = NULL; + snps[PF_SN_NAT].sn = NULL; + snps[PF_SN_RDR].sn = NULL; return (NULL); } diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c --- a/sys/netpfil/pf/pf_nv.c +++ b/sys/netpfil/pf/pf_nv.c @@ -927,8 +927,9 @@ nvlist_t * pf_state_to_nvstate(const struct pf_kstate *s) { - nvlist_t *nvl, *tmp; - uint32_t expire, flags = 0; + nvlist_t *nvl, *tmp; + uint32_t expire, flags = 0; + struct pf_sn_item *sni; nvl = nvlist_create(0); if (nvl == NULL) @@ -992,10 +993,13 @@ nvlist_add_number(nvl, "creatorid", s->creatorid); nvlist_add_number(nvl, "direction", s->direction); nvlist_add_number(nvl, "state_flags", s->state_flags); - if (s->src_node) - flags |= PFSYNC_FLAG_SRCNODE; - if (s->nat_src_node) - flags |= PFSYNC_FLAG_NATSRCNODE; + + SLIST_FOREACH(sni, &s->src_nodes, next) { + if (sni->sn->type == PF_SN_LIMIT) + flags |= PFSYNC_FLAG_SRCNODE; + else + flags |= PFSYNC_FLAG_NATSRCNODE; + } nvlist_add_number(nvl, "sync_flags", flags); return (nvl);