diff --git a/lib/libpfctl/libpfctl.h b/lib/libpfctl/libpfctl.h --- a/lib/libpfctl/libpfctl.h +++ b/lib/libpfctl/libpfctl.h @@ -209,7 +209,8 @@ uint64_t states_cur; uint64_t states_tot; - uint64_t src_nodes; + uint64_t src_nodes_tot; + uint64_t src_nodes[PF_SN_MAX]; uint16_t return_icmp; uint16_t return_icmp6; diff --git a/lib/libpfctl/libpfctl.c b/lib/libpfctl/libpfctl.c --- a/lib/libpfctl/libpfctl.c +++ b/lib/libpfctl/libpfctl.c @@ -805,7 +805,7 @@ rule->states_cur = nvlist_get_number(nvl, "states_cur"); rule->states_tot = nvlist_get_number(nvl, "states_tot"); - rule->src_nodes = nvlist_get_number(nvl, "src_nodes"); + rule->src_nodes_tot = nvlist_get_number(nvl, "src_nodes"); } static void @@ -1656,10 +1656,13 @@ { .type = PF_RT_TIMESTAMP, .off = _OUT(r.last_active_timestamp), .cb = snl_attr_get_uint64 }, { .type = PF_RT_STATES_CUR, .off = _OUT(r.states_cur), .cb = snl_attr_get_uint64 }, { .type = PF_RT_STATES_TOTAL, .off = _OUT(r.states_tot), .cb = snl_attr_get_uint64 }, - { .type = PF_RT_SRC_NODES, .off = _OUT(r.src_nodes), .cb = snl_attr_get_uint64 }, + { .type = PF_RT_SRC_NODES, .off = _OUT(r.src_nodes_tot), .cb = snl_attr_get_uint64 }, { .type = PF_RT_ANCHOR_CALL, .off = _OUT(anchor_call), .arg = (void*)MAXPATHLEN, .cb = snl_attr_copy_string }, { .type = PF_RT_RCV_IFNAME, .off = _OUT(r.rcv_ifname), .arg = (void*)IFNAMSIZ, .cb = snl_attr_copy_string }, { .type = PF_RT_MAX_SRC_CONN, .off = _OUT(r.max_src_conn), .cb = snl_attr_get_uint32 }, + { .type = PF_RT_SRC_NODES_LIMIT, .off = _OUT(r.src_nodes[PF_SN_LIMIT]), .cb = snl_attr_get_uint64 }, + { .type = PF_RT_SRC_NODES_NAT, .off = _OUT(r.src_nodes[PF_SN_NAT]), .cb = snl_attr_get_uint64 }, + { .type = PF_RT_SRC_NODES_ROUTE, .off = _OUT(r.src_nodes[PF_SN_ROUTE]), .cb = snl_attr_get_uint64 }, }; static struct snl_field_parser fp_getrule[] = {}; #undef _OUT diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -1064,6 +1064,15 @@ rule->packets[1]), (unsigned long long)(rule->bytes[0] + rule->bytes[1]), (uintmax_t)rule->states_cur); + printf(" [ Source Nodes: %-6ju " + "Limit: %-6ju " + "NAT: %-6ju " + "Route-to: %-6ju " + "]\n", + (uintmax_t)rule->src_nodes_tot, + (uintmax_t)rule->src_nodes[PF_SN_LIMIT], + (uintmax_t)rule->src_nodes[PF_SN_NAT], + (uintmax_t)rule->src_nodes[PF_SN_ROUTE]); if (!(opts & PF_OPT_DEBUG)) printf(" [ Inserted: uid %u pid %u " "State Creations: %-6ju]\n", diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -622,6 +622,8 @@ #define PF_ALGNMNT(off) (((off) % 2) == 0) +enum pf_sn_types { PF_SN_LIMIT, PF_SN_NAT, PF_SN_ROUTE, PF_SN_MAX }; + #ifdef _KERNEL struct pf_kpooladdr { @@ -816,7 +818,7 @@ counter_u64_t states_cur; counter_u64_t states_tot; - counter_u64_t src_nodes; + counter_u64_t src_nodes[PF_SN_MAX]; u_int16_t return_icmp; u_int16_t return_icmp6; @@ -895,6 +897,7 @@ u_int32_t expire; sa_family_t af; u_int8_t ruletype; + enum pf_sn_types type; struct mtx *lock; }; #endif @@ -1093,8 +1096,8 @@ struct pfi_kkif *kif; struct pfi_kkif *orig_kif; /* The real kif, even if we're a floating state (i.e. if == V_pfi_all). */ struct pfi_kkif *rt_kif; - struct pf_ksrc_node *src_node; - struct pf_ksrc_node *nat_src_node; + struct pf_ksrc_node *sns[PF_SN_MAX];/* source nodes */ + struct pf_srchash *sh; /* source nodes hash row */ u_int64_t packets[2]; u_int64_t bytes[2]; u_int64_t creation; @@ -1108,9 +1111,10 @@ }; /* - * Size <= fits 11 objects per page on LP64. Try to not grow the struct beyond that. +* 6 cache lines per struct, 11 structs per page. +* Try to not grow the struct beyond that. */ -_Static_assert(sizeof(struct pf_kstate) <= 372, "pf_kstate size crosses 372 bytes"); +_Static_assert(sizeof(struct pf_kstate) <= 384, "pf_kstate size crosses 384 bytes"); #endif /* @@ -1579,6 +1583,13 @@ struct pf_sctp_multihome_job; TAILQ_HEAD(pf_sctp_multihome_jobs, pf_sctp_multihome_job); +/* Variables accessible only during ruleset evaluation */ +struct pf_test_ctx { + struct pf_ksrc_node *sns[PF_SN_MAX];/* source nodes */ + struct pf_srchash *sh; /* source nodes hash row */ +}; + +/* Variables accessible during packet forwarding */ struct pf_pdesc { struct { int done; @@ -2332,9 +2343,12 @@ *mapping); extern void pf_udp_mapping_release(struct pf_udp_mapping *mapping); -extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *, - struct pf_krule *, sa_family_t, - struct pf_srchash **, bool); +u_short pf_find_src_node(struct pf_test_ctx *, + struct pf_pdesc *, struct pf_addr *, + struct pf_krule *, enum pf_sn_types); +u_short pf_insert_src_node(struct pf_test_ctx *, + struct pf_pdesc *pd, struct pf_addr *, + struct pf_krule *, enum pf_sn_types); extern void pf_unlink_src_node(struct pf_ksrc_node *); extern u_int pf_free_src_nodes(struct pf_ksrc_node_list *); extern void pf_print_state(struct pf_kstate *); @@ -2613,19 +2627,19 @@ struct pf_keth_rule **, struct pf_keth_rule **, int *); -u_short pf_map_addr(u_int8_t, struct pf_krule *, +u_short pf_map_addr(sa_family_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, struct pfi_kkif **nkif, struct pf_addr *); -u_short pf_map_addr_sn(u_int8_t, struct pf_krule *, +u_short pf_map_addr_sn(struct pf_test_ctx *, + struct pf_pdesc *, struct pf_krule *, struct pf_addr *, struct pf_addr *, struct pfi_kkif **nkif, struct pf_addr *, - struct pf_ksrc_node **); -u_short pf_get_translation(struct pf_pdesc *, - int, struct pf_ksrc_node **, - struct pf_state_key **, struct pf_state_key **, - struct pf_addr *, struct pf_addr *, - uint16_t, uint16_t, struct pf_kanchor_stackframe *, - struct pf_krule **, + enum pf_sn_types); +u_short pf_get_translation(struct pf_test_ctx *, + struct pf_pdesc *, struct pf_state_key **, + struct pf_state_key **, struct pf_addr *, + struct pf_addr *, uint16_t, uint16_t, + struct pf_kanchor_stackframe *, struct pf_krule **, struct pf_udp_mapping **udp_mapping); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -322,12 +322,12 @@ static int pf_test_rule(struct pf_krule **, struct pf_kstate **, struct pf_pdesc *, struct pf_krule **, struct pf_kruleset **, struct inpcb *); -static int pf_create_state(struct pf_krule *, struct pf_krule *, +static int pf_create_state(struct pf_test_ctx *, + struct pf_krule *, struct pf_krule *, struct pf_krule *, struct pf_pdesc *, - struct pf_ksrc_node *, struct pf_state_key *, - struct pf_state_key *, - u_int16_t, u_int16_t, int *, - struct pf_kstate **, int, u_int16_t, u_int16_t, + struct pf_state_key *, struct pf_state_key *, + u_int16_t, u_int16_t, int *, struct pf_kstate **, + int, u_int16_t, u_int16_t, struct pf_krule_slist *, struct pf_udp_mapping *); static int pf_state_key_addr_setup(struct pf_pdesc *, struct pf_state_key_cmp *, int); @@ -364,14 +364,15 @@ bool, u_int8_t); static struct pf_kstate *pf_find_state(struct pfi_kkif *, const struct pf_state_key_cmp *, u_int); -static int pf_src_connlimit(struct pf_kstate **); +static bool pf_find_src_node_ptr(struct pf_srchash *, + struct pf_ksrc_node *); +static void pf_fail_src_node(struct pf_ksrc_node *); +static int pf_src_connlimit(struct pf_kstate *); static int pf_match_rcvif(struct mbuf *, struct pf_krule *); static void pf_counters_inc(int, struct pf_pdesc *, struct pf_kstate *, struct pf_krule *, struct pf_krule *); static void pf_overload_task(void *v, int pending); -static u_short pf_insert_src_node(struct pf_ksrc_node **, - struct pf_krule *, struct pf_addr *, sa_family_t); static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); @@ -803,59 +804,72 @@ } static int -pf_src_connlimit(struct pf_kstate **state) +pf_src_connlimit(struct pf_kstate *state) { struct pf_overload_entry *pfoe; + struct pf_ksrc_node *sn = state->sns[PF_SN_LIMIT]; int bad = 0; + int ret = 1; - PF_STATE_LOCK_ASSERT(*state); - /* - * XXXKS: The src node is accessed unlocked! - * PF_SRC_NODE_LOCK_ASSERT((*state)->src_node); - */ + PF_STATE_LOCK_ASSERT(state); + KASSERT(sn != NULL, + ("pf_src_connlimit: state->sns[PF_SN_LIMIT] == NULL")); - (*state)->src_node->conn++; - (*state)->src.tcp_est = 1; - pf_add_threshold(&(*state)->src_node->conn_rate); + if (!pf_find_src_node_ptr(state->sh, sn)) { + ret = 0; + goto done_unlocked; + } - if ((*state)->rule->max_src_conn && - (*state)->rule->max_src_conn < - (*state)->src_node->conn) { + sn->conn++; + state->src.tcp_est = 1; + pf_add_threshold(&(sn->conn_rate)); + + if (state->rule->max_src_conn && + state->rule->max_src_conn < sn->conn) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1); bad++; } - if ((*state)->rule->max_src_conn_rate.limit && - pf_check_threshold(&(*state)->src_node->conn_rate)) { + if (state->rule->max_src_conn_rate.limit && + pf_check_threshold(&(sn->conn_rate))) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1); bad++; } - if (!bad) - return (0); + if (!bad) { + ret = 0; + goto done_locked; + } /* Kill this state. */ - (*state)->timeout = PFTM_PURGE; - pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); + state->timeout = PFTM_PURGE; + pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED); - if ((*state)->rule->overload_tbl == NULL) - return (1); + if (state->rule->overload_tbl == NULL) { + ret = 1; + goto done_locked; + } /* Schedule overloading and flushing task. */ pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT); - if (pfoe == NULL) - return (1); /* too bad :( */ + if (pfoe == NULL) { + ret = 1; /* too bad :( */ + goto done_locked; + } - bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr)); - pfoe->af = (*state)->key[PF_SK_WIRE]->af; - pfoe->rule = (*state)->rule; - pfoe->dir = (*state)->direction; + bcopy(&sn->addr, &pfoe->addr, sizeof(pfoe->addr)); + pfoe->af = state->key[PF_SK_WIRE]->af; + pfoe->rule = state->rule; + pfoe->dir = state->direction; PF_OVERLOADQ_LOCK(); SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next); PF_OVERLOADQ_UNLOCK(); taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask); - return (1); +done_locked: + PF_HASHROW_UNLOCK(state->sh); +done_unlocked: + return ret; } static void @@ -951,33 +965,61 @@ CURVNET_RESTORE(); } -/* - * Can return locked on failure, so that we can consistently - * allocate and insert a new one. - */ -struct pf_ksrc_node * -pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af, - struct pf_srchash **sh, bool returnlocked) +u_short +pf_find_src_node(struct pf_test_ctx *ctx, struct pf_pdesc *pd, + struct pf_addr *src, struct pf_krule *rule, enum pf_sn_types sn_type) { - struct pf_ksrc_node *n; + struct pf_ksrc_node *cur; counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); - *sh = &V_pf_srchash[pf_hashsrc(src, af)]; - PF_HASHROW_LOCK(*sh); - LIST_FOREACH(n, &(*sh)->nodes, entry) - if (n->rule == rule && n->af == af && - ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || - (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) - break; + /* First search of src node sets ctx->sh */ + if (ctx->sh == NULL) { + ctx->sh = &V_pf_srchash[pf_hashsrc(src, pd->af)]; + } else { +#ifdef INVARIANTS + if (ctx->sh != &V_pf_srchash[pf_hashsrc(src, pd->af)]) + panic("%s: source node hash mismatch", __func__); +#endif + } - if (n != NULL) { - n->states++; - PF_HASHROW_UNLOCK(*sh); - } else if (returnlocked == false) - PF_HASHROW_UNLOCK(*sh); + PF_HASHROW_LOCK(ctx->sh); - return (n); + LIST_FOREACH(cur, &(ctx->sh->nodes), entry) { + if (cur->rule == rule && + cur->af == pd->af && + cur->type == sn_type && + ((pd->af == AF_INET && + cur->addr.v4.s_addr == pd->src->v4.s_addr) || + (pd->af == AF_INET6 && bcmp(&(cur->addr), pd->src, + sizeof(*(pd->src))) == 0)) && + cur->expire != 1 /* Ignore nodes being killed */ + ) { + cur->states++; + ctx->sns[sn_type] = cur; + return (1); + } + } + + return (0); +} + +static bool +pf_find_src_node_ptr(struct pf_srchash *sh, struct pf_ksrc_node *sn) +{ + struct pf_ksrc_node *cur; + + KASSERT(sh != NULL, ("%s: sh is NULL", __func__)); + + counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); + PF_HASHROW_LOCK(sh); + LIST_FOREACH(cur, &(sh->nodes), entry) { + if (cur == sn && + cur->expire != 1) /* Ignore nodes being killed */ + return true; + } + PF_HASHROW_UNLOCK(sh); + return false; } static void @@ -991,81 +1033,157 @@ uma_zfree(V_pf_sources_z, sn); } -static u_short -pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_krule *rule, - struct pf_addr *src, sa_family_t af) +u_short +pf_insert_src_node(struct pf_test_ctx *ctx, struct pf_pdesc *pd, + struct pf_addr *src, struct pf_krule *rule, enum pf_sn_types sn_type) { - u_short reason = 0; - struct pf_srchash *sh = NULL; + u_short reason = 0; - KASSERT((rule->rule_flag & PFRULE_SRCTRACK || - rule->rpool.opts & PF_POOL_STICKYADDR), + KASSERT((rule->nr == -1 || rule->rule_flag & PFRULE_SRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR), ("%s for non-tracking rule %p", __func__, rule)); - if (*sn == NULL) - *sn = pf_find_src_node(src, rule, af, &sh, true); + /* + * This function must be used after pf_find_src_node() + * which will set ctx->sh. + */ + PF_HASHROW_ASSERT(ctx->sh); - if (*sn == NULL) { - PF_HASHROW_ASSERT(sh); + KASSERT(ctx->sns[sn_type] == NULL, ("%s: ctx->sns[%d] not NULL", + __func__, sn_type)); - if (rule->max_src_nodes && - counter_u64_fetch(rule->src_nodes) >= rule->max_src_nodes) { - counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1); - PF_HASHROW_UNLOCK(sh); - reason = PFRES_SRCLIMIT; - goto done; - } + ctx->sns[sn_type] = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); + if (ctx->sns[sn_type] == NULL) { + reason = PFRES_MEMORY; + goto done; + } - (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); - if ((*sn) == NULL) { - PF_HASHROW_UNLOCK(sh); + for (int i = 0; i < 2; i++) { + ctx->sns[sn_type]->bytes[i] = counter_u64_alloc(M_NOWAIT); + ctx->sns[sn_type]->packets[i] = counter_u64_alloc(M_NOWAIT); + + if (ctx->sns[sn_type]->bytes[i] == NULL || + ctx->sns[sn_type]->packets[i] == NULL) { + pf_free_src_node(ctx->sns[sn_type]); reason = PFRES_MEMORY; + ctx->sns[sn_type] = NULL; goto done; } + } - for (int i = 0; i < 2; i++) { - (*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT); - (*sn)->packets[i] = counter_u64_alloc(M_NOWAIT); + pf_init_threshold(&(ctx->sns[sn_type])->conn_rate, + rule->max_src_conn_rate.limit, + rule->max_src_conn_rate.seconds); - if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) { - pf_free_src_node(*sn); - PF_HASHROW_UNLOCK(sh); - reason = PFRES_MEMORY; - goto done; - } - } + MPASS(ctx->sns[sn_type]->lock == NULL); + ctx->sns[sn_type]->lock = &(ctx->sh->lock); - pf_init_threshold(&(*sn)->conn_rate, - rule->max_src_conn_rate.limit, - rule->max_src_conn_rate.seconds); + ctx->sns[sn_type]->af = pd->af; + ctx->sns[sn_type]->rule = rule; + PF_ACPY(&(ctx->sns[sn_type]->addr), src, pd->af); + LIST_INSERT_HEAD(&(ctx->sh->nodes), ctx->sns[sn_type], entry); + ctx->sns[sn_type]->creation = time_uptime; + ctx->sns[sn_type]->ruletype = rule->action; + ctx->sns[sn_type]->states = 1; + ctx->sns[sn_type]->type = sn_type; - MPASS((*sn)->lock == NULL); - (*sn)->lock = &sh->lock; + counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); + counter_u64_add(rule->src_nodes[sn_type], 1); - (*sn)->af = af; - (*sn)->rule = rule; - PF_ACPY(&(*sn)->addr, src, af); - LIST_INSERT_HEAD(&sh->nodes, *sn, entry); - (*sn)->creation = time_uptime; - (*sn)->ruletype = rule->action; - (*sn)->states = 1; - if ((*sn)->rule != NULL) - counter_u64_add((*sn)->rule->src_nodes, 1); - PF_HASHROW_UNLOCK(sh); - counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); - } else { - if (rule->max_src_states && - (*sn)->states >= rule->max_src_states) { - counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], +done: + /* Returns locked */ + return (reason); +} + +static u_short +pf_check_source_limits(struct pf_test_ctx *ctx, struct pf_pdesc *pd, + struct pf_krule *rule) +{ + struct pf_krule *r_track = rule; + u_short reason = 0; + + /* + * Only src tracking uses this function! Load balancing code uses + * pf_map_addr_sn() which calls the low level functions directly. + */ + KASSERT(rule->rule_flag & PFRULE_SRCTRACK, + ("%s for non-tracking rule %p", __func__, rule)); + + /* + * Rules with global source tracking store the counters of connected + * sources and their states in the default rule. + */ + if (!(rule->rule_flag & PFRULE_RULESRCTRACK)) + r_track = &V_pf_default_rule; + + pf_find_src_node(ctx, pd, pd->src, r_track, PF_SN_LIMIT); + + PF_HASHROW_ASSERT(ctx->sh); /* set by pf_find_src_node() */ + + if (ctx->sns[PF_SN_LIMIT] == NULL) { + if (rule->max_src_nodes && + counter_u64_fetch(r_track->src_nodes[PF_SN_LIMIT]) >= rule->max_src_nodes) { + counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1); reason = PFRES_SRCLIMIT; goto done; } + if ((reason = pf_insert_src_node(ctx, pd, pd->src, r_track, PF_SN_LIMIT)) != 0) + goto done; + } else { + if (rule->max_src_states && + ctx->sns[PF_SN_LIMIT]->states > rule->max_src_states) { + counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], + 1); + reason = PFRES_MAXSTATES; + goto done; + } } + done: + if (reason != 0) + pf_fail_src_node(ctx->sns[PF_SN_LIMIT]); + + PF_HASHROW_UNLOCK(ctx->sh); return (reason); } +static void +pf_fail_src_node(struct pf_ksrc_node *sn) +{ + PF_SRC_NODE_LOCK_ASSERT(sn); + + /* + * While evaluating the ruleset a source node is created pretending + * that there is already a state created. This is done so that such + * SN will not be expired, as SNs with states are valid indefinitely. + * + * In case state creation is never reached, or it fails, and no other + * states have attached to this SN in the meantime, the SN can be + * removed. + */ + + sn->states--; + + if (sn->states == 0) { + if (sn->expire == 0) { + /* Src node created while parsing ruleset. */ + pf_unlink_src_node(sn); + pf_free_src_node(sn); + counter_u64_add( + V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); + } else { + /* + * Src node already existing. Last removed + * state sets the expiry of the src node. + */ + sn->expire = time_uptime + sn->rule->timeout[PFTM_SRC_NODE] ? + sn->rule->timeout[PFTM_SRC_NODE] : + V_pf_default_rule.timeout[PFTM_SRC_NODE]; + } + } +} + void pf_unlink_src_node(struct pf_ksrc_node *src) { @@ -1073,7 +1191,7 @@ LIST_REMOVE(src, entry); if (src->rule) - counter_u64_add(src->rule->src_nodes, -1); + counter_u64_add(src->rule->src_nodes[src->type], -1); } u_int @@ -2501,6 +2619,7 @@ for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) + /* Src nodes with attached states are valid indefinitely. */ if (cur->states == 0 && cur->expire <= time_uptime) { pf_unlink_src_node(cur); LIST_INSERT_HEAD(&freelist, cur, entry); @@ -2517,30 +2636,26 @@ static void pf_src_tree_remove_state(struct pf_kstate *s) { - struct pf_ksrc_node *sn; - uint32_t timeout; + enum pf_sn_types sn_type; - timeout = s->rule->timeout[PFTM_SRC_NODE] ? - s->rule->timeout[PFTM_SRC_NODE] : - V_pf_default_rule.timeout[PFTM_SRC_NODE]; + for (sn_type=0; sn_typesns[sn_type] == NULL || + !pf_find_src_node_ptr(s->sh, s->sns[sn_type]) + ) + continue; - if (s->src_node != NULL) { - sn = s->src_node; - PF_SRC_NODE_LOCK(sn); if (s->src.tcp_est) - --sn->conn; - if (--sn->states == 0) - sn->expire = time_uptime + timeout; - PF_SRC_NODE_UNLOCK(sn); + --(s->sns[sn_type]->conn); + /* Last removed state sets the expiry of the src node. */ + if (--(s->sns[sn_type]->states) == 0) + s->sns[sn_type]->expire = time_uptime + + s->rule->timeout[PFTM_SRC_NODE] ? + s->rule->timeout[PFTM_SRC_NODE] : + V_pf_default_rule.timeout[PFTM_SRC_NODE];; + PF_SRC_NODE_UNLOCK(s->sns[sn_type]); + s->sns[sn_type] = NULL; } - if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { - sn = s->nat_src_node; - PF_SRC_NODE_LOCK(sn); - if (--sn->states == 0) - sn->expire = time_uptime + timeout; - PF_SRC_NODE_UNLOCK(sn); - } - s->src_node = s->nat_src_node = NULL; + } /* @@ -4851,6 +4966,7 @@ struct pf_pdesc *pd, struct pf_krule **am, struct pf_kruleset **rsm, struct inpcb *inp) { + struct pf_test_ctx ctx; struct pf_krule *nr = NULL; struct pf_addr * const saddr = pd->src; struct pf_addr * const daddr = pd->dst; @@ -4858,10 +4974,9 @@ struct pf_kruleset *ruleset = NULL; struct pf_krule_slist match_rules; struct pf_krule_item *ri; - struct pf_ksrc_node *nsn = NULL; struct tcphdr *th = &pd->hdr.tcp; struct pf_state_key *sk = NULL, *nk = NULL; - u_short reason, transerror; + u_short reason, transerror, sn_reason = 0; int rewrite = 0; int tag = -1; int asd = 0; @@ -4875,6 +4990,7 @@ PF_RULES_RASSERT(); + memset(&ctx, 0, sizeof(ctx)); SLIST_INIT(&match_rules); if (inp != NULL) { @@ -4938,8 +5054,8 @@ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ - transerror = pf_get_translation(pd, pd->off, &nsn, &sk, - &nk, saddr, daddr, sport, dport, anchor_stack, &nr, &udp_mapping); + transerror = pf_get_translation(&ctx, pd, &sk, &nk, saddr, daddr, + sport, dport, anchor_stack, &nr, &udp_mapping); switch (transerror) { default: /* A translation error occurred. */ @@ -5268,7 +5384,28 @@ (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM)))) { int action; - action = pf_create_state(r, nr, a, pd, nsn, nk, sk, + + /* Check maximum states per rule */ + if (r->max_states && + (counter_u64_fetch(r->states_cur) >= r->max_states)) { + counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); + REASON_SET(&reason, PFRES_MAXSTATES); + goto cleanup; + } + + /* + * If this rule has source tracking, find or create a source + * node of PF_SN_LIMIT type. This source node will be used for + * providing connection limit per source. + */ + if (r->rule_flag & PFRULE_SRCTRACK) { + if ((sn_reason = pf_check_source_limits(&ctx, pd, r)) != 0) { + REASON_SET(&reason, sn_reason); + goto cleanup; + } + } + + action = pf_create_state(&ctx, r, nr, a, pd, nk, sk, sport, dport, &rewrite, sm, tag, bproto_sum, bip_sum, &match_rules, udp_mapping); if (action != PF_PASS) { @@ -5322,41 +5459,20 @@ } static int -pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a, - struct pf_pdesc *pd, struct pf_ksrc_node *nsn, struct pf_state_key *nk, - struct pf_state_key *sk, u_int16_t sport, +pf_create_state(struct pf_test_ctx *ctx, struct pf_krule *r, + struct pf_krule *nr, struct pf_krule *a, struct pf_pdesc *pd, + struct pf_state_key *nk, struct pf_state_key *sk, u_int16_t sport, u_int16_t dport, int *rewrite, struct pf_kstate **sm, int tag, u_int16_t bproto_sum, u_int16_t bip_sum, struct pf_krule_slist *match_rules, struct pf_udp_mapping *udp_mapping) { struct pf_kstate *s = NULL; - struct pf_ksrc_node *sn = NULL; struct tcphdr *th = &pd->hdr.tcp; u_int16_t mss = V_tcp_mssdflt; - u_short reason, sn_reason; + u_short reason; struct pf_krule_item *ri; + enum pf_sn_types sn_type; - /* check maximums */ - if (r->max_states && - (counter_u64_fetch(r->states_cur) >= r->max_states)) { - counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); - REASON_SET(&reason, PFRES_MAXSTATES); - goto csfailed; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - (sn_reason = pf_insert_src_node(&sn, r, pd->src, pd->af)) != 0) { - REASON_SET(&reason, sn_reason); - goto csfailed; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - (sn_reason = pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], - pd->af)) != 0 ) { - REASON_SET(&reason, sn_reason); - goto csfailed; - } s = pf_alloc_state(M_NOWAIT); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); @@ -5445,21 +5561,45 @@ if (r->rt) { /* pf_map_addr increases the reason counters */ - if ((reason = pf_map_addr_sn(pd->af, r, pd->src, &s->rt_addr, - &s->rt_kif, NULL, &sn)) != 0) + if ((reason = pf_map_addr_sn(ctx, pd, r, pd->src, &s->rt_addr, + &s->rt_kif, NULL, PF_SN_ROUTE)) != 0) goto csfailed; s->rt = r->rt; } s->creation = s->expire = pf_get_uptime(); - if (sn != NULL) - s->src_node = sn; - if (nsn != NULL) { - /* XXX We only modify one side for now. */ - PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); - s->nat_src_node = nsn; + /* + * Source nodes might have been inserted or found by: + * - NAT rules: pf_test() -> pf_get_translation() -> pf_map_addr_sn() + * - route-to rules: pf_map_addr_sn() just above + * - source limit tracking: pf_check_source_limits() before pf_create_state() + * + * Each of those operations sets ctx->sns[sn_type], so that we can use + * them now. However, those operations unlock the hash row ctx->sh. + * We must check if maybe source nodes have been removed in the meantime. + */ + s->sh = ctx->sh; + for (sn_type=0; sn_typesns[sn_type] == NULL || + !pf_find_src_node_ptr(ctx->sh, ctx->sns[sn_type]) + ) + continue; + + if (sn_type == PF_SN_NAT) { + /* XXX We only modify one side for now. */ + PF_ACPY(&(ctx->sns[sn_type]->raddr), + &nk->addr[1], pd->af); + } + + s->sns[sn_type] = ctx->sns[sn_type]; + PF_HASHROW_UNLOCK(ctx->sh); } + if (pd->proto == IPPROTO_TCP) { if (s->state_flags & PFSTATE_SCRUB_TCP && pf_normalize_tcp_init(pd, th, &s->src, &s->dst)) { @@ -5502,7 +5642,12 @@ (pd->dir == PF_IN) ? sk : nk, (pd->dir == PF_IN) ? nk : sk, s)) { REASON_SET(&reason, PFRES_STATEINS); - goto drop; + /* + * pf_state_insert() -> pf_state_key_attach() + * pf_state_insert() -> pf_detach_state() -> pf_state_key_detach() + * clean state keys on failure + */ + goto csfailed_no_state_keys; } else *sm = s; @@ -5548,39 +5693,29 @@ return (PF_PASS); csfailed: + uma_zfree(V_pf_state_key_z, sk); + uma_zfree(V_pf_state_key_z, nk); + +csfailed_no_state_keys: while ((ri = SLIST_FIRST(match_rules))) { SLIST_REMOVE_HEAD(match_rules, entry); free(ri, M_PF_RULE_ITEM); } - uma_zfree(V_pf_state_key_z, sk); - uma_zfree(V_pf_state_key_z, nk); + /* + * Remove source nodes if they have been created for this failed state. + */ + for (sn_type=0; sn_typesns[sn_type] == NULL || + !pf_find_src_node_ptr(ctx->sh, ctx->sns[sn_type])) + continue; - if (sn != NULL) { - PF_SRC_NODE_LOCK(sn); - if (--sn->states == 0 && sn->expire == 0) { - pf_unlink_src_node(sn); - uma_zfree(V_pf_sources_z, sn); - counter_u64_add( - V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); - } - PF_SRC_NODE_UNLOCK(sn); + pf_fail_src_node(ctx->sns[sn_type]); + + PF_HASHROW_UNLOCK(ctx->sh); } - if (nsn != sn && nsn != NULL) { - PF_SRC_NODE_LOCK(nsn); - if (--nsn->states == 0 && nsn->expire == 0) { - pf_unlink_src_node(nsn); - uma_zfree(V_pf_sources_z, nsn); - counter_u64_add( - V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); - } - PF_SRC_NODE_UNLOCK(nsn); - } - -drop: if (s != NULL) { - pf_src_tree_remove_state(s); s->timeout = PFTM_UNLINKED; STATE_DEC_COUNTERS(s); pf_free_state(s); @@ -5788,8 +5923,8 @@ pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); if (src->state == TCPS_ESTABLISHED && - (*state)->src_node != NULL && - pf_src_connlimit(state)) { + (*state)->sns[PF_SN_LIMIT] != NULL && + pf_src_connlimit(*state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } @@ -5959,8 +6094,8 @@ if (dst->state == TCPS_SYN_SENT) { pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); if (src->state == TCPS_ESTABLISHED && - (*state)->src_node != NULL && - pf_src_connlimit(state)) { + (*state)->sns[PF_SN_LIMIT] && + pf_src_connlimit(*state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } @@ -5977,8 +6112,8 @@ pf_set_protostate(*state, PF_PEER_BOTH, TCPS_ESTABLISHED); dst->state = src->state = TCPS_ESTABLISHED; - if ((*state)->src_node != NULL && - pf_src_connlimit(state)) { + if ((*state)->sns[PF_SN_LIMIT] != NULL && + pf_src_connlimit(*state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } @@ -6044,8 +6179,8 @@ (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); - } else if ((*state)->src_node != NULL && - pf_src_connlimit(state)) { + } else if ((*state)->sns[PF_SN_LIMIT] != NULL && + pf_src_connlimit(*state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } else @@ -7719,7 +7854,7 @@ ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } - pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, + pf_map_addr(pd->af, r, (struct pf_addr *)&ip->ip_src, &naddr, &nkif, NULL); if (!PF_AZERO(&naddr, AF_INET)) dst.sin_addr.s_addr = naddr.v4.s_addr; @@ -7967,7 +8102,7 @@ ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } - pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, + pf_map_addr(pd->af, r, (struct pf_addr *)&ip6->ip6_src, &naddr, &nkif, NULL); if (!PF_AZERO(&naddr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, @@ -8878,17 +9013,21 @@ pf_counter_u64_add_protected(&s->nat_rule->bytes[dirndx], pd->tot_len); } - if (s->src_node != NULL) { - counter_u64_add(s->src_node->packets[dirndx], - 1); - counter_u64_add(s->src_node->bytes[dirndx], - pd->tot_len); - } - if (s->nat_src_node != NULL) { - counter_u64_add(s->nat_src_node->packets[dirndx], - 1); - counter_u64_add(s->nat_src_node->bytes[dirndx], - pd->tot_len); + /* + * Source nodes are accessed unlocked here. + * But since we are operating with stateful tracking + * and the state is locked, those SNs could not have + * been freed. + */ + for (enum pf_sn_types sn_type=0; sn_typesns[sn_type] != NULL) { + counter_u64_add( + s->sns[sn_type]->packets[dirndx], + 1); + counter_u64_add( + s->sns[sn_type]->bytes[dirndx], + pd->tot_len); + } } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -233,7 +233,6 @@ static int pf_getstate(struct pfioc_nv *); static int pf_getstatus(struct pfioc_nv *); static int pf_clear_tables(void); -static void pf_clear_srcnodes(void); static void pf_kill_srcnodes(struct pfioc_src_node_kill *); static int pf_keepcounters(struct pfioc_nv *); static void pf_tbladdr_copyout(struct pf_addr_wrap *); @@ -323,7 +322,8 @@ static void pfattach_vnet(void) { - u_int32_t *my_timeout = V_pf_default_rule.timeout; + u_int32_t *my_timeout = V_pf_default_rule.timeout; + enum pf_sn_types sn_type; bzero(&V_pf_status, sizeof(V_pf_status)); @@ -354,7 +354,8 @@ } V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK); V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK); - V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK); + for (sn_type=0; sn_typestates_cur); counter_u64_free(rule->states_tot); - counter_u64_free(rule->src_nodes); + for (sn_type=0; sn_typesrc_nodes[sn_type]); uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp); mtx_destroy(&rule->rpool.mtx); @@ -2074,6 +2077,7 @@ struct pfi_kkif *kif = NULL, *rcv_kif = NULL; int rs_num; int error = 0; + enum pf_sn_types sn_type; if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) { error = EINVAL; @@ -2093,7 +2097,8 @@ } rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); - rule->src_nodes = counter_u64_alloc(M_WAITOK); + for (sn_type=0; sn_typesrc_nodes[sn_type] = counter_u64_alloc(M_WAITOK); rule->cuid = uid; rule->cpid = pid; TAILQ_INIT(&rule->rpool.list); @@ -3593,6 +3598,7 @@ struct pf_kpooladdr *pa; u_int32_t nr = 0; int rs_num; + enum pf_sn_types sn_type; pcr->anchor[sizeof(pcr->anchor) - 1] = 0; @@ -3623,7 +3629,8 @@ } newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); - newrule->src_nodes = counter_u64_alloc(M_WAITOK); + for (sn_type=0; sn_typesrc_nodes[sn_type] = counter_u64_alloc(M_WAITOK); newrule->cuid = td->td_ucred->cr_ruid; newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->rpool.list); @@ -5451,8 +5458,7 @@ } case DIOCCLRSRCNODES: { - pf_clear_srcnodes(); - pf_purge_expired_src_nodes(); + pf_kill_srcnodes(NULL); break; } @@ -5572,6 +5578,7 @@ void pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version) { + enum pf_sn_types sn_type; bzero(sp, sizeof(union pfsync_state_union)); /* copy from state key */ @@ -5627,10 +5634,14 @@ __func__, msg_version); } - if (st->src_node) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->nat_src_node) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + for (sn_type=0; sn_typesns[sn_type] == NULL) + continue; + if (sn_type == PF_SN_LIMIT) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; + else + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + } sp->pfs_1301.id = st->id; sp->pfs_1301.creatorid = st->creatorid; @@ -5659,6 +5670,7 @@ void pf_state_export(struct pf_state_export *sp, struct pf_kstate *st) { + enum pf_sn_types sn_type; bzero(sp, sizeof(*sp)); sp->version = PF_STATE_VERSION; @@ -5693,10 +5705,14 @@ /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */ sp->state_flags_compat = st->state_flags; sp->state_flags = htons(st->state_flags); - if (st->src_node) - sp->sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->nat_src_node) - sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + for (sn_type=0; sn_typesns[sn_type] == NULL) + continue; + if (sn_type == PF_SN_LIMIT) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + else + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + } sp->id = st->id; sp->creatorid = st->creatorid; @@ -5927,40 +5943,12 @@ return (error); } -static void -pf_clear_srcnodes(void) -{ - struct pf_kstate *s; - struct pf_srchash *sh; - struct pf_ksrc_node *sn; - int i; - - for (i = 0; i <= V_pf_hashmask; i++) { - struct pf_idhash *ih = &V_pf_idhash[i]; - - PF_HASHROW_LOCK(ih); - LIST_FOREACH(s, &ih->states, entry) { - s->src_node = NULL; - s->nat_src_node = NULL; - } - PF_HASHROW_UNLOCK(ih); - } - - for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; - i++, sh++) { - PF_HASHROW_LOCK(sh); - LIST_FOREACH(sn, &sh->nodes, entry) { - sn->expire = 1; - sn->states = 0; - } - PF_HASHROW_UNLOCK(sh); - } -} - static void pf_kill_srcnodes(struct pfioc_src_node_kill *psnk) { struct pf_ksrc_node_list kill; + enum pf_sn_types sn_type; + u_int killed; LIST_INIT(&kill); for (int i = 0; i <= V_pf_srchashmask; i++) { @@ -5969,14 +5957,15 @@ PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) - if (PF_MATCHA(psnk->psnk_src.neg, + if (psnk == NULL || + (PF_MATCHA(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && PF_MATCHA(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, - &sn->raddr, sn->af)) { + &sn->raddr, sn->af))) { pf_unlink_src_node(sn); LIST_INSERT_HEAD(&kill, sn, entry); sn->expire = 1; @@ -5990,15 +5979,24 @@ PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { - if (s->src_node && s->src_node->expire == 1) - s->src_node = NULL; - if (s->nat_src_node && s->nat_src_node->expire == 1) - s->nat_src_node = NULL; + for(sn_type=0; sn_typeexpire, the SN + * has been unlinked while it was locked. + */ + if (s->sns[sn_type] && + s->sns[sn_type]->expire == 1) { + s->sns[sn_type] = NULL; + } + } } PF_HASHROW_UNLOCK(ih); } - psnk->psnk_killed = pf_free_src_nodes(&kill); + killed = pf_free_src_nodes(&kill); + + if (psnk != NULL) + psnk->psnk_killed = killed; } static int @@ -6422,7 +6420,7 @@ pf_clear_all_states(); - pf_clear_srcnodes(); + pf_kill_srcnodes(NULL); /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ @@ -6751,7 +6749,8 @@ static void pf_unload_vnet(void) { - int ret __diagused; + int ret __diagused; + enum pf_sn_types sn_type; V_pf_vnet_active = 0; V_pf_status.running = 0; @@ -6817,7 +6816,8 @@ } counter_u64_free(V_pf_default_rule.states_cur); counter_u64_free(V_pf_default_rule.states_tot); - counter_u64_free(V_pf_default_rule.src_nodes); + for (sn_type=0; sn_typerpool.opts & PF_POOL_ENDPI)) { + if (pd->proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) { struct pf_udp_endpoint_cmp udp_source; bzero(&udp_source, sizeof(udp_source)); - udp_source.af = af; - PF_ACPY(&udp_source.addr, saddr, af); + udp_source.af = pd->af; + PF_ACPY(&udp_source.addr, saddr, pd->af); udp_source.port = sport; *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { - PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af); + PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); *nport = (*udp_mapping)->endpoints[1].port; - /* Try to find a src_node as per pf_map_addr(). */ - if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && - (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(saddr, r, af, &sh, 0); + pf_map_addr_sn(ctx, pd, r, saddr, naddr, NULL, &init_addr, sn_type); return (0); } else { - *udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0); + *udp_mapping = pf_udp_mapping_create(pd->af, saddr, + sport, &init_addr, 0); if (*udp_mapping == NULL) return (1); } } - if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn)) + if (pf_map_addr_sn(ctx, pd, r, saddr, naddr, NULL, &init_addr, sn_type)) goto failed; - if (proto == IPPROTO_ICMP) { + if (pd->proto == IPPROTO_ICMP) { if (*nport == htons(ICMP_ECHO)) { low = 1; high = 65535; @@ -275,7 +273,7 @@ return (0); /* Don't try to modify non-echo ICMP */ } #ifdef INET6 - if (proto == IPPROTO_ICMPV6) { + if (pd->proto == IPPROTO_ICMPV6) { if (*nport == htons(ICMP6_ECHO_REQUEST)) { low = 1; high = 65535; @@ -285,21 +283,21 @@ #endif /* INET6 */ bzero(&key, sizeof(key)); - key.af = af; - key.proto = proto; + key.af = pd->af; + key.proto = pd->proto; key.port[0] = dport; PF_ACPY(&key.addr[0], daddr, key.af); do { PF_ACPY(&key.addr[1], naddr, key.af); if (*udp_mapping) - PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af); + PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); /* * port search; start random, step; * similar 2 portloop in in_pcbbind */ - if (proto == IPPROTO_SCTP) { + if (pd->proto == IPPROTO_SCTP) { key.port[1] = sport; if (!pf_find_state_all_exists(&key, PF_IN)) { *nport = sport; @@ -307,8 +305,8 @@ } else { return (1); /* Fail mapping. */ } - } else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || - proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { + } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || + pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { /* * XXX bug: icmp states don't use the id on both sides. * (traceroute -I through nat) @@ -361,7 +359,7 @@ } tmp = cut; for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { - if (proto == IPPROTO_UDP && + if (pd->proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) { (*udp_mapping)->endpoints[1].port = htons(tmp); if (pf_udp_mapping_insert(*udp_mapping) == 0) { @@ -385,7 +383,8 @@ * pick a different source address since we're out * of free port choices for the current one. */ - if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn)) + if (pf_map_addr_sn(ctx, pd, r, saddr, naddr, NULL, + &init_addr, sn_type)) return (1); break; case PF_POOL_NONE: @@ -394,7 +393,7 @@ default: return (1); } - } while (! PF_AEQ(&init_addr, naddr, af) ); + } while (! PF_AEQ(&init_addr, naddr, pd->af) ); failed: uma_zfree(V_pf_udp_mapping_z, *udp_mapping); @@ -411,10 +410,11 @@ } static int -pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r, - struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr, - uint16_t dport, struct pf_addr *naddr, uint16_t *nport, - struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping) +pf_get_mape_sport(struct pf_test_ctx *ctx, struct pf_pdesc *pd, + struct pf_krule *r, struct pf_addr *saddr, uint16_t sport, + struct pf_addr *daddr, uint16_t dport, struct pf_addr *naddr, + uint16_t *nport, enum pf_sn_types sn_type, + struct pf_udp_mapping **udp_mapping) { uint16_t psmask, low, highmask; uint16_t i, ahigh, cut; @@ -433,14 +433,14 @@ for (i = cut; i <= ahigh; i++) { low = (i << ashift) | psmask; - if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport, - naddr, nport, low, low | highmask, sn, udp_mapping)) + if (!pf_get_sport(ctx, pd, r, saddr, sport, daddr, dport, + naddr, nport, low, low | highmask, sn_type, udp_mapping)) return (0); } for (i = cut - 1; i > 0; i--) { low = (i << ashift) | psmask; - if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport, - naddr, nport, low, low | highmask, sn, udp_mapping)) + if (!pf_get_sport(ctx, pd, r, saddr, sport, daddr, dport, + naddr, nport, low, low | highmask, sn_type, udp_mapping)) return (0); } return (1); @@ -621,45 +621,49 @@ } u_short -pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, - struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr, - struct pf_ksrc_node **sn) +pf_map_addr_sn(struct pf_test_ctx *ctx, struct pf_pdesc *pd, + struct pf_krule *r, struct pf_addr *saddr, struct pf_addr *naddr, + struct pfi_kkif **nkif, struct pf_addr *init_addr, + enum pf_sn_types sn_type) { u_short reason = 0; - struct pf_kpool *rpool = &r->rpool; - struct pf_srchash *sh = NULL; - /* Try to find a src_node if none was given and this - is a sticky-address rule. */ - if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + /* + * Try to find an existing src_node if none was given and this is a + * sticky-address rule. Lock the src node hash row. + */ + if (r->rpool.opts & PF_POOL_STICKYADDR && (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) - *sn = pf_find_src_node(saddr, r, af, &sh, false); + pf_find_src_node(ctx, pd, saddr, r, sn_type); - /* If a src_node was found or explicitly given and it has a non-zero - route address, use this address. A zeroed address is found if the - src node was created just a moment ago in pf_create_state and it - needs to be filled in with routing decision calculated here. */ - if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { - /* If the supplied address is the same as the current one we've + /* + * If source node has been found it can be used. + */ + if (ctx->sns[sn_type] != NULL) { + /* + * If the supplied address is the same as the current one we've * been asked before, so tell the caller that there's no other - * address to be had. */ - if (PF_AEQ(naddr, &(*sn)->raddr, af)) { + * address to be had. + */ + if (PF_AEQ(naddr, &(ctx->sns[sn_type]->raddr), pd->af)) { reason = PFRES_MAPFAILED; goto done; } - PF_ACPY(naddr, &(*sn)->raddr, af); + PF_ACPY(naddr, &(ctx->sns[sn_type]->raddr), pd->af); if (nkif) - *nkif = (*sn)->rkif; + *nkif = ctx->sns[sn_type]->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { printf("pf_map_addr: src tracking maps "); - pf_print_host(saddr, 0, af); + pf_print_host(saddr, 0, pd->af); printf(" to "); - pf_print_host(naddr, 0, af); + pf_print_host(naddr, 0, pd->af); if (nkif) printf("@%s", (*nkif)->pfik_name); printf("\n"); } + + reason = 0; goto done; } @@ -667,41 +671,52 @@ * Source node has not been found. Find a new address and store it * in variables given by the caller. */ - if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr) != 0) { + if (pf_map_addr(pd->af, r, saddr, naddr, nkif, init_addr) != 0) { /* pf_map_addr() sets reason counters on its own */ goto done; } - if (*sn != NULL) { - PF_ACPY(&(*sn)->raddr, naddr, af); - if (nkif) - (*sn)->rkif = *nkif; - } - - if (V_pf_status.debug >= PF_DEBUG_NOISY && - (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + if (V_pf_status.debug >= PF_DEBUG_NOISY) { printf("pf_map_addr: selected address "); - pf_print_host(naddr, 0, af); + pf_print_host(naddr, 0, pd->af); if (nkif) printf("@%s", (*nkif)->pfik_name); printf("\n"); } + /* + * Now we can allocate and insert a new src node. The new address + * is copied from the variables given by the caller, which have been + * set by pf_map_addr(). + */ + if (r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + if ((reason = pf_insert_src_node(ctx, pd, saddr, r, sn_type)) != 0) + goto done; + + PF_ACPY(&(ctx->sns[sn_type]->raddr), naddr, pd->af); + if (nkif) + ctx->sns[sn_type]->rkif = *nkif; + } + done: if (reason) { counter_u64_add(V_pf_status.counters[reason], 1); } + if (r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) + PF_HASHROW_UNLOCK(ctx->sh); + return (reason); } u_short -pf_get_translation(struct pf_pdesc *pd, int off, - struct pf_ksrc_node **sn, struct pf_state_key **skp, - struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, - uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack, - struct pf_krule **rp, - struct pf_udp_mapping **udp_mapping) +pf_get_translation(struct pf_test_ctx *ctx, struct pf_pdesc *pd, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, uint16_t sport, + uint16_t dport, struct pf_kanchor_stackframe *anchor_stack, + struct pf_krule **rp, struct pf_udp_mapping **udp_mapping) { struct pf_krule *r = NULL; struct pf_addr *naddr; @@ -764,8 +779,8 @@ high = r->rpool.proxy_port[1]; } if (r->rpool.mape.offset > 0) { - if (pf_get_mape_sport(pd->af, pd->proto, r, saddr, - sport, daddr, dport, naddr, nportp, sn, udp_mapping)) { + if (pf_get_mape_sport(ctx, pd, r, saddr, sport, daddr, + dport, naddr, nportp, PF_SN_NAT, udp_mapping)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: MAP-E port allocation (%u/%u/%u)" " failed\n", @@ -775,8 +790,8 @@ reason = PFRES_MAPFAILED; goto notrans; } - } else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, - daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) { + } else if (pf_get_sport(ctx, pd, r, saddr, sport, daddr, dport, + naddr, nportp, low, high, PF_SN_NAT, udp_mapping)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation (%u-%u) failed\n", r->rpool.proxy_port[0], r->rpool.proxy_port[1])); @@ -863,7 +878,8 @@ int tries; uint16_t cut, low, high, nport; - reason = pf_map_addr_sn(pd->af, r, saddr, naddr, NULL, NULL, sn); + reason = pf_map_addr_sn(ctx, pd, r, saddr, naddr, NULL, NULL, + PF_SN_NAT); if (reason != 0) goto notrans; if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) @@ -970,7 +986,6 @@ uma_zfree(V_pf_state_key_z, *nkp); uma_zfree(V_pf_state_key_z, *skp); *skp = *nkp = NULL; - *sn = NULL; return (reason); } diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -262,6 +262,9 @@ PF_RT_ANCHOR_CALL = 72, /* string */ PF_RT_RCV_IFNAME = 73, /* string */ PF_RT_MAX_SRC_CONN = 74, /* u32 */ + PF_RT_SRC_NODES_LIMIT = 75, /* u64 */ + PF_RT_SRC_NODES_NAT = 76, /* u64 */ + PF_RT_SRC_NODES_ROUTE = 77, /* u64 */ }; enum pf_addrule_type_t { diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -146,7 +146,8 @@ struct nl_writer *nw = npt->nw; int error = 0; int af; - struct pf_state_key *key; + struct pf_state_key *key; + enum pf_sn_types sn_type; PF_STATE_LOCK_ASSERT(s); @@ -184,10 +185,14 @@ nlattr_add_u8(nw, PF_ST_TIMEOUT, s->timeout); nlattr_add_u16(nw, PF_ST_STATE_FLAGS, s->state_flags); uint8_t sync_flags = 0; - if (s->src_node) - sync_flags |= PFSYNC_FLAG_SRCNODE; - if (s->nat_src_node) - sync_flags |= PFSYNC_FLAG_NATSRCNODE; + for (sn_type=0; sn_typesns[sn_type] == NULL) + continue; + if (sn_type == PF_SN_LIMIT) + sync_flags |= PFSYNC_FLAG_SRCNODE; + else + sync_flags |= PFSYNC_FLAG_NATSRCNODE; + } nlattr_add_u8(nw, PF_ST_SYNC_FLAGS, sync_flags); nlattr_add_u64(nw, PF_ST_ID, s->id); nlattr_add_u32(nw, PF_ST_CREATORID, htonl(s->creatorid)); @@ -860,8 +865,10 @@ struct genlmsghdr *ghdr_new; struct pf_kruleset *ruleset; struct pf_krule *rule; + u_int64_t src_nodes_total = 0; int rs_num; int error; + enum pf_sn_types sn_type; error = nl_parse_nlmsg(hdr, &getrule_parser, npt, &attrs); if (error != 0) @@ -987,7 +994,14 @@ nlattr_add_u64(nw, PF_RT_TIMESTAMP, pf_get_timestamp(rule)); nlattr_add_u64(nw, PF_RT_STATES_CUR, counter_u64_fetch(rule->states_cur)); nlattr_add_u64(nw, PF_RT_STATES_TOTAL, counter_u64_fetch(rule->states_tot)); - nlattr_add_u64(nw, PF_RT_SRC_NODES, counter_u64_fetch(rule->src_nodes)); + + for (sn_type=0; sn_typesrc_nodes[sn_type]); + nlattr_add_u64(nw, PF_RT_SRC_NODES, src_nodes_total); + + nlattr_add_u64(nw, PF_RT_SRC_NODES_LIMIT, counter_u64_fetch(rule->src_nodes[PF_SN_LIMIT])); + nlattr_add_u64(nw, PF_RT_SRC_NODES_NAT, counter_u64_fetch(rule->src_nodes[PF_SN_NAT])); + nlattr_add_u64(nw, PF_RT_SRC_NODES_ROUTE, counter_u64_fetch(rule->src_nodes[PF_SN_ROUTE])); error = pf_kanchor_copyout(ruleset, rule, anchor_call, sizeof(anchor_call)); MPASS(error == 0); @@ -1768,8 +1782,11 @@ nlattr_add_u32(nw, PF_SN_CONNECTIONS, n->conn); nlattr_add_u8(nw, PF_SN_AF, n->af); nlattr_add_u8(nw, PF_SN_RULE_TYPE, n->ruletype); - nlattr_add_u64(nw, PF_SN_CREATION, n->creation); - nlattr_add_u64(nw, PF_SN_EXPIRE, n->expire); + nlattr_add_u64(nw, PF_SN_CREATION, time_uptime - n->creation); + if (n->expire > time_uptime) + nlattr_add_u64(nw, PF_SN_EXPIRE, n->expire - time_uptime); + else + nlattr_add_u64(nw, PF_SN_EXPIRE, 0); nlattr_add_pf_threshold(nw, PF_SN_CONNECTION_RATE, &n->conn_rate); if (!nlmsg_end(nw)) { diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c --- a/sys/netpfil/pf/pf_nv.c +++ b/sys/netpfil/pf/pf_nv.c @@ -683,7 +683,9 @@ nvlist_t * pf_krule_to_nvrule(struct pf_krule *rule) { - nvlist_t *nvl, *tmp; + nvlist_t *nvl, *tmp; + u_int64_t src_nodes_total = 0; + enum pf_sn_types sn_type; nvl = nvlist_create(0); if (nvl == NULL) @@ -759,8 +761,10 @@ counter_u64_fetch(rule->states_cur)); nvlist_add_number(nvl, "states_tot", counter_u64_fetch(rule->states_tot)); - nvlist_add_number(nvl, "src_nodes", - counter_u64_fetch(rule->src_nodes)); + + for (sn_type=0; sn_typesrc_nodes[sn_type]); + nvlist_add_number(nvl, "src_nodes", src_nodes_total); nvlist_add_number(nvl, "return_icmp", rule->return_icmp); nvlist_add_number(nvl, "return_icmp6", rule->return_icmp6); @@ -928,8 +932,9 @@ nvlist_t * pf_state_to_nvstate(const struct pf_kstate *s) { - nvlist_t *nvl, *tmp; - uint32_t expire, flags = 0; + nvlist_t *nvl, *tmp; + uint32_t expire, flags = 0; + enum pf_sn_types sn_type; nvl = nvlist_create(0); if (nvl == NULL) @@ -993,10 +998,14 @@ nvlist_add_number(nvl, "creatorid", s->creatorid); nvlist_add_number(nvl, "direction", s->direction); nvlist_add_number(nvl, "state_flags", s->state_flags); - if (s->src_node) - flags |= PFSYNC_FLAG_SRCNODE; - if (s->nat_src_node) - flags |= PFSYNC_FLAG_NATSRCNODE; + for (sn_type=0; sn_typesns[sn_type] == NULL) + continue; + if (sn_type == PF_SN_LIMIT) + flags |= PFSYNC_FLAG_SRCNODE; + else + flags |= PFSYNC_FLAG_NATSRCNODE; + } nvlist_add_number(nvl, "sync_flags", flags); return (nvl); diff --git a/tests/sys/netpfil/pf/src_track.sh b/tests/sys/netpfil/pf/src_track.sh --- a/tests/sys/netpfil/pf/src_track.sh +++ b/tests/sys/netpfil/pf/src_track.sh @@ -98,16 +98,16 @@ ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4205 --fromaddr 2001:db8:44::2 states=$(mktemp) || exit 1 - jexec router pfctl -qss | grep 'tcp 2001:db8:43::2\[9\] <-' > $states + jexec router pfctl -qss | normalize_pfctl_s | grep 'tcp 2001:db8:43::2\[9\] <-' > $states - grep -qE '2001:db8:44::1\[4201\]\s+ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4201 not found or not established" - grep -qE '2001:db8:44::1\[4202\]\s+ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4202 not found or not established" - grep -qE '2001:db8:44::1\[4203\]\s+ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4203 not found or not established" - grep -qE '2001:db8:44::2\[4205\]\s+ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4205 not found or not established" + grep -qE '2001:db8:44::1\[4201\] ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4201 not found or not established" + grep -qE '2001:db8:44::1\[4202\] ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4202 not found or not established" + grep -qE '2001:db8:44::1\[4203\] ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4203 not found or not established" + grep -qE '2001:db8:44::2\[4205\] ESTABLISHED:ESTABLISHED' $states || atf_fail "State for port 4205 not found or not established" if ( - grep -qE '2001:db8:44::1\[4204\]\s+' $states && - ! grep -qE '2001:db8:44::1\[4204\]\s+CLOSED:CLOSED' $states + grep -qE '2001:db8:44::1\[4204\] ' $states && + ! grep -qE '2001:db8:44::1\[4204\] CLOSED:CLOSED' $states ); then atf_fail "State for port 4204 found but not closed" fi @@ -150,34 +150,37 @@ # 2 connections from host ::1 matching rule_A will be allowed, 1 will fail to create a state. ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4211 --fromaddr 2001:db8:44::1 ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4212 --fromaddr 2001:db8:44::1 - ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4213 --fromaddr 2001:db8:44::1 + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4213 --fromaddr 2001:db8:44::1 + ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4214 --fromaddr 2001:db8:44::1 # 2 connections from host ::1 matching rule_B will be allowed, 1 will fail to create a state. # Limits from rule_A don't interfere with rule_B. ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4221 --fromaddr 2001:db8:44::1 ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4222 --fromaddr 2001:db8:44::1 - ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4223 --fromaddr 2001:db8:44::1 + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4223 --fromaddr 2001:db8:44::1 + ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4224 --fromaddr 2001:db8:44::1 # 2 connections from host ::2 matching rule_B will be allowed, 1 will fail to create a state. # Limits for host ::1 will not interfere with host ::2. - ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4224 --fromaddr 2001:db8:44::2 ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4225 --fromaddr 2001:db8:44::2 - ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4226 --fromaddr 2001:db8:44::2 + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4226 --fromaddr 2001:db8:44::2 + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4227 --fromaddr 2001:db8:44::2 + ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4228 --fromaddr 2001:db8:44::2 # We will check the resulting source nodes, though. # Order of source nodes in output is not guaranteed, find each one separately. nodes=$(mktemp) || exit 1 - jexec router pfctl -qvsS > $nodes + jexec router pfctl -qvsS | normalize_pfctl_s > $nodes for node_regexp in \ - '2001:db8:44::1 -> :: \( states 2, connections 2, rate [0-9/\.]+s \)\s+age [0-9:]+, 6 pkts, [0-9]+ bytes, filter rule 3' \ - '2001:db8:44::1 -> :: \( states 2, connections 2, rate [0-9/\.]+s \)\s+age [0-9:]+, 6 pkts, [0-9]+ bytes, filter rule 4' \ - '2001:db8:44::2 -> :: \( states 2, connections 2, rate [0-9/\.]+s \)\s+age [0-9:]+, 6 pkts, [0-9]+ bytes, filter rule 4' \ + '2001:db8:44::1 -> :: \( states 3, connections 3, rate [0-9/\.]+s \) age [0-9:]+, 9 pkts, [0-9]+ bytes, filter rule 3$' \ + '2001:db8:44::1 -> :: \( states 3, connections 3, rate [0-9/\.]+s \) age [0-9:]+, 9 pkts, [0-9]+ bytes, filter rule 4$' \ + '2001:db8:44::2 -> :: \( states 3, connections 3, rate [0-9/\.]+s \) age [0-9:]+, 9 pkts, [0-9]+ bytes, filter rule 4$' \ ; do - cat $nodes | tr '\n' ' ' | grep -qE "$node_regexp" || atf_fail "Source nodes not matching expected output" + grep -qE "$node_regexp" $nodes || atf_fail "Source nodes not matching expected output" done # Check if limit counters have been properly set. - jexec router pfctl -qvvsi | grep -qE 'max-src-states\s+3\s+' || atf_fail "max-src-states not set to 3" + jexec router pfctl -qvvsi | grep -qE 'max-src-states\s+3 ' || atf_fail "max-src-states not set to 3" } max_src_states_rule_cleanup() @@ -185,9 +188,136 @@ pft_cleanup } +max_src_states_global_head() +{ + atf_set descr 'Max states per source global' + atf_set require.user root +} + +max_src_states_global_body() +{ + setup_router_server_ipv6 + + # Clients will connect from another network behind the router. + # This allows for using multiple source addresses and for tester jail + # to not respond with RST packets for SYN+ACKs. + jexec router route add -6 2001:db8:44::0/64 2001:db8:42::2 + jexec server route add -6 2001:db8:44::0/64 2001:db8:43::1 + + pft_set_rules router \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ + "pass in on ${epair_tester}b inet6 proto tcp from port 4210:4219 keep state (max-src-states 3 source-track global) label rule_A" \ + "pass in on ${epair_tester}b inet6 proto tcp from port 4220:4229 keep state (max-src-states 3 source-track global) label rule_B" \ + "pass out on ${epair_server}a keep state" + + # Global source tracking creates a single source node shared between all + # rules for each connecting source IP address and counts states created + # by all rules. Each rule has its own max-src-conn value checked against + # that single source node. + + # 3 connections from host ::1 matching rule_A will be allowed. + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4211 --fromaddr 2001:db8:44::1 + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4212 --fromaddr 2001:db8:44::1 + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4213 --fromaddr 2001:db8:44::1 + # The 4th connection matching rule_A from host ::1 will have its state killed. + ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4214 --fromaddr 2001:db8:44::1 + # A connection matching rule_B from host ::1 will have its state killed too. + ping_server_check_reply exit:1 --ping-type=tcp3way --send-sport=4221 --fromaddr 2001:db8:44::1 + + nodes=$(mktemp) || exit 1 + jexec router pfctl -qvsS | normalize_pfctl_s > $nodes + node_regexp='2001:db8:44::1 -> :: \( states 3, connections 3, rate [0-9/\.]+s \) age [0-9:]+, 9 pkts, [0-9]+ bytes$' + grep -qE "$node_regexp" $nodes || atf_fail "Source nodes not matching expected output" +} + +max_src_states_global_cleanup() +{ + pft_cleanup +} + + +route_to_head() +{ + atf_set descr 'Max states per source per rule with route-to' + atf_set require.user root +} + +route_to_body() +{ + setup_router_dummy_ipv6 + + # Clients will connect from another network behind the router. + # This allows for using multiple source addresses. + jexec router route add -6 2001:db8:44::0/64 2001:db8:42::2 + + # Additional gateways for route-to. + gw1=${net_server_host_server%::*}::2:1 + gw2=${net_server_host_server%::*}::2:2 + jexec router ndp -s ${gw1} 00:01:02:03:04:05 + jexec router ndp -s ${gw2} 00:01:02:03:04:05 + + # This test will check for proper source node creation for "sticky-address" + # which creates a PF_SN_NAT source node and "max-src-states" which creates + # PF_SN_LIMIT source node. The rules below cover all combinations. + pft_set_rules router \ + "table { ${gw1} ${gw2} }" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ + "pass in on ${epair_tester}b route-to ( ${epair_server}a ) inet6 proto tcp from port 4211 keep state label rule_3" \ + "pass in on ${epair_tester}b route-to ( ${epair_server}a ) sticky-address inet6 proto tcp from port 4212 keep state label rule_4" \ + "pass in on ${epair_tester}b route-to ( ${epair_server}a ) inet6 proto tcp from port 4213 keep state (max-src-states 3 source-track rule) label rule_5" \ + "pass in on ${epair_tester}b route-to ( ${epair_server}a ) sticky-address inet6 proto tcp from port 4214 keep state (max-src-states 3 source-track rule) label rule_6" \ + "pass out on ${epair_server}a keep state" + + # We don't check if state limits are properly enforced, this is tested + # by other tests in this file. + ping_dummy_check_request exit:0 --ping-type=tcpsyn --send-sport=4211 --fromaddr 2001:db8:44::1 --to 2001:db8:45::1 + ping_dummy_check_request exit:0 --ping-type=tcpsyn --send-sport=4212 --fromaddr 2001:db8:44::2 --to 2001:db8:45::1 + ping_dummy_check_request exit:0 --ping-type=tcpsyn --send-sport=4213 --fromaddr 2001:db8:44::3 --to 2001:db8:45::1 + ping_dummy_check_request exit:0 --ping-type=tcpsyn --send-sport=4214 --fromaddr 2001:db8:44::4 --to 2001:db8:45::1 + + # Order of source nodes in output is not guaranteed, find each one separately. + jexec router ifconfig epair0b + jexec router ifconfig epair1a + + states=$(mktemp) || exit 1 + jexec router pfctl -qvss | normalize_pfctl_s > $states + + for state_regexp in \ + 'all tcp 2001:db8:45::1\[9\] <- 2001:db8:44::1\[4211\] CLOSED:SYN_SENT \[0 \+ 1\] \[0 \+ 2\] age [0-9:]+, expires in [0-9:]+, 1:0 pkts, 76:0 bytes, rule 3$' \ + 'all tcp 2001:db8:45::1\[9\] <- 2001:db8:44::2\[4212\] CLOSED:SYN_SENT \[0 \+ 1\] \[0 \+ 2\] age [0-9:]+, expires in [0-9:]+, 1:0 pkts, 76:0 bytes, rule 4, sticky-address$' \ + 'all tcp 2001:db8:45::1\[9\] <- 2001:db8:44::3\[4213\] CLOSED:SYN_SENT \[0 \+ 1\] \[0 \+ 2\] age [0-9:]+, expires in [0-9:]+, 1:0 pkts, 76:0 bytes, rule 5, source-track$' \ + 'all tcp 2001:db8:45::1\[9\] <- 2001:db8:44::4\[4214\] CLOSED:SYN_SENT \[0 \+ 1\] \[0 \+ 2\] age [0-9:]+, expires in [0-9:]+, 1:0 pkts, 76:0 bytes, rule 6, source-track, sticky-address$' \ + ; do + grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" + done + + nodes=$(mktemp) || exit 1 + jexec router pfctl -qvsS | normalize_pfctl_s > $nodes + + for node_regexp in \ + '2001:db8:44::2 -> 2001:db8:43::2:1 \( states 1, connections 0, rate 0.0/0s \) age [0-9:]+, 1 pkts, 76 bytes, filter rule 4' \ + '2001:db8:44::3 -> :: \( states 1, connections 0, rate 0.0/0s \) age [0-9:]+, 1 pkts, 76 bytes, filter rule 5' \ + '2001:db8:44::4 -> 2001:db8:43::2:1 \( states 1, connections 0, rate 0.0/0s ) age [0-9:]+, 1 pkts, 76 bytes, filter rule 6' \ + '2001:db8:44::4 -> :: \( states 1, connections 0, rate 0.0/0s \) age [0-9:]+, 1 pkts, 76 bytes, filter rule 6' \ + ; do + grep -qE "${node_regexp}" $nodes || atf_fail "Source node not found for '${node_regexp}'" + done + + ! grep -q 'filter rule 3$' $nodes || atf_fail "Source node found for rule 3" +} + +route_to_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "source_track" atf_add_test_case "max_src_conn_rule" atf_add_test_case "max_src_states_rule" + atf_add_test_case "max_src_states_global" + atf_add_test_case "route_to" } diff --git a/tests/sys/netpfil/pf/utils.subr b/tests/sys/netpfil/pf/utils.subr --- a/tests/sys/netpfil/pf/utils.subr +++ b/tests/sys/netpfil/pf/utils.subr @@ -301,3 +301,13 @@ --replyif ${epair_tester}a \ $params } + +normalize_pfctl_s() +{ + # `pfctl -s[rsS]` output is divided into sections. Each rule, state or + # source node starts with the beginning of a line and next lines with leading + # spaces are various parameters of said rule, state or source node. + # Convert it into a single line per entry, and remove multiple spaces, + # so that regular expressions for matching them in tests can be simpler. + awk '{ if ($0 ~ /^[^ ]/ && NR > 1) print(""); gsub(/ +/, " ", $0); printf("%s", $0); } END {print("");}' +}