Page MenuHomeFreeBSD

D47770.id147001.diff
No OneTemporary

D47770.id147001.diff

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -2624,8 +2624,7 @@
struct pfi_kkif **nkif, struct pf_addr *,
struct pf_ksrc_node **);
u_short pf_get_translation(struct pf_pdesc *,
- int, struct pf_ksrc_node **,
- struct pf_state_key **, struct pf_state_key **,
+ int, struct pf_state_key **, struct pf_state_key **,
struct pf_addr *, struct pf_addr *,
uint16_t, uint16_t, struct pf_kanchor_stackframe *,
struct pf_krule **,
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -332,8 +332,7 @@
struct pf_kruleset **, struct inpcb *);
static int pf_create_state(struct pf_krule *, struct pf_krule *,
struct pf_krule *, struct pf_pdesc *,
- struct pf_ksrc_node *, struct pf_state_key *,
- struct pf_state_key *,
+ struct pf_state_key *, struct pf_state_key *,
u_int16_t, u_int16_t, int *,
struct pf_kstate **, int, u_int16_t, u_int16_t,
struct pf_krule_slist *, struct pf_udp_mapping *);
@@ -379,7 +378,10 @@
struct pf_krule *);
static void pf_overload_task(void *v, int pending);
static u_short pf_insert_src_node(struct pf_ksrc_node **,
- struct pf_krule *, struct pf_addr *, sa_family_t);
+ struct pf_srchash **, struct pf_krule *,
+ struct pf_addr *, sa_family_t);
+static bool pf_find_src_node_ptr(struct pf_ksrc_node **,
+ struct pf_srchash *);
static u_int pf_purge_expired_states(u_int, int);
static void pf_purge_unlinked_rules(void);
static int pf_mtag_uminit(void *, int, int);
@@ -819,10 +821,7 @@
int bad = 0;
PF_STATE_LOCK_ASSERT(state);
- /*
- * XXXKS: The src node is accessed unlocked!
- * PF_SRC_NODE_LOCK_ASSERT(state->src_node);
- */
+ PF_SRC_NODE_LOCK(state->src_node);
state->src_node->conn++;
state->src.tcp_est = 1;
@@ -841,20 +840,25 @@
bad++;
}
- if (!bad)
- return (0);
+ if (bad == 0) {
+ goto done;
+ }
/* Kill this state. */
state->timeout = PFTM_PURGE;
pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED);
- if (state->rule->overload_tbl == NULL)
- return (1);
+ if (state->rule->overload_tbl == NULL) {
+ bad = 1;
+ goto done;
+ }
/* Schedule overloading and flushing task. */
pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
- if (pfoe == NULL)
- return (1); /* too bad :( */
+ if (pfoe == NULL) {
+ bad = 1; /* too bad :( */
+ goto done;
+ }
bcopy(&state->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
pfoe->af = state->key[PF_SK_WIRE]->af;
@@ -864,8 +868,11 @@
SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
PF_OVERLOADQ_UNLOCK();
taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
+ bad = 1;
- return (1);
+done:
+ PF_SRC_NODE_UNLOCK(state->src_node);
+ return bad;
}
static void
@@ -962,8 +969,7 @@
}
/*
- * Can return locked on failure, so that we can consistently
- * allocate and insert a new one.
+ * On node found always returns locked. On not found its configurable.
*/
struct pf_ksrc_node *
pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af,
@@ -981,15 +987,34 @@
(af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
break;
- if (n != NULL) {
- n->states++;
- PF_HASHROW_UNLOCK(*sh);
- } else if (returnlocked == false)
+ if (n == NULL && returnlocked == false)
PF_HASHROW_UNLOCK(*sh);
return (n);
}
+static bool
+pf_find_src_node_ptr(struct pf_ksrc_node **sn, struct pf_srchash *sh)
+{
+ struct pf_ksrc_node *cur;
+
+ if ((*sn) == NULL)
+ return (false);
+
+ KASSERT(sh != NULL, ("%s: sh is NULL", __func__));
+
+ counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
+ PF_HASHROW_LOCK(sh);
+ LIST_FOREACH(cur, &(sh->nodes), entry) {
+ if (cur == (*sn) &&
+ cur->expire != 1) /* Ignore nodes being killed */
+ return (true);
+ }
+ PF_HASHROW_UNLOCK(sh);
+ (*sn) = NULL;
+ return (false);
+}
+
static void
pf_free_src_node(struct pf_ksrc_node *sn)
{
@@ -1002,33 +1027,33 @@
}
static u_short
-pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_krule *rule,
- struct pf_addr *src, sa_family_t af)
+pf_insert_src_node(struct pf_ksrc_node **sn, struct pf_srchash **sh,
+ struct pf_krule *rule, struct pf_addr *src, sa_family_t af)
{
u_short reason = 0;
- struct pf_srchash *sh = NULL;
KASSERT((rule->rule_flag & PFRULE_SRCTRACK ||
rule->rpool.opts & PF_POOL_STICKYADDR),
("%s for non-tracking rule %p", __func__, rule));
+ /*
+ * Request the sh to always be locked, as we might insert a new sn.
+ */
if (*sn == NULL)
- *sn = pf_find_src_node(src, rule, af, &sh, true);
+ *sn = pf_find_src_node(src, rule, af, sh, true);
if (*sn == NULL) {
- PF_HASHROW_ASSERT(sh);
+ PF_HASHROW_ASSERT(*sh);
if (rule->max_src_nodes &&
counter_u64_fetch(rule->src_nodes) >= rule->max_src_nodes) {
counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1);
- PF_HASHROW_UNLOCK(sh);
reason = PFRES_SRCLIMIT;
goto done;
}
(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
if ((*sn) == NULL) {
- PF_HASHROW_UNLOCK(sh);
reason = PFRES_MEMORY;
goto done;
}
@@ -1039,7 +1064,6 @@
if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) {
pf_free_src_node(*sn);
- PF_HASHROW_UNLOCK(sh);
reason = PFRES_MEMORY;
goto done;
}
@@ -1050,18 +1074,16 @@
rule->max_src_conn_rate.seconds);
MPASS((*sn)->lock == NULL);
- (*sn)->lock = &sh->lock;
+ (*sn)->lock = &(*sh)->lock;
(*sn)->af = af;
(*sn)->rule = rule;
PF_ACPY(&(*sn)->addr, src, af);
- LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
+ LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry);
(*sn)->creation = time_uptime;
(*sn)->ruletype = rule->action;
- (*sn)->states = 1;
if ((*sn)->rule != NULL)
counter_u64_add((*sn)->rule->src_nodes, 1);
- PF_HASHROW_UNLOCK(sh);
counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
} else {
if (rule->max_src_states &&
@@ -1073,6 +1095,12 @@
}
}
done:
+ if (reason == 0)
+ (*sn)->states++;
+ else
+ (*sn) = NULL;
+
+ PF_HASHROW_UNLOCK(*sh);
return (reason);
}
@@ -4880,7 +4908,6 @@
struct pf_kruleset *ruleset = NULL;
struct pf_krule_slist match_rules;
struct pf_krule_item *ri;
- struct pf_ksrc_node *nsn = NULL;
struct tcphdr *th = &pd->hdr.tcp;
struct pf_state_key *sk = NULL, *nk = NULL;
u_short reason, transerror;
@@ -4960,8 +4987,8 @@
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
/* check packet for BINAT/NAT/RDR */
- transerror = pf_get_translation(pd, pd->off, &nsn, &sk,
- &nk, saddr, daddr, sport, dport, anchor_stack, &nr, &udp_mapping);
+ transerror = pf_get_translation(pd, pd->off, &sk, &nk, saddr, daddr,
+ sport, dport, anchor_stack, &nr, &udp_mapping);
switch (transerror) {
default:
/* A translation error occurred. */
@@ -5290,7 +5317,7 @@
(!state_icmp && (r->keep_state || nr != NULL ||
(pd->flags & PFDESC_TCP_NORM)))) {
int action;
- action = pf_create_state(r, nr, a, pd, nsn, nk, sk,
+ action = pf_create_state(r, nr, a, pd, nk, sk,
sport, dport, &rewrite, sm, tag, bproto_sum, bip_sum,
&match_rules, udp_mapping);
if (action != PF_PASS) {
@@ -5345,14 +5372,16 @@
static int
pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
- struct pf_pdesc *pd, struct pf_ksrc_node *nsn, struct pf_state_key *nk,
- struct pf_state_key *sk, u_int16_t sport,
- u_int16_t dport, int *rewrite, struct pf_kstate **sm,
+ struct pf_pdesc *pd, struct pf_state_key *nk, struct pf_state_key *sk,
+ u_int16_t sport, u_int16_t dport, int *rewrite, struct pf_kstate **sm,
int tag, u_int16_t bproto_sum, u_int16_t bip_sum,
struct pf_krule_slist *match_rules, struct pf_udp_mapping *udp_mapping)
{
struct pf_kstate *s = NULL;
struct pf_ksrc_node *sn = NULL;
+ struct pf_srchash *snh = NULL;
+ struct pf_ksrc_node *nsn = NULL;
+ struct pf_srchash *nsnh = NULL;
struct tcphdr *th = &pd->hdr.tcp;
u_int16_t mss = V_tcp_mssdflt;
u_short reason, sn_reason;
@@ -5368,13 +5397,13 @@
/* src node for filter rule */
if ((r->rule_flag & PFRULE_SRCTRACK ||
r->rpool.opts & PF_POOL_STICKYADDR) &&
- (sn_reason = pf_insert_src_node(&sn, r, pd->src, pd->af)) != 0) {
+ (sn_reason = pf_insert_src_node(&sn, &snh, r, pd->src, pd->af)) != 0) {
REASON_SET(&reason, sn_reason);
goto csfailed;
}
/* src node for translation rule */
if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
- (sn_reason = pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx],
+ (sn_reason = pf_insert_src_node(&nsn, &nsnh, nr, &sk->addr[pd->sidx],
pd->af)) != 0 ) {
REASON_SET(&reason, sn_reason);
goto csfailed;
@@ -5466,6 +5495,7 @@
}
if (r->rt) {
+ pf_find_src_node_ptr(&sn, snh);
/* pf_map_addr increases the reason counters */
if ((reason = pf_map_addr_sn(pd->af, r, pd->src, &s->rt_addr,
&s->rt_kif, NULL, &sn)) != 0)
@@ -5475,13 +5505,6 @@
s->creation = s->expire = pf_get_uptime();
- if (sn != NULL)
- s->src_node = sn;
- if (nsn != NULL) {
- /* XXX We only modify one side for now. */
- PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
- s->nat_src_node = nsn;
- }
if (pd->proto == IPPROTO_TCP) {
if (s->state_flags & PFSTATE_SCRUB_TCP &&
pf_normalize_tcp_init(pd, th, &s->src, &s->dst)) {
@@ -5528,6 +5551,20 @@
} else
*sm = s;
+ /*
+ * Lock order is important: first state, then source node.
+ */
+ if (pf_find_src_node_ptr(&sn, snh)) {
+ s->src_node = sn;
+ PF_HASHROW_UNLOCK(snh);
+ }
+ if (pf_find_src_node_ptr(&nsn, nsnh)) {
+ /* XXX We only modify one side for now. */
+ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
+ s->nat_src_node = nsn;
+ PF_HASHROW_UNLOCK(nsnh);
+ }
+
if (tag > 0)
s->tag = tag;
if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
@@ -5578,26 +5615,24 @@
uma_zfree(V_pf_state_key_z, sk);
uma_zfree(V_pf_state_key_z, nk);
- if (sn != NULL) {
- PF_SRC_NODE_LOCK(sn);
+ if (pf_find_src_node_ptr(&sn, snh)) {
if (--sn->states == 0 && sn->expire == 0) {
pf_unlink_src_node(sn);
- uma_zfree(V_pf_sources_z, sn);
+ pf_free_src_node(sn);
counter_u64_add(
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
}
- PF_SRC_NODE_UNLOCK(sn);
+ PF_HASHROW_UNLOCK(snh);
}
- if (nsn != sn && nsn != NULL) {
- PF_SRC_NODE_LOCK(nsn);
+ if (sn != nsn && pf_find_src_node_ptr(&nsn, nsnh)) {
if (--nsn->states == 0 && nsn->expire == 0) {
pf_unlink_src_node(nsn);
- uma_zfree(V_pf_sources_z, nsn);
+ pf_free_src_node(nsn);
counter_u64_add(
V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
}
- PF_SRC_NODE_UNLOCK(nsn);
+ PF_HASHROW_UNLOCK(nsnh);
}
drop:
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -255,7 +255,9 @@
/* Try to find a src_node as per pf_map_addr(). */
if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
(r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
- *sn = pf_find_src_node(saddr, r, af, &sh, 0);
+ *sn = pf_find_src_node(saddr, r, af, &sh, false);
+ if (*sn != NULL)
+ mtx_unlock((*sn)->lock);
return (0);
} else {
*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
@@ -385,6 +387,7 @@
* pick a different source address since we're out
* of free port choices for the current one.
*/
+ (*sn) = NULL;
if (pf_map_addr_sn(af, r, saddr, naddr, NULL, &init_addr, sn))
return (1);
break;
@@ -629,8 +632,11 @@
struct pf_kpool *rpool = &r->rpool;
struct pf_srchash *sh = NULL;
- /* Try to find a src_node if none was given and this
- is a sticky-address rule. */
+ /*
+ * Try to find a src_node if none was given and this is
+ * a sticky-address rule. Request the sh to be unlocked if
+ * sn was not found, as here we never insert a new sn.
+ */
if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
(r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
*sn = pf_find_src_node(saddr, r, af, &sh, false);
@@ -640,6 +646,11 @@
src node was created just a moment ago in pf_create_state and it
needs to be filled in with routing decision calculated here. */
if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
+#ifdef INVARIANTS
+ /* No access to pf_hashsrc() in pf_lb.c, check the lock directly. */
+ if (*sn)
+ mtx_assert((*sn)->lock, MA_OWNED);
+#endif
/* If the supplied address is the same as the current one we've
* been asked before, so tell the caller that there's no other
* address to be had. */
@@ -673,6 +684,11 @@
}
if (*sn != NULL) {
+#ifdef INVARIANTS
+ /* No access to pf_hashsrc() in pf_lb.c, check the lock directly. */
+ if (*sn)
+ mtx_assert((*sn)->lock, MA_OWNED);
+#endif
PF_ACPY(&(*sn)->raddr, naddr, af);
if (nkif)
(*sn)->rkif = *nkif;
@@ -688,6 +704,9 @@
}
done:
+ if ((*sn) != NULL)
+ mtx_unlock((*sn)->lock);
+
if (reason) {
counter_u64_add(V_pf_status.counters[reason], 1);
}
@@ -697,14 +716,14 @@
u_short
pf_get_translation(struct pf_pdesc *pd, int off,
- struct pf_ksrc_node **sn, struct pf_state_key **skp,
- struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr,
- uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
- struct pf_krule **rp,
+ struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr,
+ struct pf_addr *daddr, uint16_t sport, uint16_t dport,
+ struct pf_kanchor_stackframe *anchor_stack, struct pf_krule **rp,
struct pf_udp_mapping **udp_mapping)
{
struct pf_krule *r = NULL;
struct pf_addr *naddr;
+ struct pf_ksrc_node *sn = NULL;
uint16_t *nportp;
uint16_t low, high;
u_short reason;
@@ -765,7 +784,7 @@
}
if (r->rpool.mape.offset > 0) {
if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
- sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
+ sport, daddr, dport, naddr, nportp, &sn, udp_mapping)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: MAP-E port allocation (%u/%u/%u)"
" failed\n",
@@ -776,7 +795,7 @@
goto notrans;
}
} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
- daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
+ daddr, dport, naddr, nportp, low, high, &sn, udp_mapping)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: NAT proxy port allocation (%u-%u) failed\n",
r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
@@ -863,7 +882,7 @@
int tries;
uint16_t cut, low, high, nport;
- reason = pf_map_addr_sn(pd->af, r, saddr, naddr, NULL, NULL, sn);
+ reason = pf_map_addr_sn(pd->af, r, saddr, naddr, NULL, NULL, &sn);
if (reason != 0)
goto notrans;
if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
@@ -970,7 +989,6 @@
uma_zfree(V_pf_state_key_z, *nkp);
uma_zfree(V_pf_state_key_z, *skp);
*skp = *nkp = NULL;
- *sn = NULL;
return (reason);
}

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 10, 9:58 PM (2 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31248581
Default Alt Text
D47770.id147001.diff (14 KB)

Event Timeline