Changeset View
Changeset View
Standalone View
Standalone View
head/sys/netpfil/ipfw/ip_fw_dynamic.c
Show First 20 Lines • Show All 116 Lines • ▼ Show 20 Lines | |||||
/* By default use jenkins hash function */ | /* By default use jenkins hash function */ | ||||
#define IPFIREWALL_JENKINSHASH | #define IPFIREWALL_JENKINSHASH | ||||
#define DYN_COUNTER_INC(d, dir, pktlen) do { \ | #define DYN_COUNTER_INC(d, dir, pktlen) do { \ | ||||
(d)->pcnt_ ## dir++; \ | (d)->pcnt_ ## dir++; \ | ||||
(d)->bcnt_ ## dir += pktlen; \ | (d)->bcnt_ ## dir += pktlen; \ | ||||
} while (0) | } while (0) | ||||
#define DYN_REFERENCED 0x01 | |||||
/* | |||||
* DYN_REFERENCED flag is used to show that state keeps reference to named | |||||
* object, and this reference should be released when state becomes expired. | |||||
*/ | |||||
struct dyn_data { | struct dyn_data { | ||||
void *parent; /* pointer to parent rule */ | void *parent; /* pointer to parent rule */ | ||||
uint32_t chain_id; /* cached ruleset id */ | uint32_t chain_id; /* cached ruleset id */ | ||||
uint32_t f_pos; /* cached rule index */ | uint32_t f_pos; /* cached rule index */ | ||||
uint32_t hashval; /* hash value used for hash resize */ | uint32_t hashval; /* hash value used for hash resize */ | ||||
uint16_t fibnum; /* fib used to send keepalives */ | uint16_t fibnum; /* fib used to send keepalives */ | ||||
uint8_t _pad[3]; | uint8_t _pad[2]; | ||||
uint8_t flags; /* internal flags */ | |||||
uint8_t set; /* parent rule set number */ | uint8_t set; /* parent rule set number */ | ||||
uint16_t rulenum; /* parent rule number */ | uint16_t rulenum; /* parent rule number */ | ||||
uint32_t ruleid; /* parent rule id */ | uint32_t ruleid; /* parent rule id */ | ||||
uint32_t state; /* TCP session state and flags */ | uint32_t state; /* TCP session state and flags */ | ||||
uint32_t ack_fwd; /* most recent ACKs in forward */ | uint32_t ack_fwd; /* most recent ACKs in forward */ | ||||
uint32_t ack_rev; /* and reverse direction (used */ | uint32_t ack_rev; /* and reverse direction (used */ | ||||
/* to generate keepalives) */ | /* to generate keepalives) */ | ||||
▲ Show 20 Lines • Show All 1,253 Lines • ▼ Show 20 Lines | if (data->chain_id != V_layer3_chain.id) { | ||||
* result as MATCH_NONE and return NULL. | * result as MATCH_NONE and return NULL. | ||||
* | * | ||||
* This will lead to creation of new similar state | * This will lead to creation of new similar state | ||||
* that will be added into head of this bucket. | * that will be added into head of this bucket. | ||||
* And the state that we currently have matched | * And the state that we currently have matched | ||||
* should be deleted by dyn_expire_states(). | * should be deleted by dyn_expire_states(). | ||||
* | * | ||||
* In case when dyn_keep_states is enabled, return | * In case when dyn_keep_states is enabled, return | ||||
* pointer to default rule and corresponding f_pos | * pointer to deleted rule and f_pos value | ||||
* value. | * corresponding to penultimate rule. | ||||
* XXX: In this case we lose the cache efficiency, | * When we have enabled V_dyn_keep_states, states | ||||
* since f_pos is not cached, because it seems | * that become orphaned will get the DYN_REFERENCED | ||||
* there is no easy way to atomically switch | * flag and rule will keep around. So we can return | ||||
* all fields related to parent rule of given | * it. But since it is not in the rules map, we need | ||||
* state. | * return such f_pos value, so after the state | ||||
* handling if the search will continue, the next rule | |||||
* will be the last one - the default rule. | |||||
*/ | */ | ||||
if (V_layer3_chain.map[data->f_pos] == rule) { | if (V_layer3_chain.map[data->f_pos] == rule) { | ||||
data->chain_id = V_layer3_chain.id; | data->chain_id = V_layer3_chain.id; | ||||
info->f_pos = data->f_pos; | info->f_pos = data->f_pos; | ||||
} else if (V_dyn_keep_states != 0) { | } else if (V_dyn_keep_states != 0) { | ||||
rule = V_layer3_chain.default_rule; | /* | ||||
info->f_pos = V_layer3_chain.n_rules - 1; | * The original rule pointer is still usable. | ||||
* So, we return it, but f_pos need to be | |||||
* changed to point to the penultimate rule. | |||||
*/ | |||||
MPASS(V_layer3_chain.n_rules > 1); | |||||
data->chain_id = V_layer3_chain.id; | |||||
data->f_pos = V_layer3_chain.n_rules - 2; | |||||
info->f_pos = data->f_pos; | |||||
} else { | } else { | ||||
rule = NULL; | rule = NULL; | ||||
info->direction = MATCH_NONE; | info->direction = MATCH_NONE; | ||||
DYN_DEBUG("rule %p [%u, %u] is considered " | DYN_DEBUG("rule %p [%u, %u] is considered " | ||||
"invalid in data %p", rule, data->ruleid, | "invalid in data %p", rule, data->ruleid, | ||||
data->rulenum, data); | data->rulenum, data); | ||||
/* info->f_pos doesn't matter here. */ | /* info->f_pos doesn't matter here. */ | ||||
} | } | ||||
▲ Show 20 Lines • Show All 683 Lines • ▼ Show 20 Lines | dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt) | ||||
if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set) | if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set) | ||||
return (0); | return (0); | ||||
if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && | if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && | ||||
(rulenum < rt->start_rule || rulenum > rt->end_rule)) | (rulenum < rt->start_rule || rulenum > rt->end_rule)) | ||||
return (0); | return (0); | ||||
return (1); | return (1); | ||||
} | } | ||||
static void | |||||
dyn_acquire_rule(struct ip_fw_chain *ch, struct dyn_data *data, | |||||
struct ip_fw *rule, uint16_t kidx) | |||||
{ | |||||
struct dyn_state_obj *obj; | |||||
/* | |||||
* Do not acquire reference twice. | |||||
* This can happen when rule deletion executed for | |||||
* the same range, but different ruleset id. | |||||
*/ | |||||
if (data->flags & DYN_REFERENCED) | |||||
return; | |||||
IPFW_UH_WLOCK_ASSERT(ch); | |||||
MPASS(kidx != 0); | |||||
data->flags |= DYN_REFERENCED; | |||||
/* Reference the named object */ | |||||
obj = SRV_OBJECT(ch, kidx); | |||||
obj->no.refcnt++; | |||||
MPASS(obj->no.etlv == IPFW_TLV_STATE_NAME); | |||||
/* Reference the parent rule */ | |||||
rule->refcnt++; | |||||
} | |||||
static void | |||||
dyn_release_rule(struct ip_fw_chain *ch, struct dyn_data *data, | |||||
struct ip_fw *rule, uint16_t kidx) | |||||
{ | |||||
struct dyn_state_obj *obj; | |||||
IPFW_UH_WLOCK_ASSERT(ch); | |||||
MPASS(kidx != 0); | |||||
obj = SRV_OBJECT(ch, kidx); | |||||
if (obj->no.refcnt == 1) | |||||
dyn_destroy(ch, &obj->no); | |||||
else | |||||
obj->no.refcnt--; | |||||
if (--rule->refcnt == 1) | |||||
ipfw_free_rule(rule); | |||||
} | |||||
/* | |||||
* We do not keep O_LIMIT_PARENT states when V_dyn_keep_states is enabled. | |||||
* O_LIMIT state is created when new connection is going to be established | |||||
* and there is no matching state. So, since the old parent rule was deleted | |||||
* we can't create new states with old parent, and thus we can not account | |||||
* new connections with already established connections, and can not do | |||||
* proper limiting. | |||||
*/ | |||||
static int | static int | ||||
dyn_match_ipv4_state(struct dyn_ipv4_state *s, const ipfw_range_tlv *rt) | dyn_match_ipv4_state(struct ip_fw_chain *ch, struct dyn_ipv4_state *s, | ||||
const ipfw_range_tlv *rt) | |||||
{ | { | ||||
struct ip_fw *rule; | |||||
int ret; | |||||
if (s->type == O_LIMIT_PARENT) | if (s->type == O_LIMIT_PARENT) | ||||
return (dyn_match_range(s->limit->rulenum, | return (dyn_match_range(s->limit->rulenum, | ||||
s->limit->set, rt)); | s->limit->set, rt)); | ||||
if (s->type == O_LIMIT) | ret = dyn_match_range(s->data->rulenum, s->data->set, rt); | ||||
return (dyn_match_range(s->data->rulenum, s->data->set, rt)); | if (ret == 0 || V_dyn_keep_states == 0) | ||||
return (ret); | |||||
if (V_dyn_keep_states == 0 && | rule = s->data->parent; | ||||
dyn_match_range(s->data->rulenum, s->data->set, rt)) | if (s->type == O_LIMIT) | ||||
return (1); | rule = ((struct dyn_ipv4_state *)rule)->limit->parent; | ||||
dyn_acquire_rule(ch, s->data, rule, s->kidx); | |||||
return (0); | return (0); | ||||
} | } | ||||
#ifdef INET6 | #ifdef INET6 | ||||
static int | static int | ||||
dyn_match_ipv6_state(struct dyn_ipv6_state *s, const ipfw_range_tlv *rt) | dyn_match_ipv6_state(struct ip_fw_chain *ch, struct dyn_ipv6_state *s, | ||||
const ipfw_range_tlv *rt) | |||||
{ | { | ||||
struct ip_fw *rule; | |||||
int ret; | |||||
if (s->type == O_LIMIT_PARENT) | if (s->type == O_LIMIT_PARENT) | ||||
return (dyn_match_range(s->limit->rulenum, | return (dyn_match_range(s->limit->rulenum, | ||||
s->limit->set, rt)); | s->limit->set, rt)); | ||||
if (s->type == O_LIMIT) | ret = dyn_match_range(s->data->rulenum, s->data->set, rt); | ||||
return (dyn_match_range(s->data->rulenum, s->data->set, rt)); | if (ret == 0 || V_dyn_keep_states == 0) | ||||
return (ret); | |||||
if (V_dyn_keep_states == 0 && | rule = s->data->parent; | ||||
dyn_match_range(s->data->rulenum, s->data->set, rt)) | if (s->type == O_LIMIT) | ||||
return (1); | rule = ((struct dyn_ipv6_state *)rule)->limit->parent; | ||||
dyn_acquire_rule(ch, s->data, rule, s->kidx); | |||||
return (0); | return (0); | ||||
} | } | ||||
#endif | #endif | ||||
/* | /* | ||||
* Unlink expired entries from states lists. | * Unlink expired entries from states lists. | ||||
* @rt can be used to specify the range of states for deletion. | * @rt can be used to specify the range of states for deletion. | ||||
*/ | */ | ||||
static void | static void | ||||
dyn_expire_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) | dyn_expire_states(struct ip_fw_chain *ch, ipfw_range_tlv *rt) | ||||
{ | { | ||||
struct dyn_ipv4_slist expired_ipv4; | struct dyn_ipv4_slist expired_ipv4; | ||||
#ifdef INET6 | #ifdef INET6 | ||||
struct dyn_ipv6_slist expired_ipv6; | struct dyn_ipv6_slist expired_ipv6; | ||||
struct dyn_ipv6_state *s6, *s6n, *s6p; | struct dyn_ipv6_state *s6, *s6n, *s6p; | ||||
#endif | #endif | ||||
struct dyn_ipv4_state *s4, *s4n, *s4p; | struct dyn_ipv4_state *s4, *s4n, *s4p; | ||||
void *rule; | |||||
int bucket, removed, length, max_length; | int bucket, removed, length, max_length; | ||||
IPFW_UH_WLOCK_ASSERT(ch); | |||||
/* | /* | ||||
* Unlink expired states from each bucket. | * Unlink expired states from each bucket. | ||||
* With acquired bucket lock iterate entries of each lists: | * With acquired bucket lock iterate entries of each lists: | ||||
* ipv4, ipv4_parent, ipv6, and ipv6_parent. Check expired time | * ipv4, ipv4_parent, ipv6, and ipv6_parent. Check expired time | ||||
* and unlink entry from the list, link entry into temporary | * and unlink entry from the list, link entry into temporary | ||||
* expired_xxx lists then bump "del" bucket version. | * expired_xxx lists then bump "del" bucket version. | ||||
* | * | ||||
* When an entry is removed, corresponding states counter is | * When an entry is removed, corresponding states counter is | ||||
* decremented. If entry has O_LIMIT type, parent's reference | * decremented. If entry has O_LIMIT type, parent's reference | ||||
* counter is decremented. | * counter is decremented. | ||||
* | * | ||||
* NOTE: this function can be called from userspace context | * NOTE: this function can be called from userspace context | ||||
* when user deletes rules. In this case all matched states | * when user deletes rules. In this case all matched states | ||||
* will be forcedly unlinked. O_LIMIT_PARENT states will be kept | * will be forcedly unlinked. O_LIMIT_PARENT states will be kept | ||||
* in the expired lists until reference counter become zero. | * in the expired lists until reference counter become zero. | ||||
*/ | */ | ||||
#define DYN_UNLINK_STATES(s, prev, next, exp, af, name, extra) do { \ | #define DYN_UNLINK_STATES(s, prev, next, exp, af, name, extra) do { \ | ||||
length = 0; \ | length = 0; \ | ||||
removed = 0; \ | removed = 0; \ | ||||
prev = NULL; \ | prev = NULL; \ | ||||
s = CK_SLIST_FIRST(&V_dyn_ ## name [bucket]); \ | s = CK_SLIST_FIRST(&V_dyn_ ## name [bucket]); \ | ||||
while (s != NULL) { \ | while (s != NULL) { \ | ||||
next = CK_SLIST_NEXT(s, entry); \ | next = CK_SLIST_NEXT(s, entry); \ | ||||
if ((TIME_LEQ((s)->exp, time_uptime) && extra) || \ | if ((TIME_LEQ((s)->exp, time_uptime) && extra) || \ | ||||
(rt != NULL && dyn_match_ ## af ## _state(s, rt))) {\ | (rt != NULL && \ | ||||
dyn_match_ ## af ## _state(ch, s, rt))) { \ | |||||
if (prev != NULL) \ | if (prev != NULL) \ | ||||
CK_SLIST_REMOVE_AFTER(prev, entry); \ | CK_SLIST_REMOVE_AFTER(prev, entry); \ | ||||
else \ | else \ | ||||
CK_SLIST_REMOVE_HEAD( \ | CK_SLIST_REMOVE_HEAD( \ | ||||
&V_dyn_ ## name [bucket], entry); \ | &V_dyn_ ## name [bucket], entry); \ | ||||
removed++; \ | removed++; \ | ||||
SLIST_INSERT_HEAD(&expired_ ## af, s, expired); \ | SLIST_INSERT_HEAD(&expired_ ## af, s, expired); \ | ||||
if (s->type == O_LIMIT_PARENT) \ | if (s->type == O_LIMIT_PARENT) \ | ||||
DYN_COUNT_DEC(dyn_parent_count); \ | DYN_COUNT_DEC(dyn_parent_count); \ | ||||
else { \ | else { \ | ||||
DYN_COUNT_DEC(dyn_count); \ | DYN_COUNT_DEC(dyn_count); \ | ||||
if (s->data->flags & DYN_REFERENCED) { \ | |||||
rule = s->data->parent; \ | |||||
if (s->type == O_LIMIT) \ | |||||
rule = ((__typeof(s)) \ | |||||
rule)->limit->parent;\ | |||||
dyn_release_rule(ch, s->data, \ | |||||
rule, s->kidx); \ | |||||
} \ | |||||
if (s->type == O_LIMIT) { \ | if (s->type == O_LIMIT) { \ | ||||
s = s->data->parent; \ | s = s->data->parent; \ | ||||
DPARENT_COUNT_DEC(s->limit); \ | DPARENT_COUNT_DEC(s->limit); \ | ||||
} \ | } \ | ||||
} \ | } \ | ||||
} else { \ | } else { \ | ||||
prev = s; \ | prev = s; \ | ||||
length++; \ | length++; \ | ||||
▲ Show 20 Lines • Show All 468 Lines • ▼ Show 20 Lines | ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) | ||||
if (V_dyn_count == 0) | if (V_dyn_count == 0) | ||||
return; | return; | ||||
IPFW_UH_WLOCK_ASSERT(chain); | IPFW_UH_WLOCK_ASSERT(chain); | ||||
dyn_expire_states(chain, rt); | dyn_expire_states(chain, rt); | ||||
} | } | ||||
/* | /* | ||||
* Pass through all states and reset eaction for orphaned rules. | |||||
*/ | |||||
void | |||||
ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id, | |||||
uint16_t default_id, uint16_t instance_id) | |||||
{ | |||||
#ifdef INET6 | |||||
struct dyn_ipv6_state *s6; | |||||
#endif | |||||
struct dyn_ipv4_state *s4; | |||||
struct ip_fw *rule; | |||||
uint32_t bucket; | |||||
#define DYN_RESET_EACTION(s, h, b) \ | |||||
CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ | |||||
if ((s->data->flags & DYN_REFERENCED) == 0) \ | |||||
continue; \ | |||||
rule = s->data->parent; \ | |||||
if (s->type == O_LIMIT) \ | |||||
rule = ((__typeof(s))rule)->limit->parent; \ | |||||
ipfw_reset_eaction(ch, rule, eaction_id, \ | |||||
default_id, instance_id); \ | |||||
} | |||||
IPFW_UH_WLOCK_ASSERT(ch); | |||||
if (V_dyn_count == 0) | |||||
return; | |||||
for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { | |||||
DYN_RESET_EACTION(s4, ipv4, bucket); | |||||
#ifdef INET6 | |||||
DYN_RESET_EACTION(s6, ipv6, bucket); | |||||
#endif | |||||
} | |||||
} | |||||
/* | |||||
* Returns size of dynamic states in legacy format | * Returns size of dynamic states in legacy format | ||||
*/ | */ | ||||
int | int | ||||
ipfw_dyn_len(void) | ipfw_dyn_len(void) | ||||
{ | { | ||||
return ((V_dyn_count + V_dyn_parent_count) * sizeof(ipfw_dyn_rule)); | return ((V_dyn_count + V_dyn_parent_count) * sizeof(ipfw_dyn_rule)); | ||||
} | } | ||||
/* | /* | ||||
* Returns number of dynamic states. | * Returns number of dynamic states. | ||||
* Marks every named object index used by dynamic states with bit in @bmask. | |||||
* Returns number of named objects accounted in bmask via @nocnt. | |||||
* Used by dump format v1 (current). | * Used by dump format v1 (current). | ||||
*/ | */ | ||||
uint32_t | uint32_t | ||||
ipfw_dyn_get_count(void) | ipfw_dyn_get_count(uint32_t *bmask, int *nocnt) | ||||
{ | { | ||||
#ifdef INET6 | |||||
struct dyn_ipv6_state *s6; | |||||
#endif | |||||
struct dyn_ipv4_state *s4; | |||||
uint32_t bucket; | |||||
#define DYN_COUNT_OBJECTS(s, h, b) \ | |||||
CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ | |||||
MPASS(s->kidx != 0); \ | |||||
if (ipfw_mark_object_kidx(bmask, IPFW_TLV_STATE_NAME, \ | |||||
s->kidx) != 0) \ | |||||
(*nocnt)++; \ | |||||
} | |||||
IPFW_UH_RLOCK_ASSERT(&V_layer3_chain); | |||||
/* No need to pass through all the buckets. */ | |||||
*nocnt = 0; | |||||
if (V_dyn_count + V_dyn_parent_count == 0) | |||||
return (0); | |||||
for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { | |||||
DYN_COUNT_OBJECTS(s4, ipv4, bucket); | |||||
#ifdef INET6 | |||||
DYN_COUNT_OBJECTS(s6, ipv6, bucket); | |||||
#endif | |||||
} | |||||
return (V_dyn_count + V_dyn_parent_count); | return (V_dyn_count + V_dyn_parent_count); | ||||
} | } | ||||
/* | /* | ||||
* Check if rule contains at least one dynamic opcode. | * Check if rule contains at least one dynamic opcode. | ||||
* | * | ||||
* Returns 1 if such opcode is found, 0 otherwise. | * Returns 1 if such opcode is found, 0 otherwise. | ||||
▲ Show 20 Lines • Show All 382 Lines • Show Last 20 Lines |