Index: sbin/ipfw/ipfw.8 =================================================================== --- sbin/ipfw/ipfw.8 +++ sbin/ipfw/ipfw.8 @@ -310,10 +310,9 @@ when this does not carry any additional information. .It Fl d When listing, show dynamic rules in addition to static ones. -.It Fl e -When listing and -.Fl d -is specified, also show expired dynamic rules. +.It Fl D +When listing, show only dynamic states. +When deleting, delete only dynamic states. .It Fl f Run without prompting for confirmation for commands that can cause problems if misused, i.e., Index: sbin/ipfw/ipfw2.h =================================================================== --- sbin/ipfw/ipfw2.h +++ sbin/ipfw/ipfw2.h @@ -37,8 +37,6 @@ int do_quiet; /* Be quiet in add and flush */ int do_pipe; /* this cmd refers to a pipe/queue/sched */ int do_nat; /* this cmd refers to a nat config */ - int do_dynamic; /* display dynamic rules */ - int do_expired; /* display expired dynamic rules */ int do_compact; /* show rules in compact mode */ int do_force; /* do not ask for confirmation */ int show_sets; /* display the set each rule belongs to */ @@ -48,6 +46,8 @@ /* The options below can have multiple values. */ + int do_dynamic; /* 1 - display dynamic rules */ + /* 2 - display/delete only dynamic rules */ int do_sort; /* field to sort results (0 = no) */ /* valid fields are 1 and above */ Index: sbin/ipfw/ipfw2.c =================================================================== --- sbin/ipfw/ipfw2.c +++ sbin/ipfw/ipfw2.c @@ -2247,10 +2247,9 @@ uint16_t rulenum; char buf[INET6_ADDRSTRLEN]; - if (!co->do_expired) { - if (!d->expire && !(d->dyn_type == O_LIMIT_PARENT)) - return; - } + if (d->expire == 0 && d->dyn_type != O_LIMIT_PARENT) + return; + bcopy(&d->rule, &rulenum, sizeof(rulenum)); bprintf(bp, "%05d", rulenum); if (fo->pcwidth > 0 || fo->bcwidth > 0) { @@ -2292,6 +2291,32 @@ if (d->kidx != 0) bprintf(bp, " :%s", object_search_ctlv(fo->tstate, d->kidx, IPFW_TLV_STATE_NAME)); + +#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) +#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) + if (co->verbose) { + bprintf(bp, " state 0x%08x ", d->state); + if (d->state & IPFW_DYN_ORPHANED) + bprintf(bp, "ORPHANED,"); + if ((d->state & BOTH_SYN) == BOTH_SYN) + bprintf(bp, "BOTH_SYN,"); + else { + if (d->state & TH_SYN) + bprintf(bp, "F_SYN,"); + if (d->state & (TH_SYN << 8)) + bprintf(bp, "R_SYN,"); + } + if ((d->state & BOTH_FIN) == BOTH_FIN) + bprintf(bp, "BOTH_FIN,"); + else { + if (d->state & TH_FIN) + bprintf(bp, "F_FIN,"); + if (d->state & (TH_FIN << 8)) + bprintf(bp, "R_FIN,"); + } + bprintf(bp, " f_ack 0x%x, r_ack 0x%x", d->ack_fwd, + d->ack_rev); + } } static int @@ -2695,7 +2720,8 @@ cfg = NULL; sfo.show_counters = show_counters; sfo.show_time = co.do_time; - sfo.flags = IPFW_CFG_GET_STATIC; + if (co.do_dynamic != 2) + sfo.flags |= IPFW_CFG_GET_STATIC; if (co.do_dynamic != 0) sfo.flags |= IPFW_CFG_GET_STATES; if ((sfo.show_counters | sfo.show_time) != 0) @@ -2740,17 +2766,15 @@ fo->set_mask = cfg->set_mask; ctlv = (ipfw_obj_ctlv *)(cfg + 1); + if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { + object_sort_ctlv(ctlv); + fo->tstate = ctlv; + readsz += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); + } if (cfg->flags & IPFW_CFG_GET_STATIC) { /* We've requested static rules */ - if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { - object_sort_ctlv(ctlv); - fo->tstate = ctlv; - readsz += ctlv->head.length; - ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + - ctlv->head.length); - } - if (ctlv->head.type == IPFW_TLV_RULE_LIST) { rbase = (ipfw_obj_tlv *)(ctlv + 1); rcnt = ctlv->count; @@ -2777,10 +2801,12 @@ if (ac == 0) { fo->first = 0; fo->last = IPFW_DEFAULT_RULE; - list_static_range(co, fo, &bp, rbase, rcnt); + if (cfg->flags & IPFW_CFG_GET_STATIC) + list_static_range(co, fo, &bp, rbase, rcnt); if (co->do_dynamic && dynsz > 0) { - printf("## Dynamic rules (%d %zu):\n", fo->dcnt, dynsz); + printf("## Dynamic rules (%d %zu):\n", fo->dcnt, + dynsz); list_dyn_range(co, fo, &bp, dynbase, dynsz); } @@ -2800,6 +2826,9 @@ continue; } + if ((cfg->flags & IPFW_CFG_GET_STATIC) == 0) + continue; + if (list_static_range(co, fo, &bp, rbase, rcnt) == 0) { /* give precedence to other error(s) */ if (exitval == EX_OK) @@ -3313,6 +3342,8 @@ rt.flags |= IPFW_RCFLAG_SET; } } + if (co.do_dynamic == 2) + rt.flags |= IPFW_RCFLAG_DYNAMIC; i = do_range_cmd(IP_FW_XDEL, &rt); if (i != 0) { exitval = EX_UNAVAILABLE; @@ -3320,7 +3351,8 @@ continue; warn("rule %u: setsockopt(IP_FW_XDEL)", rt.start_rule); - } else if (rt.new_set == 0 && do_set == 0) { + } else if (rt.new_set == 0 && do_set == 0 && + co.do_dynamic != 2) { exitval = EX_UNAVAILABLE; if (co.do_quiet) continue; Index: sbin/ipfw/main.c =================================================================== --- sbin/ipfw/main.c +++ sbin/ipfw/main.c @@ -262,7 +262,7 @@ save_av = av; optind = optreset = 1; /* restart getopt() */ - while ((ch = getopt(ac, av, "abcdefhinNp:qs:STtv")) != -1) + while ((ch = getopt(ac, av, "abcdDefhinNp:qs:STtv")) != -1) switch (ch) { case 'a': do_acct = 1; @@ -281,8 +281,12 @@ co.do_dynamic = 1; break; + case 'D': + co.do_dynamic = 2; + break; + case 'e': - co.do_expired = 1; + /* nop for compatibility */ break; case 'f': Index: sys/netinet/ip_fw.h =================================================================== --- sys/netinet/ip_fw.h +++ sys/netinet/ip_fw.h @@ -707,6 +707,7 @@ u_int32_t state; /* state of this rule (typically a * combination of TCP flags) */ +#define IPFW_DYN_ORPHANED 0x40000 /* state's parent rule was deleted */ u_int32_t ack_fwd; /* most recent ACKs in forward */ u_int32_t ack_rev; /* and reverse directions (used */ /* to generate keepalives) */ @@ -937,9 +938,10 @@ #define IPFW_RCFLAG_RANGE 0x01 /* rule range is set */ #define IPFW_RCFLAG_ALL 0x02 /* match ALL rules */ #define IPFW_RCFLAG_SET 0x04 /* match rules in given set */ +#define IPFW_RCFLAG_DYNAMIC 0x08 /* match only dynamic states */ /* User-settable flags */ #define IPFW_RCFLAG_USER (IPFW_RCFLAG_RANGE | IPFW_RCFLAG_ALL | \ - IPFW_RCFLAG_SET) + IPFW_RCFLAG_SET | IPFW_RCFLAG_DYNAMIC) /* Internally used flags */ #define IPFW_RCFLAG_DEFAULT 0x0100 /* Do not skip defaul rule */ Index: sys/netpfil/ipfw/ip_fw_dynamic.c =================================================================== --- sys/netpfil/ipfw/ip_fw_dynamic.c +++ sys/netpfil/ipfw/ip_fw_dynamic.c @@ -122,6 +122,12 @@ (d)->bcnt_ ## dir += pktlen; \ } while (0) +#define DYN_REFERENCED 0x01 +/* + * DYN_REFERENCED flag is used to show that state keeps reference to named + * object, and this reference should be released when state becomes expired. + */ + struct dyn_data { void *parent; /* pointer to parent rule */ uint32_t chain_id; /* cached ruleset id */ @@ -129,7 +135,8 @@ uint32_t hashval; /* hash value used for hash resize */ uint16_t fibnum; /* fib used to send keepalives */ - uint8_t _pad[3]; + uint8_t _pad[2]; + uint8_t flags; /* internal flags */ uint8_t set; /* parent rule set number */ uint16_t rulenum; /* parent rule number */ uint32_t ruleid; /* parent rule id */ @@ -709,6 +716,8 @@ IPFW_UH_WLOCK_ASSERT(ch); + KASSERT(no->etlv == IPFW_TLV_STATE_NAME, + ("%s: wrong object type %u", __func__, no->etlv)); KASSERT(no->refcnt == 1, ("Destroying object '%s' (type %u, idx %u) with refcnt %u", no->name, no->etlv, no->kidx, no->refcnt)); @@ -1398,20 +1407,29 @@ * should be deleted by dyn_expire_states(). * * In case when dyn_keep_states is enabled, return - * pointer to default rule and corresponding f_pos - * value. - * XXX: In this case we lose the cache efficiency, - * since f_pos is not cached, because it seems - * there is no easy way to atomically switch - * all fields related to parent rule of given - * state. + * pointer to deleted rule and f_pos value + * corresponding to penultimate rule. + * When we have enabled V_dyn_keep_states, states + * that become orphaned will get the DYN_REFERENCED + * flag and rule will keep around. So we can return + * it. But since it is not in the rules map, we need + * return such f_pos value, so after the state + * handling if the search will continue, the next rule + * will be the last one - the default rule. */ if (V_layer3_chain.map[data->f_pos] == rule) { data->chain_id = V_layer3_chain.id; info->f_pos = data->f_pos; } else if (V_dyn_keep_states != 0) { - rule = V_layer3_chain.default_rule; - info->f_pos = V_layer3_chain.n_rules - 1; + /* + * The original rule pointer is still usable. + * So, we return it, but f_pos need to be + * changed to point to the penultimate rule. + */ + MPASS(V_layer3_chain.n_rules > 1); + data->chain_id = V_layer3_chain.id; + data->f_pos = V_layer3_chain.n_rules - 2; + info->f_pos = data->f_pos; } else { rule = NULL; info->direction = MATCH_NONE; @@ -2093,7 +2111,11 @@ } /* - * Returns 1 when state is matched by specified range, otherwise returns 0. + * Returns: + * 0 when state is not matched by specified range; + * 1 when state is matched by specified range; + * 2 when state is matched by specified range and requested deletion of + * dynamic states. */ static int dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt) @@ -2101,50 +2123,117 @@ MPASS(rt != NULL); /* flush all states */ - if (rt->flags & IPFW_RCFLAG_ALL) + if (rt->flags & IPFW_RCFLAG_ALL) { + if (rt->flags & IPFW_RCFLAG_DYNAMIC) + return (2); /* forced */ return (1); + } if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set) return (0); if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && (rulenum < rt->start_rule || rulenum > rt->end_rule)) return (0); + if (rt->flags & IPFW_RCFLAG_DYNAMIC) + return (2); return (1); } +static void +dyn_acquire_rule(struct ip_fw_chain *ch, struct dyn_data *data, + struct ip_fw *rule, uint16_t kidx) +{ + struct dyn_state_obj *obj; + + /* + * Do not acquire reference twice. + * This can happen when rule deletion executed for + * the same range, but different ruleset id. + */ + if (data->flags & DYN_REFERENCED) + return; + + IPFW_UH_WLOCK_ASSERT(ch); + MPASS(kidx != 0); + + data->flags |= DYN_REFERENCED; + /* Reference the named object */ + obj = SRV_OBJECT(ch, kidx); + obj->no.refcnt++; + MPASS(obj->no.etlv == IPFW_TLV_STATE_NAME); + + /* Reference the parent rule */ + rule->refcnt++; +} + +static void +dyn_release_rule(struct ip_fw_chain *ch, struct dyn_data *data, + struct ip_fw *rule, uint16_t kidx) +{ + struct dyn_state_obj *obj; + + IPFW_UH_WLOCK_ASSERT(ch); + MPASS(kidx != 0); + + obj = SRV_OBJECT(ch, kidx); + if (obj->no.refcnt == 1) + dyn_destroy(ch, &obj->no); + else + obj->no.refcnt--; + + if (--rule->refcnt == 1) + ipfw_free_rule(rule); +} + +/* + * We do not keep O_LIMIT_PARENT states when V_dyn_keep_states is enabled. + * O_LIMIT state is created when new connection is going to be established + * and there is no matching state. So, since the old parent rule was deleted + * we can't create new states with old parent, and thus we can not account + * new connections with already established connections, and can not do + * proper limiting. + */ static int -dyn_match_ipv4_state(struct dyn_ipv4_state *s, const ipfw_range_tlv *rt) +dyn_match_ipv4_state(struct ip_fw_chain *ch, struct dyn_ipv4_state *s, + const ipfw_range_tlv *rt) { + struct ip_fw *rule; + int ret; if (s->type == O_LIMIT_PARENT) return (dyn_match_range(s->limit->rulenum, s->limit->set, rt)); - if (s->type == O_LIMIT) - return (dyn_match_range(s->data->rulenum, s->data->set, rt)); + ret = dyn_match_range(s->data->rulenum, s->data->set, rt); + if (ret == 0 || V_dyn_keep_states == 0 || ret > 1) + return (ret); - if (V_dyn_keep_states == 0 && - dyn_match_range(s->data->rulenum, s->data->set, rt)) - return (1); - + rule = s->data->parent; + if (s->type == O_LIMIT) + rule = ((struct dyn_ipv4_state *)rule)->limit->parent; + dyn_acquire_rule(ch, s->data, rule, s->kidx); return (0); } #ifdef INET6 static int -dyn_match_ipv6_state(struct dyn_ipv6_state *s, const ipfw_range_tlv *rt) +dyn_match_ipv6_state(struct ip_fw_chain *ch, struct dyn_ipv6_state *s, + const ipfw_range_tlv *rt) { + struct ip_fw *rule; + int ret; if (s->type == O_LIMIT_PARENT) return (dyn_match_range(s->limit->rulenum, s->limit->set, rt)); - if (s->type == O_LIMIT) - return (dyn_match_range(s->data->rulenum, s->data->set, rt)); + ret = dyn_match_range(s->data->rulenum, s->data->set, rt); + if (ret == 0 || V_dyn_keep_states == 0 || ret > 1) + return (ret); - if (V_dyn_keep_states == 0 && - dyn_match_range(s->data->rulenum, s->data->set, rt)) - return (1); - + rule = s->data->parent; + if (s->type == O_LIMIT) + rule = ((struct dyn_ipv6_state *)rule)->limit->parent; + dyn_acquire_rule(ch, s->data, rule, s->kidx); return (0); } #endif @@ -2154,7 +2243,7 @@ * @rt can be used to specify the range of states for deletion. */ static void -dyn_expire_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) +dyn_expire_states(struct ip_fw_chain *ch, ipfw_range_tlv *rt) { struct dyn_ipv4_slist expired_ipv4; #ifdef INET6 @@ -2162,8 +2251,11 @@ struct dyn_ipv6_state *s6, *s6n, *s6p; #endif struct dyn_ipv4_state *s4, *s4n, *s4p; + void *rule; int bucket, removed, length, max_length; + IPFW_UH_WLOCK_ASSERT(ch); + /* * Unlink expired states from each bucket. * With acquired bucket lock iterate entries of each lists: @@ -2188,7 +2280,8 @@ while (s != NULL) { \ next = CK_SLIST_NEXT(s, entry); \ if ((TIME_LEQ((s)->exp, time_uptime) && extra) || \ - (rt != NULL && dyn_match_ ## af ## _state(s, rt))) {\ + (rt != NULL && \ + dyn_match_ ## af ## _state(ch, s, rt))) { \ if (prev != NULL) \ CK_SLIST_REMOVE_AFTER(prev, entry); \ else \ @@ -2200,6 +2293,14 @@ DYN_COUNT_DEC(dyn_parent_count); \ else { \ DYN_COUNT_DEC(dyn_count); \ + if (s->data->flags & DYN_REFERENCED) { \ + rule = s->data->parent; \ + if (s->type == O_LIMIT) \ + rule = ((__typeof(s)) \ + rule)->limit->parent;\ + dyn_release_rule(ch, s->data, \ + rule, s->kidx); \ + } \ if (s->type == O_LIMIT) { \ s = s->data->parent; \ DPARENT_COUNT_DEC(s->limit); \ @@ -2684,6 +2785,42 @@ } /* + * Pass through all states and reset eaction for orphaned rules. + */ +void +ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id, + uint16_t default_id, uint16_t instance_id) +{ +#ifdef INET6 + struct dyn_ipv6_state *s6; +#endif + struct dyn_ipv4_state *s4; + struct ip_fw *rule; + uint32_t bucket; + +#define DYN_RESET_EACTION(s, h, b) \ + CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ + if ((s->data->flags & DYN_REFERENCED) == 0) \ + continue; \ + rule = s->data->parent; \ + if (s->type == O_LIMIT) \ + rule = ((__typeof(s))rule)->limit->parent; \ + ipfw_reset_eaction(ch, rule, eaction_id, \ + default_id, instance_id); \ + } + + IPFW_UH_WLOCK_ASSERT(ch); + if (V_dyn_count == 0) + return; + for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { + DYN_RESET_EACTION(s4, ipv4, bucket); +#ifdef INET6 + DYN_RESET_EACTION(s6, ipv6, bucket); +#endif + } +} + +/* * Returns size of dynamic states in legacy format */ int @@ -2695,12 +2832,43 @@ /* * Returns number of dynamic states. + * Marks every named object index used by dynamic states with bit in @bmask. + * Returns number of named objects accounted in bmask via @nocnt. * Used by dump format v1 (current). */ uint32_t -ipfw_dyn_get_count(void) +ipfw_dyn_get_count(uint32_t *bmask, int *nocnt) { +#ifdef INET6 + struct dyn_ipv6_state *s6; +#endif + struct dyn_ipv4_state *s4; + uint32_t bucket; +#define DYN_COUNT_OBJECTS(s, h, b) \ + CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ + MPASS(s->kidx != 0); \ + if (ipfw_mark_object_kidx(bmask, IPFW_TLV_STATE_NAME, \ + s->kidx) != 0) \ + (*nocnt)++; \ + } + + IPFW_UH_RLOCK_ASSERT(&V_layer3_chain); + + /* No need to pass through all the buckets. */ + *nocnt = 0; + if (V_dyn_count + V_dyn_parent_count == 0) + return (0); + + for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { + DYN_COUNT_OBJECTS(s4, ipv4, bucket); + DYN_COUNT_OBJECTS(s4, ipv4_parent, bucket); +#ifdef INET6 + DYN_COUNT_OBJECTS(s6, ipv6, bucket); + DYN_COUNT_OBJECTS(s6, ipv6_parent, bucket); +#endif + } + return (V_dyn_count + V_dyn_parent_count); } @@ -2783,9 +2951,12 @@ memcpy((char *)&dst->rule + sizeof(data->rulenum), &data->set, sizeof(data->set)); + dst->state = data->state; + if (data->flags & DYN_REFERENCED) + dst->state |= IPFW_DYN_ORPHANED; + /* unused fields */ dst->parent = NULL; - dst->state = data->state; dst->ack_fwd = data->ack_fwd; dst->ack_rev = data->ack_rev; dst->count = 0; Index: sys/netpfil/ipfw/ip_fw_eaction.c =================================================================== --- sys/netpfil/ipfw/ip_fw_eaction.c +++ sys/netpfil/ipfw/ip_fw_eaction.c @@ -252,11 +252,10 @@ * Resets all eaction opcodes to default handlers. */ static void -reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eaction_id) +reset_eaction_rules(struct ip_fw_chain *ch, uint16_t eaction_id, + uint16_t instance_id, bool reset_rules) { struct named_object *no; - struct ip_fw *rule; - ipfw_insn *cmd; int i; IPFW_UH_WLOCK_ASSERT(ch); @@ -267,35 +266,32 @@ panic("Default external action handler is not found"); if (eaction_id == no->kidx) panic("Wrong eaction_id"); - EACTION_DEBUG("replace id %u with %u", eaction_id, no->kidx); + + EACTION_DEBUG("Going to replace id %u with %u", eaction_id, no->kidx); IPFW_WLOCK(ch); - for (i = 0; i < ch->n_rules; i++) { - rule = ch->map[i]; - cmd = ACTION_PTR(rule); - if (cmd->opcode != O_EXTERNAL_ACTION) - continue; - if (cmd->arg1 != eaction_id) - continue; - cmd->arg1 = no->kidx; /* Set to default id */ - /* - * XXX: we only bump refcount on default_eaction. - * Refcount on the original object will be just - * ignored on destroy. But on default_eaction it - * will be decremented on rule deletion. - */ - no->refcnt++; - /* - * Since named_object related to this instance will be - * also destroyed, truncate the chain of opcodes to - * remove the rest of cmd chain just after O_EXTERNAL_ACTION - * opcode. - */ - if (rule->act_ofs < rule->cmd_len - 1) { - EACTION_DEBUG("truncate rule %d: len %u -> %u", - rule->rulenum, rule->cmd_len, rule->act_ofs + 1); - rule->cmd_len = rule->act_ofs + 1; + /* + * Reset eaction objects only if it is referenced by rules. + * But always reset objects for orphaned dynamic states. + */ + if (reset_rules) { + for (i = 0; i < ch->n_rules; i++) { + /* + * Refcount on the original object will be just + * ignored on destroy. Refcount on default_eaction + * will be decremented on rule deletion, thus we + * need to reference default_eaction object. + */ + if (ipfw_reset_eaction(ch, ch->map[i], eaction_id, + no->kidx, instance_id) != 0) + no->refcnt++; } } + /* + * Reset eaction opcodes for orphaned dynamic states. + * Since parent rules are already deleted, we don't need to + * reference named object of default_eaction. + */ + ipfw_dyn_reset_eaction(ch, eaction_id, no->kidx, instance_id); IPFW_WUNLOCK(ch); } @@ -368,12 +364,71 @@ IPFW_UH_WUNLOCK(ch); return (EINVAL); } - if (no->refcnt > 1) - reset_eaction_obj(ch, eaction_id); + reset_eaction_rules(ch, eaction_id, 0, (no->refcnt > 1)); EACTION_DEBUG("External action '%s' with id %u unregistered", no->name, eaction_id); destroy_eaction_obj(ch, no); IPFW_UH_WUNLOCK(ch); + return (0); +} + +int +ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule, + uint16_t eaction_id, uint16_t default_id, uint16_t instance_id) +{ + ipfw_insn *cmd, *icmd; + + IPFW_UH_WLOCK_ASSERT(ch); + IPFW_WLOCK_ASSERT(ch); + + cmd = ACTION_PTR(rule); + if (cmd->opcode != O_EXTERNAL_ACTION || + cmd->arg1 != eaction_id) + return (0); + + if (instance_id != 0 && rule->act_ofs < rule->cmd_len - 1) { + icmd = cmd + 1; + if (icmd->opcode != O_EXTERNAL_INSTANCE || + icmd->arg1 != instance_id) + return (0); + /* FALLTHROUGH */ + } + + cmd->arg1 = default_id; /* Set to default id */ + /* + * Since named_object related to this instance will be + * also destroyed, truncate the chain of opcodes to + * remove the rest of cmd chain just after O_EXTERNAL_ACTION + * opcode. + */ + if (rule->act_ofs < rule->cmd_len - 1) { + EACTION_DEBUG("truncate rule %d: len %u -> %u", + rule->rulenum, rule->cmd_len, rule->act_ofs + 1); + rule->cmd_len = rule->act_ofs + 1; + } + /* + * Return 1 when reset successfully happened. + */ + return (1); +} + +/* + * This function should be called before external action instance is + * destroyed. It will reset eaction_id to default_id for rules, where + * eaction has instance with id == kidx. + */ +int +ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id, + uint16_t kidx) +{ + struct named_object *no; + + IPFW_UH_WLOCK_ASSERT(ch); + no = ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), eaction_id); + if (no == NULL || no->etlv != IPFW_TLV_EACTION) + return (EINVAL); + + reset_eaction_rules(ch, eaction_id, kidx, 0); return (0); } Index: sys/netpfil/ipfw/ip_fw_private.h =================================================================== --- sys/netpfil/ipfw/ip_fw_private.h +++ sys/netpfil/ipfw/ip_fw_private.h @@ -146,6 +146,9 @@ /* * Function definitions. */ +int ipfw_chk(struct ip_fw_args *args); +struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, + u_int32_t, u_int32_t, int); /* attach (arg = 1) or detach (arg = 0) hooks */ int ipfw_attach_hooks(int); @@ -156,6 +159,7 @@ /* In ip_fw_log.c */ struct ip; struct ip_fw_chain; + void ipfw_bpf_init(int); void ipfw_bpf_uninit(int); void ipfw_bpf_mtap2(void *, u_int, struct mbuf *); @@ -168,6 +172,7 @@ #define V_verbose_limit VNET(verbose_limit) /* In ip_fw_dynamic.c */ +struct sockopt_data; enum { /* result for matching dynamic rules */ MATCH_REVERSE = 0, @@ -177,19 +182,6 @@ }; /* - * The lock for dynamic rules is only used once outside the file, - * and only to release the result of lookup_dyn_rule(). - * Eventually we may implement it with a callback on the function. - */ -struct ip_fw_chain; -struct sockopt_data; -int ipfw_is_dyn_rule(struct ip_fw *rule); -void ipfw_expire_dyn_states(struct ip_fw_chain *, ipfw_range_tlv *); - -struct tcphdr; -struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, - u_int32_t, u_int32_t, int); -/* * Macro to determine that we need to do or redo dynamic state lookup. * direction == MATCH_UNKNOWN means that this is first lookup, then we need * to do lookup. @@ -219,13 +211,17 @@ const void *ulp, int pktlen, const ipfw_insn *cmd, struct ipfw_dyn_info *info); +int ipfw_is_dyn_rule(struct ip_fw *rule); +void ipfw_expire_dyn_states(struct ip_fw_chain *, ipfw_range_tlv *); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd); void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); -uint32_t ipfw_dyn_get_count(void); +uint32_t ipfw_dyn_get_count(uint32_t *, int *); +void ipfw_dyn_reset_eaction(struct ip_fw_chain *ch, uint16_t eaction_id, + uint16_t default_id, uint16_t instance_id); /* common variables */ VNET_DECLARE(int, fw_one_pass); @@ -280,7 +276,9 @@ uint32_t id; /* rule id */ uint32_t cached_id; /* used by jump_fast */ uint32_t cached_pos; /* used by jump_fast */ + uint32_t refcnt; /* number of references */ + struct ip_fw *next; /* linked list of deleted rules */ ipfw_insn cmd[1]; /* storage for commands */ }; @@ -650,7 +648,6 @@ void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain); int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_ctl3(struct sockopt *sopt); -int ipfw_chk(struct ip_fw_args *args); int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule, int locked); void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, @@ -659,7 +656,9 @@ void ipfw_init_counters(void); void ipfw_destroy_counters(void); struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize); +void ipfw_free_rule(struct ip_fw *rule); int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt); +int ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx); typedef int (sopt_handler_f)(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); @@ -758,6 +757,10 @@ int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id); int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done); +int ipfw_reset_eaction(struct ip_fw_chain *ch, struct ip_fw *rule, + uint16_t eaction_id, uint16_t default_id, uint16_t instance_id); +int ipfw_reset_eaction_instance(struct ip_fw_chain *ch, uint16_t eaction_id, + uint16_t instance_id); /* In ip_fw_table.c */ struct table_info; Index: sys/netpfil/ipfw/ip_fw_sockopt.c =================================================================== --- sys/netpfil/ipfw/ip_fw_sockopt.c +++ sys/netpfil/ipfw/ip_fw_sockopt.c @@ -161,8 +161,6 @@ set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule); static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype); -static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, - uint32_t *bmask); static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti); static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, @@ -209,14 +207,23 @@ rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); rule->cntr = uma_zalloc_pcpu(V_ipfw_cntr_zone, M_WAITOK | M_ZERO); + rule->refcnt = 1; return (rule); } -static void -free_rule(struct ip_fw *rule) +void +ipfw_free_rule(struct ip_fw *rule) { + /* + * We don't release refcnt here, since this function + * can be called without any locks held. The caller + * must release reference under IPFW_UH_WLOCK, and then + * call this function if refcount becomes 1. + */ + if (rule->refcnt > 1) + return; uma_zfree_pcpu(V_ipfw_cntr_zone, rule->cntr); free(rule, M_IPFW); } @@ -827,7 +834,7 @@ /* Unlink rule from everywhere */ unref_rule_objects(chain, rule); - *((struct ip_fw **)rule) = *head; + rule->next = *head; *head = rule; } @@ -842,8 +849,8 @@ struct ip_fw *rule; while ((rule = head) != NULL) { - head = *((struct ip_fw **)head); - free_rule(rule); + head = head->next; + ipfw_free_rule(rule); } } @@ -1026,6 +1033,16 @@ end = ipfw_find_rule(chain, rt->end_rule, UINT32_MAX); } + if (rt->flags & IPFW_RCFLAG_DYNAMIC) { + /* + * Requested deleting only for dynamic states. + */ + *ndel = 0; + ipfw_expire_dyn_states(chain, rt); + IPFW_UH_WUNLOCK(chain); + return (0); + } + /* Allocate new map of the same size */ map = get_map(chain, 0, 1 /* locked */); if (map == NULL) { @@ -2187,6 +2204,7 @@ uint32_t rsize; /* rules size */ uint32_t tcount; /* number of tables */ int rcounters; /* counters */ + uint32_t *bmask; /* index bitmask of used named objects */ }; void @@ -2223,6 +2241,49 @@ return (0); } +static int +export_named_objects(struct namedobj_instance *ni, struct dump_args *da, + struct sockopt_data *sd) +{ + int error, i; + + for (i = 0; i < IPFW_TABLES_MAX && da->tcount > 0; i++) { + if ((da->bmask[i / 32] & (1 << (i % 32))) == 0) + continue; + if ((error = export_objhash_ntlv(ni, i, sd)) != 0) + return (error); + da->tcount--; + } + return (0); +} + +static int +dump_named_objects(struct ip_fw_chain *ch, struct dump_args *da, + struct sockopt_data *sd) +{ + ipfw_obj_ctlv *ctlv; + int error; + + MPASS(da->tcount > 0); + /* Header first */ + ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); + if (ctlv == NULL) + return (ENOMEM); + ctlv->head.type = IPFW_TLV_TBLNAME_LIST; + ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + + sizeof(*ctlv); + ctlv->count = da->tcount; + ctlv->objsize = sizeof(ipfw_obj_ntlv); + + /* Dump table names first (if any) */ + error = export_named_objects(ipfw_get_table_objhash(ch), da, sd); + if (error != 0) + return (error); + /* Then dump another named objects */ + da->bmask += IPFW_TABLES_MAX / 32; + return (export_named_objects(CHAIN_TO_SRV(ch), da, sd)); +} + /* * Dumps static rules with table TLVs in buffer @sd. * @@ -2230,52 +2291,13 @@ */ static int dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da, - uint32_t *bmask, struct sockopt_data *sd) + struct sockopt_data *sd) { - int error; - int i, l; - uint32_t tcount; ipfw_obj_ctlv *ctlv; struct ip_fw *krule; - struct namedobj_instance *ni; caddr_t dst; + int i, l; - /* Dump table names first (if any) */ - if (da->tcount > 0) { - /* Header first */ - ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); - if (ctlv == NULL) - return (ENOMEM); - ctlv->head.type = IPFW_TLV_TBLNAME_LIST; - ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + - sizeof(*ctlv); - ctlv->count = da->tcount; - ctlv->objsize = sizeof(ipfw_obj_ntlv); - } - - i = 0; - tcount = da->tcount; - ni = ipfw_get_table_objhash(chain); - while (tcount > 0) { - if ((bmask[i / 32] & (1 << (i % 32))) == 0) { - i++; - continue; - } - - /* Jump to shared named object bitmask */ - if (i >= IPFW_TABLES_MAX) { - ni = CHAIN_TO_SRV(chain); - i -= IPFW_TABLES_MAX; - bmask += IPFW_TABLES_MAX / 32; - } - - if ((error = export_objhash_ntlv(ni, i, sd)) != 0) - return (error); - - i++; - tcount--; - } - /* Dump rules */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) @@ -2300,27 +2322,41 @@ return (0); } +int +ipfw_mark_object_kidx(uint32_t *bmask, uint16_t etlv, uint16_t kidx) +{ + uint32_t bidx; + + /* + * Maintain separate bitmasks for table and non-table objects. + */ + bidx = (etlv == IPFW_TLV_TBL_NAME) ? 0: IPFW_TABLES_MAX / 32; + bidx += kidx / 32; + if ((bmask[bidx] & (1 << (kidx % 32))) != 0) + return (0); + + bmask[bidx] |= 1 << (kidx % 32); + return (1); +} + /* * Marks every object index used in @rule with bit in @bmask. * Used to generate bitmask of referenced tables/objects for given ruleset * or its part. - * - * Returns number of newly-referenced objects. */ -static int -mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, - uint32_t *bmask) +static void +mark_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, + struct dump_args *da) { struct opcode_obj_rewrite *rw; ipfw_insn *cmd; - int bidx, cmdlen, l, count; + int cmdlen, l; uint16_t kidx; uint8_t subtype; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; - count = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); @@ -2328,21 +2364,9 @@ if (rw == NULL) continue; - bidx = kidx / 32; - /* - * Maintain separate bitmasks for table and - * non-table objects. - */ - if (rw->etlv != IPFW_TLV_TBL_NAME) - bidx += IPFW_TABLES_MAX / 32; - - if ((bmask[bidx] & (1 << (kidx % 32))) == 0) - count++; - - bmask[bidx] |= 1 << (kidx % 32); + if (ipfw_mark_object_kidx(da->bmask, rw->etlv, kidx)) + da->tcount++; } - - return (count); } /* @@ -2366,13 +2390,12 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { + struct dump_args da; ipfw_cfg_lheader *hdr; struct ip_fw *rule; size_t sz, rnum; - uint32_t hdr_flags; + uint32_t hdr_flags, *bmask; int error, i; - struct dump_args da; - uint32_t *bmask; hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); if (hdr == NULL) @@ -2380,9 +2403,15 @@ error = 0; bmask = NULL; - /* Allocate needed state. Note we allocate 2xspace mask, for table&srv */ - if (hdr->flags & IPFW_CFG_GET_STATIC) - bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO); + memset(&da, 0, sizeof(da)); + /* + * Allocate needed state. + * Note we allocate 2xspace mask, for table & srv + */ + if (hdr->flags & (IPFW_CFG_GET_STATIC | IPFW_CFG_GET_STATES)) + da.bmask = bmask = malloc( + sizeof(uint32_t) * IPFW_TABLES_MAX * 2 / 32, M_TEMP, + M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); @@ -2391,9 +2420,6 @@ * Prepare used tables bitmask. */ sz = sizeof(ipfw_cfg_lheader); - memset(&da, 0, sizeof(da)); - - da.b = 0; da.e = chain->n_rules; if (hdr->end_rule != 0) { @@ -2412,24 +2438,25 @@ da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv); da.rcount++; /* Update bitmask of used objects for given range */ - da.tcount += mark_object_kidx(chain, rule, bmask); + mark_rule_objects(chain, rule, &da); } /* Add counters if requested */ if (hdr->flags & IPFW_CFG_GET_COUNTERS) { da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount; da.rcounters = 1; } - - if (da.tcount > 0) - sz += da.tcount * sizeof(ipfw_obj_ntlv) + - sizeof(ipfw_obj_ctlv); sz += da.rsize + sizeof(ipfw_obj_ctlv); } - if (hdr->flags & IPFW_CFG_GET_STATES) - sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) + - sizeof(ipfw_obj_ctlv); + if (hdr->flags & IPFW_CFG_GET_STATES) { + sz += sizeof(ipfw_obj_ctlv) + + ipfw_dyn_get_count(bmask, &i) * sizeof(ipfw_obj_dyntlv); + da.tcount += i; + } + if (da.tcount > 0) + sz += da.tcount * sizeof(ipfw_obj_ntlv) + + sizeof(ipfw_obj_ctlv); /* * Fill header anyway. @@ -2447,8 +2474,14 @@ } /* STAGE2: Store actual data */ + if (da.tcount > 0) { + error = dump_named_objects(chain, &da, sd); + if (error != 0) + goto cleanup; + } + if (hdr_flags & IPFW_CFG_GET_STATIC) { - error = dump_static_rules(chain, &da, bmask, sd); + error = dump_static_rules(chain, &da, sd); if (error != 0) goto cleanup; } @@ -3027,7 +3060,7 @@ if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) { /* Free allocate krules */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) - free_rule(ci->krule); + ipfw_free_rule(ci->krule); } if (cbuf != NULL && cbuf != &rci) @@ -3851,7 +3884,7 @@ import_rule0(&ci); error = commit_rules(chain, &ci, 1); if (error != 0) - free_rule(ci.krule); + ipfw_free_rule(ci.krule); else if (sopt->sopt_dir == SOPT_GET) { if (is7) { error = convert_rule_to_7(rule); Index: sys/netpfil/ipfw/nat64/nat64lsn_control.c =================================================================== --- sys/netpfil/ipfw/nat64/nat64lsn_control.c +++ sys/netpfil/ipfw/nat64/nat64lsn_control.c @@ -256,6 +256,7 @@ return (EBUSY); } + ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx); SRV_OBJECT(ch, cfg->no.kidx) = NULL; nat64lsn_detach_config(ch, cfg); IPFW_UH_WUNLOCK(ch); Index: sys/netpfil/ipfw/nat64/nat64stl_control.c =================================================================== --- sys/netpfil/ipfw/nat64/nat64stl_control.c +++ sys/netpfil/ipfw/nat64/nat64stl_control.c @@ -342,6 +342,7 @@ return (EBUSY); } + ipfw_reset_eaction_instance(ch, V_nat64stl_eid, cfg->no.kidx); SRV_OBJECT(ch, cfg->no.kidx) = NULL; nat64stl_detach_config(ch, cfg); IPFW_UH_WUNLOCK(ch); Index: sys/netpfil/ipfw/nptv6/nptv6.c =================================================================== --- sys/netpfil/ipfw/nptv6/nptv6.c +++ sys/netpfil/ipfw/nptv6/nptv6.c @@ -746,6 +746,7 @@ return (EBUSY); } + ipfw_reset_eaction_instance(ch, V_nptv6_eid, cfg->no.kidx); SRV_OBJECT(ch, cfg->no.kidx) = NULL; ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);