Index: stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c =================================================================== --- stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c (revision 324045) +++ stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c (revision 324046) @@ -1,1822 +1,1818 @@ /*- * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define DEB(x) #define DDB(x) x /* * Dynamic rule support for ipfw */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include /* for ETHERTYPE_IP */ #include #include #include #include #include #include #include /* ip_defttl */ #include #include #include #include /* IN6_ARE_ADDR_EQUAL */ #ifdef INET6 #include #include #endif #include #include /* XXX for in_cksum */ #ifdef MAC #include #endif /* * Description of dynamic rules. * * Dynamic rules are stored in lists accessed through a hash table * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can * be modified through the sysctl variable dyn_buckets which is * updated when the table becomes empty. * * XXX currently there is only one list, ipfw_dyn. * * When a packet is received, its address fields are first masked * with the mask defined for the rule, then hashed, then matched * against the entries in the corresponding list. * Dynamic rules can be used for different purposes: * + stateful rules; * + enforcing limits on the number of sessions; * + in-kernel NAT (not implemented yet) * * The lifetime of dynamic rules is regulated by dyn_*_lifetime, * measured in seconds and depending on the flags. * * The total number of dynamic rules is equal to UMA zone items count. * The max number of dynamic rules is dyn_max. When we reach * the maximum number of rules we do not create anymore. This is * done to avoid consuming too much memory, but also too much * time when searching on each packet (ideally, we should try instead * to put a limit on the length of the list on each bucket...). * * Each dynamic rule holds a pointer to the parent ipfw rule so * we know what action to perform. Dynamic rules are removed when * the parent rule is deleted. This can be changed by dyn_keep_states * sysctl. * * There are some limitations with dynamic rules -- we do not * obey the 'randomized match', and we do not do multiple * passes through the firewall. XXX check the latter!!! */ struct ipfw_dyn_bucket { struct mtx mtx; /* Bucket protecting lock */ ipfw_dyn_rule *head; /* Pointer to first rule */ }; /* * Static variables followed by global ones */ static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v); static VNET_DEFINE(u_int32_t, dyn_buckets_max); static VNET_DEFINE(u_int32_t, curr_dyn_buckets); static VNET_DEFINE(struct callout, ipfw_timeout); #define V_ipfw_dyn_v VNET(ipfw_dyn_v) #define V_dyn_buckets_max VNET(dyn_buckets_max) #define V_curr_dyn_buckets VNET(curr_dyn_buckets) #define V_ipfw_timeout VNET(ipfw_timeout) static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone); #define V_ipfw_dyn_rule_zone VNET(ipfw_dyn_rule_zone) #define IPFW_BUCK_LOCK_INIT(b) \ mtx_init(&(b)->mtx, "IPFW dynamic bucket", NULL, MTX_DEF) #define IPFW_BUCK_LOCK_DESTROY(b) \ mtx_destroy(&(b)->mtx) #define IPFW_BUCK_LOCK(i) mtx_lock(&V_ipfw_dyn_v[(i)].mtx) #define IPFW_BUCK_UNLOCK(i) mtx_unlock(&V_ipfw_dyn_v[(i)].mtx) #define IPFW_BUCK_ASSERT(i) mtx_assert(&V_ipfw_dyn_v[(i)].mtx, MA_OWNED) static VNET_DEFINE(int, dyn_keep_states); #define V_dyn_keep_states VNET(dyn_keep_states) /* * Timeouts for various events in handing dynamic rules. */ static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); static VNET_DEFINE(u_int32_t, dyn_short_lifetime); #define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) #define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) #define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) #define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) #define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) #define V_dyn_short_lifetime VNET(dyn_short_lifetime) /* * Keepalives are sent if dyn_keepalive is set. They are sent every * dyn_keepalive_period seconds, in the last dyn_keepalive_interval * seconds of lifetime of a rule. * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower * than dyn_keepalive_period. */ static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); static VNET_DEFINE(u_int32_t, dyn_keepalive_period); static VNET_DEFINE(u_int32_t, dyn_keepalive); static VNET_DEFINE(time_t, dyn_keepalive_last); #define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) #define V_dyn_keepalive_period VNET(dyn_keepalive_period) #define V_dyn_keepalive VNET(dyn_keepalive) #define V_dyn_keepalive_last VNET(dyn_keepalive_last) static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ #define DYN_COUNT uma_zone_get_cur(V_ipfw_dyn_rule_zone) #define V_dyn_max VNET(dyn_max) /* for userspace, we emulate the uma_zone_counter with ipfw_dyn_count */ static int ipfw_dyn_count; /* number of objects */ #ifdef USERSPACE /* emulation of UMA object counters for userspace */ #define uma_zone_get_cur(x) ipfw_dyn_count #endif /* USERSPACE */ static int last_log; /* Log ratelimiting */ static void ipfw_dyn_tick(void *vnetx); static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int); #ifdef SYSCTL_NODE static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS); static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS); SYSBEGIN(f2) SYSCTL_DECL(_net_inet_ip_fw); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_buckets_max), 0, "Max number of dyn. buckets"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, "Current Number of dyn. buckets"); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RD, 0, 0, sysctl_ipfw_dyn_count, "IU", "Number of dyn. rules"); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_dyn_max, "IU", "Max number of dyn. rules"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, "Lifetime of dyn. rules for acks"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, "Lifetime of dyn. rules for syn"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, "Lifetime of dyn. rules for fin"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, "Lifetime of dyn. rules for rst"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, "Lifetime of dyn. rules for UDP"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, "Lifetime of dyn. rules for other situations"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, "Enable keepalives for dyn. rules"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keep_states, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0, "Do not flush dynamic states on rule deletion"); SYSEND #endif /* SYSCTL_NODE */ #ifdef INET6 static __inline int hash_packet6(struct ipfw_flow_id *id) { u_int32_t i; i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ (id->src_ip6.__u6_addr.__u6_addr32[3]); return ntohl(i); } #endif /* * IMPORTANT: the hash function for dynamic rules must be commutative * in source and destination (ip,port), because rules are bidirectional * and we want to find both in the same bucket. */ static __inline int hash_packet(struct ipfw_flow_id *id, int buckets) { u_int32_t i; #ifdef INET6 if (IS_IP6_FLOW_ID(id)) i = hash_packet6(id); else #endif /* INET6 */ i = (id->dst_ip) ^ (id->src_ip); i ^= (id->dst_port) ^ (id->src_port); return (i & (buckets - 1)); } #if 0 #define DYN_DEBUG(fmt, ...) do { \ printf("%s: " fmt "\n", __func__, __VA_ARGS__); \ } while (0) #else #define DYN_DEBUG(fmt, ...) #endif static char *default_state_name = "default"; struct dyn_state_obj { struct named_object no; char name[64]; }; #define DYN_STATE_OBJ(ch, cmd) \ ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1)) /* * Classifier callback. * Return 0 if opcode contains object that should be referenced * or rewritten. */ static int dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); /* Don't rewrite "check-state any" */ if (cmd->arg1 == 0 && cmd->opcode == O_CHECK_STATE) return (1); *puidx = cmd->arg1; *ptype = 0; return (0); } static void dyn_update(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); } static int dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { ipfw_obj_ntlv *ntlv; const char *name; DYN_DEBUG("uidx %d", ti->uidx); if (ti->uidx != 0) { if (ti->tlvs == NULL) return (EINVAL); /* Search ntlv in the buffer provided by user */ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_STATE_NAME); if (ntlv == NULL) return (EINVAL); name = ntlv->name; } else name = default_state_name; /* * Search named object with corresponding name. * Since states objects are global - ignore the set value * and use zero instead. */ *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, IPFW_TLV_STATE_NAME, name); /* * We always return success here. * The caller will check *pno and mark object as unresolved, * then it will automatically create "default" object. */ return (0); } static struct named_object * dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { DYN_DEBUG("kidx %d", idx); return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); } static int dyn_create(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx) { struct namedobj_instance *ni; struct dyn_state_obj *obj; struct named_object *no; ipfw_obj_ntlv *ntlv; char *name; DYN_DEBUG("uidx %d", ti->uidx); if (ti->uidx != 0) { if (ti->tlvs == NULL) return (EINVAL); ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_STATE_NAME); if (ntlv == NULL) return (EINVAL); name = ntlv->name; } else name = default_state_name; ni = CHAIN_TO_SRV(ch); obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); obj->no.name = obj->name; obj->no.etlv = IPFW_TLV_STATE_NAME; strlcpy(obj->name, name, sizeof(obj->name)); IPFW_UH_WLOCK(ch); no = ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_STATE_NAME, name); if (no != NULL) { /* * Object is already created. * Just return its kidx and bump refcount. */ *pkidx = no->kidx; no->refcnt++; IPFW_UH_WUNLOCK(ch); free(obj, M_IPFW); DYN_DEBUG("\tfound kidx %d", *pkidx); return (0); } if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { DYN_DEBUG("\talloc_idx failed for %s", name); IPFW_UH_WUNLOCK(ch); free(obj, M_IPFW); return (ENOSPC); } ipfw_objhash_add(ni, &obj->no); - IPFW_WLOCK(ch); SRV_OBJECT(ch, obj->no.kidx) = obj; - IPFW_WUNLOCK(ch); obj->no.refcnt++; *pkidx = obj->no.kidx; IPFW_UH_WUNLOCK(ch); DYN_DEBUG("\tcreated kidx %d", *pkidx); return (0); } static void dyn_destroy(struct ip_fw_chain *ch, struct named_object *no) { struct dyn_state_obj *obj; IPFW_UH_WLOCK_ASSERT(ch); KASSERT(no->refcnt == 1, ("Destroying object '%s' (type %u, idx %u) with refcnt %u", no->name, no->etlv, no->kidx, no->refcnt)); DYN_DEBUG("kidx %d", no->kidx); - IPFW_WLOCK(ch); obj = SRV_OBJECT(ch, no->kidx); SRV_OBJECT(ch, no->kidx) = NULL; - IPFW_WUNLOCK(ch); ipfw_objhash_del(CHAIN_TO_SRV(ch), no); ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx); free(obj, M_IPFW); } static struct opcode_obj_rewrite dyn_opcodes[] = { { O_KEEP_STATE, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, { O_CHECK_STATE, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, { O_PROBE_STATE, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, { O_LIMIT, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, }; /** * Print customizable flow id description via log(9) facility. */ static void print_dyn_rule_flags(struct ipfw_flow_id *id, int dyn_type, int log_flags, char *prefix, char *postfix) { struct in_addr da; #ifdef INET6 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; #else char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; #endif #ifdef INET6 if (IS_IP6_FLOW_ID(id)) { ip6_sprintf(src, &id->src_ip6); ip6_sprintf(dst, &id->dst_ip6); } else #endif { da.s_addr = htonl(id->src_ip); inet_ntop(AF_INET, &da, src, sizeof(src)); da.s_addr = htonl(id->dst_ip); inet_ntop(AF_INET, &da, dst, sizeof(dst)); } log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n", prefix, dyn_type, src, id->src_port, dst, id->dst_port, DYN_COUNT, postfix); } #define print_dyn_rule(id, dtype, prefix, postfix) \ print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix) #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) #define TIME_LE(a,b) ((int)((a)-(b)) < 0) static void dyn_update_proto_state(ipfw_dyn_rule *q, const struct ipfw_flow_id *id, const struct tcphdr *tcp, int dir) { uint32_t ack; u_char flags; if (id->proto == IPPROTO_TCP) { flags = id->_flags & (TH_FIN | TH_SYN | TH_RST); #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) #define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8)) #define ACK_FWD 0x10000 /* fwd ack seen */ #define ACK_REV 0x20000 /* rev ack seen */ q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); switch (q->state & TCP_FLAGS) { case TH_SYN: /* opening */ q->expire = time_uptime + V_dyn_syn_lifetime; break; case BOTH_SYN: /* move to established */ case BOTH_SYN | TH_FIN: /* one side tries to close */ case BOTH_SYN | (TH_FIN << 8): #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) if (tcp == NULL) break; ack = ntohl(tcp->th_ack); if (dir == MATCH_FORWARD) { if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) { q->ack_fwd = ack; q->state |= ACK_FWD; } } else { if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) { q->ack_rev = ack; q->state |= ACK_REV; } } if ((q->state & (ACK_FWD | ACK_REV)) == (ACK_FWD | ACK_REV)) { q->expire = time_uptime + V_dyn_ack_lifetime; q->state &= ~(ACK_FWD | ACK_REV); } break; case BOTH_SYN | BOTH_FIN: /* both sides closed */ if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; q->expire = time_uptime + V_dyn_fin_lifetime; break; default: #if 0 /* * reset or some invalid combination, but can also * occur if we use keep-state the wrong way. */ if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) printf("invalid state: 0x%x\n", q->state); #endif if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; q->expire = time_uptime + V_dyn_rst_lifetime; break; } } else if (id->proto == IPPROTO_UDP) { q->expire = time_uptime + V_dyn_udp_lifetime; } else { /* other protocols */ q->expire = time_uptime + V_dyn_short_lifetime; } } /* * Lookup a dynamic rule, locked version. */ static ipfw_dyn_rule * lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int i, int *match_direction, struct tcphdr *tcp, uint16_t kidx) { /* * Stateful ipfw extensions. * Lookup into dynamic session queue. */ ipfw_dyn_rule *prev, *q = NULL; int dir; IPFW_BUCK_ASSERT(i); dir = MATCH_NONE; for (prev = NULL, q = V_ipfw_dyn_v[i].head; q; prev = q, q = q->next) { if (q->dyn_type == O_LIMIT_PARENT) continue; if (pkt->proto != q->id.proto) continue; if (kidx != 0 && kidx != q->kidx) continue; if (IS_IP6_FLOW_ID(pkt)) { if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) && IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port) { dir = MATCH_FORWARD; break; } if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) && IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) && pkt->src_port == q->id.dst_port && pkt->dst_port == q->id.src_port) { dir = MATCH_REVERSE; break; } } else { if (pkt->src_ip == q->id.src_ip && pkt->dst_ip == q->id.dst_ip && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port) { dir = MATCH_FORWARD; break; } if (pkt->src_ip == q->id.dst_ip && pkt->dst_ip == q->id.src_ip && pkt->src_port == q->id.dst_port && pkt->dst_port == q->id.src_port) { dir = MATCH_REVERSE; break; } } } if (q == NULL) goto done; /* q = NULL, not found */ if (prev != NULL) { /* found and not in front */ prev->next = q->next; q->next = V_ipfw_dyn_v[i].head; V_ipfw_dyn_v[i].head = q; } /* update state according to flags */ dyn_update_proto_state(q, pkt, tcp, dir); done: if (match_direction != NULL) *match_direction = dir; return (q); } ipfw_dyn_rule * ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp, uint16_t kidx) { ipfw_dyn_rule *q; int i; i = hash_packet(pkt, V_curr_dyn_buckets); IPFW_BUCK_LOCK(i); q = lookup_dyn_rule_locked(pkt, i, match_direction, tcp, kidx); if (q == NULL) IPFW_BUCK_UNLOCK(i); /* NB: return table locked when q is not NULL */ return q; } /* * Unlock bucket mtx * @p - pointer to dynamic rule */ void ipfw_dyn_unlock(ipfw_dyn_rule *q) { IPFW_BUCK_UNLOCK(q->bucket); } static int resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets) { int i, k, nbuckets_old; ipfw_dyn_rule *q; struct ipfw_dyn_bucket *dyn_v, *dyn_v_old; /* Check if given number is power of 2 and less than 64k */ if ((nbuckets > 65536) || (!powerof2(nbuckets))) return 1; CTR3(KTR_NET, "%s: resize dynamic hash: %d -> %d", __func__, V_curr_dyn_buckets, nbuckets); /* Allocate and initialize new hash */ dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW, M_WAITOK | M_ZERO); for (i = 0 ; i < nbuckets; i++) IPFW_BUCK_LOCK_INIT(&dyn_v[i]); /* * Call upper half lock, as get_map() do to ease * read-only access to dynamic rules hash from sysctl */ IPFW_UH_WLOCK(chain); /* * Acquire chain write lock to permit hash access * for main traffic path without additional locks */ IPFW_WLOCK(chain); /* Save old values */ nbuckets_old = V_curr_dyn_buckets; dyn_v_old = V_ipfw_dyn_v; /* Skip relinking if array is not set up */ if (V_ipfw_dyn_v == NULL) V_curr_dyn_buckets = 0; /* Re-link all dynamic states */ for (i = 0 ; i < V_curr_dyn_buckets ; i++) { while (V_ipfw_dyn_v[i].head != NULL) { /* Remove from current chain */ q = V_ipfw_dyn_v[i].head; V_ipfw_dyn_v[i].head = q->next; /* Get new hash value */ k = hash_packet(&q->id, nbuckets); q->bucket = k; /* Add to the new head */ q->next = dyn_v[k].head; dyn_v[k].head = q; } } /* Update current pointers/buckets values */ V_curr_dyn_buckets = nbuckets; V_ipfw_dyn_v = dyn_v; IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); /* Start periodic callout on initial creation */ if (dyn_v_old == NULL) { callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, curvnet, 0); return (0); } /* Destroy all mutexes */ for (i = 0 ; i < nbuckets_old ; i++) IPFW_BUCK_LOCK_DESTROY(&dyn_v_old[i]); /* Free old hash */ free(dyn_v_old, M_IPFW); return 0; } /** * Install state of type 'type' for a dynamic session. * The hash table contains two type of rules: * - regular rules (O_KEEP_STATE) * - rules for sessions with limited number of sess per user * (O_LIMIT). When they are created, the parent is * increased by 1, and decreased on delete. In this case, * the third parameter is the parent rule and not the chain. * - "parent" rules for the above (O_LIMIT_PARENT). */ static ipfw_dyn_rule * add_dyn_rule(struct ipfw_flow_id *id, int i, uint8_t dyn_type, struct ip_fw *rule, uint16_t kidx) { ipfw_dyn_rule *r; IPFW_BUCK_ASSERT(i); r = uma_zalloc(V_ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); if (r == NULL) { if (last_log != time_uptime) { last_log = time_uptime; log(LOG_DEBUG, "ipfw: Cannot allocate dynamic state, " "consider increasing net.inet.ip.fw.dyn_max\n"); } return NULL; } ipfw_dyn_count++; /* * refcount on parent is already incremented, so * it is safe to use parent unlocked. */ if (dyn_type == O_LIMIT) { ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; if ( parent->dyn_type != O_LIMIT_PARENT) panic("invalid parent"); r->parent = parent; rule = parent->rule; } r->id = *id; r->expire = time_uptime + V_dyn_syn_lifetime; r->rule = rule; r->dyn_type = dyn_type; IPFW_ZERO_DYN_COUNTER(r); r->count = 0; r->kidx = kidx; r->bucket = i; r->next = V_ipfw_dyn_v[i].head; V_ipfw_dyn_v[i].head = r; DEB(print_dyn_rule(id, dyn_type, "add dyn entry", "total");) return r; } /** * lookup dynamic parent rule using pkt and rule as search keys. * If the lookup fails, then install one. */ static ipfw_dyn_rule * lookup_dyn_parent(struct ipfw_flow_id *pkt, int *pindex, struct ip_fw *rule, uint16_t kidx) { ipfw_dyn_rule *q; int i, is_v6; is_v6 = IS_IP6_FLOW_ID(pkt); i = hash_packet( pkt, V_curr_dyn_buckets ); *pindex = i; IPFW_BUCK_LOCK(i); for (q = V_ipfw_dyn_v[i].head ; q != NULL ; q=q->next) if (q->dyn_type == O_LIMIT_PARENT && kidx == q->kidx && rule == q->rule && pkt->proto == q->id.proto && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port && ( (is_v6 && IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), &(q->id.src_ip6)) && IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), &(q->id.dst_ip6))) || (!is_v6 && pkt->src_ip == q->id.src_ip && pkt->dst_ip == q->id.dst_ip) ) ) { q->expire = time_uptime + V_dyn_short_lifetime; DEB(print_dyn_rule(pkt, q->dyn_type, "lookup_dyn_parent found", "");) return q; } /* Add virtual limiting rule */ return add_dyn_rule(pkt, i, O_LIMIT_PARENT, rule, kidx); } /** * Install dynamic state for rule type cmd->o.opcode * * Returns 1 (failure) if state is not installed because of errors or because * session limitations are enforced. */ int ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg) { ipfw_dyn_rule *q; int i; DEB(print_dyn_rule(&args->f_id, cmd->o.opcode, "install_state", (cmd->o.arg1 == 0 ? "": DYN_STATE_OBJ(chain, &cmd->o)->name));) i = hash_packet(&args->f_id, V_curr_dyn_buckets); IPFW_BUCK_LOCK(i); q = lookup_dyn_rule_locked(&args->f_id, i, NULL, NULL, cmd->o.arg1); if (q != NULL) { /* should never occur */ DEB( if (last_log != time_uptime) { last_log = time_uptime; printf("ipfw: %s: entry already present, done\n", __func__); }) IPFW_BUCK_UNLOCK(i); return (0); } /* * State limiting is done via uma(9) zone limiting. * Save pointer to newly-installed rule and reject * packet if add_dyn_rule() returned NULL. * Note q is currently set to NULL. */ switch (cmd->o.opcode) { case O_KEEP_STATE: /* bidir rule */ q = add_dyn_rule(&args->f_id, i, O_KEEP_STATE, rule, cmd->o.arg1); break; case O_LIMIT: { /* limit number of sessions */ struct ipfw_flow_id id; ipfw_dyn_rule *parent; uint32_t conn_limit; uint16_t limit_mask = cmd->limit_mask; int pindex; conn_limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit); DEB( if (cmd->conn_limit == IP_FW_TARG) printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " "(tablearg)\n", __func__, conn_limit); else printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", __func__, conn_limit); ) id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; id.proto = args->f_id.proto; id.addr_type = args->f_id.addr_type; id.fib = M_GETFIB(args->m); if (IS_IP6_FLOW_ID (&(args->f_id))) { bzero(&id.src_ip6, sizeof(id.src_ip6)); bzero(&id.dst_ip6, sizeof(id.dst_ip6)); if (limit_mask & DYN_SRC_ADDR) id.src_ip6 = args->f_id.src_ip6; if (limit_mask & DYN_DST_ADDR) id.dst_ip6 = args->f_id.dst_ip6; } else { if (limit_mask & DYN_SRC_ADDR) id.src_ip = args->f_id.src_ip; if (limit_mask & DYN_DST_ADDR) id.dst_ip = args->f_id.dst_ip; } if (limit_mask & DYN_SRC_PORT) id.src_port = args->f_id.src_port; if (limit_mask & DYN_DST_PORT) id.dst_port = args->f_id.dst_port; /* * We have to release lock for previous bucket to * avoid possible deadlock */ IPFW_BUCK_UNLOCK(i); parent = lookup_dyn_parent(&id, &pindex, rule, cmd->o.arg1); if (parent == NULL) { printf("ipfw: %s: add parent failed\n", __func__); IPFW_BUCK_UNLOCK(pindex); return (1); } if (parent->count >= conn_limit) { if (V_fw_verbose && last_log != time_uptime) { last_log = time_uptime; char sbuf[24]; last_log = time_uptime; snprintf(sbuf, sizeof(sbuf), "%d drop session", parent->rule->rulenum); print_dyn_rule_flags(&args->f_id, cmd->o.opcode, LOG_SECURITY | LOG_DEBUG, sbuf, "too many entries"); } IPFW_BUCK_UNLOCK(pindex); return (1); } /* Increment counter on parent */ parent->count++; IPFW_BUCK_UNLOCK(pindex); IPFW_BUCK_LOCK(i); q = add_dyn_rule(&args->f_id, i, O_LIMIT, (struct ip_fw *)parent, cmd->o.arg1); if (q == NULL) { /* Decrement index and notify caller */ IPFW_BUCK_UNLOCK(i); IPFW_BUCK_LOCK(pindex); parent->count--; IPFW_BUCK_UNLOCK(pindex); return (1); } break; } default: printf("ipfw: %s: unknown dynamic rule type %u\n", __func__, cmd->o.opcode); } if (q == NULL) { IPFW_BUCK_UNLOCK(i); return (1); /* Notify caller about failure */ } dyn_update_proto_state(q, &args->f_id, NULL, MATCH_FORWARD); IPFW_BUCK_UNLOCK(i); return (0); } /* * Generate a TCP packet, containing either a RST or a keepalive. * When flags & TH_RST, we are sending a RST packet, because of a * "reset" action matched the packet. * Otherwise we are sending a keepalive, and flags & TH_ * The 'replyto' mbuf is the mbuf being replied to, if any, and is required * so that MAC can label the reply appropriately. */ struct mbuf * ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) { struct mbuf *m = NULL; /* stupid compiler */ int len, dir; struct ip *h = NULL; /* stupid compiler */ #ifdef INET6 struct ip6_hdr *h6 = NULL; #endif struct tcphdr *th = NULL; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); M_SETFIB(m, id->fib); #ifdef MAC if (replyto != NULL) mac_netinet_firewall_reply(replyto, m); else mac_netinet_firewall_send(m); #else (void)replyto; /* don't warn about unused arg */ #endif switch (id->addr_type) { case 4: len = sizeof(struct ip) + sizeof(struct tcphdr); break; #ifdef INET6 case 6: len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); break; #endif default: /* XXX: log me?!? */ FREE_PKT(m); return (NULL); } dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); m->m_data += max_linkhdr; m->m_flags |= M_SKIP_FIREWALL; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; bzero(m->m_data, len); switch (id->addr_type) { case 4: h = mtod(m, struct ip *); /* prepare for checksum */ h->ip_p = IPPROTO_TCP; h->ip_len = htons(sizeof(struct tcphdr)); if (dir) { h->ip_src.s_addr = htonl(id->src_ip); h->ip_dst.s_addr = htonl(id->dst_ip); } else { h->ip_src.s_addr = htonl(id->dst_ip); h->ip_dst.s_addr = htonl(id->src_ip); } th = (struct tcphdr *)(h + 1); break; #ifdef INET6 case 6: h6 = mtod(m, struct ip6_hdr *); /* prepare for checksum */ h6->ip6_nxt = IPPROTO_TCP; h6->ip6_plen = htons(sizeof(struct tcphdr)); if (dir) { h6->ip6_src = id->src_ip6; h6->ip6_dst = id->dst_ip6; } else { h6->ip6_src = id->dst_ip6; h6->ip6_dst = id->src_ip6; } th = (struct tcphdr *)(h6 + 1); break; #endif } if (dir) { th->th_sport = htons(id->src_port); th->th_dport = htons(id->dst_port); } else { th->th_sport = htons(id->dst_port); th->th_dport = htons(id->src_port); } th->th_off = sizeof(struct tcphdr) >> 2; if (flags & TH_RST) { if (flags & TH_ACK) { th->th_seq = htonl(ack); th->th_flags = TH_RST; } else { if (flags & TH_SYN) seq++; th->th_ack = htonl(seq); th->th_flags = TH_RST | TH_ACK; } } else { /* * Keepalive - use caller provided sequence numbers */ th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_flags = TH_ACK; } switch (id->addr_type) { case 4: th->th_sum = in_cksum(m, len); /* finish the ip header */ h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; h->ip_off = htons(0); h->ip_len = htons(len); h->ip_ttl = V_ip_defttl; h->ip_sum = 0; break; #ifdef INET6 case 6: th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), sizeof(struct tcphdr)); /* finish the ip6 header */ h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; break; #endif } return (m); } /* * Queue keepalive packets for given dynamic rule */ static struct mbuf ** ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q) { struct mbuf *m_rev, *m_fwd; m_rev = (q->state & ACK_REV) ? NULL : ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); m_fwd = (q->state & ACK_FWD) ? NULL : ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0); if (m_rev != NULL) { *mtailp = m_rev; mtailp = &(*mtailp)->m_nextpkt; } if (m_fwd != NULL) { *mtailp = m_fwd; mtailp = &(*mtailp)->m_nextpkt; } return (mtailp); } /* * This procedure is used to perform various maintenance * on dynamic hash list. Currently it is called every second. */ static void ipfw_dyn_tick(void * vnetx) { struct ip_fw_chain *chain; int check_ka = 0; #ifdef VIMAGE struct vnet *vp = vnetx; #endif CURVNET_SET(vp); chain = &V_layer3_chain; /* Run keepalive checks every keepalive_period iff ka is enabled */ if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) && (V_dyn_keepalive != 0)) { V_dyn_keepalive_last = time_uptime; check_ka = 1; } check_dyn_rules(chain, NULL, check_ka, 1); callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0); CURVNET_RESTORE(); } /* * Walk through all dynamic states doing generic maintenance: * 1) free expired states * 2) free all states based on deleted rule / set * 3) send keepalives for states if needed * * @chain - pointer to current ipfw rules chain * @rule - delete all states originated by given rule if != NULL * @set - delete all states originated by any rule in set @set if != RESVD_SET * @check_ka - perform checking/sending keepalives * @timer - indicate call from timer routine. * * Timer routine must call this function unlocked to permit * sending keepalives/resizing table. * * Others has to call function with IPFW_UH_WLOCK held. * Additionally, function assume that dynamic rule/set is * ALREADY deleted so no new states can be generated by * 'deleted' rules. * * Write lock is needed to ensure that unused parent rules * are not freed by other instance (see stage 2, 3) */ static void check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int check_ka, int timer) { struct mbuf *m0, *m, *mnext, **mtailp; struct ip *h; int i, dyn_count, new_buckets = 0, max_buckets; int expired = 0, expired_limits = 0, parents = 0, total = 0; ipfw_dyn_rule *q, *q_prev, *q_next; ipfw_dyn_rule *exp_head, **exptailp; ipfw_dyn_rule *exp_lhead, **expltailp; KASSERT(V_ipfw_dyn_v != NULL, ("%s: dynamic table not allocated", __func__)); /* Avoid possible LOR */ KASSERT(!check_ka || timer, ("%s: keepalive check with lock held", __func__)); /* * Do not perform any checks if we currently have no dynamic states */ if (DYN_COUNT == 0) return; /* Expired states */ exp_head = NULL; exptailp = &exp_head; /* Expired limit states */ exp_lhead = NULL; expltailp = &exp_lhead; /* * We make a chain of packets to go out here -- not deferring * until after we drop the IPFW dynamic rule lock would result * in a lock order reversal with the normal packet input -> ipfw * call stack. */ m0 = NULL; mtailp = &m0; /* Protect from hash resizing */ if (timer != 0) IPFW_UH_WLOCK(chain); else IPFW_UH_WLOCK_ASSERT(chain); #define NEXT_RULE() { q_prev = q; q = q->next ; continue; } /* Stage 1: perform requested deletion */ for (i = 0 ; i < V_curr_dyn_buckets ; i++) { IPFW_BUCK_LOCK(i); for (q = V_ipfw_dyn_v[i].head, q_prev = q; q ; ) { /* account every rule */ total++; /* Skip parent rules at all */ if (q->dyn_type == O_LIMIT_PARENT) { parents++; NEXT_RULE(); } /* * Remove rules which are: * 1) expired * 2) matches deletion range */ if ((TIME_LEQ(q->expire, time_uptime)) || (rt != NULL && ipfw_match_range(q->rule, rt))) { if (TIME_LE(time_uptime, q->expire) && q->dyn_type == O_KEEP_STATE && V_dyn_keep_states != 0) { /* * Do not delete state if * it is not expired and * dyn_keep_states is ON. * However we need to re-link it * to any other stable rule */ q->rule = chain->default_rule; NEXT_RULE(); } /* Unlink q from current list */ q_next = q->next; if (q == V_ipfw_dyn_v[i].head) V_ipfw_dyn_v[i].head = q_next; else q_prev->next = q_next; q->next = NULL; /* queue q to expire list */ if (q->dyn_type != O_LIMIT) { *exptailp = q; exptailp = &(*exptailp)->next; DEB(print_dyn_rule(&q->id, q->dyn_type, "unlink entry", "left"); ) } else { /* Separate list for limit rules */ *expltailp = q; expltailp = &(*expltailp)->next; expired_limits++; DEB(print_dyn_rule(&q->id, q->dyn_type, "unlink limit entry", "left"); ) } q = q_next; expired++; continue; } /* * Check if we need to send keepalive: * we need to ensure if is time to do KA, * this is established TCP session, and * expire time is within keepalive interval */ if ((check_ka != 0) && (q->id.proto == IPPROTO_TCP) && ((q->state & BOTH_SYN) == BOTH_SYN) && (TIME_LEQ(q->expire, time_uptime + V_dyn_keepalive_interval))) mtailp = ipfw_dyn_send_ka(mtailp, q); NEXT_RULE(); } IPFW_BUCK_UNLOCK(i); } /* Stage 2: decrement counters from O_LIMIT parents */ if (expired_limits != 0) { /* * XXX: Note that deleting set with more than one * heavily-used LIMIT rules can result in overwhelming * locking due to lack of per-hash value sorting * * We should probably think about: * 1) pre-allocating hash of size, say, * MAX(16, V_curr_dyn_buckets / 1024) * 2) checking if expired_limits is large enough * 3) If yes, init hash (or its part), re-link * current list and start decrementing procedure in * each bucket separately */ /* * Small optimization: do not unlock bucket until * we see the next item resides in different bucket */ if (exp_lhead != NULL) { i = exp_lhead->parent->bucket; IPFW_BUCK_LOCK(i); } for (q = exp_lhead; q != NULL; q = q->next) { if (i != q->parent->bucket) { IPFW_BUCK_UNLOCK(i); i = q->parent->bucket; IPFW_BUCK_LOCK(i); } /* Decrease parent refcount */ q->parent->count--; } if (exp_lhead != NULL) IPFW_BUCK_UNLOCK(i); } /* * We protectet ourselves from unused parent deletion * (from the timer function) by holding UH write lock. */ /* Stage 3: remove unused parent rules */ if ((parents != 0) && (expired != 0)) { for (i = 0 ; i < V_curr_dyn_buckets ; i++) { IPFW_BUCK_LOCK(i); for (q = V_ipfw_dyn_v[i].head, q_prev = q ; q ; ) { if (q->dyn_type != O_LIMIT_PARENT) NEXT_RULE(); if (q->count != 0) NEXT_RULE(); /* Parent rule without consumers */ /* Unlink q from current list */ q_next = q->next; if (q == V_ipfw_dyn_v[i].head) V_ipfw_dyn_v[i].head = q_next; else q_prev->next = q_next; q->next = NULL; /* Add to expired list */ *exptailp = q; exptailp = &(*exptailp)->next; DEB(print_dyn_rule(&q->id, q->dyn_type, "unlink parent entry", "left"); ) expired++; q = q_next; } IPFW_BUCK_UNLOCK(i); } } #undef NEXT_RULE if (timer != 0) { /* * Check if we need to resize hash: * if current number of states exceeds number of buckes in hash, * grow hash size to the minimum power of 2 which is bigger than * current states count. Limit hash size by 64k. */ max_buckets = (V_dyn_buckets_max > 65536) ? 65536 : V_dyn_buckets_max; dyn_count = DYN_COUNT; if ((dyn_count > V_curr_dyn_buckets * 2) && (dyn_count < max_buckets)) { new_buckets = V_curr_dyn_buckets; while (new_buckets < dyn_count) { new_buckets *= 2; if (new_buckets >= max_buckets) break; } } IPFW_UH_WUNLOCK(chain); } /* Finally delete old states ad limits if any */ for (q = exp_head; q != NULL; q = q_next) { q_next = q->next; uma_zfree(V_ipfw_dyn_rule_zone, q); ipfw_dyn_count--; } for (q = exp_lhead; q != NULL; q = q_next) { q_next = q->next; uma_zfree(V_ipfw_dyn_rule_zone, q); ipfw_dyn_count--; } /* * The rest code MUST be called from timer routine only * without holding any locks */ if (timer == 0) return; /* Send keepalive packets if any */ for (m = m0; m != NULL; m = mnext) { mnext = m->m_nextpkt; m->m_nextpkt = NULL; h = mtod(m, struct ip *); if (h->ip_v == 4) ip_output(m, NULL, NULL, 0, NULL, NULL); #ifdef INET6 else ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif } /* Run table resize without holding any locks */ if (new_buckets != 0) resize_dynamic_table(chain, new_buckets); } /* * Deletes all dynamic rules originated by given rule or all rules in * given set. Specify RESVD_SET to indicate set should not be used. * @chain - pointer to current ipfw rules chain * @rr - delete all states originated by rules in matched range. * * Function has to be called with IPFW_UH_WLOCK held. * Additionally, function assume that dynamic rule/set is * ALREADY deleted so no new states can be generated by * 'deleted' rules. */ void ipfw_expire_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { check_dyn_rules(chain, rt, 0, 0); } /* * Check if rule contains at least one dynamic opcode. * * Returns 1 if such opcode is found, 0 otherwise. */ int ipfw_is_dyn_rule(struct ip_fw *rule) { int cmdlen, l; ipfw_insn *cmd; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); switch (cmd->opcode) { case O_LIMIT: case O_KEEP_STATE: case O_PROBE_STATE: case O_CHECK_STATE: return (1); } } return (0); } void ipfw_dyn_init(struct ip_fw_chain *chain) { V_ipfw_dyn_v = NULL; V_dyn_buckets_max = 256; /* must be power of 2 */ V_curr_dyn_buckets = 256; /* must be power of 2 */ V_dyn_ack_lifetime = 300; V_dyn_syn_lifetime = 20; V_dyn_fin_lifetime = 1; V_dyn_rst_lifetime = 1; V_dyn_udp_lifetime = 10; V_dyn_short_lifetime = 5; V_dyn_keepalive_interval = 20; V_dyn_keepalive_period = 5; V_dyn_keepalive = 1; /* do send keepalives */ V_dyn_keepalive_last = time_uptime; V_dyn_max = 16384; /* max # of dynamic rules */ V_ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* Enforce limit on dynamic rules */ uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); callout_init(&V_ipfw_timeout, 1); /* * This can potentially be done on first dynamic rule * being added to chain. */ resize_dynamic_table(chain, V_curr_dyn_buckets); IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); } void ipfw_dyn_uninit(int pass) { int i; if (pass == 0) { callout_drain(&V_ipfw_timeout); return; } IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); if (V_ipfw_dyn_v != NULL) { /* * Skip deleting all dynamic states - * uma_zdestroy() does this more efficiently; */ /* Destroy all mutexes */ for (i = 0 ; i < V_curr_dyn_buckets ; i++) IPFW_BUCK_LOCK_DESTROY(&V_ipfw_dyn_v[i]); free(V_ipfw_dyn_v, M_IPFW); V_ipfw_dyn_v = NULL; } uma_zdestroy(V_ipfw_dyn_rule_zone); } #ifdef SYSCTL_NODE /* * Get/set maximum number of dynamic states in given VNET instance. */ static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS) { int error; unsigned int nstates; nstates = V_dyn_max; error = sysctl_handle_int(oidp, &nstates, 0, req); /* Read operation or some error */ if ((error != 0) || (req->newptr == NULL)) return (error); V_dyn_max = nstates; uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); return (0); } /* * Get current number of dynamic states in given VNET instance. */ static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS) { int error; unsigned int nstates; nstates = DYN_COUNT; error = sysctl_handle_int(oidp, &nstates, 0, req); return (error); } #endif /* * Returns size of dynamic states in legacy format */ int ipfw_dyn_len(void) { return (V_ipfw_dyn_v == NULL) ? 0 : (DYN_COUNT * sizeof(ipfw_dyn_rule)); } /* * Returns number of dynamic states. * Used by dump format v1 (current). */ int ipfw_dyn_get_count(void) { return (V_ipfw_dyn_v == NULL) ? 0 : DYN_COUNT; } static void export_dyn_rule(ipfw_dyn_rule *src, ipfw_dyn_rule *dst) { uint16_t rulenum; rulenum = (uint16_t)src->rule->rulenum; memcpy(dst, src, sizeof(*src)); memcpy(&dst->rule, &rulenum, sizeof(rulenum)); /* * store set number into high word of * dst->rule pointer. */ memcpy((char *)&dst->rule + sizeof(rulenum), &src->rule->set, sizeof(src->rule->set)); /* * store a non-null value in "next". * The userland code will interpret a * NULL here as a marker * for the last dynamic rule. */ memcpy(&dst->next, &dst, sizeof(dst)); dst->expire = TIME_LEQ(dst->expire, time_uptime) ? 0: dst->expire - time_uptime; } /* * Fills int buffer given by @sd with dynamic states. * Used by dump format v1 (current). * * Returns 0 on success. */ int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) { ipfw_dyn_rule *p; ipfw_obj_dyntlv *dst, *last; ipfw_obj_ctlv *ctlv; int i; size_t sz; if (V_ipfw_dyn_v == NULL) return (0); IPFW_UH_RLOCK_ASSERT(chain); ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); sz = sizeof(ipfw_obj_dyntlv); ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; ctlv->objsize = sz; last = NULL; for (i = 0 ; i < V_curr_dyn_buckets; i++) { IPFW_BUCK_LOCK(i); for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, sz); if (dst == NULL) { IPFW_BUCK_UNLOCK(i); return (ENOMEM); } export_dyn_rule(p, &dst->state); dst->head.length = sz; dst->head.type = IPFW_TLV_DYN_ENT; last = dst; } IPFW_BUCK_UNLOCK(i); } if (last != NULL) /* mark last dynamic rule */ last->head.flags = IPFW_DF_LAST; return (0); } /* * Fill given buffer with dynamic states (legacy format). * IPFW_UH_RLOCK has to be held while calling. */ void ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) { ipfw_dyn_rule *p, *last = NULL; char *bp; int i; if (V_ipfw_dyn_v == NULL) return; bp = *pbp; IPFW_UH_RLOCK_ASSERT(chain); for (i = 0 ; i < V_curr_dyn_buckets; i++) { IPFW_BUCK_LOCK(i); for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { if (bp + sizeof *p <= ep) { ipfw_dyn_rule *dst = (ipfw_dyn_rule *)bp; export_dyn_rule(p, dst); last = dst; bp += sizeof(ipfw_dyn_rule); } } IPFW_BUCK_UNLOCK(i); } if (last != NULL) /* mark last dynamic rule */ bzero(&last->next, sizeof(last)); *pbp = bp; } /* end of file */ Index: stable/11/sys/netpfil/ipfw/ip_fw_table.c =================================================================== --- stable/11/sys/netpfil/ipfw/ip_fw_table.c (revision 324045) +++ stable/11/sys/netpfil/ipfw/ip_fw_table.c (revision 324046) @@ -1,3363 +1,3360 @@ /*- * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Lookup table support for ipfw. * * This file contains handlers for all generic tables' operations: * add/del/flush entries, list/dump tables etc.. * * Table data modification is protected by both UH and runtime lock * while reading configuration/data is protected by UH lock. * * Lookup algorithms for all table types are located in ip_fw_table_algo.c */ #include "opt_ipfw.h" #include #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include #include /* struct ipfw_rule_ref */ #include #include #include /* * Table has the following `type` concepts: * * `no.type` represents lookup key type (addr, ifp, uid, etc..) * vmask represents bitmask of table values which are present at the moment. * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old * single-value-for-all approach. */ struct table_config { struct named_object no; uint8_t tflags; /* type flags */ uint8_t locked; /* 1 if locked from changes */ uint8_t linked; /* 1 if already linked */ uint8_t ochanged; /* used by set swapping */ uint8_t vshared; /* 1 if using shared value array */ uint8_t spare[3]; uint32_t count; /* Number of records */ uint32_t limit; /* Max number of records */ uint32_t vmask; /* bitmask with supported values */ uint32_t ocount; /* used by set swapping */ uint64_t gencnt; /* generation count */ char tablename[64]; /* table name */ struct table_algo *ta; /* Callbacks for given algo */ void *astate; /* algorithm state */ struct table_info ti_copy; /* data to put to table_info */ struct namedobj_instance *vi; }; static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc); static struct table_config *find_table(struct namedobj_instance *ni, struct tid_info *ti); static struct table_config *alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags); static void free_table_config(struct namedobj_instance *ni, struct table_config *tc); static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref); static void link_table(struct ip_fw_chain *ch, struct table_config *tc); static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc); #define OP_ADD 1 #define OP_DEL 0 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd); static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i); static int dump_table_tentry(void *e, void *arg); static int dump_table_xentry(void *e, void *arg); static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, struct tid_info *b); static int check_table_name(const char *name); static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, struct table_config *tc, struct table_info *ti, uint32_t count); static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); static struct table_algo *find_table_algo(struct tables_config *tableconf, struct tid_info *ti, char *name); static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) #define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ void rollback_toperation_state(struct ip_fw_chain *ch, void *object) { struct tables_config *tcfg; struct op_state *os; tcfg = CHAIN_TO_TCFG(ch); TAILQ_FOREACH(os, &tcfg->state_list, next) os->func(object, os); } void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) { struct tables_config *tcfg; tcfg = CHAIN_TO_TCFG(ch); TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); } void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) { struct tables_config *tcfg; tcfg = CHAIN_TO_TCFG(ch); TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); } void tc_ref(struct table_config *tc) { tc->no.refcnt++; } void tc_unref(struct table_config *tc) { tc->no.refcnt--; } static struct table_value * get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) { struct table_value *pval; pval = (struct table_value *)ch->valuestate; return (&pval[kidx]); } /* * Checks if we're able to insert/update entry @tei into table * w.r.t @tc limits. * May alter @tei to indicate insertion error / insert * options. * * Returns 0 if operation can be performed/ */ static int check_table_limit(struct table_config *tc, struct tentry_info *tei) { if (tc->limit == 0 || tc->count < tc->limit) return (0); if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { /* Notify userland on error cause */ tei->flags |= TEI_FLAGS_LIMIT; return (EFBIG); } /* * We have UPDATE flag set. * Permit updating record (if found), * but restrict adding new one since we've * already hit the limit. */ tei->flags |= TEI_FLAGS_DONTADD; return (0); } /* * Convert algorithm callback return code into * one of pre-defined states known by userland. */ static void store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num) { int flag; flag = 0; switch (error) { case 0: if (op == OP_ADD && num != 0) flag = TEI_FLAGS_ADDED; if (op == OP_DEL) flag = TEI_FLAGS_DELETED; break; case ENOENT: flag = TEI_FLAGS_NOTFOUND; break; case EEXIST: flag = TEI_FLAGS_EXISTS; break; default: flag = TEI_FLAGS_ERROR; } tei->flags |= flag; } /* * Creates and references table with default parameters. * Saves table config, algo and allocated kidx info @ptc, @pta and * @pkidx if non-zero. * Used for table auto-creation to support old binaries. * * Returns 0 on success. */ static int create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx) { ipfw_xtable_info xi; int error; memset(&xi, 0, sizeof(xi)); /* Set default value mask for legacy clients */ xi.vmask = IPFW_VTYPE_LEGACY; error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1); if (error != 0) return (error); return (0); } /* * Find and reference existing table optionally * creating new one. * * Saves found table config into @ptc. * Note function may drop/acquire UH_WLOCK. * Returns 0 if table was found/created and referenced * or non-zero return code. */ static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc) { struct namedobj_instance *ni; struct table_config *tc; uint16_t kidx; int error; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); tc = NULL; if ((tc = find_table(ni, ti)) != NULL) { /* check table type */ if (tc->no.subtype != ti->type) return (EINVAL); if (tc->locked != 0) return (EACCES); /* Try to exit early on limit hit */ if (op == OP_ADD && count == 1 && check_table_limit(tc, tei) != 0) return (EFBIG); /* Reference and return */ tc->no.refcnt++; *ptc = tc; return (0); } if (op == OP_DEL) return (ESRCH); /* Compatibility mode: create new table for old clients */ if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); IPFW_UH_WUNLOCK(ch); error = create_table_compat(ch, ti, &kidx); IPFW_UH_WLOCK(ch); if (error != 0) return (error); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx)); /* OK, now we've got referenced table. */ *ptc = tc; return (0); } /* * Rolls back already @added to @tc entries using state array @ta_buf_m. * Assume the following layout: * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) * for storing deleted state */ static void rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, uint32_t count, uint32_t added) { struct table_algo *ta; struct tentry_info *ptei; caddr_t v, vv; size_t ta_buf_sz; int error, i; uint32_t num; IPFW_UH_WLOCK_ASSERT(ch); ta = tc->ta; ta_buf_sz = ta->ta_buf_size; v = ta_buf_m; vv = v + count * ta_buf_sz; for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { ptei = &tei[i]; if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { /* * We have old value stored by previous * call in @ptei->value. Do add once again * to restore it. */ error = ta->add(tc->astate, tinfo, ptei, v, &num); KASSERT(error == 0, ("rollback UPDATE fail")); KASSERT(num == 0, ("rollback UPDATE fail2")); continue; } error = ta->prepare_del(ch, ptei, vv); KASSERT(error == 0, ("pre-rollback INSERT failed")); error = ta->del(tc->astate, tinfo, ptei, vv, &num); KASSERT(error == 0, ("rollback INSERT failed")); tc->count -= num; } } /* * Prepares add/del state for all @count entries in @tei. * Uses either stack buffer (@ta_buf) or allocates a new one. * Stores pointer to allocated buffer back to @ta_buf. * * Returns 0 on success. */ static int prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf) { caddr_t ta_buf_m, v; size_t ta_buf_sz, sz; struct tentry_info *ptei; int error, i; error = 0; ta_buf_sz = ta->ta_buf_size; if (count == 1) { /* Sigle add/delete, use on-stack buffer */ memset(*ta_buf, 0, TA_BUF_SZ); ta_buf_m = *ta_buf; } else { /* * Multiple adds/deletes, allocate larger buffer * * Note we need 2xcount buffer for add case: * we have hold both ADD state * and DELETE state (this may be needed * if we need to rollback all changes) */ sz = count * ta_buf_sz; ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP, M_WAITOK | M_ZERO); } v = ta_buf_m; for (i = 0; i < count; i++, v += ta_buf_sz) { ptei = &tei[i]; error = (op == OP_ADD) ? ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); /* * Some syntax error (incorrect mask, or address, or * anything). Return error regardless of atomicity * settings. */ if (error != 0) break; } *ta_buf = ta_buf_m; return (error); } /* * Flushes allocated state for each @count entries in @tei. * Frees @ta_buf_m if differs from stack buffer @ta_buf. */ static void flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, struct tentry_info *tei, uint32_t count, int rollback, caddr_t ta_buf_m, caddr_t ta_buf) { caddr_t v; struct tentry_info *ptei; size_t ta_buf_sz; int i; ta_buf_sz = ta->ta_buf_size; /* Run cleaning callback anyway */ v = ta_buf_m; for (i = 0; i < count; i++, v += ta_buf_sz) { ptei = &tei[i]; ta->flush_entry(ch, ptei, v); if (ptei->ptv != NULL) { free(ptei->ptv, M_IPFW); ptei->ptv = NULL; } } /* Clean up "deleted" state in case of rollback */ if (rollback != 0) { v = ta_buf_m + count * ta_buf_sz; for (i = 0; i < count; i++, v += ta_buf_sz) ta->flush_entry(ch, &tei[i], v); } if (ta_buf_m != ta_buf) free(ta_buf_m, M_TEMP); } static void rollback_add_entry(void *object, struct op_state *_state) { struct ip_fw_chain *ch; struct tableop_state *ts; ts = (struct tableop_state *)_state; if (ts->tc != object && ts->ch != object) return; ch = ts->ch; IPFW_UH_WLOCK_ASSERT(ch); /* Call specifid unlockers */ rollback_table_values(ts); /* Indicate we've called */ ts->modified = 1; } /* * Adds/updates one or more entries in table @ti. * * Function may drop/reacquire UH wlock multiple times due to * items alloc, algorithm callbacks (check_space), value linkage * (new values, value storage realloc), etc.. * Other processes like other adds (which may involve storage resize), * table swaps (which changes table data and may change algo type), * table modify (which may change value mask) may be executed * simultaneously so we need to deal with it. * * The following approach was implemented: * we have per-chain linked list, protected with UH lock. * add_table_entry prepares special on-stack structure wthich is passed * to its descendants. Users add this structure to this list before unlock. * After performing needed operations and acquiring UH lock back, each user * checks if structure has changed. If true, it rolls local state back and * returns without error to the caller. * add_table_entry() on its own checks if structure has changed and restarts * its operation from the beginning (goto restart). * * Functions which are modifying fields of interest (currently * resize_shared_value_storage() and swap_tables() ) * traverses given list while holding UH lock immediately before * performing their operations calling function provided be list entry * ( currently rollback_add_entry ) which performs rollback for all necessary * state and sets appropriate values in structure indicating rollback * has happened. * * Algo interaction: * Function references @ti first to ensure table won't * disappear or change its type. * After that, prepare_add callback is called for each @tei entry. * Next, we try to add each entry under UH+WHLOCK * using add() callback. * Finally, we free all state by calling flush_entry callback * for each @tei. * * Returns 0 on success. */ int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint8_t flags, uint32_t count) { struct table_config *tc; struct table_algo *ta; uint16_t kidx; int error, first_error, i, rollback; uint32_t num, numadd; struct tentry_info *ptei; struct tableop_state ts; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; memset(&ts, 0, sizeof(ts)); ta = NULL; IPFW_UH_WLOCK(ch); /* * Find and reference existing table. */ restart: if (ts.modified != 0) { IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); memset(&ts, 0, sizeof(ts)); ta = NULL; IPFW_UH_WLOCK(ch); } error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } ta = tc->ta; /* Fill in tablestate */ ts.ch = ch; ts.opstate.func = rollback_add_entry; ts.tc = tc; ts.vshared = tc->vshared; ts.vmask = tc->vmask; ts.ta = ta; ts.tei = tei; ts.count = count; rollback = 0; add_toperation_state(ch, &ts); IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); IPFW_UH_WLOCK(ch); del_toperation_state(ch, &ts); /* Drop reference we've used in first search */ tc->no.refcnt--; /* Check prepare_batch_buffer() error */ if (error != 0) goto cleanup; /* * Check if table swap has happened. * (so table algo might be changed). * Restart operation to achieve consistent behavior. */ if (ts.modified != 0) goto restart; /* * Link all values values to shared/per-table value array. * * May release/reacquire UH_WLOCK. */ error = ipfw_link_table_values(ch, &ts); if (error != 0) goto cleanup; if (ts.modified != 0) goto restart; /* * Ensure we are able to add all entries without additional * memory allocations. May release/reacquire UH_WLOCK. */ kidx = tc->no.kidx; error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); if (error != 0) goto cleanup; if (ts.modified != 0) goto restart; /* We've got valid table in @tc. Let's try to add data */ kidx = tc->no.kidx; ta = tc->ta; numadd = 0; first_error = 0; IPFW_WLOCK(ch); v = ta_buf_m; for (i = 0; i < count; i++, v += ta->ta_buf_size) { ptei = &tei[i]; num = 0; /* check limit before adding */ if ((error = check_table_limit(tc, ptei)) == 0) { error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, &num); /* Set status flag to inform userland */ store_tei_result(ptei, OP_ADD, error, num); } if (error == 0) { /* Update number of records to ease limit checking */ tc->count += num; numadd += num; continue; } if (first_error == 0) first_error = error; /* * Some error have happened. Check our atomicity * settings: continue if atomicity is not required, * rollback changes otherwise. */ if ((flags & IPFW_CTF_ATOMIC) == 0) continue; rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), tei, ta_buf_m, count, i); rollback = 1; break; } IPFW_WUNLOCK(ch); ipfw_garbage_table_values(ch, tc, tei, count, rollback); /* Permit post-add algorithm grow/rehash. */ if (numadd != 0) check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); /* Return first error to user, if any */ error = first_error; cleanup: IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); return (error); } /* * Deletes one or more entries in table @ti. * * Returns 0 on success. */ int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint8_t flags, uint32_t count) { struct table_config *tc; struct table_algo *ta; struct tentry_info *ptei; uint16_t kidx; int error, first_error, i; uint32_t num, numdel; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; /* * Find and reference existing table. */ IPFW_UH_WLOCK(ch); error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } ta = tc->ta; IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m); if (error != 0) goto cleanup; IPFW_UH_WLOCK(ch); /* Drop reference we've used in first search */ tc->no.refcnt--; /* * Check if table algo is still the same. * (changed ta may be the result of table swap). */ if (ta != tc->ta) { IPFW_UH_WUNLOCK(ch); error = EINVAL; goto cleanup; } kidx = tc->no.kidx; numdel = 0; first_error = 0; IPFW_WLOCK(ch); v = ta_buf_m; for (i = 0; i < count; i++, v += ta->ta_buf_size) { ptei = &tei[i]; num = 0; error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, &num); /* Save state for userland */ store_tei_result(ptei, OP_DEL, error, num); if (error != 0 && first_error == 0) first_error = error; tc->count -= num; numdel += num; } IPFW_WUNLOCK(ch); /* Unlink non-used values */ ipfw_garbage_table_values(ch, tc, tei, count, 0); if (numdel != 0) { /* Run post-del hook to permit shrinking */ check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); } IPFW_UH_WUNLOCK(ch); /* Return first error to user, if any */ error = first_error; cleanup: flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf); return (error); } /* * Ensure that table @tc has enough space to add @count entries without * need for reallocation. * * Callbacks order: * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. * * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage * 3) modify (UH_WLOCK + WLOCK) - switch pointers * 4) flush_modify (UH_WLOCK) - free state, if needed * * Returns 0 on success. */ static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, struct table_config *tc, struct table_info *ti, uint32_t count) { struct table_algo *ta; uint64_t pflags; char ta_buf[TA_BUF_SZ]; int error; IPFW_UH_WLOCK_ASSERT(ch); error = 0; ta = tc->ta; if (ta->need_modify == NULL) return (0); /* Acquire reference not to loose @tc between locks/unlocks */ tc->no.refcnt++; /* * TODO: think about avoiding race between large add/large delete * operation on algorithm which implements shrinking along with * growing. */ while (true) { pflags = 0; if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { error = 0; break; } /* We have to shrink/grow table */ if (ts != NULL) add_toperation_state(ch, ts); IPFW_UH_WUNLOCK(ch); memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_mod(ta_buf, &pflags); IPFW_UH_WLOCK(ch); if (ts != NULL) del_toperation_state(ch, ts); if (error != 0) break; if (ts != NULL && ts->modified != 0) { /* * Swap operation has happened * so we're currently operating on other * table data. Stop doing this. */ ta->flush_mod(ta_buf); break; } /* Check if we still need to alter table */ ti = KIDX_TO_TI(ch, tc->no.kidx); if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { IPFW_UH_WUNLOCK(ch); /* * Other thread has already performed resize. * Flush our state and return. */ ta->flush_mod(ta_buf); break; } error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); if (error == 0) { /* Do actual modification */ IPFW_WLOCK(ch); ta->modify(tc->astate, ti, ta_buf, pflags); IPFW_WUNLOCK(ch); } /* Anyway, flush data and retry */ ta->flush_mod(ta_buf); } tc->no.refcnt--; return (error); } /* * Adds or deletes record in table. * Data layout (v0): * Request: [ ip_fw3_opheader ipfw_table_xentry ] * * Returns 0 on success */ static int manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_table_xentry *xent; struct tentry_info tei; struct tid_info ti; struct table_value v; int error, hdrlen, read; hdrlen = offsetof(ipfw_table_xentry, k); /* Check minimum header size */ if (sd->valsize < (sizeof(*op3) + hdrlen)) return (EINVAL); read = sizeof(ip_fw3_opheader); /* Check if xentry len field is valid */ xent = (ipfw_table_xentry *)(op3 + 1); if (xent->len < hdrlen || xent->len + read > sd->valsize) return (EINVAL); memset(&tei, 0, sizeof(tei)); tei.paddr = &xent->k; tei.masklen = xent->masklen; ipfw_import_table_value_legacy(xent->value, &v); tei.pvalue = &v; /* Old requests compatibility */ tei.flags = TEI_FLAGS_COMPAT; if (xent->type == IPFW_TABLE_ADDR) { if (xent->len - hdrlen == sizeof(in_addr_t)) tei.subtype = AF_INET; else tei.subtype = AF_INET6; } memset(&ti, 0, sizeof(ti)); ti.uidx = xent->tbl; ti.type = xent->type; error = (op3->opcode == IP_FW_TABLE_XADD) ? add_table_entry(ch, &ti, &tei, 0, 1) : del_table_entry(ch, &ti, &tei, 0, 1); return (error); } /* * Adds or deletes record in table. * Data layout (v1)(current): * Request: [ ipfw_obj_header * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] * ] * * Returns 0 on success */ static int manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent, *ptent; ipfw_obj_ctlv *ctlv; ipfw_obj_header *oh; struct tentry_info *ptei, tei, *tei_buf; struct tid_info ti; int error, i, kidx, read; /* Check minimum header size */ if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) return (EINVAL); /* Check if passed data is too long */ if (sd->valsize != sd->kavail) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); read = sizeof(*oh); ctlv = (ipfw_obj_ctlv *)(oh + 1); if (ctlv->head.length + read != sd->valsize) return (EINVAL); read += sizeof(*ctlv); tent = (ipfw_obj_tentry *)(ctlv + 1); if (ctlv->count * sizeof(*tent) + read != sd->valsize) return (EINVAL); if (ctlv->count == 0) return (0); /* * Mark entire buffer as "read". * This instructs sopt api write it back * after function return. */ ipfw_get_sopt_header(sd, sd->valsize); /* Perform basic checks for each entry */ ptent = tent; kidx = tent->idx; for (i = 0; i < ctlv->count; i++, ptent++) { if (ptent->head.length != sizeof(*ptent)) return (EINVAL); if (ptent->idx != kidx) return (ENOTSUP); } /* Convert data into kernel request objects */ objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = kidx; /* Use on-stack buffer for single add/del */ if (ctlv->count == 1) { memset(&tei, 0, sizeof(tei)); tei_buf = &tei; } else tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, M_WAITOK | M_ZERO); ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { ptei->paddr = &ptent->k; ptei->subtype = ptent->subtype; ptei->masklen = ptent->masklen; if (ptent->head.flags & IPFW_TF_UPDATE) ptei->flags |= TEI_FLAGS_UPDATE; ipfw_import_table_value_v1(&ptent->v.value); ptei->pvalue = (struct table_value *)&ptent->v.value; } error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); /* Translate result back to userland */ ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { if (ptei->flags & TEI_FLAGS_ADDED) ptent->result = IPFW_TR_ADDED; else if (ptei->flags & TEI_FLAGS_DELETED) ptent->result = IPFW_TR_DELETED; else if (ptei->flags & TEI_FLAGS_UPDATED) ptent->result = IPFW_TR_UPDATED; else if (ptei->flags & TEI_FLAGS_LIMIT) ptent->result = IPFW_TR_LIMIT; else if (ptei->flags & TEI_FLAGS_ERROR) ptent->result = IPFW_TR_ERROR; else if (ptei->flags & TEI_FLAGS_NOTFOUND) ptent->result = IPFW_TR_NOTFOUND; else if (ptei->flags & TEI_FLAGS_EXISTS) ptent->result = IPFW_TR_EXISTS; ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value); } if (tei_buf != &tei) free(tei_buf, M_TEMP); return (error); } /* * Looks up an entry in given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_tentry ] * Reply: [ ipfw_obj_header ipfw_obj_tentry ] * * Returns 0 on success */ static int find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent; ipfw_obj_header *oh; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct table_info *kti; struct table_value *pval; struct namedobj_instance *ni; int error; size_t sz; /* Check minimum header size */ sz = sizeof(*oh) + sizeof(*tent); if (sd->valsize != sz) return (EINVAL); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); tent = (ipfw_obj_tentry *)(oh + 1); /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = tent->idx; IPFW_UH_RLOCK(ch); ni = CHAIN_TO_NI(ch); /* * Find existing table and check its type . */ ta = NULL; if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } /* check table type */ if (tc->no.subtype != ti.type) { IPFW_UH_RUNLOCK(ch); return (EINVAL); } kti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->find_tentry == NULL) return (ENOTSUP); error = ta->find_tentry(tc->astate, kti, tent); if (error == 0) { pval = get_table_value(ch, tc, tent->v.kidx); ipfw_export_table_value_v1(pval, &tent->v.value); } IPFW_UH_RUNLOCK(ch); return (error); } /* * Flushes all entries or destroys given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; objheader_to_ti(oh, &ti); if (op3->opcode == IP_FW_TABLE_XDESTROY) error = destroy_table(ch, &ti); else if (op3->opcode == IP_FW_TABLE_XFLUSH) error = flush_table(ch, &ti); else return (ENOTSUP); return (error); } static void restart_flush(void *object, struct op_state *_state) { struct tableop_state *ts; ts = (struct tableop_state *)_state; if (ts->tc != object) return; /* Indicate we've called */ ts->modified = 1; } /* * Flushes given table. * * Function create new table instance with the same * parameters, swaps it with old one and * flushes state without holding runtime WLOCK. * * Returns 0 on success. */ int flush_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct table_info ti_old, ti_new, *tablestate; void *astate_old, *astate_new; char algostate[64], *pstate; struct tableop_state ts; int error, need_gc; uint16_t kidx; uint8_t tflags; /* * Stage 1: save table algorithm. * Reference found table to ensure it won't disappear. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } need_gc = 0; astate_new = NULL; memset(&ti_new, 0, sizeof(ti_new)); restart: /* Set up swap handler */ memset(&ts, 0, sizeof(ts)); ts.opstate.func = restart_flush; ts.tc = tc; ta = tc->ta; /* Do not flush readonly tables */ if ((ta->flags & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } /* Save startup algo parameters */ if (ta->print_config != NULL) { ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), algostate, sizeof(algostate)); pstate = algostate; } else pstate = NULL; tflags = tc->tflags; tc->no.refcnt++; add_toperation_state(ch, &ts); IPFW_UH_WUNLOCK(ch); /* * Stage 1.5: if this is not the first attempt, destroy previous state */ if (need_gc != 0) { ta->destroy(astate_new, &ti_new); need_gc = 0; } /* * Stage 2: allocate new table instance using same algo. */ memset(&ti_new, 0, sizeof(struct table_info)); error = ta->init(ch, &astate_new, &ti_new, pstate, tflags); /* * Stage 3: swap old state pointers with newly-allocated ones. * Decrease refcount. */ IPFW_UH_WLOCK(ch); tc->no.refcnt--; del_toperation_state(ch, &ts); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } /* * Restart operation if table swap has happened: * even if algo may be the same, algo init parameters * may change. Restart operation instead of doing * complex checks. */ if (ts.modified != 0) { /* Delay destroying data since we're holding UH lock */ need_gc = 1; goto restart; } ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; tablestate = (struct table_info *)ch->tablestate; IPFW_WLOCK(ch); ti_old = tablestate[kidx]; tablestate[kidx] = ti_new; IPFW_WUNLOCK(ch); astate_old = tc->astate; tc->astate = astate_new; tc->ti_copy = ti_new; tc->count = 0; /* Notify algo on real @ti address */ if (ta->change_ti != NULL) ta->change_ti(tc->astate, &tablestate[kidx]); /* * Stage 4: unref values. */ ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old); IPFW_UH_WUNLOCK(ch); /* * Stage 5: perform real flush/destroy. */ ta->destroy(astate_old, &ti_old); return (0); } /* * Swaps two tables. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_ntlv ] * * Returns 0 on success */ static int swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti_a, ti_b; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; ntlv_to_ti(&oh->ntlv, &ti_a); ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); error = swap_tables(ch, &ti_a, &ti_b); return (error); } /* * Swaps two tables of the same type/valtype. * * Checks if tables are compatible and limits * permits swap, than actually perform swap. * * Each table consists of 2 different parts: * config: * @tc (with name, set, kidx) and rule bindings, which is "stable". * number of items * table algo * runtime: * runtime data @ti (ch->tablestate) * runtime cache in @tc * algo-specific data (@tc->astate) * * So we switch: * all runtime data * number of items * table algo * * After that we call @ti change handler for each table. * * Note that referencing @tc won't protect tc->ta from change. * XXX: Do we need to restrict swap between locked tables? * XXX: Do we need to exchange ftype? * * Returns 0 on success. */ static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, struct tid_info *b) { struct namedobj_instance *ni; struct table_config *tc_a, *tc_b; struct table_algo *ta; struct table_info ti, *tablestate; void *astate; uint32_t count; /* * Stage 1: find both tables and ensure they are of * the same type. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc_a = find_table(ni, a)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if ((tc_b = find_table(ni, b)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* It is very easy to swap between the same table */ if (tc_a == tc_b) { IPFW_UH_WUNLOCK(ch); return (0); } /* Check type and value are the same */ if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Check limits before swap */ if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { IPFW_UH_WUNLOCK(ch); return (EFBIG); } /* Check if one of the tables is readonly */ if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } /* Notify we're going to swap */ rollback_toperation_state(ch, tc_a); rollback_toperation_state(ch, tc_b); /* Everything is fine, prepare to swap */ tablestate = (struct table_info *)ch->tablestate; ti = tablestate[tc_a->no.kidx]; ta = tc_a->ta; astate = tc_a->astate; count = tc_a->count; IPFW_WLOCK(ch); /* a <- b */ tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; tc_a->ta = tc_b->ta; tc_a->astate = tc_b->astate; tc_a->count = tc_b->count; /* b <- a */ tablestate[tc_b->no.kidx] = ti; tc_b->ta = ta; tc_b->astate = astate; tc_b->count = count; IPFW_WUNLOCK(ch); /* Ensure tc.ti copies are in sync */ tc_a->ti_copy = tablestate[tc_a->no.kidx]; tc_b->ti_copy = tablestate[tc_b->no.kidx]; /* Notify both tables on @ti change */ if (tc_a->ta->change_ti != NULL) tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); if (tc_b->ta->change_ti != NULL) tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); IPFW_UH_WUNLOCK(ch); return (0); } /* * Destroys table specified by @ti. * Data layout (v0)(current): * Request: [ ip_fw3_opheader ] * * Returns 0 on success */ static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not permit destroying referenced tables */ if (tc->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } IPFW_WLOCK(ch); unlink_table(ch, tc); IPFW_WUNLOCK(ch); /* Free obj index */ if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) printf("Error unlinking kidx %d from table %s\n", tc->no.kidx, tc->tablename); /* Unref values used in tables while holding UH lock */ ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy); IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (0); } static uint32_t roundup2p(uint32_t v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return (v); } /* * Grow tables index. * * Returns 0 on success. */ int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) { unsigned int ntables_old, tbl; struct namedobj_instance *ni; void *new_idx, *old_tablestate, *tablestate; struct table_info *ti; struct table_config *tc; int i, new_blocks; /* Check new value for validity */ if (ntables == 0) return (EINVAL); if (ntables > IPFW_TABLES_MAX) ntables = IPFW_TABLES_MAX; /* Alight to nearest power of 2 */ ntables = (unsigned int)roundup2p(ntables); /* Allocate new pointers */ tablestate = malloc(ntables * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); IPFW_UH_WLOCK(ch); tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; ni = CHAIN_TO_NI(ch); /* Temporary restrict decreasing max_tables */ if (ntables < V_fw_tables_max) { /* * FIXME: Check if we really can shrink */ IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Copy table info/indices */ memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); IPFW_WLOCK(ch); /* Change pointers */ old_tablestate = ch->tablestate; ch->tablestate = tablestate; ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); ntables_old = V_fw_tables_max; V_fw_tables_max = ntables; IPFW_WUNLOCK(ch); /* Notify all consumers that their @ti pointer has changed */ ti = (struct table_info *)ch->tablestate; for (i = 0; i < tbl; i++, ti++) { if (ti->lookup == NULL) continue; tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); if (tc == NULL || tc->ta->change_ti == NULL) continue; tc->ta->change_ti(tc->astate, ti); } IPFW_UH_WUNLOCK(ch); /* Free old pointers */ free(old_tablestate, M_IPFW); ipfw_objhash_bitmap_free(new_idx, new_blocks); return (0); } /* * Lookup table's named object by its @kidx. */ struct named_object * ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx) { return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx)); } /* * Take reference to table specified in @ntlv. * On success return its @kidx. */ int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx) { struct tid_info ti; struct table_config *tc; int error; IPFW_UH_WLOCK_ASSERT(ch); ntlv_to_ti(ntlv, &ti); error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc); if (error != 0) return (error); if (tc == NULL) return (ESRCH); tc_ref(tc); *kidx = tc->no.kidx; return (0); } void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx) { struct namedobj_instance *ni; struct named_object *no; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("Table with index %d not found", kidx)); no->refcnt--; } /* * Lookup an arbtrary key @paddr of legth @plen in table @tbl. * Stores found value in @val. * * Returns 1 if key was found. */ int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val) { struct table_info *ti; ti = KIDX_TO_TI(ch, tbl); return (ti->lookup(ti, paddr, plen, val)); } /* * Info/List/dump support for tables. * */ /* * High-level 'get' cmds sysctl handlers */ /* * Lists all tables currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] * * Returns 0 on success */ static int list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; int error; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); error = export_tables(ch, olh, sd); IPFW_UH_RUNLOCK(ch); return (error); } /* * Store table info to buffer provided by @sd. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] * Reply: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success. */ static int describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; struct table_config *tc; struct tid_info ti; size_t sz; sz = sizeof(*oh) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); IPFW_UH_RUNLOCK(ch); return (0); } /* * Modifies existing table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ static int modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname; struct tid_info ti; struct namedobj_instance *ni; struct table_config *tc; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; if (check_table_name(tname) != 0) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = i->type; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not support any modifications for readonly tables */ if ((tc->ta->flags & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) tc->limit = i->limit; if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); IPFW_UH_WUNLOCK(ch); return (0); } /* * Creates new table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ static int create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname, *aname; struct tid_info ti; struct namedobj_instance *ni; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; aname = i->algoname; if (check_table_name(tname) != 0 || strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) return (EINVAL); if (aname[0] == '\0') { /* Use default algorithm */ aname = NULL; } objheader_to_ti(oh, &ti); ti.type = i->type; ni = CHAIN_TO_NI(ch); IPFW_UH_RLOCK(ch); if (find_table(ni, &ti) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); return (create_table_internal(ch, &ti, aname, i, NULL, 0)); } /* * Creates new table based on @ti and @aname. * * Assume @aname to be checked and valid. * Stores allocated table kidx inside @pkidx (if non-NULL). * Reference created table if @compat is non-zero. * * Returns 0 on success. */ static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat) { struct namedobj_instance *ni; struct table_config *tc, *tc_new, *tmp; struct table_algo *ta; uint16_t kidx; ni = CHAIN_TO_NI(ch); ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); if (ta == NULL) return (ENOTSUP); tc = alloc_table_config(ch, ti, ta, aname, i->tflags); if (tc == NULL) return (ENOMEM); tc->vmask = i->vmask; tc->limit = i->limit; if (ta->flags & TA_FLAG_READONLY) tc->locked = 1; else tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; IPFW_UH_WLOCK(ch); /* Check if table has been already created */ tc_new = find_table(ni, ti); if (tc_new != NULL) { /* * Compat: do not fail if we're * requesting to create existing table * which has the same type */ if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (EEXIST); } /* Exchange tc and tc_new for proper refcounting & freeing */ tmp = tc; tc = tc_new; tc_new = tmp; } else { /* New table */ if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { IPFW_UH_WUNLOCK(ch); printf("Unable to allocate table index." " Consider increasing net.inet.ip.fw.tables_max"); free_table_config(ni, tc); return (EBUSY); } tc->no.kidx = kidx; tc->no.etlv = IPFW_TLV_TBL_NAME; - IPFW_WLOCK(ch); link_table(ch, tc); - IPFW_WUNLOCK(ch); } if (compat != 0) tc->no.refcnt++; if (pkidx != NULL) *pkidx = tc->no.kidx; IPFW_UH_WUNLOCK(ch); if (tc_new != NULL) free_table_config(ni, tc_new); return (0); } static void ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) { memset(ti, 0, sizeof(struct tid_info)); ti->set = ntlv->set; ti->uidx = ntlv->idx; ti->tlvs = ntlv; ti->tlen = ntlv->head.length; } static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) { ntlv_to_ti(&oh->ntlv, ti); } struct namedobj_instance * ipfw_get_table_objhash(struct ip_fw_chain *ch) { return (CHAIN_TO_NI(ch)); } /* * Exports basic table info as name TLV. * Used inside dump_static_rules() to provide info * about all tables referenced by current ruleset. * * Returns 0 on success. */ int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, struct sockopt_data *sd) { struct namedobj_instance *ni; struct named_object *no; ipfw_obj_ntlv *ntlv; ni = CHAIN_TO_NI(ch); no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("invalid table kidx passed")); ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ntlv->head.type = IPFW_TLV_TBL_NAME; ntlv->head.length = sizeof(*ntlv); ntlv->idx = no->kidx; strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); return (0); } struct dump_args { struct ip_fw_chain *ch; struct table_info *ti; struct table_config *tc; struct sockopt_data *sd; uint32_t cnt; uint16_t uidx; int error; uint32_t size; ipfw_table_entry *ent; ta_foreach_f *f; void *farg; ipfw_obj_tentry tent; }; static int count_ext_entries(void *e, void *arg) { struct dump_args *da; da = (struct dump_args *)arg; da->cnt++; return (0); } /* * Gets number of items from table either using * internal counter or calling algo callback for * externally-managed tables. * * Returns number of records. */ static uint32_t table_get_count(struct ip_fw_chain *ch, struct table_config *tc) { struct table_info *ti; struct table_algo *ta; struct dump_args da; ti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; /* Use internal counter for self-managed tables */ if ((ta->flags & TA_FLAG_READONLY) == 0) return (tc->count); /* Use callback to quickly get number of items */ if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0) return (ta->get_count(tc->astate, ti)); /* Count number of iterms ourselves */ memset(&da, 0, sizeof(da)); ta->foreach(tc->astate, ti, count_ext_entries, &da); return (da.cnt); } /* * Exports table @tc info into standard ipfw_xtable_info format. */ static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i) { struct table_info *ti; struct table_algo *ta; i->type = tc->no.subtype; i->tflags = tc->tflags; i->vmask = tc->vmask; i->set = tc->no.set; i->kidx = tc->no.kidx; i->refcnt = tc->no.refcnt; i->count = table_get_count(ch, tc); i->limit = tc->limit; i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; i->size = i->count * sizeof(ipfw_obj_tentry); i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); ti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->print_config != NULL) { /* Use algo function to print table config to string */ ta->print_config(tc->astate, ti, i->algoname, sizeof(i->algoname)); } else strlcpy(i->algoname, ta->name, sizeof(i->algoname)); /* Dump algo-specific data, if possible */ if (ta->dump_tinfo != NULL) { ta->dump_tinfo(tc->astate, ti, &i->ta_info); i->ta_info.flags |= IPFW_TATFLAGS_DATA; } } struct dump_table_args { struct ip_fw_chain *ch; struct sockopt_data *sd; }; static int export_table_internal(struct namedobj_instance *ni, struct named_object *no, void *arg) { ipfw_xtable_info *i; struct dump_table_args *dta; dta = (struct dump_table_args *)arg; i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); export_table_info(dta->ch, (struct table_config *)no, i); return (0); } /* * Export all tables as ipfw_xtable_info structures to * storage provided by @sd. * * If supplied buffer is too small, fills in required size * and returns ENOMEM. * Returns 0 on success. */ static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd) { uint32_t size; uint32_t count; struct dump_table_args dta; count = ipfw_objhash_count(CHAIN_TO_NI(ch)); size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_xtable_info); if (size > olh->size) { olh->size = size; return (ENOMEM); } olh->size = size; dta.ch = ch; dta.sd = sd; ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); return (0); } /* * Dumps all table data * Data layout (v1)(current): * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] * * Returns 0 on success */ static int dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; uint32_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); i = (ipfw_xtable_info *)(oh + 1); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, i); if (sd->valsize < i->size) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* * Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); IPFW_UH_RUNLOCK(ch); return (da.error); } /* * Dumps all table data * Data layout (version 0)(legacy): * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() * Reply: [ ipfw_xtable ipfw_table_xentry x N ] * * Returns 0 on success */ static int dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_xtable *xtbl; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; size_t sz, count; xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); if (xtbl == NULL) return (EINVAL); memset(&ti, 0, sizeof(ti)); ti.uidx = xtbl->tbl; IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (0); } count = table_get_count(ch, tc); sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); xtbl->cnt = count; xtbl->size = sz; xtbl->type = tc->no.subtype; xtbl->tbl = ti.uidx; if (sd->valsize < sz) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); IPFW_UH_RUNLOCK(ch); return (0); } /* * Legacy function to retrieve number of items in table. */ static int get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { uint32_t *tbl; struct tid_info ti; size_t sz; int error; sz = sizeof(*op3) + sizeof(uint32_t); op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz); if (op3 == NULL) return (EINVAL); tbl = (uint32_t *)(op3 + 1); memset(&ti, 0, sizeof(ti)); ti.uidx = *tbl; IPFW_UH_RLOCK(ch); error = ipfw_count_xtable(ch, &ti, tbl); IPFW_UH_RUNLOCK(ch); return (error); } /* * Legacy IP_FW_TABLE_GETSIZE handler */ int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (ESRCH); *cnt = table_get_count(ch, tc); return (0); } /* * Legacy IP_FW_TABLE_XGETSIZE handler */ int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; uint32_t count; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { *cnt = 0; return (0); /* 'table all list' requires success */ } count = table_get_count(ch, tc); *cnt = count * sizeof(ipfw_table_xentry); if (count > 0) *cnt += sizeof(ipfw_xtable); return (0); } static int dump_table_entry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_entry *ent; struct table_value *pval; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; /* Out of memory, returning */ if (da->cnt == da->size) return (1); ent = da->ent++; ent->tbl = da->uidx; da->cnt++; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); ent->addr = da->tent.k.addr.s_addr; ent->masklen = da->tent.masklen; pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); ent->value = ipfw_export_table_value_legacy(pval); return (0); } /* * Dumps table in pre-8.1 legacy format. */ int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, ipfw_table *tbl) { struct table_config *tc; struct table_algo *ta; struct dump_args da; tbl->cnt = 0; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (0); /* XXX: We should return ESRCH */ ta = tc->ta; /* This dump format supports IPv4 only */ if (tc->no.subtype != IPFW_TABLE_ADDR) return (0); memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.ent = &tbl->ent[0]; da.size = tbl->size; tbl->cnt = 0; ta->foreach(tc->astate, da.ti, dump_table_entry, &da); tbl->cnt = da.cnt; return (0); } /* * Dumps table entry in eXtended format (v1)(current). */ static int dump_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; struct table_value *pval; ipfw_obj_tentry *tent; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); /* Out of memory, returning */ if (tent == NULL) { da->error = ENOMEM; return (1); } tent->head.length = sizeof(ipfw_obj_tentry); tent->idx = da->uidx; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); pval = get_table_value(da->ch, da->tc, tent->v.kidx); ipfw_export_table_value_v1(pval, &tent->v.value); return (0); } /* * Dumps table entry in eXtended format (v0). */ static int dump_table_xentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_xentry *xent; ipfw_obj_tentry *tent; struct table_value *pval; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); /* Out of memory, returning */ if (xent == NULL) return (1); xent->len = sizeof(ipfw_table_xentry); xent->tbl = da->uidx; memset(&da->tent, 0, sizeof(da->tent)); tent = &da->tent; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); /* Convert current format to previous one */ xent->masklen = tent->masklen; pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); xent->value = ipfw_export_table_value_legacy(pval); /* Apply some hacks */ if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) { xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; xent->flags = IPFW_TCF_INET; } else memcpy(&xent->k, &tent->k, sizeof(xent->k)); return (0); } /* * Helper function to export table algo data * to tentry format before calling user function. * * Returns 0 on success. */ static int prepare_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); da->f(&da->tent, da->farg); return (0); } /* * Allow external consumers to read table entries in standard format. */ int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, ta_foreach_f *f, void *arg) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct dump_args da; ni = CHAIN_TO_NI(ch); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); if (tc == NULL) return (ESRCH); ta = tc->ta; memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.f = f; da.farg = arg; ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da); return (0); } /* * Table algorithms */ /* * Finds algorithm by index, table type or supplied name. * * Returns pointer to algo or NULL. */ static struct table_algo * find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) { int i, l; struct table_algo *ta; if (ti->type > IPFW_TABLE_MAXTYPE) return (NULL); /* Search by index */ if (ti->atype != 0) { if (ti->atype > tcfg->algo_count) return (NULL); return (tcfg->algo[ti->atype]); } if (name == NULL) { /* Return default algorithm for given type if set */ return (tcfg->def_algo[ti->type]); } /* Search by name */ /* TODO: better search */ for (i = 1; i <= tcfg->algo_count; i++) { ta = tcfg->algo[i]; /* * One can supply additional algorithm * parameters so we compare only the first word * of supplied name: * 'addr:chash hsize=32' * '^^^^^^^^^' * */ l = strlen(ta->name); if (strncmp(name, ta->name, l) != 0) continue; if (name[l] != '\0' && name[l] != ' ') continue; /* Check if we're requesting proper table type */ if (ti->type != 0 && ti->type != ta->type) return (NULL); return (ta); } return (NULL); } /* * Register new table algo @ta. * Stores algo id inside @idx. * * Returns 0 on success. */ int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx) { struct tables_config *tcfg; struct table_algo *ta_new; size_t sz; if (size > sizeof(struct table_algo)) return (EINVAL); /* Check for the required on-stack size for add/del */ sz = roundup2(ta->ta_buf_size, sizeof(void *)); if (sz > TA_BUF_SZ) return (EINVAL); KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); /* Copy algorithm data to stable storage. */ ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); memcpy(ta_new, ta, size); tcfg = CHAIN_TO_TCFG(ch); KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); tcfg->algo[++tcfg->algo_count] = ta_new; ta_new->idx = tcfg->algo_count; /* Set algorithm as default one for given type */ if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && tcfg->def_algo[ta_new->type] == NULL) tcfg->def_algo[ta_new->type] = ta_new; *idx = ta_new->idx; return (0); } /* * Unregisters table algo using @idx as id. * XXX: It is NOT safe to call this function in any place * other than ipfw instance destroy handler. */ void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) { struct tables_config *tcfg; struct table_algo *ta; tcfg = CHAIN_TO_TCFG(ch); KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", idx, tcfg->algo_count)); ta = tcfg->algo[idx]; KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); if (tcfg->def_algo[ta->type] == ta) tcfg->def_algo[ta->type] = NULL; free(ta, M_IPFW); } /* * Lists all table algorithms currently available. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] * * Returns 0 on success */ static int list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; struct tables_config *tcfg; ipfw_ta_info *i; struct table_algo *ta; uint32_t count, n, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); tcfg = CHAIN_TO_TCFG(ch); count = tcfg->algo_count; size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_ta_info); if (size > olh->size) { olh->size = size; IPFW_UH_RUNLOCK(ch); return (ENOMEM); } olh->size = size; for (n = 1; n <= count; n++) { i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); ta = tcfg->algo[n]; strlcpy(i->algoname, ta->name, sizeof(i->algoname)); i->type = ta->type; i->refcnt = ta->refcnt; } IPFW_UH_RUNLOCK(ch); return (0); } static int classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { /* Basic IPv4/IPv6 or u32 lookups */ *puidx = cmd->arg1; /* Assume ADDR by default */ *ptype = IPFW_TABLE_ADDR; int v; if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { /* * generic lookup. The key must be * in 32bit big-endian format. */ v = ((ipfw_insn_u32 *)cmd)->d[1]; switch (v) { case 0: case 1: /* IPv4 src/dst */ break; case 2: case 3: /* src/dst port */ *ptype = IPFW_TABLE_NUMBER; break; case 4: /* uid/gid */ *ptype = IPFW_TABLE_NUMBER; break; case 5: /* jid */ *ptype = IPFW_TABLE_NUMBER; break; case 6: /* dscp */ *ptype = IPFW_TABLE_NUMBER; break; } } return (0); } static int classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn_if *cmdif; /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') return (1); *ptype = IPFW_TABLE_INTERFACE; *puidx = cmdif->p.kidx; return (0); } static int classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { *puidx = cmd->arg1; *ptype = IPFW_TABLE_FLOW; return (0); } static void update_arg1(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; } static void update_via(ipfw_insn *cmd, uint16_t idx) { ipfw_insn_if *cmdif; cmdif = (ipfw_insn_if *)cmd; cmdif->p.kidx = idx; } static int table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { struct table_config *tc; int error; IPFW_UH_WLOCK_ASSERT(ch); error = find_table_err(CHAIN_TO_NI(ch), ti, &tc); if (error != 0) return (error); *pno = &tc->no; return (0); } /* XXX: sets-sets! */ static struct named_object * table_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx); KASSERT(tc != NULL, ("Table with index %d not found", idx)); return (&tc->no); } static int table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) { switch (cmd) { case SWAP_ALL: case TEST_ALL: case MOVE_ALL: /* * Always return success, the real action and decision * should make table_manage_sets_all(). */ return (0); case TEST_ONE: case MOVE_ONE: /* * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add * if set number will be used in hash function. Currently * we can just use generic handler that replaces set value. */ if (V_fw_tables_sets == 0) return (0); break; case COUNT_ONE: /* * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is * disabled. This allow skip table's opcodes from additional * checks when specific rules moved to another set. */ if (V_fw_tables_sets == 0) return (EOPNOTSUPP); } /* Use generic sets handler when per-set sysctl is enabled. */ return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, set, new_set, cmd)); } /* * We register several opcode rewriters for lookup tables. * All tables opcodes have the same ETLV type, but different subtype. * To avoid invoking sets handler several times for XXX_ALL commands, * we use separate manage_sets handler. O_RECV has the lowest value, * so it should be called first. */ static int table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) { switch (cmd) { case SWAP_ALL: case TEST_ALL: /* * Return success for TEST_ALL, since nothing prevents * move rules from one set to another. All tables are * accessible from all sets when per-set tables sysctl * is disabled. */ case MOVE_ALL: if (V_fw_tables_sets == 0) return (0); break; default: return (table_manage_sets(ch, set, new_set, cmd)); } /* Use generic sets handler when per-set sysctl is enabled. */ return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, set, new_set, cmd)); } static struct opcode_obj_rewrite opcodes[] = { { .opcode = O_IP_SRC_LOOKUP, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_srcdst, .update = update_arg1, .find_byname = table_findbyname, .find_bykidx = table_findbykidx, .create_object = create_table_compat, .manage_sets = table_manage_sets, }, { .opcode = O_IP_DST_LOOKUP, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_srcdst, .update = update_arg1, .find_byname = table_findbyname, .find_bykidx = table_findbykidx, .create_object = create_table_compat, .manage_sets = table_manage_sets, }, { .opcode = O_IP_FLOW_LOOKUP, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_flow, .update = update_arg1, .find_byname = table_findbyname, .find_bykidx = table_findbykidx, .create_object = create_table_compat, .manage_sets = table_manage_sets, }, { .opcode = O_XMIT, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_via, .update = update_via, .find_byname = table_findbyname, .find_bykidx = table_findbykidx, .create_object = create_table_compat, .manage_sets = table_manage_sets, }, { .opcode = O_RECV, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_via, .update = update_via, .find_byname = table_findbyname, .find_bykidx = table_findbykidx, .create_object = create_table_compat, .manage_sets = table_manage_sets_all, }, { .opcode = O_VIA, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_via, .update = update_via, .find_byname = table_findbyname, .find_bykidx = table_findbykidx, .create_object = create_table_compat, .manage_sets = table_manage_sets, }, }; static int test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no, void *arg __unused) { /* Check that there aren't any tables in not default set */ if (no->set != 0) return (EBUSY); return (0); } /* * Switch between "set 0" and "rule's set" table binding, * Check all ruleset bindings and permits changing * IFF each binding has both rule AND table in default set (set 0). * * Returns 0 on success. */ int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) { struct opcode_obj_rewrite *rw; struct namedobj_instance *ni; struct named_object *no; struct ip_fw *rule; ipfw_insn *cmd; int cmdlen, i, l; uint16_t kidx; uint8_t subtype; IPFW_UH_WLOCK(ch); if (V_fw_tables_sets == sets) { IPFW_UH_WUNLOCK(ch); return (0); } ni = CHAIN_TO_NI(ch); if (sets == 0) { /* * Prevent disabling sets support if we have some tables * in not default sets. */ if (ipfw_objhash_foreach_type(ni, test_sets_cb, NULL, IPFW_TLV_TBL_NAME) != 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } } /* * Scan all rules and examine tables opcodes. */ for (i = 0; i < ch->n_rules; i++) { rule = ch->map[i]; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); /* Check only tables opcodes */ for (kidx = 0, rw = opcodes; rw < opcodes + nitems(opcodes); rw++) { if (rw->opcode != cmd->opcode) continue; if (rw->classifier(cmd, &kidx, &subtype) == 0) break; } if (kidx == 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); /* Check if both table object and rule has the set 0 */ if (no->set != 0 || rule->set != 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } } } V_fw_tables_sets = sets; IPFW_UH_WUNLOCK(ch); return (0); } /* * Checks table name for validity. * Enforce basic length checks, the rest * should be done in userland. * * Returns 0 if name is considered valid. */ static int check_table_name(const char *name) { /* * TODO: do some more complicated checks */ return (ipfw_check_object_name_generic(name)); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns 0 in success and fills in @tc with found config */ static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc) { char *name, bname[16]; struct named_object *no; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_TBL_NAME); if (ntlv == NULL) return (EINVAL); name = ntlv->name; /* * Use set provided by @ti instead of @ntlv one. * This is needed due to different sets behavior * controlled by V_fw_tables_sets. */ set = (V_fw_tables_sets != 0) ? ti->set : 0; } else { snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } no = ipfw_objhash_lookup_name(ni, set, name); *tc = (struct table_config *)no; return (0); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns pointer to table_config or NULL. */ static struct table_config * find_table(struct namedobj_instance *ni, struct tid_info *ti) { struct table_config *tc; if (find_table_err(ni, ti, &tc) != 0) return (NULL); return (tc); } /* * Allocate new table config structure using * specified @algo and @aname. * * Returns pointer to config or NULL. */ static struct table_config * alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *aname, uint8_t tflags) { char *name, bname[16]; struct table_config *tc; int error; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_TBL_NAME); if (ntlv == NULL) return (NULL); name = ntlv->name; set = ntlv->set; } else { /* Compat part: convert number to string representation */ snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); tc->no.name = tc->tablename; tc->no.subtype = ta->type; tc->no.set = set; tc->tflags = tflags; tc->ta = ta; strlcpy(tc->tablename, name, sizeof(tc->tablename)); /* Set "shared" value type by default */ tc->vshared = 1; /* Preallocate data structures for new tables */ error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags); if (error != 0) { free(tc, M_IPFW); return (NULL); } return (tc); } /* * Destroys table state and config. */ static void free_table_config(struct namedobj_instance *ni, struct table_config *tc) { KASSERT(tc->linked == 0, ("free() on linked config")); /* UH lock MUST NOT be held */ /* * We're using ta without any locking/referencing. * TODO: fix this if we're going to use unloadable algos. */ tc->ta->destroy(tc->astate, &tc->ti_copy); free(tc, M_IPFW); } /* * Links @tc to @chain table named instance. * Sets appropriate type/states in @chain table info. */ static void link_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); - IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; ipfw_objhash_add(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); *ti = tc->ti_copy; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, ti); tc->linked = 1; tc->ta->refcnt++; } /* * Unlinks @tc from @chain table named instance. * Zeroes states in @chain and stores them in @tc. */ static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; /* Clear state. @ti copy is already saved inside @tc */ ipfw_objhash_del(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); memset(ti, 0, sizeof(struct table_info)); tc->linked = 0; tc->ta->refcnt--; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, NULL); } static struct ipfw_sopt_handler scodes[] = { { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table }, { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 }, { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 }, { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table }, { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table }, { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables }, { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 }, { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 }, { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 }, { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 }, { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 }, { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 }, { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry }, { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, }; static int destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, void *arg) { unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); if (ipfw_objhash_free_idx(ni, no->kidx) != 0) printf("Error unlinking kidx %d from table %s\n", no->kidx, no->name); free_table_config(ni, (struct table_config *)no); return (0); } /* * Shuts tables module down. */ void ipfw_destroy_tables(struct ip_fw_chain *ch, int last) { IPFW_DEL_SOPT_HANDLER(last, scodes); IPFW_DEL_OBJ_REWRITER(last, opcodes); /* Remove all tables from working set */ IPFW_UH_WLOCK(ch); IPFW_WLOCK(ch); ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); IPFW_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch); /* Free pointers itself */ free(ch->tablestate, M_IPFW); ipfw_table_value_destroy(ch, last); ipfw_table_algo_destroy(ch); ipfw_objhash_destroy(CHAIN_TO_NI(ch)); free(CHAIN_TO_TCFG(ch), M_IPFW); } /* * Starts tables module. */ int ipfw_init_tables(struct ip_fw_chain *ch, int first) { struct tables_config *tcfg; /* Allocate pointers */ ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); ch->tblcfg = tcfg; ipfw_table_value_init(ch, first); ipfw_table_algo_init(ch); IPFW_ADD_OBJ_REWRITER(first, opcodes); IPFW_ADD_SOPT_HANDLER(first, scodes); return (0); } Index: stable/11/sys/netpfil/ipfw/nat64/nat64lsn_control.c =================================================================== --- stable/11/sys/netpfil/ipfw/nat64/nat64lsn_control.c (revision 324045) +++ stable/11/sys/netpfil/ipfw/nat64/nat64lsn_control.c (revision 324046) @@ -1,917 +1,911 @@ /*- * Copyright (c) 2015 Yandex LLC * Copyright (c) 2015 Alexander V. Chernikov * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DEFINE(uint16_t, nat64lsn_eid) = 0; static struct nat64lsn_cfg * nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set) { struct nat64lsn_cfg *cfg; cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set, IPFW_TLV_NAT64LSN_NAME, name); return (cfg); } static void nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) { if (uc->max_ports == 0) uc->max_ports = NAT64LSN_MAX_PORTS; else uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE); if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR) uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR; if (uc->jmaxlen == 0) uc->jmaxlen = NAT64LSN_JMAXLEN; if (uc->jmaxlen > 65536) uc->jmaxlen = 65536; if (uc->nh_delete_delay == 0) uc->nh_delete_delay = NAT64LSN_HOST_AGE; if (uc->pg_delete_delay == 0) uc->pg_delete_delay = NAT64LSN_PG_AGE; if (uc->st_syn_ttl == 0) uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE; if (uc->st_close_ttl == 0) uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE; if (uc->st_estab_ttl == 0) uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE; if (uc->st_udp_ttl == 0) uc->st_udp_ttl = NAT64LSN_UDP_AGE; if (uc->st_icmp_ttl == 0) uc->st_icmp_ttl = NAT64LSN_ICMP_AGE; } /* * Creates new nat64lsn instance. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ] * * Returns 0 on success */ static int nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *olh; ipfw_nat64lsn_cfg *uc; struct nat64lsn_cfg *cfg; struct namedobj_instance *ni; uint32_t addr4, mask4; if (sd->valsize != sizeof(*olh) + sizeof(*uc)) return (EINVAL); olh = (ipfw_obj_lheader *)sd->kbuf; uc = (ipfw_nat64lsn_cfg *)(olh + 1); if (ipfw_check_object_name_generic(uc->name) != 0) return (EINVAL); if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS) return (EINVAL); if (uc->plen4 > 32) return (EINVAL); if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0)) return (EINVAL); /* XXX: Check prefix4 to be global */ addr4 = ntohl(uc->prefix4.s_addr); mask4 = ~((1 << (32 - uc->plen4)) - 1); if ((addr4 & mask4) != addr4) return (EINVAL); /* XXX: Check prefix6 */ if (uc->min_port == 0) uc->min_port = NAT64_MIN_PORT; if (uc->max_port == 0) uc->max_port = 65535; if (uc->min_port > uc->max_port) return (EINVAL); uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE); uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE); nat64lsn_default_config(uc); ni = CHAIN_TO_SRV(ch); IPFW_UH_RLOCK(ch); if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4)); strlcpy(cfg->name, uc->name, sizeof(cfg->name)); cfg->no.name = cfg->name; cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME; cfg->no.set = uc->set; cfg->prefix4 = addr4; cfg->pmask4 = addr4 | ~mask4; /* XXX: Copy 96 bits */ cfg->plen6 = 96; memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8); cfg->plen4 = uc->plen4; cfg->flags = uc->flags & NAT64LSN_FLAGSMASK; cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; cfg->agg_prefix_len = uc->agg_prefix_len; cfg->agg_prefix_max = uc->agg_prefix_max; cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE; cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE; cfg->jmaxlen = uc->jmaxlen; cfg->nh_delete_delay = uc->nh_delete_delay; cfg->pg_delete_delay = uc->pg_delete_delay; cfg->st_syn_ttl = uc->st_syn_ttl; cfg->st_close_ttl = uc->st_close_ttl; cfg->st_estab_ttl = uc->st_estab_ttl; cfg->st_udp_ttl = uc->st_udp_ttl; cfg->st_icmp_ttl = uc->st_icmp_ttl; cfg->nomatch_verdict = IP_FW_DENY; cfg->nomatch_final = 1; /* Exit outer loop by default */ IPFW_UH_WLOCK(ch); if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { IPFW_UH_WUNLOCK(ch); nat64lsn_destroy_instance(cfg); return (EEXIST); } if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) { IPFW_UH_WUNLOCK(ch); nat64lsn_destroy_instance(cfg); return (ENOSPC); } ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no); /* Okay, let's link data */ - IPFW_WLOCK(ch); SRV_OBJECT(ch, cfg->no.kidx) = cfg; - IPFW_WUNLOCK(ch); - nat64lsn_start_instance(cfg); IPFW_UH_WUNLOCK(ch); return (0); } static void nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg) { IPFW_UH_WLOCK_ASSERT(ch); ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); } /* * Destroys nat64 instance. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct nat64lsn_cfg *cfg; ipfw_obj_header *oh; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (ipfw_obj_header *)op3; IPFW_UH_WLOCK(ch); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if (cfg->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } - IPFW_WLOCK(ch); SRV_OBJECT(ch, cfg->no.kidx) = NULL; - IPFW_WUNLOCK(ch); - nat64lsn_detach_config(ch, cfg); IPFW_UH_WUNLOCK(ch); nat64lsn_destroy_instance(cfg); return (0); } #define __COPY_STAT_FIELD(_cfg, _stats, _field) \ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field) static void export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, struct ipfw_nat64lsn_stats *stats) { __COPY_STAT_FIELD(cfg, stats, opcnt64); __COPY_STAT_FIELD(cfg, stats, opcnt46); __COPY_STAT_FIELD(cfg, stats, ofrags); __COPY_STAT_FIELD(cfg, stats, ifrags); __COPY_STAT_FIELD(cfg, stats, oerrors); __COPY_STAT_FIELD(cfg, stats, noroute4); __COPY_STAT_FIELD(cfg, stats, noroute6); __COPY_STAT_FIELD(cfg, stats, nomatch4); __COPY_STAT_FIELD(cfg, stats, noproto); __COPY_STAT_FIELD(cfg, stats, nomem); __COPY_STAT_FIELD(cfg, stats, dropped); __COPY_STAT_FIELD(cfg, stats, jcalls); __COPY_STAT_FIELD(cfg, stats, jrequests); __COPY_STAT_FIELD(cfg, stats, jhostsreq); __COPY_STAT_FIELD(cfg, stats, jportreq); __COPY_STAT_FIELD(cfg, stats, jhostfails); __COPY_STAT_FIELD(cfg, stats, jportfails); __COPY_STAT_FIELD(cfg, stats, jmaxlen); __COPY_STAT_FIELD(cfg, stats, jnomem); __COPY_STAT_FIELD(cfg, stats, jreinjected); __COPY_STAT_FIELD(cfg, stats, screated); __COPY_STAT_FIELD(cfg, stats, sdeleted); __COPY_STAT_FIELD(cfg, stats, spgcreated); __COPY_STAT_FIELD(cfg, stats, spgdeleted); stats->hostcount = cfg->ihcount; stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP]; stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP]; stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP]; } #undef __COPY_STAT_FIELD static void nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, ipfw_nat64lsn_cfg *uc) { uc->flags = cfg->flags & NAT64LSN_FLAGSMASK; uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE; uc->agg_prefix_len = cfg->agg_prefix_len; uc->agg_prefix_max = cfg->agg_prefix_max; uc->jmaxlen = cfg->jmaxlen; uc->nh_delete_delay = cfg->nh_delete_delay; uc->pg_delete_delay = cfg->pg_delete_delay; uc->st_syn_ttl = cfg->st_syn_ttl; uc->st_close_ttl = cfg->st_close_ttl; uc->st_estab_ttl = cfg->st_estab_ttl; uc->st_udp_ttl = cfg->st_udp_ttl; uc->st_icmp_ttl = cfg->st_icmp_ttl; uc->prefix4.s_addr = htonl(cfg->prefix4); uc->prefix6 = cfg->prefix6; uc->plen4 = cfg->plen4; uc->plen6 = cfg->plen6; uc->set = cfg->no.set; strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); } struct nat64_dump_arg { struct ip_fw_chain *ch; struct sockopt_data *sd; }; static int export_config_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg; ipfw_nat64lsn_cfg *uc; uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc)); nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc); return (0); } /* * Lists all nat64 lsn instances currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ] * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ] * * Returns 0 on success */ static int nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *olh; struct nat64_dump_arg da; /* Check minimum header size */ if (sd->valsize < sizeof(ipfw_obj_lheader)) return (EINVAL); olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); IPFW_UH_RLOCK(ch); olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME); olh->objsize = sizeof(ipfw_nat64lsn_cfg); olh->size = sizeof(*olh) + olh->count * olh->objsize; if (sd->valsize < olh->size) { IPFW_UH_RUNLOCK(ch); return (ENOMEM); } memset(&da, 0, sizeof(da)); da.ch = ch; da.sd = sd; ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da, IPFW_TLV_NAT64LSN_NAME); IPFW_UH_RUNLOCK(ch); return (0); } /* * Change existing nat64lsn instance configuration. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ] * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ] * * Returns 0 on success */ static int nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { ipfw_obj_header *oh; ipfw_nat64lsn_cfg *uc; struct nat64lsn_cfg *cfg; struct namedobj_instance *ni; if (sd->valsize != sizeof(*oh) + sizeof(*uc)) return (EINVAL); oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sizeof(*oh) + sizeof(*uc)); uc = (ipfw_nat64lsn_cfg *)(oh + 1); if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || oh->ntlv.set >= IPFW_MAX_SETS) return (EINVAL); ni = CHAIN_TO_SRV(ch); if (sd->sopt->sopt_dir == SOPT_GET) { IPFW_UH_RLOCK(ch); cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } nat64lsn_export_config(ch, cfg, uc); IPFW_UH_RUNLOCK(ch); return (0); } nat64lsn_default_config(uc); IPFW_UH_WLOCK(ch); cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (EEXIST); } /* * For now allow to change only following values: * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age, * tcp_est_age, udp_age, icmp_age, flags, max_ports. */ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; cfg->jmaxlen = uc->jmaxlen; cfg->nh_delete_delay = uc->nh_delete_delay; cfg->pg_delete_delay = uc->pg_delete_delay; cfg->st_syn_ttl = uc->st_syn_ttl; cfg->st_close_ttl = uc->st_close_ttl; cfg->st_estab_ttl = uc->st_estab_ttl; cfg->st_udp_ttl = uc->st_udp_ttl; cfg->st_icmp_ttl = uc->st_icmp_ttl; cfg->flags = uc->flags & NAT64LSN_FLAGSMASK; IPFW_UH_WUNLOCK(ch); return (0); } /* * Get nat64lsn statistics. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * Reply: [ ipfw_obj_header ipfw_counter_tlv ] * * Returns 0 on success */ static int nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { struct ipfw_nat64lsn_stats stats; struct nat64lsn_cfg *cfg; ipfw_obj_header *oh; ipfw_obj_ctlv *ctlv; size_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); if (sd->valsize % sizeof(uint64_t)) return (EINVAL); if (sd->valsize < sz) return (ENOMEM); oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); memset(&stats, 0, sizeof(stats)); IPFW_UH_RLOCK(ch); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_stats(ch, cfg, &stats); IPFW_UH_RUNLOCK(ch); ctlv = (ipfw_obj_ctlv *)(oh + 1); memset(ctlv, 0, sizeof(*ctlv)); ctlv->head.type = IPFW_TLV_COUNTERS; ctlv->head.length = sz - sizeof(ipfw_obj_header); ctlv->count = sizeof(stats) / sizeof(uint64_t); ctlv->objsize = sizeof(uint64_t); ctlv->version = IPFW_NAT64_VERSION; memcpy(ctlv + 1, &stats, sizeof(stats)); return (0); } /* * Reset nat64lsn statistics. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { struct nat64lsn_cfg *cfg; ipfw_obj_header *oh; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || oh->ntlv.set >= IPFW_MAX_SETS) return (EINVAL); IPFW_UH_WLOCK(ch); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS); IPFW_UH_WUNLOCK(ch); return (0); } /* * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg * ipfw_nat64lsn_state x count, ... ] ] */ static int export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg, ipfw_nat64lsn_stg *stg, struct sockopt_data *sd) { ipfw_nat64lsn_state *ste; struct nat64lsn_state *st; int i, count; NAT64_LOCK(pg->host); count = 0; for (i = 0; i < 64; i++) { if (PG_IS_BUSY_IDX(pg, i)) count++; } DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count); if (count == 0) { stg->count = 0; NAT64_UNLOCK(pg->host); return (0); } ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd, count * sizeof(ipfw_nat64lsn_state)); if (ste == NULL) { NAT64_UNLOCK(pg->host); return (1); } stg->alias4.s_addr = pg->aaddr; stg->proto = nat64lsn_rproto_map[pg->nat_proto]; stg->flags = 0; stg->host6 = pg->host->addr; stg->count = count; for (i = 0; i < 64; i++) { if (PG_IS_FREE_IDX(pg, i)) continue; st = &pg->states[i]; ste->daddr.s_addr = st->u.s.faddr; ste->dport = st->u.s.fport; ste->aport = pg->aport + i; ste->sport = st->u.s.lport; ste->flags = st->flags; /* XXX filter flags */ ste->idle = GET_AGE(st->timestamp); ste++; } NAT64_UNLOCK(pg->host); return (0); } static int get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, uint16_t *port) { if (*port < 65536 - NAT64_CHUNK_SIZE) { *port += NAT64_CHUNK_SIZE; return (0); } *port = 0; if (*nat_proto < NAT_MAX_PROTO - 1) { *nat_proto += 1; return (0); } *nat_proto = 1; if (*addr < cfg->pmask4) { *addr += 1; return (0); } /* End of space. */ return (1); } #define PACK_IDX(addr, proto, port) \ ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8) #define UNPACK_IDX(idx, addr, proto, port) \ (addr) = (uint32_t)((idx) >> 32); \ (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \ (proto) = (uint8_t)(((idx) >> 8) & 0xFF) static struct nat64lsn_portgroup * get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, uint16_t *port) { struct nat64lsn_portgroup *pg; uint64_t pre_pack, post_pack; pg = NULL; pre_pack = PACK_IDX(*addr, *nat_proto, *port); for (;;) { if (get_next_idx(cfg, addr, nat_proto, port) != 0) { /* End of states */ return (pg); } pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); if (pg != NULL) break; } post_pack = PACK_IDX(*addr, *nat_proto, *port); if (pre_pack == post_pack) DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d", *addr, *nat_proto, *port); return (pg); } static NAT64NOINLINE struct nat64lsn_portgroup * get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, uint16_t *port) { struct nat64lsn_portgroup *pg; pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); if (pg == NULL) pg = get_next_pg(cfg, addr, nat_proto, port); return (pg); } /* * Lists nat64lsn states. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] * Reply: [ ipfw_obj_header ipfw_obj_data [ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ] * * Returns 0 on success */ static int nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_header *oh; ipfw_obj_data *od; ipfw_nat64lsn_stg *stg; struct nat64lsn_cfg *cfg; struct nat64lsn_portgroup *pg, *pg_next; uint64_t next_idx; size_t sz; uint32_t addr, states; uint16_t port; uint8_t nat_proto; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + sizeof(uint64_t); /* Check minimum header size */ if (sd->valsize < sz) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; od = (ipfw_obj_data *)(oh + 1); if (od->head.type != IPFW_TLV_OBJDATA || od->head.length != sz - sizeof(ipfw_obj_header)) return (EINVAL); next_idx = *(uint64_t *)(od + 1); /* Translate index to the request position to start from */ UNPACK_IDX(next_idx, addr, nat_proto, port); if (nat_proto >= NAT_MAX_PROTO) return (EINVAL); if (nat_proto == 0 && addr != 0) return (EINVAL); IPFW_UH_RLOCK(ch); cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } /* Fill in starting point */ if (addr == 0) { addr = cfg->prefix4; nat_proto = 1; port = 0; } if (addr < cfg->prefix4 || addr > cfg->pmask4) { IPFW_UH_RUNLOCK(ch); DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u", (uintmax_t)next_idx, addr, cfg->pmask4); return (EINVAL); } sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + sizeof(ipfw_nat64lsn_stg); if (sd->valsize < sz) return (ENOMEM); oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz); od = (ipfw_obj_data *)(oh + 1); od->head.type = IPFW_TLV_OBJDATA; od->head.length = sz - sizeof(ipfw_obj_header); stg = (ipfw_nat64lsn_stg *)(od + 1); pg = get_first_pg(cfg, &addr, &nat_proto, &port); if (pg == NULL) { /* No states */ stg->next_idx = 0xFF; stg->count = 0; IPFW_UH_RUNLOCK(ch); return (0); } states = 0; pg_next = NULL; while (pg != NULL) { pg_next = get_next_pg(cfg, &addr, &nat_proto, &port); if (pg_next == NULL) stg->next_idx = 0xFF; else stg->next_idx = PACK_IDX(addr, nat_proto, port); if (export_pg_states(cfg, pg, stg, sd) != 0) { IPFW_UH_RUNLOCK(ch); return (states == 0 ? ENOMEM: 0); } states += stg->count; od->head.length += stg->count * sizeof(ipfw_nat64lsn_state); sz += stg->count * sizeof(ipfw_nat64lsn_state); if (pg_next != NULL) { sz += sizeof(ipfw_nat64lsn_stg); if (sd->valsize < sz) break; stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd, sizeof(ipfw_nat64lsn_stg)); } pg = pg_next; } IPFW_UH_RUNLOCK(ch); return (0); } static struct ipfw_sopt_handler scodes[] = { { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create }, { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy }, { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config }, { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list }, { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats }, { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats }, { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states }, }; static int nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn *icmd; icmd = cmd - 1; if (icmd->opcode != O_EXTERNAL_ACTION || icmd->arg1 != V_nat64lsn_eid) return (1); *puidx = cmd->arg1; *ptype = 0; return (0); } static void nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; } static int nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { int err; err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, IPFW_TLV_NAT64LSN_NAME, pno); return (err); } static struct named_object * nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { struct namedobj_instance *ni; struct named_object *no; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_SRV(ch); no = ipfw_objhash_lookup_kidx(ni, idx); KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx)); return (no); } static int nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) { return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME, set, new_set, cmd)); } static struct opcode_obj_rewrite opcodes[] = { { .opcode = O_EXTERNAL_INSTANCE, .etlv = IPFW_TLV_EACTION /* just show it isn't table */, .classifier = nat64lsn_classify, .update = nat64lsn_update_arg1, .find_byname = nat64lsn_findbyname, .find_bykidx = nat64lsn_findbykidx, .manage_sets = nat64lsn_manage_sets, }, }; static int destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct nat64lsn_cfg *cfg; struct ip_fw_chain *ch; ch = (struct ip_fw_chain *)arg; cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx); SRV_OBJECT(ch, no->kidx) = NULL; nat64lsn_detach_config(ch, cfg); nat64lsn_destroy_instance(cfg); return (0); } int nat64lsn_init(struct ip_fw_chain *ch, int first) { if (first != 0) nat64lsn_init_internal(); V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn"); if (V_nat64lsn_eid == 0) return (ENXIO); IPFW_ADD_SOPT_HANDLER(first, scodes); IPFW_ADD_OBJ_REWRITER(first, opcodes); return (0); } void nat64lsn_uninit(struct ip_fw_chain *ch, int last) { IPFW_DEL_OBJ_REWRITER(last, opcodes); IPFW_DEL_SOPT_HANDLER(last, scodes); ipfw_del_eaction(ch, V_nat64lsn_eid); /* * Since we already have deregistered external action, * our named objects become unaccessible via rules, because * all rules were truncated by ipfw_del_eaction(). * So, we can unlink and destroy our named objects without holding * IPFW_WLOCK(). */ IPFW_UH_WLOCK(ch); ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, IPFW_TLV_NAT64LSN_NAME); V_nat64lsn_eid = 0; IPFW_UH_WUNLOCK(ch); if (last != 0) nat64lsn_uninit_internal(); } Index: stable/11/sys/netpfil/ipfw/nat64/nat64stl_control.c =================================================================== --- stable/11/sys/netpfil/ipfw/nat64/nat64stl_control.c (revision 324045) +++ stable/11/sys/netpfil/ipfw/nat64/nat64stl_control.c (revision 324046) @@ -1,621 +1,615 @@ /*- * Copyright (c) 2015-2016 Yandex LLC * Copyright (c) 2015-2016 Andrey V. Elsukov * Copyright (c) 2015 Alexander V. Chernikov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DEFINE(uint16_t, nat64stl_eid) = 0; static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set); static void nat64stl_free_config(struct nat64stl_cfg *cfg); static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set); static struct nat64stl_cfg * nat64stl_alloc_config(const char *name, uint8_t set) { struct nat64stl_cfg *cfg; cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO); COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK); cfg->no.name = cfg->name; cfg->no.etlv = IPFW_TLV_NAT64STL_NAME; cfg->no.set = set; strlcpy(cfg->name, name, sizeof(cfg->name)); return (cfg); } static void nat64stl_free_config(struct nat64stl_cfg *cfg) { COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS); free(cfg, M_IPFW); } static void nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, ipfw_nat64stl_cfg *uc) { struct named_object *no; uc->prefix6 = cfg->prefix6; uc->plen6 = cfg->plen6; uc->flags = cfg->flags & NAT64STL_FLAGSMASK; uc->set = cfg->no.set; strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64); ipfw_export_obj_ntlv(no, &uc->ntlv6); no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46); ipfw_export_obj_ntlv(no, &uc->ntlv4); } struct nat64stl_dump_arg { struct ip_fw_chain *ch; struct sockopt_data *sd; }; static int export_config_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg; ipfw_nat64stl_cfg *uc; uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc)); nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc); return (0); } static struct nat64stl_cfg * nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set) { struct nat64stl_cfg *cfg; cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set, IPFW_TLV_NAT64STL_NAME, name); return (cfg); } static int nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, ipfw_nat64stl_cfg *i) { IPFW_UH_WLOCK_ASSERT(ch); if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) return (ENOSPC); cfg->flags |= NAT64STL_KIDX; if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0) return (EINVAL); cfg->flags |= NAT64STL_46T; if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0) return (EINVAL); cfg->flags |= NAT64STL_64T; ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no); return (0); } /* * Creates new nat64 instance. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ] * * Returns 0 on success */ static int nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *olh; ipfw_nat64stl_cfg *uc; struct namedobj_instance *ni; struct nat64stl_cfg *cfg; int error; if (sd->valsize != sizeof(*olh) + sizeof(*uc)) return (EINVAL); olh = (ipfw_obj_lheader *)sd->kbuf; uc = (ipfw_nat64stl_cfg *)(olh + 1); if (ipfw_check_object_name_generic(uc->name) != 0) return (EINVAL); if (!IN6_IS_ADDR_WKPFX(&uc->prefix6)) return (EINVAL); if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS) return (EINVAL); /* XXX: check types of tables */ ni = CHAIN_TO_SRV(ch); error = 0; IPFW_UH_RLOCK(ch); if (nat64stl_find(ni, uc->name, uc->set) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); cfg = nat64stl_alloc_config(uc->name, uc->set); cfg->prefix6 = uc->prefix6; cfg->plen6 = uc->plen6; cfg->flags = uc->flags & NAT64STL_FLAGSMASK; IPFW_UH_WLOCK(ch); if (nat64stl_find(ni, uc->name, uc->set) != NULL) { IPFW_UH_WUNLOCK(ch); nat64stl_free_config(cfg); return (EEXIST); } error = nat64stl_create_internal(ch, cfg, uc); if (error == 0) { /* Okay, let's link data */ - IPFW_WLOCK(ch); SRV_OBJECT(ch, cfg->no.kidx) = cfg; - IPFW_WUNLOCK(ch); - IPFW_UH_WUNLOCK(ch); return (0); } if (cfg->flags & NAT64STL_KIDX) ipfw_objhash_free_idx(ni, cfg->no.kidx); if (cfg->flags & NAT64STL_46T) ipfw_unref_table(ch, cfg->map46); if (cfg->flags & NAT64STL_64T) ipfw_unref_table(ch, cfg->map64); IPFW_UH_WUNLOCK(ch); nat64stl_free_config(cfg); return (error); } /* * Change existing nat64stl instance configuration. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ] * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ] * * Returns 0 on success */ static int nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { ipfw_obj_header *oh; ipfw_nat64stl_cfg *uc; struct nat64stl_cfg *cfg; struct namedobj_instance *ni; if (sd->valsize != sizeof(*oh) + sizeof(*uc)) return (EINVAL); oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sizeof(*oh) + sizeof(*uc)); uc = (ipfw_nat64stl_cfg *)(oh + 1); if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || oh->ntlv.set >= IPFW_MAX_SETS) return (EINVAL); ni = CHAIN_TO_SRV(ch); if (sd->sopt->sopt_dir == SOPT_GET) { IPFW_UH_RLOCK(ch); cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } nat64stl_export_config(ch, cfg, uc); IPFW_UH_RUNLOCK(ch); return (0); } IPFW_UH_WLOCK(ch); cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (EEXIST); } /* * For now allow to change only following values: * flags. */ cfg->flags = uc->flags & NAT64STL_FLAGSMASK; IPFW_UH_WUNLOCK(ch); return (0); } static void nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg) { IPFW_UH_WLOCK_ASSERT(ch); ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); ipfw_unref_table(ch, cfg->map46); ipfw_unref_table(ch, cfg->map64); } /* * Destroys nat64 instance. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_header *oh; struct nat64stl_cfg *cfg; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; if (ipfw_check_object_name_generic(oh->ntlv.name) != 0) return (EINVAL); IPFW_UH_WLOCK(ch); cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if (cfg->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } - IPFW_WLOCK(ch); SRV_OBJECT(ch, cfg->no.kidx) = NULL; - IPFW_WUNLOCK(ch); - nat64stl_detach_config(ch, cfg); IPFW_UH_WUNLOCK(ch); nat64stl_free_config(cfg); return (0); } /* * Lists all nat64stl instances currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ] * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ] * * Returns 0 on success */ static int nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *olh; struct nat64stl_dump_arg da; /* Check minimum header size */ if (sd->valsize < sizeof(ipfw_obj_lheader)) return (EINVAL); olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); IPFW_UH_RLOCK(ch); olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME); olh->objsize = sizeof(ipfw_nat64stl_cfg); olh->size = sizeof(*olh) + olh->count * olh->objsize; if (sd->valsize < olh->size) { IPFW_UH_RUNLOCK(ch); return (ENOMEM); } memset(&da, 0, sizeof(da)); da.ch = ch; da.sd = sd; ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da, IPFW_TLV_NAT64STL_NAME); IPFW_UH_RUNLOCK(ch); return (0); } #define __COPY_STAT_FIELD(_cfg, _stats, _field) \ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field) static void export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, struct ipfw_nat64stl_stats *stats) { __COPY_STAT_FIELD(cfg, stats, opcnt64); __COPY_STAT_FIELD(cfg, stats, opcnt46); __COPY_STAT_FIELD(cfg, stats, ofrags); __COPY_STAT_FIELD(cfg, stats, ifrags); __COPY_STAT_FIELD(cfg, stats, oerrors); __COPY_STAT_FIELD(cfg, stats, noroute4); __COPY_STAT_FIELD(cfg, stats, noroute6); __COPY_STAT_FIELD(cfg, stats, noproto); __COPY_STAT_FIELD(cfg, stats, nomem); __COPY_STAT_FIELD(cfg, stats, dropped); } /* * Get nat64stl statistics. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]] * * Returns 0 on success */ static int nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { struct ipfw_nat64stl_stats stats; struct nat64stl_cfg *cfg; ipfw_obj_header *oh; ipfw_obj_ctlv *ctlv; size_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); if (sd->valsize % sizeof(uint64_t)) return (EINVAL); if (sd->valsize < sz) return (ENOMEM); oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); memset(&stats, 0, sizeof(stats)); IPFW_UH_RLOCK(ch); cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_stats(ch, cfg, &stats); IPFW_UH_RUNLOCK(ch); ctlv = (ipfw_obj_ctlv *)(oh + 1); memset(ctlv, 0, sizeof(*ctlv)); ctlv->head.type = IPFW_TLV_COUNTERS; ctlv->head.length = sz - sizeof(ipfw_obj_header); ctlv->count = sizeof(stats) / sizeof(uint64_t); ctlv->objsize = sizeof(uint64_t); ctlv->version = IPFW_NAT64_VERSION; memcpy(ctlv + 1, &stats, sizeof(stats)); return (0); } /* * Reset nat64stl statistics. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { struct nat64stl_cfg *cfg; ipfw_obj_header *oh; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || oh->ntlv.set >= IPFW_MAX_SETS) return (EINVAL); IPFW_UH_WLOCK(ch); cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS); IPFW_UH_WUNLOCK(ch); return (0); } static struct ipfw_sopt_handler scodes[] = { { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create }, { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy }, { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config }, { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list }, { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats }, { IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats }, }; static int nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn *icmd; icmd = cmd - 1; if (icmd->opcode != O_EXTERNAL_ACTION || icmd->arg1 != V_nat64stl_eid) return (1); *puidx = cmd->arg1; *ptype = 0; return (0); } static void nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; } static int nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { int err; err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, IPFW_TLV_NAT64STL_NAME, pno); return (err); } static struct named_object * nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { struct namedobj_instance *ni; struct named_object *no; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_SRV(ch); no = ipfw_objhash_lookup_kidx(ni, idx); KASSERT(no != NULL, ("NAT with index %d not found", idx)); return (no); } static int nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) { return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME, set, new_set, cmd)); } static struct opcode_obj_rewrite opcodes[] = { { .opcode = O_EXTERNAL_INSTANCE, .etlv = IPFW_TLV_EACTION /* just show it isn't table */, .classifier = nat64stl_classify, .update = nat64stl_update_arg1, .find_byname = nat64stl_findbyname, .find_bykidx = nat64stl_findbykidx, .manage_sets = nat64stl_manage_sets, }, }; static int destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct nat64stl_cfg *cfg; struct ip_fw_chain *ch; ch = (struct ip_fw_chain *)arg; cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx); SRV_OBJECT(ch, no->kidx) = NULL; nat64stl_detach_config(ch, cfg); nat64stl_free_config(cfg); return (0); } int nat64stl_init(struct ip_fw_chain *ch, int first) { V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl"); if (V_nat64stl_eid == 0) return (ENXIO); IPFW_ADD_SOPT_HANDLER(first, scodes); IPFW_ADD_OBJ_REWRITER(first, opcodes); return (0); } void nat64stl_uninit(struct ip_fw_chain *ch, int last) { IPFW_DEL_OBJ_REWRITER(last, opcodes); IPFW_DEL_SOPT_HANDLER(last, scodes); ipfw_del_eaction(ch, V_nat64stl_eid); /* * Since we already have deregistered external action, * our named objects become unaccessible via rules, because * all rules were truncated by ipfw_del_eaction(). * So, we can unlink and destroy our named objects without holding * IPFW_WLOCK(). */ IPFW_UH_WLOCK(ch); ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, IPFW_TLV_NAT64STL_NAME); V_nat64stl_eid = 0; IPFW_UH_WUNLOCK(ch); } Index: stable/11/sys/netpfil/ipfw/nptv6/nptv6.c =================================================================== --- stable/11/sys/netpfil/ipfw/nptv6/nptv6.c (revision 324045) +++ stable/11/sys/netpfil/ipfw/nptv6/nptv6.c (revision 324046) @@ -1,894 +1,889 @@ /*- * Copyright (c) 2016 Yandex LLC * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static VNET_DEFINE(uint16_t, nptv6_eid) = 0; #define V_nptv6_eid VNET(nptv6_eid) #define IPFW_TLV_NPTV6_NAME IPFW_TLV_EACTION_NAME(V_nptv6_eid) static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set); static void nptv6_free_config(struct nptv6_cfg *cfg); static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set); static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset); static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset); #define NPTV6_LOOKUP(chain, cmd) \ (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1) #ifndef IN6_MASK_ADDR #define IN6_MASK_ADDR(a, m) do { \ (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \ (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \ (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \ (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \ } while (0) #endif #ifndef IN6_ARE_MASKED_ADDR_EQUAL #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) #endif #if 0 #define NPTV6_DEBUG(fmt, ...) do { \ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \ } while (0) #define NPTV6_IPDEBUG(fmt, ...) do { \ char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN]; \ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \ } while (0) #else #define NPTV6_DEBUG(fmt, ...) #define NPTV6_IPDEBUG(fmt, ...) #endif static int nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset) { struct ip6_hdr *ip6; struct ip6_hbh *hbh; int proto, hlen; hlen = (offset == NULL) ? 0: *offset; if (m->m_len < hlen) return (-1); ip6 = mtodo(m, hlen); hlen += sizeof(*ip6); proto = ip6->ip6_nxt; while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || proto == IPPROTO_DSTOPTS) { hbh = mtodo(m, hlen); if (m->m_len < hlen) return (-1); proto = hbh->ip6h_nxt; hlen += (hbh->ip6h_len + 1) << 3; } if (offset != NULL) *offset = hlen; return (proto); } static int nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset) { struct icmp6_hdr *icmp6; struct ip6_hdr *ip6; struct mbuf *m; m = *mp; if (offset > m->m_len) return (-1); icmp6 = mtodo(m, offset); NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: case ICMP6_PACKET_TOO_BIG: case ICMP6_TIME_EXCEEDED: case ICMP6_PARAM_PROB: break; case ICMP6_ECHO_REQUEST: case ICMP6_ECHO_REPLY: /* nothing to translate */ return (0); default: /* * XXX: We can add some checks to not translate NDP and MLD * messages. Currently user must explicitly allow these message * types, otherwise packets will be dropped. */ return (-1); } offset += sizeof(*icmp6); if (offset + sizeof(*ip6) > m->m_pkthdr.len) return (-1); if (offset + sizeof(*ip6) > m->m_len) *mp = m = m_pullup(m, offset + sizeof(*ip6)); if (m == NULL) return (-1); ip6 = mtodo(m, offset); NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset, inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), ip6->ip6_nxt); if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src, &cfg->external, &cfg->mask)) return (nptv6_rewrite_external(cfg, mp, offset)); else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, &cfg->internal, &cfg->mask)) return (nptv6_rewrite_internal(cfg, mp, offset)); /* * Addresses in the inner IPv6 header doesn't matched to * our prefixes. */ return (-1); } static int nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a) { int idx; if (cfg->flags & NPTV6_48PLEN) return (3); /* Search suitable word index for adjustment */ for (idx = 4; idx < 8; idx++) if (a->s6_addr16[idx] != 0xffff) break; /* * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with * an IID of all-zeros while performing address mapping, that * datagram MUST be dropped, and an ICMPv6 Parameter Problem error * SHOULD be generated. */ if (idx == 8 || (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0)) return (-1); return (idx); } static void nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst, struct in6_addr *mask) { int i; for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) { dst->s6_addr8[i] &= ~mask->s6_addr8[i]; dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i]; } } static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset) { struct in6_addr *addr; struct ip6_hdr *ip6; int idx, proto; uint16_t adj; ip6 = mtodo(*mp, offset); NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset, inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), ip6->ip6_nxt); if (offset == 0) addr = &ip6->ip6_src; else { /* * When we rewriting inner IPv6 header, we need to rewrite * destination address back to external prefix. The datagram in * the ICMPv6 payload should looks like it was send from * external prefix. */ addr = &ip6->ip6_dst; } idx = nptv6_search_index(cfg, addr); if (idx < 0) { /* * Do not send ICMPv6 error when offset isn't zero. * This means we are rewriting inner IPv6 header in the * ICMPv6 error message. */ if (offset == 0) { icmp6_error2(*mp, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif); *mp = NULL; } return (IP_FW_DENY); } adj = addr->s6_addr16[idx]; nptv6_copy_addr(&cfg->external, addr, &cfg->mask); adj = cksum_add(adj, cfg->adjustment); if (adj == 0xffff) adj = 0; addr->s6_addr16[idx] = adj; if (offset == 0) { /* * We may need to translate addresses in the inner IPv6 * header for ICMPv6 error messages. */ proto = nptv6_getlasthdr(cfg, *mp, &offset); if (proto < 0 || (proto == IPPROTO_ICMPV6 && nptv6_translate_icmpv6(cfg, mp, offset) != 0)) return (IP_FW_DENY); NPTV6STAT_INC(cfg, in2ex); } return (0); } static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset) { struct in6_addr *addr; struct ip6_hdr *ip6; int idx, proto; uint16_t adj; ip6 = mtodo(*mp, offset); NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset, inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), ip6->ip6_nxt); if (offset == 0) addr = &ip6->ip6_dst; else { /* * When we rewriting inner IPv6 header, we need to rewrite * source address back to internal prefix. The datagram in * the ICMPv6 payload should looks like it was send from * internal prefix. */ addr = &ip6->ip6_src; } idx = nptv6_search_index(cfg, addr); if (idx < 0) { /* * Do not send ICMPv6 error when offset isn't zero. * This means we are rewriting inner IPv6 header in the * ICMPv6 error message. */ if (offset == 0) { icmp6_error2(*mp, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif); *mp = NULL; } return (IP_FW_DENY); } adj = addr->s6_addr16[idx]; nptv6_copy_addr(&cfg->internal, addr, &cfg->mask); adj = cksum_add(adj, ~cfg->adjustment); if (adj == 0xffff) adj = 0; addr->s6_addr16[idx] = adj; if (offset == 0) { /* * We may need to translate addresses in the inner IPv6 * header for ICMPv6 error messages. */ proto = nptv6_getlasthdr(cfg, *mp, &offset); if (proto < 0 || (proto == IPPROTO_ICMPV6 && nptv6_translate_icmpv6(cfg, mp, offset) != 0)) return (IP_FW_DENY); NPTV6STAT_INC(cfg, ex2in); } return (0); } /* * ipfw external action handler. */ static int ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args, ipfw_insn *cmd, int *done) { struct ip6_hdr *ip6; struct nptv6_cfg *cfg; ipfw_insn *icmd; int ret; *done = 0; /* try next rule if not matched */ ret = IP_FW_DENY; icmd = cmd + 1; if (cmd->opcode != O_EXTERNAL_ACTION || cmd->arg1 != V_nptv6_eid || icmd->opcode != O_EXTERNAL_INSTANCE || (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL) return (ret); /* * We need act as router, so when forwarding is disabled - * do nothing. */ if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6) return (ret); /* * NOTE: we expect ipfw_chk() did m_pullup() up to upper level * protocol's headers. Also we skip some checks, that ip6_input(), * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. */ ip6 = mtod(args->m, struct ip6_hdr *); NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d", cmd->arg1, icmd->arg1, inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), ip6->ip6_nxt); if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src, &cfg->internal, &cfg->mask)) { /* * XXX: Do not translate packets when both src and dst * are from internal prefix. */ if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, &cfg->internal, &cfg->mask)) return (ret); ret = nptv6_rewrite_internal(cfg, &args->m, 0); } else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, &cfg->external, &cfg->mask)) ret = nptv6_rewrite_external(cfg, &args->m, 0); else return (ret); /* * If address wasn't rewrited - free mbuf and terminate the search. */ if (ret != 0) { if (args->m != NULL) { m_freem(args->m); args->m = NULL; /* mark mbuf as consumed */ } NPTV6STAT_INC(cfg, dropped); *done = 1; } else { /* Terminate the search if one_pass is set */ *done = V_fw_one_pass; /* Update args->f_id when one_pass is off */ if (*done == 0) { ip6 = mtod(args->m, struct ip6_hdr *); args->f_id.src_ip6 = ip6->ip6_src; args->f_id.dst_ip6 = ip6->ip6_dst; } } return (ret); } static struct nptv6_cfg * nptv6_alloc_config(const char *name, uint8_t set) { struct nptv6_cfg *cfg; cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO); COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK); cfg->no.name = cfg->name; cfg->no.etlv = IPFW_TLV_NPTV6_NAME; cfg->no.set = set; strlcpy(cfg->name, name, sizeof(cfg->name)); return (cfg); } static void nptv6_free_config(struct nptv6_cfg *cfg) { COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS); free(cfg, M_IPFW); } static void nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg, ipfw_nptv6_cfg *uc) { uc->internal = cfg->internal; uc->external = cfg->external; uc->plen = cfg->plen; uc->flags = cfg->flags & NPTV6_FLAGSMASK; uc->set = cfg->no.set; strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); } struct nptv6_dump_arg { struct ip_fw_chain *ch; struct sockopt_data *sd; }; static int export_config_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg; ipfw_nptv6_cfg *uc; uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc)); nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc); return (0); } static struct nptv6_cfg * nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set) { struct nptv6_cfg *cfg; cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set, IPFW_TLV_NPTV6_NAME, name); return (cfg); } static void nptv6_calculate_adjustment(struct nptv6_cfg *cfg) { uint16_t i, e; uint16_t *p; /* Calculate checksum of internal prefix */ for (i = 0, p = (uint16_t *)&cfg->internal; p < (uint16_t *)(&cfg->internal + 1); p++) i = cksum_add(i, *p); /* Calculate checksum of external prefix */ for (e = 0, p = (uint16_t *)&cfg->external; p < (uint16_t *)(&cfg->external + 1); p++) e = cksum_add(e, *p); /* Adjustment value for Int->Ext direction */ cfg->adjustment = cksum_add(~e, i); } /* * Creates new NPTv6 instance. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ] * * Returns 0 on success */ static int nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct in6_addr mask; ipfw_obj_lheader *olh; ipfw_nptv6_cfg *uc; struct namedobj_instance *ni; struct nptv6_cfg *cfg; if (sd->valsize != sizeof(*olh) + sizeof(*uc)) return (EINVAL); olh = (ipfw_obj_lheader *)sd->kbuf; uc = (ipfw_nptv6_cfg *)(olh + 1); if (ipfw_check_object_name_generic(uc->name) != 0) return (EINVAL); if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS) return (EINVAL); if (IN6_IS_ADDR_MULTICAST(&uc->internal) || IN6_IS_ADDR_MULTICAST(&uc->external) || IN6_IS_ADDR_UNSPECIFIED(&uc->internal) || IN6_IS_ADDR_UNSPECIFIED(&uc->external) || IN6_IS_ADDR_LINKLOCAL(&uc->internal) || IN6_IS_ADDR_LINKLOCAL(&uc->external)) return (EINVAL); in6_prefixlen2mask(&mask, uc->plen); if (IN6_ARE_MASKED_ADDR_EQUAL(&uc->internal, &uc->external, &mask)) return (EINVAL); ni = CHAIN_TO_SRV(ch); IPFW_UH_RLOCK(ch); if (nptv6_find(ni, uc->name, uc->set) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); cfg = nptv6_alloc_config(uc->name, uc->set); cfg->plen = uc->plen; if (cfg->plen <= 48) cfg->flags |= NPTV6_48PLEN; cfg->internal = uc->internal; cfg->external = uc->external; cfg->mask = mask; IN6_MASK_ADDR(&cfg->internal, &mask); IN6_MASK_ADDR(&cfg->external, &mask); nptv6_calculate_adjustment(cfg); IPFW_UH_WLOCK(ch); if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) { IPFW_UH_WUNLOCK(ch); nptv6_free_config(cfg); return (ENOSPC); } ipfw_objhash_add(ni, &cfg->no); - IPFW_WLOCK(ch); SRV_OBJECT(ch, cfg->no.kidx) = cfg; - IPFW_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch); return (0); } /* * Destroys NPTv6 instance. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_header *oh; struct nptv6_cfg *cfg; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; if (ipfw_check_object_name_generic(oh->ntlv.name) != 0) return (EINVAL); IPFW_UH_WLOCK(ch); cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if (cfg->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } - IPFW_WLOCK(ch); SRV_OBJECT(ch, cfg->no.kidx) = NULL; - IPFW_WUNLOCK(ch); - ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); IPFW_UH_WUNLOCK(ch); nptv6_free_config(cfg); return (0); } /* * Get or change nptv6 instance config. * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ] */ static int nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op, struct sockopt_data *sd) { return (EOPNOTSUPP); } /* * Lists all NPTv6 instances currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ] * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ] * * Returns 0 on success */ static int nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *olh; struct nptv6_dump_arg da; /* Check minimum header size */ if (sd->valsize < sizeof(ipfw_obj_lheader)) return (EINVAL); olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); IPFW_UH_RLOCK(ch); olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME); olh->objsize = sizeof(ipfw_nptv6_cfg); olh->size = sizeof(*olh) + olh->count * olh->objsize; if (sd->valsize < olh->size) { IPFW_UH_RUNLOCK(ch); return (ENOMEM); } memset(&da, 0, sizeof(da)); da.ch = ch; da.sd = sd; ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da, IPFW_TLV_NPTV6_NAME); IPFW_UH_RUNLOCK(ch); return (0); } #define __COPY_STAT_FIELD(_cfg, _stats, _field) \ (_stats)->_field = NPTV6STAT_FETCH(_cfg, _field) static void export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg, struct ipfw_nptv6_stats *stats) { __COPY_STAT_FIELD(cfg, stats, in2ex); __COPY_STAT_FIELD(cfg, stats, ex2in); __COPY_STAT_FIELD(cfg, stats, dropped); } /* * Get NPTv6 statistics. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]] * * Returns 0 on success */ static int nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { struct ipfw_nptv6_stats stats; struct nptv6_cfg *cfg; ipfw_obj_header *oh; ipfw_obj_ctlv *ctlv; size_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); if (sd->valsize % sizeof(uint64_t)) return (EINVAL); if (sd->valsize < sz) return (ENOMEM); oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || oh->ntlv.set >= IPFW_MAX_SETS) return (EINVAL); memset(&stats, 0, sizeof(stats)); IPFW_UH_RLOCK(ch); cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_stats(ch, cfg, &stats); IPFW_UH_RUNLOCK(ch); ctlv = (ipfw_obj_ctlv *)(oh + 1); memset(ctlv, 0, sizeof(*ctlv)); ctlv->head.type = IPFW_TLV_COUNTERS; ctlv->head.length = sz - sizeof(ipfw_obj_header); ctlv->count = sizeof(stats) / sizeof(uint64_t); ctlv->objsize = sizeof(uint64_t); ctlv->version = 1; memcpy(ctlv + 1, &stats, sizeof(stats)); return (0); } /* * Reset NPTv6 statistics. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, struct sockopt_data *sd) { struct nptv6_cfg *cfg; ipfw_obj_header *oh; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || oh->ntlv.set >= IPFW_MAX_SETS) return (EINVAL); IPFW_UH_WLOCK(ch); cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); if (cfg == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS); IPFW_UH_WUNLOCK(ch); return (0); } static struct ipfw_sopt_handler scodes[] = { { IP_FW_NPTV6_CREATE, 0, HDIR_SET, nptv6_create }, { IP_FW_NPTV6_DESTROY,0, HDIR_SET, nptv6_destroy }, { IP_FW_NPTV6_CONFIG, 0, HDIR_BOTH, nptv6_config }, { IP_FW_NPTV6_LIST, 0, HDIR_GET, nptv6_list }, { IP_FW_NPTV6_STATS, 0, HDIR_GET, nptv6_stats }, { IP_FW_NPTV6_RESET_STATS,0, HDIR_SET, nptv6_reset_stats }, }; static int nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn *icmd; icmd = cmd - 1; NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d", cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1); if (icmd->opcode != O_EXTERNAL_ACTION || icmd->arg1 != V_nptv6_eid) return (1); *puidx = cmd->arg1; *ptype = 0; return (0); } static void nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1); } static int nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { int err; err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, IPFW_TLV_NPTV6_NAME, pno); NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err); return (err); } static struct named_object * nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { struct namedobj_instance *ni; struct named_object *no; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_SRV(ch); no = ipfw_objhash_lookup_kidx(ni, idx); KASSERT(no != NULL, ("NPT with index %d not found", idx)); NPTV6_DEBUG("kidx %u -> %s", idx, no->name); return (no); } static int nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) { return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME, set, new_set, cmd)); } static struct opcode_obj_rewrite opcodes[] = { { .opcode = O_EXTERNAL_INSTANCE, .etlv = IPFW_TLV_EACTION /* just show it isn't table */, .classifier = nptv6_classify, .update = nptv6_update_arg1, .find_byname = nptv6_findbyname, .find_bykidx = nptv6_findbykidx, .manage_sets = nptv6_manage_sets, }, }; static int destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct nptv6_cfg *cfg; struct ip_fw_chain *ch; ch = (struct ip_fw_chain *)arg; IPFW_UH_WLOCK_ASSERT(ch); cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx); SRV_OBJECT(ch, no->kidx) = NULL; ipfw_objhash_del(ni, &cfg->no); ipfw_objhash_free_idx(ni, cfg->no.kidx); nptv6_free_config(cfg); return (0); } int nptv6_init(struct ip_fw_chain *ch, int first) { V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6"); if (V_nptv6_eid == 0) return (ENXIO); IPFW_ADD_SOPT_HANDLER(first, scodes); IPFW_ADD_OBJ_REWRITER(first, opcodes); return (0); } void nptv6_uninit(struct ip_fw_chain *ch, int last) { IPFW_DEL_OBJ_REWRITER(last, opcodes); IPFW_DEL_SOPT_HANDLER(last, scodes); ipfw_del_eaction(ch, V_nptv6_eid); /* * Since we already have deregistered external action, * our named objects become unaccessible via rules, because * all rules were truncated by ipfw_del_eaction(). * So, we can unlink and destroy our named objects without holding * IPFW_WLOCK(). */ IPFW_UH_WLOCK(ch); ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, IPFW_TLV_NPTV6_NAME); V_nptv6_eid = 0; IPFW_UH_WUNLOCK(ch); } Index: stable/11 =================================================================== --- stable/11 (revision 324045) +++ stable/11 (revision 324046) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r323836