Index: projects/ipfw/sys/netpfil/ipfw/ip_fw_private.h =================================================================== --- projects/ipfw/sys/netpfil/ipfw/ip_fw_private.h (revision 269434) +++ projects/ipfw/sys/netpfil/ipfw/ip_fw_private.h (revision 269435) @@ -1,565 +1,566 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _IPFW2_PRIVATE_H #define _IPFW2_PRIVATE_H /* * Internal constants and data structures used by ipfw components * and not meant to be exported outside the kernel. */ #ifdef _KERNEL /* * For platforms that do not have SYSCTL support, we wrap the * SYSCTL_* into a function (one per file) to collect the values * into an array at module initialization. The wrapping macros, * SYSBEGIN() and SYSEND, are empty in the default case. */ #ifndef SYSBEGIN #define SYSBEGIN(x) #endif #ifndef SYSEND #define SYSEND #endif /* Return values from ipfw_chk() */ enum { IP_FW_PASS = 0, IP_FW_DENY, IP_FW_DIVERT, IP_FW_TEE, IP_FW_DUMMYNET, IP_FW_NETGRAPH, IP_FW_NGTEE, IP_FW_NAT, IP_FW_REASS, }; /* * Structure for collecting parameters to dummynet for ip6_output forwarding */ struct _ip6dn_args { struct ip6_pktopts *opt_or; struct route_in6 ro_or; int flags_or; struct ip6_moptions *im6o_or; struct ifnet *origifp_or; struct ifnet *ifp_or; struct sockaddr_in6 dst_or; u_long mtu_or; struct route_in6 ro_pmtu_or; }; /* * Arguments for calling ipfw_chk() and dummynet_io(). We put them * all into a structure because this way it is easier and more * efficient to pass variables around and extend the interface. */ struct ip_fw_args { struct mbuf *m; /* the mbuf chain */ struct ifnet *oif; /* output interface */ struct sockaddr_in *next_hop; /* forward address */ struct sockaddr_in6 *next_hop6; /* ipv6 forward address */ /* * On return, it points to the matching rule. * On entry, rule.slot > 0 means the info is valid and * contains the starting rule for an ipfw search. * If chain_id == chain->id && slot >0 then jump to that slot. * Otherwise, we locate the first rule >= rulenum:rule_id */ struct ipfw_rule_ref rule; /* match/restart info */ struct ether_header *eh; /* for bridged packets */ struct ipfw_flow_id f_id; /* grabbed from IP header */ //uint32_t cookie; /* a cookie depending on rule action */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ struct sockaddr_in hopstore; /* store here if cannot use a pointer */ }; MALLOC_DECLARE(M_IPFW); /* * Hooks sometime need to know the direction of the packet * (divert, dummynet, netgraph, ...) * We use a generic definition here, with bit0-1 indicating the * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the * specific protocol * indicating the protocol (if necessary) */ enum { DIR_MASK = 0x3, DIR_OUT = 0, DIR_IN = 1, DIR_FWD = 2, DIR_DROP = 3, PROTO_LAYER2 = 0x4, /* set for layer 2 */ /* PROTO_DEFAULT = 0, */ PROTO_IPV4 = 0x08, PROTO_IPV6 = 0x10, PROTO_IFB = 0x0c, /* layer2 + ifbridge */ /* PROTO_OLDBDG = 0x14, unused, old bridge */ }; /* wrapper for freeing a packet, in case we need to do more work */ #ifndef FREE_PKT #if defined(__linux__) || defined(_WIN32) #define FREE_PKT(m) netisr_dispatch(-1, m) #else #define FREE_PKT(m) m_freem(m) #endif #endif /* !FREE_PKT */ /* * Function definitions. */ /* attach (arg = 1) or detach (arg = 0) hooks */ int ipfw_attach_hooks(int); #ifdef NOTYET void ipfw_nat_destroy(void); #endif /* In ip_fw_log.c */ struct ip; void ipfw_log_bpf(int); void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg, struct ip *ip); VNET_DECLARE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) VNET_DECLARE(int, verbose_limit); #define V_verbose_limit VNET(verbose_limit) /* In ip_fw_dynamic.c */ enum { /* result for matching dynamic rules */ MATCH_REVERSE = 0, MATCH_FORWARD, MATCH_NONE, MATCH_UNKNOWN, }; /* * The lock for dynamic rules is only used once outside the file, * and only to release the result of lookup_dyn_rule(). * Eventually we may implement it with a callback on the function. */ struct ip_fw_chain; struct sockopt_data; void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int); void ipfw_dyn_unlock(ipfw_dyn_rule *q); struct tcphdr; struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, u_int32_t, u_int32_t, int); int ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg); ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp); void ipfw_remove_dyn_children(struct ip_fw *rule); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd); void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); int ipfw_dyn_get_count(void); /* common variables */ VNET_DECLARE(int, fw_one_pass); #define V_fw_one_pass VNET(fw_one_pass) VNET_DECLARE(int, fw_verbose); #define V_fw_verbose VNET(fw_verbose) VNET_DECLARE(struct ip_fw_chain, layer3_chain); #define V_layer3_chain VNET(layer3_chain) VNET_DECLARE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) VNET_DECLARE(int, autoinc_step); #define V_autoinc_step VNET(autoinc_step) VNET_DECLARE(unsigned int, fw_tables_max); #define V_fw_tables_max VNET(fw_tables_max) VNET_DECLARE(unsigned int, fw_tables_sets); #define V_fw_tables_sets VNET(fw_tables_sets) struct tables_config; #ifdef _KERNEL typedef struct ip_fw_cntr { uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint64_t timestamp; /* tv_sec of last match */ } ip_fw_cntr; /* * Here we have the structure representing an ipfw rule. * * It starts with a general area * followed by an array of one or more instructions, which the code * accesses as an array of 32-bit values. * * Given a rule pointer r: * * r->cmd is the start of the first instruction. * ACTION_PTR(r) is the start of the first action (things to do * once a rule matched). */ struct ip_fw { uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ uint8_t flags; /* currently unused */ counter_u64_t cntr; /* Pointer to rule counters */ uint32_t timestamp; /* tv_sec of last match */ uint32_t id; /* rule id */ struct ip_fw *x_next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ ipfw_insn cmd[1]; /* storage for commands */ }; #endif struct ip_fw_chain { struct ip_fw **map; /* array of rule ptrs to ease lookup */ uint32_t id; /* ruleset id */ int n_rules; /* number of static rules */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ void *tablestate; /* runtime table info */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; #else struct rwlock rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ struct ip_fw *reap; /* list of rules to reap */ struct ip_fw *default_rule; struct tables_config *tblcfg; /* tables module data */ void *ifcfg; /* interface module data */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t uh_lock; #else struct rwlock uh_lock; /* lock for upper half */ #endif }; struct namedobj_instance; struct named_object { TAILQ_ENTRY(named_object) nn_next; /* namehash */ TAILQ_ENTRY(named_object) nv_next; /* valuehash */ char *name; /* object name */ uint8_t type; /* object type */ uint8_t compat; /* Object name is number */ uint16_t kidx; /* object kernel index */ uint16_t uidx; /* userland idx for compat records */ uint32_t set; /* set object belongs to */ uint32_t refcnt; /* number of references */ }; TAILQ_HEAD(namedobjects_head, named_object); struct sockopt; /* used by tcp_var.h */ struct sockopt_data { caddr_t kbuf; /* allocated buffer */ size_t ksize; /* given buffer size */ size_t koff; /* data already used */ size_t kavail; /* number of bytes available */ size_t ktotal; /* total bytes pushed */ struct sockopt *sopt; /* socket data */ + caddr_t sopt_val; /* sopt user buffer */ size_t valsize; /* original data size */ }; struct ipfw_ifc; typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); struct ipfw_iface { struct named_object no; char ifname[64]; int resolved; uint16_t ifindex; uint16_t spare; uint64_t gencnt; TAILQ_HEAD(, ipfw_ifc) consumers; }; struct ipfw_ifc { TAILQ_ENTRY(ipfw_ifc) next; struct ipfw_iface *iface; ipfw_ifc_cb *cb; void *cbdata; int linked; int spare; }; /* Macro for working with various counters */ #ifdef USERSPACE #define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \ (_cntr)->pcnt++; \ (_cntr)->bcnt += _bytes; \ (_cntr)->timestamp = time_uptime; \ } while (0) #define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \ (_cntr)->pcnt++; \ (_cntr)->bcnt += _bytes; \ } while (0) #define IPFW_ZERO_RULE_COUNTER(_cntr) do { \ (_cntr)->pcnt = 0; \ (_cntr)->bcnt = 0; \ (_cntr)->timestamp = 0; \ } while (0) #define IPFW_ZERO_DYN_COUNTER(_cntr) do { \ (_cntr)->pcnt = 0; \ (_cntr)->bcnt = 0; \ } while (0) #else #define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \ counter_u64_add((_cntr)->cntr, 1); \ counter_u64_add((_cntr)->cntr + 1, _bytes); \ if ((_cntr)->timestamp != time_uptime) \ (_cntr)->timestamp = time_uptime; \ } while (0) #define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \ (_cntr)->pcnt++; \ (_cntr)->bcnt += _bytes; \ } while (0) #define IPFW_ZERO_RULE_COUNTER(_cntr) do { \ counter_u64_zero((_cntr)->cntr); \ counter_u64_zero((_cntr)->cntr + 1); \ (_cntr)->timestamp = 0; \ } while (0) #define IPFW_ZERO_DYN_COUNTER(_cntr) do { \ (_cntr)->pcnt = 0; \ (_cntr)->bcnt = 0; \ } while (0) #endif #define IP_FW_ARG_TABLEARG(a) (((a) == IP_FW_TABLEARG) ? tablearg : (a)) /* * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c * so the variable and the macros must be here. */ #define IPFW_LOCK_INIT(_chain) do { \ rw_init(&(_chain)->rwmtx, "IPFW static rules"); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ rw_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) #define IPFW_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_RLOCKED) #define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) #define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) #define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) #define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED) #define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock) #define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock) #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock) #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock) struct obj_idx { uint16_t uidx; /* internal index supplied by userland */ uint16_t kidx; /* kernel object index */ uint16_t off; /* tlv offset from rule end in 4-byte words */ uint8_t new; /* index is newly-allocated */ uint8_t type; /* object type within its category */ }; struct rule_check_info { uint16_t table_opcodes; /* count of opcodes referencing table */ uint16_t new_tables; /* count of opcodes referencing table */ uint16_t urule_numoff; /* offset of rulenum in bytes */ uint8_t version; /* rule version */ ipfw_obj_ctlv *ctlv; /* name TLV containter */ struct ip_fw *krule; /* resulting rule pointer */ caddr_t urule; /* original rule pointer */ struct obj_idx obuf[8]; /* table references storage */ }; /* Legacy interface support */ /* * FreeBSD 8 export rule format */ struct ip_fw_rule0 { struct ip_fw *x_next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ uint8_t _pad; /* padding */ uint32_t id; /* rule id */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; struct ip_fw_bcounter0 { uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ }; /* Kernel rule length */ /* * RULE _K_ SIZE _V_ -> * get kernel size from userland rool version _V_. * RULE _U_ SIZE _V_ -> * get user size version _V_ from kernel rule * RULESIZE _V_ -> * get user size rule length */ /* FreeBSD8 <> current kernel format */ #define RULEUSIZE0(r) (sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4) #define RULEKSIZE0(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) /* FreeBSD11 <> current kernel format */ #define RULEUSIZE1(r) (roundup2(sizeof(struct ip_fw_rule) + \ (r)->cmd_len * 4 - 4, 8)) #define RULEKSIZE1(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) /* In ip_fw_iface.c */ int ipfw_iface_init(void); void ipfw_iface_destroy(void); void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch); int ipfw_iface_ref(struct ip_fw_chain *ch, char *name, struct ipfw_ifc *ic); void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); int ipfw_list_ifaces(struct ip_fw_chain *ch, struct sockopt_data *sd); /* In ip_fw_sockopt.c */ int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_ctl(struct sockopt *sopt); int ipfw_ctl3(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); void ipfw_reap_rules(struct ip_fw *head); void ipfw_init_counters(void); void ipfw_destroy_counters(void); struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize); caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed); caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed); typedef void (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *, void *arg); struct namedobj_instance *ipfw_objhash_create(uint32_t items); void ipfw_objhash_destroy(struct namedobj_instance *); void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks); void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks); void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks); void ipfw_objhash_bitmap_free(void *idx, int blocks); struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name); struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t idx); int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, struct named_object *b); void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no); void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no); uint32_t ipfw_objhash_count(struct namedobj_instance *ni); void ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg); int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx); int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx); /* In ip_fw_table.c */ struct table_info; typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val); int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val); int ipfw_init_tables(struct ip_fw_chain *ch); int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); void ipfw_destroy_tables(struct ip_fw_chain *ch); /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */ extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); typedef int ipfw_nat_cfg_t(struct sockopt *); VNET_DECLARE(int, ipfw_nat_ready); #define V_ipfw_nat_ready VNET(ipfw_nat_ready) #define IPFW_NAT_LOADED (V_ipfw_nat_ready) extern ipfw_nat_t *ipfw_nat_ptr; extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; extern ipfw_nat_cfg_t *ipfw_nat_del_ptr; extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; #endif /* _KERNEL */ #endif /* _IPFW2_PRIVATE_H */ Index: projects/ipfw/sys/netpfil/ipfw/ip_fw_sockopt.c =================================================================== --- projects/ipfw/sys/netpfil/ipfw/ip_fw_sockopt.c (revision 269434) +++ projects/ipfw/sys/netpfil/ipfw/ip_fw_sockopt.c (revision 269435) @@ -1,2706 +1,2715 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Supported by: Valeria Paoli * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Sockopt support for ipfw. The routines here implement * the upper half of the ipfw code. */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include /* struct m_tag used by nested headers */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* hooks */ #include #include #include #ifdef MAC #include #endif static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci); static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, struct rule_check_info *ci); static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, struct rule_check_info *ci); #define NAMEDOBJ_HASH_SIZE 32 struct namedobj_instance { struct namedobjects_head *names; struct namedobjects_head *values; uint32_t nn_size; /* names hash size */ uint32_t nv_size; /* number hash size */ u_long *idx_mask; /* used items bitmask */ uint32_t max_blocks; /* number of "long" blocks in bitmask */ uint32_t count; /* number of items */ uint16_t free_off[IPFW_MAX_SETS]; /* first possible free offset */ }; #define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */ static uint32_t objhash_hash_name(struct namedobj_instance *ni, uint32_t set, char *name); static uint32_t objhash_hash_val(struct namedobj_instance *ni, uint32_t val); static int ipfw_flush_sopt_data(struct sockopt_data *sd); MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); /* * static variables followed by global ones */ #ifndef USERSPACE static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone); #define V_ipfw_cntr_zone VNET(ipfw_cntr_zone) void ipfw_init_counters() { V_ipfw_cntr_zone = uma_zcreate("IPFW counters", sizeof(ip_fw_cntr), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); } void ipfw_destroy_counters() { uma_zdestroy(V_ipfw_cntr_zone); } struct ip_fw * ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) { struct ip_fw *rule; rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO); return (rule); } static void free_rule(struct ip_fw *rule) { uma_zfree(V_ipfw_cntr_zone, rule->cntr); free(rule, M_IPFW); } #else void ipfw_init_counters() { } void ipfw_destroy_counters() { } struct ip_fw * ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) { struct ip_fw *rule; rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); return (rule); } static void free_rule(struct ip_fw *rule) { free(rule, M_IPFW); } #endif /* * Find the smallest rule >= key, id. * We could use bsearch but it is so simple that we code it directly */ int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id) { int i, lo, hi; struct ip_fw *r; for (lo = 0, hi = chain->n_rules - 1; lo < hi;) { i = (lo + hi) / 2; r = chain->map[i]; if (r->rulenum < key) lo = i + 1; /* continue from the next one */ else if (r->rulenum > key) hi = i; /* this might be good */ else if (r->id < id) lo = i + 1; /* continue from the next one */ else /* r->id >= id */ hi = i; /* this might be good */ }; return hi; } /* * allocate a new map, returns the chain locked. extra is the number * of entries to add or delete. */ static struct ip_fw ** get_map(struct ip_fw_chain *chain, int extra, int locked) { for (;;) { struct ip_fw **map; int i; i = chain->n_rules + extra; map = malloc(i * sizeof(struct ip_fw *), M_IPFW, locked ? M_NOWAIT : M_WAITOK); if (map == NULL) { printf("%s: cannot allocate map\n", __FUNCTION__); return NULL; } if (!locked) IPFW_UH_WLOCK(chain); if (i >= chain->n_rules + extra) /* good */ return map; /* otherwise we lost the race, free and retry */ if (!locked) IPFW_UH_WUNLOCK(chain); free(map, M_IPFW); } } /* * swap the maps. It is supposed to be called with IPFW_UH_WLOCK */ static struct ip_fw ** swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) { struct ip_fw **old_map; IPFW_WLOCK(chain); chain->id++; chain->n_rules = new_len; old_map = chain->map; chain->map = new_map; IPFW_WUNLOCK(chain); return old_map; } static void export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr) { cntr->size = sizeof(*cntr); if (krule->cntr != NULL) { cntr->pcnt = counter_u64_fetch(krule->cntr); cntr->bcnt = counter_u64_fetch(krule->cntr + 1); cntr->timestamp = krule->timestamp; } if (cntr->timestamp > 0) cntr->timestamp += boottime.tv_sec; } static void export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr) { if (krule->cntr != NULL) { cntr->pcnt = counter_u64_fetch(krule->cntr); cntr->bcnt = counter_u64_fetch(krule->cntr + 1); cntr->timestamp = krule->timestamp; } if (cntr->timestamp > 0) cntr->timestamp += boottime.tv_sec; } /* * Copies rule @urule from v1 userland format * to kernel @krule. * Assume @krule is zeroed. */ static void import_rule1(struct rule_check_info *ci) { struct ip_fw_rule *urule; struct ip_fw *krule; urule = (struct ip_fw_rule *)ci->urule; krule = (struct ip_fw *)ci->krule; /* copy header */ krule->act_ofs = urule->act_ofs; krule->cmd_len = urule->cmd_len; krule->rulenum = urule->rulenum; krule->set = urule->set; krule->flags = urule->flags; /* Save rulenum offset */ ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum); /* Copy opcodes */ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); } /* * Export rule into v1 format (Current). * Layout: * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT) * [ ip_fw_rule ] OR * [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs). * ] * Assume @data is zeroed. */ static void export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs) { struct ip_fw_bcounter *cntr; struct ip_fw_rule *urule; ipfw_obj_tlv *tlv; /* Fill in TLV header */ tlv = (ipfw_obj_tlv *)data; tlv->type = IPFW_TLV_RULE_ENT; tlv->length = len; if (rcntrs != 0) { /* Copy counters */ cntr = (struct ip_fw_bcounter *)(tlv + 1); urule = (struct ip_fw_rule *)(cntr + 1); export_cntr1_base(krule, cntr); } else urule = (struct ip_fw_rule *)(tlv + 1); /* copy header */ urule->act_ofs = krule->act_ofs; urule->cmd_len = krule->cmd_len; urule->rulenum = krule->rulenum; urule->set = krule->set; urule->flags = krule->flags; urule->id = krule->id; /* Copy opcodes */ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); } /* * Copies rule @urule from FreeBSD8 userland format (v0) * to kernel @krule. * Assume @krule is zeroed. */ static void import_rule0(struct rule_check_info *ci) { struct ip_fw_rule0 *urule; struct ip_fw *krule; urule = (struct ip_fw_rule0 *)ci->urule; krule = (struct ip_fw *)ci->krule; /* copy header */ krule->act_ofs = urule->act_ofs; krule->cmd_len = urule->cmd_len; krule->rulenum = urule->rulenum; krule->set = urule->set; if ((urule->_pad & 1) != 0) krule->flags |= IPFW_RULE_NOOPT; /* Save rulenum offset */ ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum); /* Copy opcodes */ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); } static void export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) { /* copy header */ memset(urule, 0, len); urule->act_ofs = krule->act_ofs; urule->cmd_len = krule->cmd_len; urule->rulenum = krule->rulenum; urule->set = krule->set; if ((krule->flags & IPFW_RULE_NOOPT) != 0) urule->_pad |= 1; /* Copy opcodes */ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); /* Export counters */ export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt); } /* * Add new rule(s) to the list possibly creating rule number for each. * Update the rule_number in the input struct so the caller knows it as well. * Must be called without IPFW_UH held */ static int commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count) { int error, i, insert_before, tcount; uint16_t rulenum, *pnum; struct rule_check_info *ci; struct ip_fw *krule; struct ip_fw **map; /* the new array of pointers */ /* Check if we need to do table remap */ tcount = 0; for (ci = rci, i = 0; i < count; ci++, i++) { if (ci->table_opcodes == 0) continue; /* * Rule has some table opcodes. * Reference & allocate needed tables/ */ error = ipfw_rewrite_table_uidx(chain, ci); if (error != 0) { /* * rewrite failed, state for current rule * has been reverted. Check if we need to * revert more. */ if (tcount > 0) { /* * We have some more table rules * we need to rollback. */ IPFW_UH_WLOCK(chain); while (ci != rci) { ci--; if (ci->table_opcodes == 0) continue; ipfw_unbind_table_rule(chain,ci->krule); } IPFW_UH_WUNLOCK(chain); } return (error); } tcount++; } /* get_map returns with IPFW_UH_WLOCK if successful */ map = get_map(chain, count, 0 /* not locked */); if (map == NULL) { if (tcount > 0) { /* Unbind tables */ IPFW_UH_WLOCK(chain); for (ci = rci, i = 0; i < count; ci++, i++) { if (ci->table_opcodes == 0) continue; ipfw_unbind_table_rule(chain, ci->krule); } IPFW_UH_WUNLOCK(chain); } return (ENOSPC); } if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; /* FIXME: Handle count > 1 */ ci = rci; krule = ci->krule; rulenum = krule->rulenum; /* find the insertion point, we will insert before */ insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE; i = ipfw_find_rule(chain, insert_before, 0); /* duplicate first part */ if (i > 0) bcopy(chain->map, map, i * sizeof(struct ip_fw *)); map[i] = krule; /* duplicate remaining part, we always have the default rule */ bcopy(chain->map + i, map + i + 1, sizeof(struct ip_fw *) *(chain->n_rules - i)); if (rulenum == 0) { /* Compute rule number and write it back */ rulenum = i > 0 ? map[i-1]->rulenum : 0; if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) rulenum += V_autoinc_step; krule->rulenum = rulenum; /* Save number to userland rule */ pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff); *pnum = rulenum; } krule->id = chain->id + 1; map = swap_map(chain, map, chain->n_rules + 1); chain->static_len += RULEUSIZE0(krule); IPFW_UH_WUNLOCK(chain); if (map) free(map, M_IPFW); return (0); } /* * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. */ void ipfw_reap_rules(struct ip_fw *head) { struct ip_fw *rule; while ((rule = head) != NULL) { head = head->x_next; free_rule(rule); } } /* * Used by del_entry() to check if a rule should be kept. * Returns 1 if the rule must be kept, 0 otherwise. * * Called with cmd = {0,1,5}. * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ; * cmd == 1 matches on set numbers only, rule numbers are ignored; * cmd == 5 matches on rule and set numbers. * * n == 0 is a wildcard for rule numbers, there is no wildcard for sets. * * Rules to keep are * (default || reserved || !match_set || !match_number) * where * default ::= (rule->rulenum == IPFW_DEFAULT_RULE) * // the default rule is always protected * * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET) * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush") * * match_set ::= (cmd == 0 || rule->set == set) * // set number is ignored for cmd == 0 * * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum) * // number is ignored for cmd == 1 or n == 0 * */ static int keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n) { return (rule->rulenum == IPFW_DEFAULT_RULE) || (cmd == 0 && n == 0 && rule->set == RESVD_SET) || !(cmd == 0 || rule->set == set) || !(cmd == 1 || n == 0 || n == rule->rulenum); } /** * Remove all rules with given number, or do set manipulation. * Assumes chain != NULL && *chain != NULL. * * The argument is an uint32_t. The low 16 bit are the rule or set number; * the next 8 bits are the new set; the top 8 bits indicate the command: * * 0 delete rules numbered "rulenum" * 1 delete rules in set "rulenum" * 2 move rules "rulenum" to set "new_set" * 3 move rules from set "rulenum" to set "new_set" * 4 swap sets "rulenum" and "new_set" * 5 delete rules "rulenum" and set "new_set" */ static int del_entry(struct ip_fw_chain *chain, uint32_t arg) { struct ip_fw *rule; uint32_t num; /* rule number or old_set */ uint8_t cmd, new_set; int start, end, i, ofs, n; struct ip_fw **map = NULL; int error = 0; num = arg & 0xffff; cmd = (arg >> 24) & 0xff; new_set = (arg >> 16) & 0xff; if (cmd > 5 || new_set > RESVD_SET) return EINVAL; if (cmd == 0 || cmd == 2 || cmd == 5) { if (num >= IPFW_DEFAULT_RULE) return EINVAL; } else { if (num > RESVD_SET) /* old_set */ return EINVAL; } IPFW_UH_WLOCK(chain); /* arbitrate writers */ chain->reap = NULL; /* prepare for deletions */ switch (cmd) { case 0: /* delete rules "num" (num == 0 matches all) */ case 1: /* delete all rules in set N */ case 5: /* delete rules with number N and set "new_set". */ /* * Locate first rule to delete (start), the rule after * the last one to delete (end), and count how many * rules to delete (n). Always use keep_rule() to * determine which rules to keep. */ n = 0; if (cmd == 1) { /* look for a specific set including RESVD_SET. * Must scan the entire range, ignore num. */ new_set = num; for (start = -1, end = i = 0; i < chain->n_rules; i++) { if (keep_rule(chain->map[i], cmd, new_set, 0)) continue; if (start < 0) start = i; end = i; n++; } end++; /* first non-matching */ } else { /* Optimized search on rule numbers */ start = ipfw_find_rule(chain, num, 0); for (end = start; end < chain->n_rules; end++) { rule = chain->map[end]; if (num > 0 && rule->rulenum != num) break; if (!keep_rule(rule, cmd, new_set, num)) n++; } } if (n == 0) { /* A flush request (arg == 0 or cmd == 1) on empty * ruleset returns with no error. On the contrary, * if there is no match on a specific request, * we return EINVAL. */ if (arg != 0 && cmd != 1) error = EINVAL; break; } /* We have something to delete. Allocate the new map */ map = get_map(chain, -n, 1 /* locked */); if (map == NULL) { error = EINVAL; break; } /* 1. bcopy the initial part of the map */ if (start > 0) bcopy(chain->map, map, start * sizeof(struct ip_fw *)); /* 2. copy active rules between start and end */ for (i = ofs = start; i < end; i++) { rule = chain->map[i]; if (keep_rule(rule, cmd, new_set, num)) map[ofs++] = rule; } /* 3. copy the final part of the map */ bcopy(chain->map + end, map + ofs, (chain->n_rules - end) * sizeof(struct ip_fw *)); /* 4. swap the maps (under BH_LOCK) */ map = swap_map(chain, map, chain->n_rules - n); /* 5. now remove the rules deleted from the old map */ if (cmd == 1) ipfw_expire_dyn_rules(chain, NULL, new_set); for (i = start; i < end; i++) { rule = map[i]; if (keep_rule(rule, cmd, new_set, num)) continue; chain->static_len -= RULEUSIZE0(rule); if (cmd != 1) ipfw_expire_dyn_rules(chain, rule, RESVD_SET); rule->x_next = chain->reap; chain->reap = rule; } break; /* * In the next 3 cases the loop stops at (n_rules - 1) * because the default rule is never eligible.. */ case 2: /* move rules with given RULE number to new set */ for (i = 0; i < chain->n_rules - 1; i++) { rule = chain->map[i]; if (rule->rulenum == num) rule->set = new_set; } break; case 3: /* move rules with given SET number to new set */ for (i = 0; i < chain->n_rules - 1; i++) { rule = chain->map[i]; if (rule->set == num) rule->set = new_set; } break; case 4: /* swap two sets */ for (i = 0; i < chain->n_rules - 1; i++) { rule = chain->map[i]; if (rule->set == num) rule->set = new_set; else if (rule->set == new_set) rule->set = num; } break; } rule = chain->reap; chain->reap = NULL; ipfw_unbind_table_list(chain, rule); IPFW_UH_WUNLOCK(chain); ipfw_reap_rules(rule); if (map) free(map, M_IPFW); return error; } /* * Clear counters for a specific rule. * Normally run under IPFW_UH_RLOCK, but these are idempotent ops * so we only care that rules do not disappear. */ static void clear_counters(struct ip_fw *rule, int log_only) { ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); if (log_only == 0) IPFW_ZERO_RULE_COUNTER(rule); if (l->o.opcode == O_LOG) l->log_left = l->max_log; } /** * Reset some or all counters on firewall rules. * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, * the next 8 bits are the set number, the top 8 bits are the command: * 0 work with rules from all set's; * 1 work with rules only from specified set. * Specified rule number is zero if we want to clear all entries. * log_only is 1 if we only want to reset logs, zero otherwise. */ static int zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) { struct ip_fw *rule; char *msg; int i; uint16_t rulenum = arg & 0xffff; uint8_t set = (arg >> 16) & 0xff; uint8_t cmd = (arg >> 24) & 0xff; if (cmd > 1) return (EINVAL); if (cmd == 1 && set > RESVD_SET) return (EINVAL); IPFW_UH_RLOCK(chain); if (rulenum == 0) { V_norule_counter = 0; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; /* Skip rules not in our set. */ if (cmd == 1 && rule->set != set) continue; clear_counters(rule, log_only); } msg = log_only ? "All logging counts reset" : "Accounting cleared"; } else { int cleared = 0; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (rule->rulenum == rulenum) { if (cmd == 0 || rule->set == set) clear_counters(rule, log_only); cleared = 1; } if (rule->rulenum > rulenum) break; } if (!cleared) { /* we did not find any matching rules */ IPFW_UH_RUNLOCK(chain); return (EINVAL); } msg = log_only ? "logging count reset" : "cleared"; } IPFW_UH_RUNLOCK(chain); if (V_fw_verbose) { int lev = LOG_SECURITY | LOG_NOTICE; if (rulenum) log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); else log(lev, "ipfw: %s.\n", msg); } return (0); } /* * Check rule head in FreeBSD11 format * */ static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, struct rule_check_info *ci) { int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* Check for valid cmd_len */ l = roundup2(RULESIZE(rule), sizeof(uint64_t)); if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } if (rule->rulenum > IPFW_DEFAULT_RULE - 1) return (EINVAL); return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } /* * Check rule head in FreeBSD8 format * */ static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, struct rule_check_info *ci) { int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* Check for valid cmd_len */ l = sizeof(*rule) + rule->cmd_len * 4 - 4; if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } if (rule->rulenum > IPFW_DEFAULT_RULE - 1) return (EINVAL); return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) { int cmdlen, l; int have_action; have_action = 0; /* * Now go for the individual checks. Very simple ones, basically only * instruction sizes. */ for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (cmdlen > l) { printf("ipfw: opcode %d size truncated\n", cmd->opcode); return EINVAL; } switch (cmd->opcode) { case O_PROBE_STATE: case O_KEEP_STATE: case O_PROTO: case O_IP_SRC_ME: case O_IP_DST_ME: case O_LAYER2: case O_IN: case O_FRAG: case O_DIVERTED: case O_IPOPT: case O_IPTOS: case O_IPPRECEDENCE: case O_IPVER: case O_SOCKARG: case O_TCPFLAGS: case O_TCPOPTS: case O_ESTAB: case O_VERREVPATH: case O_VERSRCREACH: case O_ANTISPOOF: case O_IPSEC: #ifdef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: #endif case O_IP4: case O_TAG: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_FIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if (cmd->arg1 >= rt_numfibs) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; } break; case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if ((cmd->arg1 != IP_FW_TABLEARG) && (cmd->arg1 >= rt_numfibs)) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; } goto check_action; case O_UID: case O_GID: case O_JAIL: case O_IP_SRC: case O_IP_DST: case O_TCPSEQ: case O_TCPACK: case O_PROB: case O_ICMPTYPE: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; case O_LIMIT: if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) goto bad_size; break; case O_LOG: if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) goto bad_size; ((ipfw_insn_log *)cmd)->log_left = ((ipfw_insn_log *)cmd)->max_log; break; case O_IP_SRC_MASK: case O_IP_DST_MASK: /* only odd command lengths */ if ( !(cmdlen & 1) || cmdlen > 31) goto bad_size; break; case O_IP_SRC_SET: case O_IP_DST_SET: if (cmd->arg1 == 0 || cmd->arg1 > 256) { printf("ipfw: invalid set size %d\n", cmd->arg1); return EINVAL; } if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + (cmd->arg1+31)/32 ) goto bad_size; break; case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; ci->table_opcodes++; break; case O_IP_FLOW_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; ci->table_opcodes++; break; case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; break; case O_NOP: case O_IPID: case O_IPTTL: case O_IPLEN: case O_TCPDATALEN: case O_TCPWIN: case O_TAGGED: if (cmdlen < 1 || cmdlen > 31) goto bad_size; break; case O_DSCP: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1) goto bad_size; break; case O_MAC_TYPE: case O_IP_SRCPORT: case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ if (cmdlen < 2 || cmdlen > 31) goto bad_size; break; case O_RECV: case O_XMIT: case O_VIA: if (((ipfw_insn_if *)cmd)->name[0] == '\1') ci->table_opcodes++; if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) goto bad_size; break; case O_ALTQ: if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) goto bad_size; break; case O_PIPE: case O_QUEUE: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; goto check_action; case O_FORWARD_IP: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) goto bad_size; goto check_action; #ifdef INET6 case O_FORWARD_IP6: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6)) goto bad_size; goto check_action; #endif /* INET6 */ case O_DIVERT: case O_TEE: if (ip_divert_ptr == NULL) return EINVAL; else goto check_size; case O_NETGRAPH: case O_NGTEE: if (ng_ipfw_input_p == NULL) return EINVAL; else goto check_size; case O_NAT: if (!IPFW_NAT_LOADED) return EINVAL; if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) goto bad_size; goto check_action; case O_FORWARD_MAC: /* XXX not implemented yet */ case O_CHECK_STATE: case O_COUNT: case O_ACCEPT: case O_DENY: case O_REJECT: case O_SETDSCP: #ifdef INET6 case O_UNREACH6: #endif case O_SKIPTO: case O_REASS: case O_CALLRETURN: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; check_action: if (have_action) { printf("ipfw: opcode %d, multiple actions" " not allowed\n", cmd->opcode); return (EINVAL); } have_action = 1; if (l != cmdlen) { printf("ipfw: opcode %d, action must be" " last opcode\n", cmd->opcode); return (EINVAL); } break; #ifdef INET6 case O_IP6_SRC: case O_IP6_DST: if (cmdlen != F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_FLOW6ID: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + ((ipfw_insn_u32 *)cmd)->o.arg1) goto bad_size; break; case O_IP6_SRC_MASK: case O_IP6_DST_MASK: if ( !(cmdlen & 1) || cmdlen > 127) goto bad_size; break; case O_ICMP6TYPE: if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) goto bad_size; break; #endif default: switch (cmd->opcode) { #ifndef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: case O_UNREACH6: case O_IP6_SRC: case O_IP6_DST: case O_FLOW6ID: case O_IP6_SRC_MASK: case O_IP6_DST_MASK: case O_ICMP6TYPE: printf("ipfw: no IPv6 support in kernel\n"); return (EPROTONOSUPPORT); #endif default: printf("ipfw: opcode %d, unknown opcode\n", cmd->opcode); return (EINVAL); } } } if (have_action == 0) { printf("ipfw: missing action\n"); return (EINVAL); } return 0; bad_size: printf("ipfw: opcode %d size %d wrong\n", cmd->opcode, cmdlen); return (EINVAL); } /* * Translation of requests for compatibility with FreeBSD 7.2/8. * a static variable tells us if we have an old client from userland, * and if necessary we translate requests and responses between the * two formats. */ static int is7 = 0; struct ip_fw7 { struct ip_fw7 *next; /* linked list of rules */ struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ // #define RESVD_SET 31 /* set for default and persistent rules */ uint8_t _pad; /* padding */ // uint32_t id; /* rule id, only in v.8 */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; static int convert_rule_to_7(struct ip_fw_rule0 *rule); static int convert_rule_to_8(struct ip_fw_rule0 *rule); #ifndef RULESIZE7 #define RULESIZE7(rule) (sizeof(struct ip_fw7) + \ ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4) #endif /* * Copy the static and dynamic rules to the supplied buffer * and return the amount of space actually used. * Must be run under IPFW_UH_RLOCK */ static size_t ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; struct ip_fw *rule; struct ip_fw_rule0 *dst; int error, i, l, warnflag; time_t boot_seconds; warnflag = 0; boot_seconds = boottime.tv_sec; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (is7) { /* Convert rule to FreeBSd 7.2 format */ l = RULESIZE7(rule); if (bp + l + sizeof(uint32_t) <= ep) { bcopy(rule, bp, l + sizeof(uint32_t)); error = ipfw_rewrite_table_kidx(chain, (struct ip_fw_rule0 *)bp); if (error != 0) return (0); error = convert_rule_to_7((struct ip_fw_rule0 *) bp); if (error) return 0; /*XXX correct? */ /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? */ bcopy(&V_set_disable, &(((struct ip_fw7 *)bp)->next_rule), sizeof(V_set_disable)); if (((struct ip_fw7 *)bp)->timestamp) ((struct ip_fw7 *)bp)->timestamp += boot_seconds; bp += l; } continue; /* go to next rule */ } l = RULEUSIZE0(rule); if (bp + l > ep) { /* should not happen */ printf("overflow dumping static rules\n"); break; } dst = (struct ip_fw_rule0 *)bp; export_rule0(rule, dst, l); error = ipfw_rewrite_table_kidx(chain, dst); /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? * * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask * so we need to fail _after_ saving at least one mask. */ bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable)); if (dst->timestamp) dst->timestamp += boot_seconds; bp += l; if (error != 0) { if (error == 2) { /* Non-fatal table rewrite error. */ warnflag = 1; continue; } printf("Stop on rule %d. Fail to convert table\n", rule->rulenum); break; } } if (warnflag != 0) printf("ipfw: process %s is using legacy interfaces," " consider rebuilding\n", ""); ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */ return (bp - (char *)buf); } struct dump_args { uint32_t b; /* start rule */ uint32_t e; /* end rule */ uint32_t rcount; /* number of rules */ uint32_t rsize; /* rules size */ uint32_t tcount; /* number of tables */ int rcounters; /* counters */ }; /* * Dumps static rules with table TLVs in buffer @sd. * * Returns 0 on success. */ static int dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da, uint32_t *bmask, struct sockopt_data *sd) { int error; int i, l; uint32_t tcount; ipfw_obj_ctlv *ctlv; struct ip_fw *krule; caddr_t dst; /* Dump table names first (if any) */ if (da->tcount > 0) { /* Header first */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); ctlv->head.type = IPFW_TLV_TBLNAME_LIST; ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + sizeof(*ctlv); ctlv->count = da->tcount; ctlv->objsize = sizeof(ipfw_obj_ntlv); } i = 0; tcount = da->tcount; while (tcount > 0) { if ((bmask[i / 32] & (1 << (i % 32))) == 0) { i++; continue; } if ((error = ipfw_export_table_ntlv(chain, i, sd)) != 0) return (error); i++; tcount--; } /* Dump rules */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); ctlv->head.type = IPFW_TLV_RULE_LIST; ctlv->head.length = da->rsize + sizeof(*ctlv); ctlv->count = da->rcount; for (i = da->b; i < da->e; i++) { krule = chain->map[i]; l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv); if (da->rcounters != 0) l += sizeof(struct ip_fw_bcounter); dst = (caddr_t)ipfw_get_sopt_space(sd, l); if (dst == NULL) return (ENOMEM); export_rule1(krule, dst, l, da->rcounters); } return (0); } /* * Dumps requested objects data * Data layout (version 0)(current): * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags * size = ipfw_cfg_lheader.size * Reply: [ ipfw_rules_lheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) * ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ] * ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional) * ] * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize. * The rest (size, count) are set to zero and needs to be ignored. * * Returns 0 on success. */ static int dump_config(struct ip_fw_chain *chain, struct sockopt_data *sd) { ipfw_cfg_lheader *hdr; struct ip_fw *rule; uint32_t sz, rnum; int error, i; struct dump_args da; uint32_t *bmask; hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); if (hdr == NULL) return (EINVAL); error = 0; bmask = NULL; /* Allocate needed state */ if (hdr->flags & IPFW_CFG_GET_STATIC) bmask = malloc(IPFW_TABLES_MAX / 8, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* * STAGE 1: Determine size/count for objects in range. * Prepare used tables bitmask. */ sz = 0; memset(&da, 0, sizeof(da)); da.b = 0; da.e = chain->n_rules; if (hdr->end_rule != 0) { /* Handle custom range */ if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE) rnum = IPFW_DEFAULT_RULE; da.b = ipfw_find_rule(chain, rnum, 0); rnum = hdr->end_rule; rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE; da.e = ipfw_find_rule(chain, rnum, 0); } if (hdr->flags & IPFW_CFG_GET_STATIC) { for (i = da.b; i < da.e; i++) { rule = chain->map[i]; da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv); da.rcount++; da.tcount += ipfw_mark_table_kidx(chain, rule, bmask); } /* Add counters if requested */ if (hdr->flags & IPFW_CFG_GET_COUNTERS) { da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount; da.rcounters = 1; } if (da.tcount > 0) sz += da.tcount * sizeof(ipfw_obj_ntlv) + sizeof(ipfw_obj_ctlv); sz += da.rsize + sizeof(ipfw_obj_ctlv); } if (hdr->flags & IPFW_CFG_GET_STATES) sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) + sizeof(ipfw_obj_ctlv); /* Fill header anyway */ hdr->size = sz; hdr->set_mask = ~V_set_disable; if (sd->valsize < sz) { IPFW_UH_RUNLOCK(chain); return (ENOMEM); } /* STAGE2: Store actual data */ if (hdr->flags & IPFW_CFG_GET_STATIC) { error = dump_static_rules(chain, &da, bmask, sd); if (error != 0) { IPFW_UH_RUNLOCK(chain); return (error); } } if (hdr->flags & IPFW_CFG_GET_STATES) error = ipfw_dump_states(chain, sd); IPFW_UH_RUNLOCK(chain); if (bmask != NULL) free(bmask, M_TEMP); return (error); } #define IP_FW3_OPLENGTH(x) ((x)->sopt_valsize - sizeof(ip_fw3_opheader)) #define IP_FW3_WRITEBUF 4096 /* small page-size write buffer */ #define IP_FW3_READBUF 16 * 1024 * 1024 /* handle large rulesets */ static int check_object_name(ipfw_obj_ntlv *ntlv) { int error; switch (ntlv->head.type) { case IPFW_TLV_TBL_NAME: error = ipfw_check_table_name(ntlv->name); break; default: error = ENOTSUP; } return (0); } /* * Adds one or more rules to ipfw @chain. * Data layout (version 0)(current): * Request: * [ * ip_fw3_opheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3) * ] * Reply: * [ * ip_fw3_opheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] * ] * * Rules in reply are modified to store their actual ruleset number. * * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending * accoring to their idx field and there has to be no duplicates. * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending. * (*3) Each ip_fw structure needs to be aligned to u64 boundary. * * Returns 0 on success. */ static int add_entry(struct ip_fw_chain *chain, struct sockopt_data *sd) { ipfw_obj_ctlv *ctlv, *rtlv, *tstate; ipfw_obj_ntlv *ntlv; int clen, error, idx; uint32_t count, read; struct ip_fw_rule *r; struct rule_check_info rci, *ci, *cbuf; ip_fw3_opheader *op3; int i, rsize; if (sd->valsize > IP_FW3_READBUF) return (EINVAL); op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize); ctlv = (ipfw_obj_ctlv *)(op3 + 1); read = sizeof(ip_fw3_opheader); rtlv = NULL; tstate = NULL; cbuf = NULL; memset(&rci, 0, sizeof(struct rule_check_info)); if (read + sizeof(*ctlv) > sd->valsize) return (EINVAL); if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { clen = ctlv->head.length; /* Check size and alignment */ if (clen > sd->valsize || clen < sizeof(*ctlv)) return (EINVAL); if ((clen % sizeof(uint64_t)) != 0) return (EINVAL); /* * Some table names or other named objects. * Check for validness. */ count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv); if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv)) return (EINVAL); /* * Check each TLV. * Ensure TLVs are sorted ascending and * there are no duplicates. */ idx = -1; ntlv = (ipfw_obj_ntlv *)(ctlv + 1); while (count > 0) { if (ntlv->head.length != sizeof(ipfw_obj_ntlv)) return (EINVAL); error = check_object_name(ntlv); if (error != 0) return (error); if (ntlv->idx <= idx) return (EINVAL); idx = ntlv->idx; count--; ntlv++; } tstate = ctlv; read += ctlv->head.length; ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); } if (read + sizeof(*ctlv) > sd->valsize) return (EINVAL); if (ctlv->head.type == IPFW_TLV_RULE_LIST) { clen = ctlv->head.length; if (clen + read > sd->valsize || clen < sizeof(*ctlv)) return (EINVAL); if ((clen % sizeof(uint64_t)) != 0) return (EINVAL); /* * TODO: Permit adding multiple rules at once */ if (ctlv->count != 1) return (ENOTSUP); clen -= sizeof(*ctlv); if (ctlv->count > clen / sizeof(struct ip_fw_rule)) return (EINVAL); /* Allocate state for each rule or use stack */ if (ctlv->count == 1) { memset(&rci, 0, sizeof(struct rule_check_info)); cbuf = &rci; } else cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP, M_WAITOK | M_ZERO); ci = cbuf; /* * Check each rule for validness. * Ensure numbered rules are sorted ascending * and properly aligned */ idx = 0; r = (struct ip_fw_rule *)(ctlv + 1); count = 0; error = 0; while (clen > 0) { rsize = roundup2(RULESIZE(r), sizeof(uint64_t)); if (rsize > clen || ctlv->count <= count) { error = EINVAL; break; } ci->ctlv = tstate; error = check_ipfw_rule1(r, rsize, ci); if (error != 0) break; /* Check sorting */ if (r->rulenum != 0 && r->rulenum < idx) { printf("rulenum %d idx %d\n", r->rulenum, idx); error = EINVAL; break; } idx = r->rulenum; ci->urule = (caddr_t)r; rsize = roundup2(rsize, sizeof(uint64_t)); clen -= rsize; r = (struct ip_fw_rule *)((caddr_t)r + rsize); count++; ci++; } if (ctlv->count != count || error != 0) { if (cbuf != &rci) free(cbuf, M_TEMP); return (EINVAL); } rtlv = ctlv; read += ctlv->head.length; ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); } if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) { if (cbuf != NULL && cbuf != &rci) free(cbuf, M_TEMP); return (EINVAL); } /* * Passed rules seems to be valid. * Allocate storage and try to add them to chain. */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) { clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule); ci->krule = ipfw_alloc_rule(chain, clen); import_rule1(ci); } if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) { /* Free allocate krules */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) free(ci->krule, M_IPFW); } if (cbuf != NULL && cbuf != &rci) free(cbuf, M_TEMP); return (error); } int ipfw_ctl3(struct sockopt *sopt) { int error; size_t bsize_max, size, valsize; struct ip_fw_chain *chain; uint32_t opt; char xbuf[256]; struct sockopt_data sdata; ip_fw3_opheader *op3 = NULL; /* Do not check privs twice untile we're called from ipfw_ctl() */ #if 0 error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error != 0) return (error); #endif if (sopt->sopt_name != IP_FW3) return (ENOTSUP); chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; memset(&sdata, 0, sizeof(sdata)); /* Read op3 header first to determine actual operation */ op3 = (ip_fw3_opheader *)xbuf; error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3)); if (error != 0) return (error); opt = op3->opcode; sopt->sopt_valsize = valsize; /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if (opt == IP_FW_ADD || (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error != 0) return (error); } /* * Determine buffer size: * use on-stack xbuf for short request, * allocate sliding-window buf for data export or * contigious buffer for special ops. */ bsize_max = IP_FW3_WRITEBUF; if (opt == IP_FW_ADD) bsize_max = IP_FW3_READBUF; /* * Fill in sockopt_data structure that may be useful for * IP_FW3 get requests. */ if (valsize <= sizeof(xbuf)) { sdata.kbuf = xbuf; sdata.ksize = sizeof(xbuf); sdata.kavail = valsize; } else { if (valsize < bsize_max) size = valsize; else size = bsize_max; sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); sdata.ksize = size; sdata.kavail = size; } sdata.sopt = sopt; + sdata.sopt_val = sopt->sopt_val; sdata.valsize = valsize; /* * Copy either all request (if valsize < bsize_max) * or first bsize_max bytes to guarantee most consumers * that all necessary data has been copied). * Anyway, copy not less than sizeof(ip_fw3_opheader). */ if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize, sizeof(ip_fw3_opheader))) != 0) return (error); op3 = (ip_fw3_opheader *)sdata.kbuf; opt = op3->opcode; switch (opt) { case IP_FW_XGET: error = dump_config(chain, &sdata); break; case IP_FW_XIFLIST: error = ipfw_list_ifaces(chain, &sdata); break; case IP_FW_XADD: error = add_entry(chain, &sdata); break; /*--- TABLE opcodes ---*/ case IP_FW_TABLE_XCREATE: error = ipfw_create_table(chain, op3, &sdata); break; case IP_FW_TABLE_XDESTROY: case IP_FW_TABLE_XFLUSH: error = ipfw_flush_table(chain, op3, &sdata); break; case IP_FW_TABLE_XINFO: error = ipfw_describe_table(chain, &sdata); break; case IP_FW_TABLES_XGETSIZE: error = ipfw_listsize_tables(chain, &sdata); break; case IP_FW_TABLES_XLIST: error = ipfw_list_tables(chain, &sdata); break; case IP_FW_TABLE_XLIST: error = ipfw_dump_table(chain, op3, &sdata); break; case IP_FW_TABLE_XADD: case IP_FW_TABLE_XDEL: error = ipfw_manage_table_ent(chain, op3, &sdata); break; case IP_FW_TABLE_XFIND: error = ipfw_find_table_entry(chain, op3, &sdata); break; case IP_FW_TABLES_ALIST: error = ipfw_list_table_algo(chain, &sdata); break; case IP_FW_TABLE_XGETSIZE: { uint32_t *tbl; struct tid_info ti; if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) { error = EINVAL; break; } tbl = (uint32_t *)(op3 + 1); memset(&ti, 0, sizeof(ti)); ti.uidx = *tbl; IPFW_UH_RLOCK(chain); error = ipfw_count_xtable(chain, &ti, tbl); IPFW_UH_RUNLOCK(chain); if (error) break; error = sooptcopyout(sopt, op3, sopt->sopt_valsize); } break; default: printf("ipfw: ipfw_ctl3 invalid option %d\n", opt); error = EINVAL; } /* Flush state and free buffers */ if (error == 0) error = ipfw_flush_sopt_data(&sdata); else ipfw_flush_sopt_data(&sdata); + /* Restore original pointer and set number of bytes written */ + sopt->sopt_val = sdata.sopt_val; + sopt->sopt_valsize = sdata.ktotal; if (sdata.kbuf != xbuf) free(sdata.kbuf, M_TEMP); return (error); } /** * {set|get}sockopt parser. */ int ipfw_ctl(struct sockopt *sopt) { #define RULE_MAXSIZE (256*sizeof(u_int32_t)) int error; size_t size, valsize; struct ip_fw *buf; struct ip_fw_rule0 *rule; struct ip_fw_chain *chain; u_int32_t rulenum[2]; uint32_t opt; struct rule_check_info ci; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error) return (error); chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; opt = sopt->sopt_name; /* Pass IP_FW3 to a new handler */ if (opt == IP_FW3) return (ipfw_ctl3(sopt)); /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if (opt == IP_FW_ADD || (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error != 0) return (error); } switch (opt) { case IP_FW_GET: /* * pass up a copy of the current rules. Static rules * come first (the last of which has number IPFW_DEFAULT_RULE), * followed by a possibly empty list of dynamic rule. * The last dynamic rule has NULL in the "next" field. * * Note that the calculated size is used to bound the * amount of data returned to the user. The rule set may * change between calculating the size and returning the * data in which case we'll just return what fits. */ for (;;) { int len = 0, want; size = chain->static_len; size += ipfw_dyn_len(); if (size >= sopt->sopt_valsize) break; buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* check again how much space we need */ want = chain->static_len + ipfw_dyn_len(); if (size >= want) len = ipfw_getrules(chain, buf, size); IPFW_UH_RUNLOCK(chain); if (size >= want) error = sooptcopyout(sopt, buf, len); free(buf, M_TEMP); if (size >= want) break; } break; case IP_FW_FLUSH: /* locking is done within del_entry() */ error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */ break; case IP_FW_ADD: rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, rule, RULE_MAXSIZE, sizeof(struct ip_fw7) ); memset(&ci, 0, sizeof(struct rule_check_info)); /* * If the size of commands equals RULESIZE7 then we assume * a FreeBSD7.2 binary is talking to us (set is7=1). * is7 is persistent so the next 'ipfw list' command * will use this format. * NOTE: If wrong version is guessed (this can happen if * the first ipfw command is 'ipfw [pipe] list') * the ipfw binary may crash or loop infinitly... */ size = sopt->sopt_valsize; if (size == RULESIZE7(rule)) { is7 = 1; error = convert_rule_to_8(rule); if (error) { free(rule, M_TEMP); return error; } size = RULESIZE(rule); } else is7 = 0; if (error == 0) error = check_ipfw_rule0(rule, size, &ci); if (error == 0) { /* locking is done within add_rule() */ struct ip_fw *krule; krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule)); ci.urule = (caddr_t)rule; ci.krule = krule; import_rule0(&ci); error = commit_rules(chain, &ci, 1); if (!error && sopt->sopt_dir == SOPT_GET) { if (is7) { error = convert_rule_to_7(rule); size = RULESIZE7(rule); if (error) { free(rule, M_TEMP); return error; } } error = sooptcopyout(sopt, rule, size); } } free(rule, M_TEMP); break; case IP_FW_DEL: /* * IP_FW_DEL is used for deleting single rules or sets, * and (ab)used to atomically manipulate sets. Argument size * is used to distinguish between the two: * sizeof(u_int32_t) * delete single rule or set of rules, * or reassign rules (or sets) to a different set. * 2*sizeof(u_int32_t) * atomic disable/enable sets. * first u_int32_t contains sets to be disabled, * second u_int32_t contains sets to be enabled. */ error = sooptcopyin(sopt, rulenum, 2*sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; size = sopt->sopt_valsize; if (size == sizeof(u_int32_t) && rulenum[0] != 0) { /* delete or reassign, locking done in del_entry() */ error = del_entry(chain, rulenum[0]); } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */ IPFW_UH_WLOCK(chain); V_set_disable = (V_set_disable | rulenum[0]) & ~rulenum[1] & ~(1<sopt_val != 0) { error = sooptcopyin(sopt, rulenum, sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; } error = zero_entry(chain, rulenum[0], sopt->sopt_name == IP_FW_RESETLOG); break; /*--- TABLE opcodes ---*/ case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: { ipfw_table_entry ent; struct tentry_info tei; struct tid_info ti; error = sooptcopyin(sopt, &ent, sizeof(ent), sizeof(ent)); if (error) break; memset(&tei, 0, sizeof(tei)); tei.paddr = &ent.addr; tei.subtype = AF_INET; tei.masklen = ent.masklen; tei.value = ent.value; memset(&ti, 0, sizeof(ti)); ti.uidx = ent.tbl; ti.type = IPFW_TABLE_CIDR; error = (opt == IP_FW_TABLE_ADD) ? - add_table_entry(chain, &ti, &tei) : - del_table_entry(chain, &ti, &tei); + add_table_entry(chain, &ti, &tei, 1) : + del_table_entry(chain, &ti, &tei, 1); } break; case IP_FW_TABLE_FLUSH: { u_int16_t tbl; struct tid_info ti; error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)); if (error) break; memset(&ti, 0, sizeof(ti)); ti.uidx = tbl; error = flush_table(chain, &ti); } break; case IP_FW_TABLE_GETSIZE: { u_int32_t tbl, cnt; struct tid_info ti; if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)))) break; memset(&ti, 0, sizeof(ti)); ti.uidx = tbl; IPFW_RLOCK(chain); error = ipfw_count_table(chain, &ti, &cnt); IPFW_RUNLOCK(chain); if (error) break; error = sooptcopyout(sopt, &cnt, sizeof(cnt)); } break; case IP_FW_TABLE_LIST: { ipfw_table *tbl; struct tid_info ti; if (sopt->sopt_valsize < sizeof(*tbl)) { error = EINVAL; break; } size = sopt->sopt_valsize; tbl = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); if (error) { free(tbl, M_TEMP); break; } tbl->size = (size - sizeof(*tbl)) / sizeof(ipfw_table_entry); memset(&ti, 0, sizeof(ti)); ti.uidx = tbl->tbl; IPFW_RLOCK(chain); error = ipfw_dump_table_legacy(chain, &ti, tbl); IPFW_RUNLOCK(chain); if (error) { free(tbl, M_TEMP); break; } error = sooptcopyout(sopt, tbl, size); free(tbl, M_TEMP); } break; /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) error = ipfw_nat_cfg_ptr(sopt); else { printf("IP_FW_NAT_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_DEL: if (IPFW_NAT_LOADED) error = ipfw_nat_del_ptr(sopt); else { printf("IP_FW_NAT_DEL: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_CONFIG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_cfg_ptr(sopt); else { printf("IP_FW_NAT_GET_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_LOG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_log_ptr(sopt); else { printf("IP_FW_NAT_GET_LOG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; default: printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); error = EINVAL; } return (error); #undef RULE_MAXSIZE } static int ipfw_flush_sopt_data(struct sockopt_data *sd) { int error; + size_t sz; - if (sd->koff == 0) + if ((sz = sd->koff) == 0) return (0); if (sd->sopt->sopt_dir == SOPT_GET) { - error = sooptcopyout(sd->sopt, sd->kbuf, sd->koff); + error = sooptcopyout(sd->sopt, sd->kbuf, sz); if (error != 0) return (error); } memset(sd->kbuf, 0, sd->ksize); sd->ktotal += sd->koff; sd->koff = 0; if (sd->ktotal + sd->ksize < sd->valsize) sd->kavail = sd->ksize; else sd->kavail = sd->valsize - sd->ktotal; + + /* Update sopt buffer */ + sd->sopt->sopt_valsize = sd->kavail; + sd->sopt->sopt_val = sd->sopt_val + sd->ktotal; return (0); } caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed) { int error; caddr_t addr; if (sd->kavail < needed) { /* * Flush data and try another time. */ error = ipfw_flush_sopt_data(sd); if (sd->kavail < needed || error != 0) return (NULL); } addr = sd->kbuf + sd->koff; sd->koff += needed; sd->kavail -= needed; return (addr); } caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed) { caddr_t addr; if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL) return (NULL); if (sd->kavail > 0) memset(sd->kbuf + sd->koff, 0, sd->kavail); return (addr); } #define RULE_MAXSIZE (256*sizeof(u_int32_t)) /* Functions to convert rules 7.2 <==> 8.0 */ static int convert_rule_to_7(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *)rule; /* copy of original rule, version 8 */ struct ip_fw *tmp; /* Used to copy commands */ ipfw_insn *ccmd, *dst; int ll = 0, ccmdlen = 0; tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); if (tmp == NULL) { return 1; //XXX error } bcopy(rule, tmp, RULE_MAXSIZE); /* Copy fields */ //rule7->_pad = tmp->_pad; rule7->set = tmp->set; rule7->rulenum = tmp->rulenum; rule7->cmd_len = tmp->cmd_len; rule7->act_ofs = tmp->act_ofs; rule7->next_rule = (struct ip_fw7 *)tmp->next_rule; rule7->next = (struct ip_fw7 *)tmp->x_next; rule7->cmd_len = tmp->cmd_len; export_cntr1_base(tmp, (struct ip_fw_bcounter *)&rule7->pcnt); /* Copy commands */ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ; ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { ccmdlen = F_LEN(ccmd); bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); if (dst->opcode > O_NAT) /* O_REASS doesn't exists in 7.2 version, so * decrement opcode if it is after O_REASS */ dst->opcode--; if (ccmdlen > ll) { printf("ipfw: opcode %d size truncated\n", ccmd->opcode); return EINVAL; } } free(tmp, M_TEMP); return 0; } static int convert_rule_to_8(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *) rule; /* Used to copy commands */ ipfw_insn *ccmd, *dst; int ll = 0, ccmdlen = 0; /* Copy of original rule */ struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); if (tmp == NULL) { return 1; //XXX error } bcopy(rule7, tmp, RULE_MAXSIZE); for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ; ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { ccmdlen = F_LEN(ccmd); bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); if (dst->opcode > O_NAT) /* O_REASS doesn't exists in 7.2 version, so * increment opcode if it is after O_REASS */ dst->opcode++; if (ccmdlen > ll) { printf("ipfw: opcode %d size truncated\n", ccmd->opcode); return EINVAL; } } rule->_pad = tmp->_pad; rule->set = tmp->set; rule->rulenum = tmp->rulenum; rule->cmd_len = tmp->cmd_len; rule->act_ofs = tmp->act_ofs; rule->next_rule = (struct ip_fw *)tmp->next_rule; rule->x_next = (struct ip_fw *)tmp->next; rule->cmd_len = tmp->cmd_len; rule->id = 0; /* XXX see if is ok = 0 */ rule->pcnt = tmp->pcnt; rule->bcnt = tmp->bcnt; rule->timestamp = tmp->timestamp; free (tmp, M_TEMP); return 0; } /* * Named object api * */ /* * Allocate new bitmask which can be used to enlarge/shrink * named instance index. */ void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks) { size_t size; int max_blocks; void *idx_mask; items = roundup2(items, BLOCK_ITEMS); /* Align to block size */ max_blocks = items / BLOCK_ITEMS; size = items / 8; idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK); /* Mark all as free */ memset(idx_mask, 0xFF, size * IPFW_MAX_SETS); *idx = idx_mask; *pblocks = max_blocks; } /* * Copy current bitmask index to new one. */ void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks) { int old_blocks, new_blocks; u_long *old_idx, *new_idx; int i; old_idx = ni->idx_mask; old_blocks = ni->max_blocks; new_idx = *idx; new_blocks = *blocks; for (i = 0; i < IPFW_MAX_SETS; i++) { memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i], old_blocks * sizeof(u_long)); } } /* * Swaps current @ni index with new one. */ void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks) { int old_blocks; u_long *old_idx; old_idx = ni->idx_mask; old_blocks = ni->max_blocks; ni->idx_mask = *idx; ni->max_blocks = *blocks; /* Save old values */ *idx = old_idx; *blocks = old_blocks; } void ipfw_objhash_bitmap_free(void *idx, int blocks) { free(idx, M_IPFW); } /* * Creates named hash instance. * Must be called without holding any locks. * Return pointer to new instance. */ struct namedobj_instance * ipfw_objhash_create(uint32_t items) { struct namedobj_instance *ni; int i; size_t size; size = sizeof(struct namedobj_instance) + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE; ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO); ni->nn_size = NAMEDOBJ_HASH_SIZE; ni->nv_size = NAMEDOBJ_HASH_SIZE; ni->names = (struct namedobjects_head *)(ni +1); ni->values = &ni->names[ni->nn_size]; for (i = 0; i < ni->nn_size; i++) TAILQ_INIT(&ni->names[i]); for (i = 0; i < ni->nv_size; i++) TAILQ_INIT(&ni->values[i]); /* Allocate bitmask separately due to possible resize */ ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks); return (ni); } void ipfw_objhash_destroy(struct namedobj_instance *ni) { free(ni->idx_mask, M_IPFW); free(ni, M_IPFW); } static uint32_t objhash_hash_name(struct namedobj_instance *ni, uint32_t set, char *name) { uint32_t v; v = fnv_32_str(name, FNV1_32_INIT); return (v % ni->nn_size); } static uint32_t objhash_hash_val(struct namedobj_instance *ni, uint32_t val) { uint32_t v; v = val % (ni->nv_size - 1); return (v); } struct named_object * ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name) { struct named_object *no; uint32_t hash; hash = objhash_hash_name(ni, set, name); TAILQ_FOREACH(no, &ni->names[hash], nn_next) { if ((strcmp(no->name, name) == 0) && (no->set == set)) return (no); } return (NULL); } struct named_object * ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx) { struct named_object *no; uint32_t hash; hash = objhash_hash_val(ni, kidx); TAILQ_FOREACH(no, &ni->values[hash], nv_next) { if (no->kidx == kidx) return (no); } return (NULL); } int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, struct named_object *b) { if ((strcmp(a->name, b->name) == 0) && a->set == b->set) return (1); return (0); } void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no) { uint32_t hash; hash = objhash_hash_name(ni, no->set, no->name); TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next); hash = objhash_hash_val(ni, no->kidx); TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next); ni->count++; } void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no) { uint32_t hash; hash = objhash_hash_name(ni, no->set, no->name); TAILQ_REMOVE(&ni->names[hash], no, nn_next); hash = objhash_hash_val(ni, no->kidx); TAILQ_REMOVE(&ni->values[hash], no, nv_next); ni->count--; } uint32_t ipfw_objhash_count(struct namedobj_instance *ni) { return (ni->count); } /* * Runs @func for each found named object. * It is safe to delete objects from callback */ void ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg) { struct named_object *no, *no_tmp; int i; for (i = 0; i < ni->nn_size; i++) { TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) f(ni, no, arg); } } /* * Removes index from given set. * Returns 0 on success. */ int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx) { u_long *mask; int i, v; i = idx / BLOCK_ITEMS; v = idx % BLOCK_ITEMS; if (i >= ni->max_blocks) return (1); mask = &ni->idx_mask[i]; if ((*mask & ((u_long)1 << v)) != 0) return (1); /* Mark as free */ *mask |= (u_long)1 << v; /* Update free offset */ if (ni->free_off[0] > i) ni->free_off[0] = i; return (0); } /* * Allocate new index in given set and stores in in @pidx. * Returns 0 on success. */ int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx) { struct namedobj_instance *ni; u_long *mask; int i, off, v; ni = (struct namedobj_instance *)n; off = ni->free_off[0]; mask = &ni->idx_mask[off]; for (i = off; i < ni->max_blocks; i++, mask++) { if ((v = ffsl(*mask)) == 0) continue; /* Mark as busy */ *mask &= ~ ((u_long)1 << (v - 1)); ni->free_off[0] = i; v = BLOCK_ITEMS * i + v - 1; *pidx = v; return (0); } return (1); } /* end of file */ Index: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c =================================================================== --- projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c (revision 269434) +++ projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c (revision 269435) @@ -1,2504 +1,2547 @@ /*- * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Lookup table support for ipfw. * * This file contains handlers for all generic tables' operations: * add/del/flush entries, list/dump tables etc.. * * Table data modification is protected by both UH and runtimg lock * while reading configuration/data is protected by UH lock. * * Lookup algorithms for all table types are located in ip_fw_table_algo.c */ #include "opt_ipfw.h" #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include #include #include /* struct ipfw_rule_ref */ #include #include #include /* * Table has the following `type` concepts: * * `no.type` represents lookup key type (cidr, ifp, uid, etc..) * `ta->atype` represents exact lookup algorithm. * For example, we can use more efficient search schemes if we plan * to use some specific table for storing host-routes only. * `ftype` (at the moment )is pure userland field helping to properly * format value data e.g. "value is IPv4 nexthop" or "value is DSCP" * or "value is port". * */ struct table_config { struct named_object no; uint8_t vtype; /* format table type */ uint8_t linked; /* 1 if already linked */ uint8_t tflags; /* type flags */ uint8_t spare; uint32_t count; /* Number of records */ uint32_t limit; /* Max number of records */ - uint64_t flags; /* state flags */ char tablename[64]; /* table name */ struct table_algo *ta; /* Callbacks for given algo */ void *astate; /* algorithm state */ struct table_info ti; /* data to put to table_info */ }; struct tables_config { struct namedobj_instance *namehash; int algo_count; struct table_algo *algo[256]; struct table_algo *def_algo[IPFW_TABLE_MAXTYPE + 1]; }; static struct table_config *find_table(struct namedobj_instance *ni, struct tid_info *ti); static struct table_config *alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags, uint8_t vtype); static void free_table_config(struct namedobj_instance *ni, struct table_config *tc); static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i); static void link_table(struct ip_fw_chain *chain, struct table_config *tc); static void unlink_table(struct ip_fw_chain *chain, struct table_config *tc); static void free_table_state(void **state, void **xstate, uint8_t type); static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd); static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i); static int dump_table_tentry(void *e, void *arg); static int dump_table_xentry(void *e, void *arg); static int ipfw_dump_table_v0(struct ip_fw_chain *ch, struct sockopt_data *sd); static int ipfw_dump_table_v1(struct ip_fw_chain *ch, struct sockopt_data *sd); static int ipfw_manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); static int ipfw_manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); -static int modify_table(struct ip_fw_chain *ch, struct table_config *tc, - struct table_algo *ta, void *ta_buf, uint64_t pflags); +static int check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count); static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); static struct table_algo *find_table_algo(struct tables_config *tableconf, struct tid_info *ti, char *name); #define CHAIN_TO_TCFG(chain) ((struct tables_config *)(chain)->tblcfg) #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) +#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ + int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei) + struct tentry_info *tei, uint32_t count) { struct table_config *tc; struct table_algo *ta; struct namedobj_instance *ni; uint16_t kidx; int error; uint32_t num; - uint64_t aflags; - ipfw_xtable_info xi; - char ta_buf[128]; + ipfw_xtable_info *xi; + char ta_buf[TA_BUF_SZ]; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); /* * Find and reference existing table. */ ta = NULL; if ((tc = find_table(ni, ti)) != NULL) { /* check table type */ if (tc->no.type != ti->type) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Try to exit early on limit hit */ if (tc->limit != 0 && tc->count == tc->limit && (tei->flags & TEI_FLAGS_UPDATE) == 0) { IPFW_UH_WUNLOCK(ch); return (EFBIG); } /* Reference and unlock */ tc->no.refcnt++; ta = tc->ta; - aflags = tc->flags; } IPFW_UH_WUNLOCK(ch); if (tc == NULL) { /* Compability mode: create new table for old clients */ if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); - memset(&xi, 0, sizeof(xi)); - xi.vtype = IPFW_VTYPE_U32; + xi = malloc(sizeof(ipfw_xtable_info), M_TEMP, M_WAITOK|M_ZERO); + xi->vtype = IPFW_VTYPE_U32; - error = create_table_internal(ch, ti, NULL, &xi); + error = create_table_internal(ch, ti, NULL, xi); + free(xi, M_TEMP); if (error != 0) return (error); /* Let's try to find & reference another time */ IPFW_UH_WLOCK(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } if (tc->no.type != ti->type) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Reference and unlock */ tc->no.refcnt++; ta = tc->ta; - aflags = tc->flags; IPFW_UH_WUNLOCK(ch); } - if (aflags != 0) { - - /* - * Previous add/delete call returned non-zero state. - * Run appropriate handler. - */ - error = modify_table(ch, tc, ta, &ta_buf, aflags); - if (error != 0) - return (error); - } - /* Prepare record (allocate memory) */ memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_add(ch, tei, &ta_buf); if (error != 0) return (error); IPFW_UH_WLOCK(ch); + /* + * Ensure we are able to add all entries without additional + * memory allocations. May release/reacquire UH_WLOCK. + */ + kidx = tc->no.kidx; + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), count); + if (error != 0) { + IPFW_UH_WUNLOCK(ch); + ta->flush_entry(ch, tei, &ta_buf); + return (error); + } + ni = CHAIN_TO_NI(ch); /* Drop reference we've used in first search */ tc->no.refcnt--; - /* Update aflags since it can be changed after previous read */ - aflags = tc->flags; /* Check limit before adding */ if (tc->limit != 0 && tc->count == tc->limit) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { IPFW_UH_WUNLOCK(ch); + ta->flush_entry(ch, tei, &ta_buf); return (EFBIG); } /* * We have UPDATE flag set. * Permit updating record (if found), * but restrict adding new one since we've * already hit the limit. */ tei->flags |= TEI_FLAGS_DONTADD; } /* We've got valid table in @tc. Let's add data */ kidx = tc->no.kidx; ta = tc->ta; num = 0; IPFW_WLOCK(ch); - error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, - &aflags, &num); + error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, &num); IPFW_WUNLOCK(ch); /* Update number of records. */ - if (error == 0) + if (error == 0) { tc->count += num; + /* Permit post-add algorithm grow/rehash. */ + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + } - tc->flags = aflags; - IPFW_UH_WUNLOCK(ch); /* Run cleaning callback anyway */ ta->flush_entry(ch, tei, &ta_buf); return (error); } int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei) + struct tentry_info *tei, uint32_t count) { struct table_config *tc; struct table_algo *ta; struct namedobj_instance *ni; uint16_t kidx; int error; uint32_t num; - uint64_t aflags; - char ta_buf[128]; + char ta_buf[TA_BUF_SZ]; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if (tc->no.type != ti->type) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } - aflags = tc->flags; ta = tc->ta; - if (aflags != 0) { - - /* - * Give the chance to algo to shrink its state. - */ - tc->no.refcnt++; + /* + * Give a chance for algorithm to shrink. + * May release/reacquire UH_WLOCK. + */ + kidx = tc->no.kidx; + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + if (error != 0) { IPFW_UH_WUNLOCK(ch); - memset(&ta_buf, 0, sizeof(ta_buf)); - - error = modify_table(ch, tc, ta, &ta_buf, aflags); - - IPFW_UH_WLOCK(ch); - tc->no.refcnt--; - aflags = tc->flags; - - if (error != 0) { - IPFW_UH_WUNLOCK(ch); - return (error); - } + ta->flush_entry(ch, tei, &ta_buf); + return (error); } /* * We assume ta_buf size is enough for storing - * prepare_del() key, so we're running under UH_LOCK here. + * prepare_del() key, so we're running under UH_WLOCK here. */ memset(&ta_buf, 0, sizeof(ta_buf)); if ((error = ta->prepare_del(ch, tei, &ta_buf)) != 0) { IPFW_UH_WUNLOCK(ch); return (error); } kidx = tc->no.kidx; num = 0; IPFW_WLOCK(ch); - error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, - &aflags, &num); + error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), tei, &ta_buf, &num); IPFW_WUNLOCK(ch); - if (error == 0) + if (error == 0) { tc->count -= num; - tc->flags = aflags; + /* Run post-del hook to permit shrinking */ + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + } IPFW_UH_WUNLOCK(ch); ta->flush_entry(ch, tei, &ta_buf); return (error); } /* - * Runs callbacks to modify algo state (typically, table resize). + * Ensure that table @tc has enough space to add @count entries without + * need for reallocation. * * Callbacks order: + * 0) has_space() (UH_WLOCK) - checks if @count items can be added w/o resize. + * * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage * 3) modify (UH_WLOCK + WLOCK) - switch pointers - * 4) flush_modify (no locks) - free state, if needed + * 4) flush_modify (UH_WLOCK) - free state, if needed + * + * Returns 0 on success. */ static int -modify_table(struct ip_fw_chain *ch, struct table_config *tc, - struct table_algo *ta, void *ta_buf, uint64_t pflags) +check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count) { - struct table_info *ti; + struct table_algo *ta; + uint64_t pflags; + char ta_buf[TA_BUF_SZ]; int error; - error = ta->prepare_mod(ta_buf, &pflags); - if (error != 0) - return (error); + IPFW_UH_WLOCK_ASSERT(ch); - IPFW_UH_WLOCK(ch); - ti = KIDX_TO_TI(ch, tc->no.kidx); + error = 0; + ta = tc->ta; + /* Acquire reference not to loose @tc between locks/unlocks */ + tc->no.refcnt++; - error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); - /* - * prepare_mofify may return zero in @pflags to - * indicate that modifications are not unnesessary. + * TODO: think about avoiding race between large add/large delete + * operation on algorithm which implements shrinking along with + * growing. */ + while (true) { + pflags = 0; + if (ta->has_space(tc->astate, ti, count, &pflags) != 0) { + tc->no.refcnt--; + return (0); + } - if (error == 0 && pflags != 0) { - /* Do actual modification */ - IPFW_WLOCK(ch); - ta->modify(tc->astate, ti, ta_buf, pflags); - IPFW_WUNLOCK(ch); - } + /* We have to shrink/grow table */ + IPFW_UH_WUNLOCK(ch); + memset(&ta_buf, 0, sizeof(ta_buf)); + + if ((error = ta->prepare_mod(ta_buf, &pflags)) != 0) { + IPFW_UH_WLOCK(ch); + break; + } - IPFW_UH_WUNLOCK(ch); + IPFW_UH_WLOCK(ch); - ta->flush_mod(ta_buf); + /* Check if we still need to alter table */ + ti = KIDX_TO_TI(ch, tc->no.kidx); + if (ta->has_space(tc->astate, ti, count, &pflags) != 0) { + /* + * Other threads has already performed resize. + * Flush our state and return/ + */ + ta->flush_mod(ta_buf); + break; + } + + error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); + if (error == 0) { + /* Do actual modification */ + IPFW_WLOCK(ch); + ta->modify(tc->astate, ti, ta_buf, pflags); + IPFW_WUNLOCK(ch); + } + + /* Anyway, flush data and retry */ + ta->flush_mod(ta_buf); + } + + tc->no.refcnt--; return (error); } + + int ipfw_manage_table_ent(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; switch (op3->version) { case 0: error = ipfw_manage_table_ent_v0(ch, op3, sd); break; case 1: error = ipfw_manage_table_ent_v1(ch, op3, sd); break; default: error = ENOTSUP; } return (error); } /* * Adds or deletes record in table. * Data layout (v0): * Request: [ ip_fw3_opheader ipfw_table_xentry ] * * Returns 0 on success */ static int ipfw_manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_table_xentry *xent; struct tentry_info tei; struct tid_info ti; int error, hdrlen, read; hdrlen = offsetof(ipfw_table_xentry, k); /* Check minimum header size */ if (sd->valsize < (sizeof(*op3) + hdrlen)) return (EINVAL); read = sizeof(ip_fw3_opheader); /* Check if xentry len field is valid */ xent = (ipfw_table_xentry *)(op3 + 1); if (xent->len < hdrlen || xent->len + read > sd->valsize) return (EINVAL); memset(&tei, 0, sizeof(tei)); tei.paddr = &xent->k; tei.masklen = xent->masklen; tei.value = xent->value; /* Old requests compability */ tei.flags = TEI_FLAGS_COMPAT; if (xent->type == IPFW_TABLE_CIDR) { if (xent->len - hdrlen == sizeof(in_addr_t)) tei.subtype = AF_INET; else tei.subtype = AF_INET6; } memset(&ti, 0, sizeof(ti)); ti.uidx = xent->tbl; ti.type = xent->type; error = (op3->opcode == IP_FW_TABLE_XADD) ? - add_table_entry(ch, &ti, &tei) : - del_table_entry(ch, &ti, &tei); + add_table_entry(ch, &ti, &tei, 1) : + del_table_entry(ch, &ti, &tei, 1); return (error); } /* * Adds or deletes record in table. * Data layout (v1)(current): * Request: [ ipfw_obj_header * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] * ] * * Returns 0 on success */ static int ipfw_manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent; ipfw_obj_ctlv *ctlv; ipfw_obj_header *oh; struct tentry_info tei; struct tid_info ti; int error, read; /* Check minimum header size */ if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) return (EINVAL); /* Check if passed data is too long */ if (sd->valsize != sd->kavail) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); read = sizeof(*oh); ctlv = (ipfw_obj_ctlv *)(oh + 1); if (ctlv->head.length + read != sd->valsize) return (EINVAL); /* * TODO: permit adding multiple entries for given table * at once */ if (ctlv->count != 1) return (EOPNOTSUPP); read += sizeof(*ctlv); /* Assume tentry may grow to support larger keys */ tent = (ipfw_obj_tentry *)(ctlv + 1); if (tent->head.length < sizeof(*tent) || tent->head.length + read > sd->valsize) return (EINVAL); /* Convert data into kernel request objects */ memset(&tei, 0, sizeof(tei)); tei.paddr = &tent->k; tei.subtype = tent->subtype; tei.masklen = tent->masklen; if (tent->head.flags & IPFW_TF_UPDATE) tei.flags |= TEI_FLAGS_UPDATE; tei.value = tent->value; objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = tent->idx; error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? - add_table_entry(ch, &ti, &tei) : - del_table_entry(ch, &ti, &tei); + add_table_entry(ch, &ti, &tei, 1) : + del_table_entry(ch, &ti, &tei, 1); return (error); } /* * Looks up an entry in given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_tentry ] * Reply: [ ipfw_obj_header ipfw_obj_tentry ] * * Returns 0 on success */ int ipfw_find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent; ipfw_obj_header *oh; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct table_info *kti; struct namedobj_instance *ni; int error; size_t sz; /* Check minimum header size */ sz = sizeof(*oh) + sizeof(*tent); if (sd->valsize != sz) return (EINVAL); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); tent = (ipfw_obj_tentry *)(oh + 1); /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = tent->idx; IPFW_UH_RLOCK(ch); ni = CHAIN_TO_NI(ch); /* * Find existing table and check its type . */ ta = NULL; if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } /* check table type */ if (tc->no.type != ti.type) { IPFW_UH_RUNLOCK(ch); return (EINVAL); } kti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->find_tentry == NULL) return (ENOTSUP); error = ta->find_tentry(tc->astate, kti, tent); IPFW_UH_RUNLOCK(ch); return (error); } int ipfw_flush_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; objheader_to_ti(oh, &ti); if (op3->opcode == IP_FW_TABLE_XDESTROY) error = destroy_table(ch, &ti); else if (op3->opcode == IP_FW_TABLE_XFLUSH) error = flush_table(ch, &ti); else return (ENOTSUP); return (error); } /* * Flushes all entries in given table. * Data layout (v0)(current): * Request: [ ip_fw3_opheader ] * * Returns 0 on success */ int flush_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct table_info ti_old, ti_new, *tablestate; void *astate_old, *astate_new; char algostate[64], *pstate; int error; uint16_t kidx; uint8_t tflags; /* * Stage 1: save table algoritm. * Reference found table to ensure it won't disappear. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } ta = tc->ta; tc->no.refcnt++; /* Save statup algo parameters */ if (ta->print_config != NULL) { ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), algostate, sizeof(algostate)); pstate = algostate; } else pstate = NULL; tflags = tc->tflags; IPFW_UH_WUNLOCK(ch); /* * Stage 2: allocate new table instance using same algo. */ memset(&ti_new, 0, sizeof(struct table_info)); if ((error = ta->init(ch, &astate_new, &ti_new, pstate, tflags)) != 0) { IPFW_UH_WLOCK(ch); tc->no.refcnt--; IPFW_UH_WUNLOCK(ch); return (error); } /* * Stage 3: swap old state pointers with newly-allocated ones. * Decrease refcount. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; tablestate = (struct table_info *)ch->tablestate; IPFW_WLOCK(ch); ti_old = tablestate[kidx]; tablestate[kidx] = ti_new; IPFW_WUNLOCK(ch); astate_old = tc->astate; tc->astate = astate_new; tc->ti = ti_new; tc->count = 0; tc->no.refcnt--; IPFW_UH_WUNLOCK(ch); /* * Stage 4: perform real flush. */ ta->destroy(astate_old, &ti_old); return (0); } /* * Destroys table specified by @ti. * Data layout (v0)(current): * Request: [ ip_fw3_opheader ] * * Returns 0 on success */ static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not permit destroying referenced tables */ if (tc->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } IPFW_WLOCK(ch); unlink_table(ch, tc); IPFW_WUNLOCK(ch); /* Free obj index */ if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) printf("Error unlinking kidx %d from table %s\n", tc->no.kidx, tc->tablename); IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (0); } static void destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, void *arg) { unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); if (ipfw_objhash_free_idx(ni, no->kidx) != 0) printf("Error unlinking kidx %d from table %s\n", no->kidx, no->name); free_table_config(ni, (struct table_config *)no); } void ipfw_destroy_tables(struct ip_fw_chain *ch) { /* Remove all tables from working set */ IPFW_UH_WLOCK(ch); IPFW_WLOCK(ch); ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); IPFW_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch); /* Free pointers itself */ free(ch->tablestate, M_IPFW); ipfw_table_algo_destroy(ch); ipfw_objhash_destroy(CHAIN_TO_NI(ch)); free(CHAIN_TO_TCFG(ch), M_IPFW); } int ipfw_init_tables(struct ip_fw_chain *ch) { struct tables_config *tcfg; /* Allocate pointers */ ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); ch->tblcfg = tcfg; ipfw_table_algo_init(ch); return (0); } int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) { unsigned int ntables_old, tbl; struct namedobj_instance *ni; void *new_idx, *old_tablestate, *tablestate; struct table_info *ti; struct table_config *tc; int i, new_blocks; /* Check new value for validity */ if (ntables > IPFW_TABLES_MAX) ntables = IPFW_TABLES_MAX; /* Allocate new pointers */ tablestate = malloc(ntables * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); IPFW_UH_WLOCK(ch); tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; ni = CHAIN_TO_NI(ch); /* Temporary restrict decreasing max_tables */ if (ntables < V_fw_tables_max) { /* * FIXME: Check if we really can shrink */ IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Copy table info/indices */ memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); IPFW_WLOCK(ch); /* Change pointers */ old_tablestate = ch->tablestate; ch->tablestate = tablestate; ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); ntables_old = V_fw_tables_max; V_fw_tables_max = ntables; IPFW_WUNLOCK(ch); /* Notify all consumers that their @ti pointer has changed */ ti = (struct table_info *)ch->tablestate; for (i = 0; i < tbl; i++, ti++) { if (ti->lookup == NULL) continue; tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); if (tc == NULL || tc->ta->change_ti == NULL) continue; tc->ta->change_ti(tc->astate, ti); } IPFW_UH_WUNLOCK(ch); /* Free old pointers */ free(old_tablestate, M_IPFW); ipfw_objhash_bitmap_free(new_idx, new_blocks); return (0); } int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { struct table_info *ti; ti = &(((struct table_info *)ch->tablestate)[tbl]); return (ti->lookup(ti, &addr, sizeof(in_addr_t), val)); } int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val) { struct table_info *ti; ti = &(((struct table_info *)ch->tablestate)[tbl]); return (ti->lookup(ti, paddr, plen, val)); } /* * Info/List/dump support for tables. * */ /* * High-level 'get' cmds sysctl handlers */ /* * Get buffer size needed to list info for all tables. * Data layout (v0)(current): * Request: [ empty ], size = sizeof(ipfw_obj_lheader) * Reply: [ ipfw_obj_lheader ] * * Returns 0 on success */ int ipfw_listsize_tables(struct ip_fw_chain *ch, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); olh->size = sizeof(*olh); /* Make export_table store needed size */ IPFW_UH_RLOCK(ch); export_tables(ch, olh, sd); IPFW_UH_RUNLOCK(ch); return (0); } /* * Lists all tables currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] * * Returns 0 on success */ int ipfw_list_tables(struct ip_fw_chain *ch, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; int error; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); error = export_tables(ch, olh, sd); IPFW_UH_RUNLOCK(ch); return (error); } /* * Store table info to buffer provided by @sd. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] * Reply: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success. */ int ipfw_describe_table(struct ip_fw_chain *ch, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; struct table_config *tc; struct tid_info ti; size_t sz; sz = sizeof(*oh) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); IPFW_UH_RUNLOCK(ch); return (0); } struct dump_args { struct table_info *ti; struct table_config *tc; struct sockopt_data *sd; uint32_t cnt; uint16_t uidx; int error; ipfw_table_entry *ent; uint32_t size; ipfw_obj_tentry tent; }; int ipfw_dump_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; switch (op3->version) { case 0: error = ipfw_dump_table_v0(ch, sd); break; case 1: error = ipfw_dump_table_v1(ch, sd); break; default: error = ENOTSUP; } return (error); } /* * Dumps all table data * Data layout (v1)(current): * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] * * Returns 0 on success */ static int ipfw_dump_table_v1(struct ip_fw_chain *ch, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; uint32_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); i = (ipfw_xtable_info *)(oh + 1); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, i); sz = tc->count; if (sd->valsize < sz + tc->count * sizeof(ipfw_obj_tentry)) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* * Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); IPFW_UH_RUNLOCK(ch); return (da.error); } /* * Dumps all table data * Data layout (version 0)(legacy): * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() * Reply: [ ipfw_xtable ipfw_table_xentry x N ] * * Returns 0 on success */ static int ipfw_dump_table_v0(struct ip_fw_chain *ch, struct sockopt_data *sd) { ipfw_xtable *xtbl; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; size_t sz; xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); if (xtbl == NULL) return (EINVAL); memset(&ti, 0, sizeof(ti)); ti.uidx = xtbl->tbl; IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (0); } sz = tc->count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); xtbl->cnt = tc->count; xtbl->size = sz; xtbl->type = tc->no.type; xtbl->tbl = ti.uidx; if (sd->valsize < sz) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); IPFW_UH_RUNLOCK(ch); return (0); } /* * Creates new table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ int ipfw_create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname, *aname; struct tid_info ti; struct namedobj_instance *ni; struct table_config *tc; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; aname = i->algoname; if (ipfw_check_table_name(tname) != 0 || strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) return (EINVAL); if (aname[0] == '\0') { /* Use default algorithm */ aname = NULL; } objheader_to_ti(oh, &ti); ti.type = i->type; ni = CHAIN_TO_NI(ch); IPFW_UH_RLOCK(ch); if ((tc = find_table(ni, &ti)) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); return (create_table_internal(ch, &ti, aname, i)); } /* * Creates new table based on @ti and @aname. * * Relies on table name checking inside find_name_tlv() * Assume @aname to be checked and valid. * * Returns 0 on success. */ static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; uint16_t kidx; ni = CHAIN_TO_NI(ch); ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); if (ta == NULL) return (ENOTSUP); tc = alloc_table_config(ch, ti, ta, aname, i->tflags, i->vtype); if (tc == NULL) return (ENOMEM); tc->limit = i->limit; IPFW_UH_WLOCK(ch); /* Check if table has been already created */ if (find_table(ni, ti) != NULL) { IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (EEXIST); } if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { IPFW_UH_WUNLOCK(ch); printf("Unable to allocate table index." " Consider increasing net.inet.ip.fw.tables_max"); free_table_config(ni, tc); return (EBUSY); } tc->no.kidx = kidx; IPFW_WLOCK(ch); link_table(ch, tc); IPFW_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch); return (0); } void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) { memset(ti, 0, sizeof(struct tid_info)); ti->set = oh->ntlv.set; ti->uidx = oh->idx; ti->tlvs = &oh->ntlv; ti->tlen = oh->ntlv.head.length; } int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, struct sockopt_data *sd) { struct namedobj_instance *ni; struct named_object *no; ipfw_obj_ntlv *ntlv; ni = CHAIN_TO_NI(ch); no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("invalid table kidx passed")); ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ntlv->head.type = IPFW_TLV_TBL_NAME; ntlv->head.length = sizeof(*ntlv); ntlv->idx = no->kidx; strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); return (0); } static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i) { struct table_info *ti; i->type = tc->no.type; i->tflags = tc->tflags; i->vtype = tc->vtype; i->set = tc->no.set; i->kidx = tc->no.kidx; i->refcnt = tc->no.refcnt; i->count = tc->count; i->limit = tc->limit; i->size = tc->count * sizeof(ipfw_obj_tentry); i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); if (tc->ta->print_config != NULL) { /* Use algo function to print table config to string */ ti = KIDX_TO_TI(ch, tc->no.kidx); tc->ta->print_config(tc->astate, ti, i->algoname, sizeof(i->algoname)); } else strlcpy(i->algoname, tc->ta->name, sizeof(i->algoname)); } struct dump_table_args { struct ip_fw_chain *ch; struct sockopt_data *sd; }; static void export_table_internal(struct namedobj_instance *ni, struct named_object *no, void *arg) { ipfw_xtable_info *i; struct dump_table_args *dta; dta = (struct dump_table_args *)arg; i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); KASSERT(i != 0, ("previously checked buffer is not enough")); export_table_info(dta->ch, (struct table_config *)no, i); } /* * Export all tables as ipfw_xtable_info structures to * storage provided by @sd. * Returns 0 on success. */ static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd) { uint32_t size; uint32_t count; struct dump_table_args dta; count = ipfw_objhash_count(CHAIN_TO_NI(ch)); size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_xtable_info); if (size > olh->size) { olh->size = size; return (ENOMEM); } olh->size = size; dta.ch = ch; dta.sd = sd; ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); return (0); } /* * Legacy IP_FW_TABLE_GETSIZE handler */ int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (ESRCH); *cnt = tc->count; return (0); } /* * Legacy IP_FW_TABLE_XGETSIZE handler */ int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { *cnt = 0; return (0); /* 'table all list' requires success */ } *cnt = tc->count * sizeof(ipfw_table_xentry); if (tc->count > 0) *cnt += sizeof(ipfw_xtable); return (0); } static int dump_table_entry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_entry *ent; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; /* Out of memory, returning */ if (da->cnt == da->size) return (1); ent = da->ent++; ent->tbl = da->uidx; da->cnt++; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); ent->addr = da->tent.k.addr.s_addr; ent->masklen = da->tent.masklen; ent->value = da->tent.value; return (0); } /* * Dumps table in pre-8.1 legacy format. */ int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, ipfw_table *tbl) { struct table_config *tc; struct table_algo *ta; struct dump_args da; tbl->cnt = 0; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (0); /* XXX: We should return ESRCH */ ta = tc->ta; /* This dump format supports IPv4 only */ if (tc->no.type != IPFW_TABLE_CIDR) return (0); memset(&da, 0, sizeof(da)); da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.ent = &tbl->ent[0]; da.size = tbl->size; tbl->cnt = 0; ta->foreach(tc->astate, da.ti, dump_table_entry, &da); tbl->cnt = da.cnt; return (0); } /* * Dumps table entry in eXtended format (v1)(current). */ static int dump_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_obj_tentry *tent; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); /* Out of memory, returning */ if (tent == NULL) { da->error = ENOMEM; return (1); } tent->head.length = sizeof(ipfw_obj_tentry); tent->idx = da->uidx; return (ta->dump_tentry(tc->astate, da->ti, e, tent)); } /* * Dumps table entry in eXtended format (v0). */ static int dump_table_xentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_xentry *xent; ipfw_obj_tentry *tent; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); /* Out of memory, returning */ if (xent == NULL) return (1); xent->len = sizeof(ipfw_table_xentry); xent->tbl = da->uidx; memset(&da->tent, 0, sizeof(da->tent)); tent = &da->tent; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); /* Convert current format to previous one */ xent->masklen = tent->masklen; xent->value = tent->value; /* Apply some hacks */ if (tc->no.type == IPFW_TABLE_CIDR && tent->subtype == AF_INET) { xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; xent->flags = IPFW_TCF_INET; } else memcpy(&xent->k, &tent->k, sizeof(xent->k)); return (0); } /* * Table algorithms */ /* * Finds algoritm by index, table type or supplied name */ static struct table_algo * find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) { int i, l; struct table_algo *ta; if (ti->type > IPFW_TABLE_MAXTYPE) return (NULL); /* Search by index */ if (ti->atype != 0) { if (ti->atype > tcfg->algo_count) return (NULL); return (tcfg->algo[ti->atype]); } /* Search by name if supplied */ if (name != NULL) { /* TODO: better search */ for (i = 1; i <= tcfg->algo_count; i++) { ta = tcfg->algo[i]; /* * One can supply additional algorithm * parameters so we compare only the first word * of supplied name: * 'hash_cidr hsize=32' * '^^^^^^^^^' * */ l = strlen(ta->name); if (strncmp(name, ta->name, l) == 0) { if (name[l] == '\0' || name[l] == ' ') return (ta); } } return (NULL); } /* Return default algorithm for given type if set */ return (tcfg->def_algo[ti->type]); } +/* + * Register new table algo @ta. + * Stores algo id iside @idx. + * + * Returns 0 on success. + */ int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx) { struct tables_config *tcfg; struct table_algo *ta_new; + size_t sz; if (size > sizeof(struct table_algo)) return (EINVAL); + /* Check for the required on-stack size for add/del */ + sz = roundup2(ta->ta_buf_size, sizeof(void *)); + if (sz > TA_BUF_SZ) + return (EINVAL); + KASSERT(ta->type >= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); memcpy(ta_new, ta, size); tcfg = CHAIN_TO_TCFG(ch); KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); tcfg->algo[++tcfg->algo_count] = ta_new; ta_new->idx = tcfg->algo_count; /* Set algorithm as default one for given type */ if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && tcfg->def_algo[ta_new->type] == NULL) tcfg->def_algo[ta_new->type] = ta_new; *idx = ta_new->idx; return (0); } +/* + * Unregisters table algo using @idx as id. + */ void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) { struct tables_config *tcfg; struct table_algo *ta; tcfg = CHAIN_TO_TCFG(ch); - KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of rage 1..%d", idx, - tcfg->algo_count)); + KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", + idx, tcfg->algo_count)); ta = tcfg->algo[idx]; KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); if (tcfg->def_algo[ta->type] == ta) tcfg->def_algo[ta->type] = NULL; free(ta, M_IPFW); } /* * Lists all table algorithms currently available. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] * * Returns 0 on success */ int ipfw_list_table_algo(struct ip_fw_chain *ch, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; struct tables_config *tcfg; ipfw_ta_info *i; struct table_algo *ta; uint32_t count, n, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); tcfg = CHAIN_TO_TCFG(ch); count = tcfg->algo_count; size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_ta_info); if (size > olh->size) { olh->size = size; IPFW_UH_RUNLOCK(ch); return (ENOMEM); } olh->size = size; for (n = 1; n <= count; n++) { i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); KASSERT(i != 0, ("previously checked buffer is not enough")); ta = tcfg->algo[n]; strlcpy(i->algoname, ta->name, sizeof(i->algoname)); i->type = ta->type; i->refcnt = ta->refcnt; } IPFW_UH_RUNLOCK(ch); return (0); } /* * Tables rewriting code * */ /* * Determine table number and lookup type for @cmd. * Fill @tbl and @type with appropriate values. * Returns 0 for relevant opcodes, 1 otherwise. */ static int classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn_if *cmdif; int skip; uint16_t v; skip = 1; switch (cmd->opcode) { case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: /* Basic IPv4/IPv6 or u32 lookups */ *puidx = cmd->arg1; /* Assume CIDR by default */ *ptype = IPFW_TABLE_CIDR; skip = 0; if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { /* * generic lookup. The key must be * in 32bit big-endian format. */ v = ((ipfw_insn_u32 *)cmd)->d[1]; switch (v) { case 0: case 1: /* IPv4 src/dst */ break; case 2: case 3: /* src/dst port */ *ptype = IPFW_TABLE_NUMBER; break; case 4: /* uid/gid */ *ptype = IPFW_TABLE_NUMBER; break; case 5: /* jid */ *ptype = IPFW_TABLE_NUMBER; break; case 6: /* dscp */ *ptype = IPFW_TABLE_NUMBER; break; } } break; case O_XMIT: case O_RECV: case O_VIA: /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') break; *ptype = IPFW_TABLE_INTERFACE; *puidx = cmdif->p.glob; skip = 0; break; case O_IP_FLOW_LOOKUP: *puidx = cmd->arg1; *ptype = IPFW_TABLE_FLOW; skip = 0; break; } return (skip); } /* * Sets new table value for given opcode. * Assume the same opcodes as classify_table_opcode() */ static void update_table_opcode(ipfw_insn *cmd, uint16_t idx) { ipfw_insn_if *cmdif; switch (cmd->opcode) { case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: /* Basic IPv4/IPv6 or u32 lookups */ cmd->arg1 = idx; break; case O_XMIT: case O_RECV: case O_VIA: /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; cmdif->p.glob = idx; break; case O_IP_FLOW_LOOKUP: cmd->arg1 = idx; break; } } /* * Checks table name for validity. * Enforce basic length checks, the rest * should be done in userland. * * Returns 0 if name is considered valid. */ int ipfw_check_table_name(char *name) { int nsize; ipfw_obj_ntlv *ntlv = NULL; nsize = sizeof(ntlv->name); if (strnlen(name, nsize) == nsize) return (EINVAL); if (name[0] == '\0') return (EINVAL); /* * TODO: do some more complicated checks */ return (0); } /* * Find tablename TLV by @uid. * Check @tlvs for valid data inside. * * Returns pointer to found TLV or NULL. */ static ipfw_obj_ntlv * find_name_tlv(void *tlvs, int len, uint16_t uidx) { ipfw_obj_ntlv *ntlv; uintptr_t pa, pe; int l; pa = (uintptr_t)tlvs; pe = pa + len; l = 0; for (; pa < pe; pa += l) { ntlv = (ipfw_obj_ntlv *)pa; l = ntlv->head.length; if (l != sizeof(*ntlv)) return (NULL); if (ntlv->head.type != IPFW_TLV_TBL_NAME) continue; if (ntlv->idx != uidx) continue; if (ipfw_check_table_name(ntlv->name) != 0) return (NULL); return (ntlv); } return (NULL); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns pointer to table_config or NULL. */ static struct table_config * find_table(struct namedobj_instance *ni, struct tid_info *ti) { char *name, bname[16]; struct named_object *no; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx); if (ntlv == NULL) return (NULL); name = ntlv->name; set = ntlv->set; } else { snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } no = ipfw_objhash_lookup_name(ni, set, name); return ((struct table_config *)no); } static struct table_config * alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *aname, uint8_t tflags, uint8_t vtype) { char *name, bname[16]; struct table_config *tc; int error; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx); if (ntlv == NULL) return (NULL); name = ntlv->name; set = ntlv->set; } else { snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); tc->no.name = tc->tablename; tc->no.type = ti->type; tc->no.set = set; tc->tflags = tflags; tc->ta = ta; strlcpy(tc->tablename, name, sizeof(tc->tablename)); /* Set default value type to u32 for compability reasons */ if (vtype == 0) tc->vtype = IPFW_VTYPE_U32; else tc->vtype = vtype; if (ti->tlvs == NULL) { tc->no.compat = 1; tc->no.uidx = ti->uidx; } /* Preallocate data structures for new tables */ error = ta->init(ch, &tc->astate, &tc->ti, aname, tflags); if (error != 0) { free(tc, M_IPFW); return (NULL); } return (tc); } static void free_table_config(struct namedobj_instance *ni, struct table_config *tc) { if (tc->linked == 0) tc->ta->destroy(tc->astate, &tc->ti); free(tc, M_IPFW); } /* * Links @tc to @chain table named instance. * Sets appropriate type/states in @chain table info. */ static void link_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; ipfw_objhash_add(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); *ti = tc->ti; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, ti); tc->linked = 1; tc->ta->refcnt++; } /* * Unlinks @tc from @chain table named instance. * Zeroes states in @chain and stores them in @tc. */ static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; /* Clear state. @ti copy is already saved inside @tc */ ipfw_objhash_del(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); memset(ti, 0, sizeof(struct table_info)); tc->linked = 0; tc->ta->refcnt--; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, NULL); } /* * Finds named object by @uidx number. * Refs found object, allocate new index for non-existing object. * Fills in @oib with userland/kernel indexes. * First free oidx pointer is saved back in @oib. * * Returns 0 on success. */ static int bind_table_rule(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx **oib, struct tid_info *ti) { struct table_config *tc; struct namedobj_instance *ni; struct named_object *no; int error, l, cmdlen; ipfw_insn *cmd; struct obj_idx *pidx, *p; pidx = *oib; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; error = 0; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_table_opcode(cmd, &ti->uidx, &ti->type) != 0) continue; pidx->uidx = ti->uidx; pidx->type = ti->type; if ((tc = find_table(ni, ti)) != NULL) { if (tc->no.type != ti->type) { /* Incompatible types */ error = EINVAL; break; } /* Reference found table and save kidx */ tc->no.refcnt++; pidx->kidx = tc->no.kidx; pidx++; continue; } /* Table not found. Allocate new index and save for later */ if (ipfw_objhash_alloc_idx(ni, &pidx->kidx) != 0) { printf("Unable to allocate table %s index in set %u." " Consider increasing net.inet.ip.fw.tables_max", "", ti->set); error = EBUSY; break; } ci->new_tables++; pidx->new = 1; pidx++; } if (error != 0) { /* Unref everything we have already done */ for (p = *oib; p < pidx; p++) { if (p->new != 0) { ipfw_objhash_free_idx(ni, p->kidx); continue; } /* Find & unref by existing idx */ no = ipfw_objhash_lookup_kidx(ni, p->kidx); KASSERT(no != NULL, ("Ref'd table %d disappeared", p->kidx)); no->refcnt--; } } IPFW_UH_WUNLOCK(ch); *oib = pidx; return (error); } /* * Compatibility function for old ipfw(8) binaries. * Rewrites table kernel indices with userland ones. * Works for \d+ talbes only (e.g. for tables, converted * from old numbered system calls). * * Returns 0 on success. * Raises error on any other tables. */ int ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, struct ip_fw_rule0 *rule) { int cmdlen, error, l; ipfw_insn *cmd; uint16_t kidx, uidx; uint8_t type; struct named_object *no; struct namedobj_instance *ni; ni = CHAIN_TO_NI(chain); error = 0; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_table_opcode(cmd, &kidx, &type) != 0) continue; if ((no = ipfw_objhash_lookup_kidx(ni, kidx)) == NULL) return (1); uidx = no->uidx; if (no->compat == 0) { /* * We are called via legacy opcode. * Save error and show table as fake number * not to make ipfw(8) hang. */ uidx = 65535; error = 2; } update_table_opcode(cmd, uidx); } return (error); } /* * Sets every table kidx in @bmask which is used in rule @rule. * * Returns number of newly-referenced tables. */ int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule, uint32_t *bmask) { int cmdlen, l, count; ipfw_insn *cmd; uint16_t kidx; uint8_t type; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; count = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_table_opcode(cmd, &kidx, &type) != 0) continue; if ((bmask[kidx / 32] & (1 << (kidx % 32))) == 0) count++; bmask[kidx / 32] |= 1 << (kidx % 32); } return (count); } /* * Checks is opcode is referencing table of appropriate type. * Adds reference count for found table if true. * Rewrites user-supplied opcode values with kernel ones. * * Returns 0 on success and appropriate error code otherwise. */ int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci) { int cmdlen, error, ftype, l; ipfw_insn *cmd; uint16_t uidx; uint8_t type; struct table_config *tc; struct table_algo *ta; struct namedobj_instance *ni; struct named_object *no, *no_n, *no_tmp; struct obj_idx *p, *pidx_first, *pidx_last; struct namedobjects_head nh; struct tid_info ti; ni = CHAIN_TO_NI(chain); /* Prepare queue to store configs */ TAILQ_INIT(&nh); /* * Prepare an array for storing opcode indices. * Use stack allocation by default. */ if (ci->table_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) { /* Stack */ pidx_first = ci->obuf; } else pidx_first = malloc(ci->table_opcodes * sizeof(struct obj_idx), M_IPFW, M_WAITOK | M_ZERO); pidx_last = pidx_first; error = 0; type = 0; ftype = 0; memset(&ti, 0, sizeof(ti)); /* * Use default set for looking up tables (old way) or * use set rule is assigned to (new way). */ ti.set = (V_fw_tables_sets != 0) ? ci->krule->set : 0; if (ci->ctlv != NULL) { ti.tlvs = (void *)(ci->ctlv + 1); ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv); } /* * Stage 1: reference existing tables, determine number * of tables we need to allocate and allocate indexes for each. */ error = bind_table_rule(chain, ci->krule, ci, &pidx_last, &ti); if (error != 0) { if (pidx_first != ci->obuf) free(pidx_first, M_IPFW); return (error); } /* * Stage 2: allocate table configs for every non-existent table */ if (ci->new_tables > 0) { for (p = pidx_first; p < pidx_last; p++) { if (p->new == 0) continue; ti.uidx = p->uidx; ti.type = p->type; ti.atype = 0; ta = find_table_algo(CHAIN_TO_TCFG(chain), &ti, NULL); if (ta == NULL) { error = ENOTSUP; goto free; } tc = alloc_table_config(chain, &ti, ta, NULL, 0, IPFW_VTYPE_U32); if (tc == NULL) { error = ENOMEM; goto free; } tc->no.kidx = p->kidx; tc->no.refcnt = 1; /* Add to list */ TAILQ_INSERT_TAIL(&nh, &tc->no, nn_next); } /* * Stage 2.1: Check if we're going to create 2 tables * with the same name, but different table types. */ TAILQ_FOREACH(no, &nh, nn_next) { TAILQ_FOREACH(no_tmp, &nh, nn_next) { if (ipfw_objhash_same_name(ni, no, no_tmp) == 0) continue; if (no->type != no_tmp->type) { error = EINVAL; goto free; } } } } IPFW_UH_WLOCK(chain); if (ci->new_tables > 0) { /* * Stage 3: link & reference new table configs */ /* * Step 3.1: Check if some tables we need to create have been * already created with different table type. */ error = 0; TAILQ_FOREACH_SAFE(no, &nh, nn_next, no_tmp) { no_n = ipfw_objhash_lookup_name(ni, no->set, no->name); if (no_n == NULL) continue; if (no_n->type != no->type) { error = EINVAL; break; } } if (error != 0) { /* * Someone has allocated table with different table type. * We have to rollback everything. */ IPFW_UH_WUNLOCK(chain); goto free; } /* * Attach new tables. * We need to set table pointers for each new table, * so we have to acquire main WLOCK. */ IPFW_WLOCK(chain); TAILQ_FOREACH_SAFE(no, &nh, nn_next, no_tmp) { no_n = ipfw_objhash_lookup_name(ni, no->set, no->name); if (no_n == NULL) { /* New table. Attach to runtime hash */ TAILQ_REMOVE(&nh, no, nn_next); link_table(chain, (struct table_config *)no); continue; } /* * Newly-allocated table with the same type. * Reference it and update out @pidx array * rewrite info. */ no_n->refcnt++; /* Keep oib array in sync: update kidx */ for (p = pidx_first; p < pidx_last; p++) { if (p->kidx != no->kidx) continue; /* Update kidx */ p->kidx = no_n->kidx; break; } } IPFW_WUNLOCK(chain); } /* Perform rule rewrite */ l = ci->krule->cmd_len; cmd = ci->krule->cmd; cmdlen = 0; p = pidx_first; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_table_opcode(cmd, &uidx, &type) != 0) continue; update_table_opcode(cmd, p->kidx); p++; } IPFW_UH_WUNLOCK(chain); error = 0; /* * Stage 4: free resources */ free: if (!TAILQ_EMPTY(&nh)) { /* Free indexes first */ IPFW_UH_WLOCK(chain); TAILQ_FOREACH_SAFE(no, &nh, nn_next, no_tmp) { ipfw_objhash_free_idx(ni, no->kidx); } IPFW_UH_WUNLOCK(chain); /* Free configs */ TAILQ_FOREACH_SAFE(no, &nh, nn_next, no_tmp) free_table_config(ni, tc); } if (pidx_first != ci->obuf) free(pidx_first, M_IPFW); return (error); } /* * Remove references from every table used in @rule. */ void ipfw_unbind_table_rule(struct ip_fw_chain *chain, struct ip_fw *rule) { int cmdlen, l; ipfw_insn *cmd; struct namedobj_instance *ni; struct named_object *no; uint16_t kidx; uint8_t type; ni = CHAIN_TO_NI(chain); l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_table_opcode(cmd, &kidx, &type) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("table id %d not found", kidx)); KASSERT(no->type == type, ("wrong type %d (%d) for table id %d", no->type, type, kidx)); KASSERT(no->refcnt > 0, ("refcount for table %d is %d", kidx, no->refcnt)); no->refcnt--; } } /* * Removes table bindings for every rule in rule chain @head. */ void ipfw_unbind_table_list(struct ip_fw_chain *chain, struct ip_fw *head) { struct ip_fw *rule; while ((rule = head) != NULL) { head = head->x_next; ipfw_unbind_table_rule(chain, rule); } } /* end of file */ Index: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.h =================================================================== --- projects/ipfw/sys/netpfil/ipfw/ip_fw_table.h (revision 269434) +++ projects/ipfw/sys/netpfil/ipfw/ip_fw_table.h (revision 269435) @@ -1,184 +1,187 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_private.h 267467 2014-06-14 10:58:39Z melifaro $ */ #ifndef _IPFW2_TABLE_H #define _IPFW2_TABLE_H /* * Internal constants and data structures used by ipfw tables * and not meant to be exported outside the kernel. */ #ifdef _KERNEL struct table_info { table_lookup_t *lookup; /* Lookup function */ void *state; /* Lookup radix/other structure */ void *xstate; /* eXtended state */ u_long data; /* Hints for given func */ }; /* Internal structures for handling sockopt data */ struct tid_info { uint32_t set; /* table set */ uint16_t uidx; /* table index */ uint8_t type; /* table type */ uint8_t atype; void *tlvs; /* Pointer to first TLV */ int tlen; /* Total TLV size block */ }; struct tentry_info { void *paddr; uint8_t masklen; /* mask length */ uint8_t subtype; uint16_t flags; /* record flags */ uint32_t value; /* value */ }; #define TEI_FLAGS_UPDATE 0x01 /* Add or update rec if exists */ #define TEI_FLAGS_UPDATED 0x02 /* Entry has been updated */ #define TEI_FLAGS_COMPAT 0x04 /* Called from old ABI */ #define TEI_FLAGS_DONTADD 0x08 /* Do not create new rec */ typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); typedef void (ta_destroy)(void *ta_state, struct table_info *ti); typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); typedef int (ta_add)(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint64_t *pflags, uint32_t *pnum); + struct tentry_info *tei, void *ta_buf, uint32_t *pnum); typedef int (ta_del)(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint64_t *pflags, uint32_t *pnum); + struct tentry_info *tei, void *ta_buf, uint32_t *pnum); typedef void (ta_flush_entry)(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); +typedef int (ta_has_space)(void *ta_state, struct table_info *ti, + uint32_t count, uint64_t *pflags); typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); typedef int (ta_modify)(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); typedef void (ta_flush_mod)(void *ta_buf); typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); typedef void (ta_print_config)(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); typedef int ta_foreach_f(void *node, void *arg); typedef void ta_foreach(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); typedef int ta_find_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); typedef int ta_dump_tinfo(void *ta_state, struct table_info *ti, ifpw_ta_tinfo *tinfo); struct table_algo { char name[16]; uint32_t idx; uint32_t type; uint32_t refcnt; uint32_t flags; size_t ta_buf_size; ta_init *init; ta_destroy *destroy; ta_prepare_add *prepare_add; ta_prepare_del *prepare_del; ta_add *add; ta_del *del; ta_flush_entry *flush_entry; ta_find_tentry *find_tentry; + ta_has_space *has_space; ta_prepare_mod *prepare_mod; ta_fill_mod *fill_mod; ta_modify *modify; ta_flush_mod *flush_mod; ta_change_ti *change_ti; ta_foreach *foreach; ta_dump_tentry *dump_tentry; ta_print_config *print_config; ta_dump_tinfo *dump_tinfo; }; #define TA_FLAG_DEFAULT 0x01 /* Algorithm is default for given type */ int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx); void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx); void ipfw_table_algo_init(struct ip_fw_chain *chain); void ipfw_table_algo_destroy(struct ip_fw_chain *chain); /* direct ipfw_ctl handlers */ int ipfw_listsize_tables(struct ip_fw_chain *ch, struct sockopt_data *sd); int ipfw_list_tables(struct ip_fw_chain *ch, struct sockopt_data *sd); int ipfw_dump_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); int ipfw_describe_table(struct ip_fw_chain *ch, struct sockopt_data *sd); int ipfw_find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); int ipfw_create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); int ipfw_manage_table_ent(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); int ipfw_flush_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); int ipfw_list_table_algo(struct ip_fw_chain *ch, struct sockopt_data *sd); /* Exported to support legacy opcodes */ int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei); + struct tentry_info *tei, uint32_t count); int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei); + struct tentry_info *tei, uint32_t count); int flush_table(struct ip_fw_chain *ch, struct tid_info *ti); int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci); int ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, struct ip_fw_rule0 *rule); int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule, uint32_t *bmask); int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, struct sockopt_data *sd); void ipfw_unbind_table_rule(struct ip_fw_chain *chain, struct ip_fw *rule); void ipfw_unbind_table_list(struct ip_fw_chain *chain, struct ip_fw *head); /* utility functions */ void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); int ipfw_check_table_name(char *name); /* Legacy interfaces */ int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt); int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt); int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, ipfw_table *tbl); #endif /* _KERNEL */ #endif /* _IPFW2_TABLE_H */ Index: projects/ipfw/sys/netpfil/ipfw/ip_fw_table_algo.c =================================================================== --- projects/ipfw/sys/netpfil/ipfw/ip_fw_table_algo.c (revision 269434) +++ projects/ipfw/sys/netpfil/ipfw/ip_fw_table_algo.c (revision 269435) @@ -1,3120 +1,3219 @@ /*- * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c 267384 2014-06-12 09:59:11Z melifaro $"); /* * Lookup table algorithms. * */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include #include #include #include /* struct ipfw_rule_ref */ #include #include #include static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); static int badd(const void *key, void *item, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); static int bdel(const void *key, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); /* * CIDR implementation using radix * */ /* * The radix code expects addr and mask to be array of bytes, * with the first byte being the length of the array. rn_inithead * is called with the offset in bits of the lookup key within the * array. If we use a sockaddr_in as the underlying type, * sin_len is conveniently located at offset 0, sin_addr is at * offset 4 and normally aligned. * But for portability, let's avoid assumption and make the code explicit */ #define KEY_LEN(v) *((uint8_t *)&(v)) /* * Do not require radix to compare more than actual IPv4/IPv6 address */ #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) #define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr)) #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) #define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr)) struct radix_cidr_entry { struct radix_node rn[2]; struct sockaddr_in addr; uint32_t value; uint8_t masklen; }; struct sa_in6 { uint8_t sin6_len; uint8_t sin6_family; uint8_t pad[2]; struct in6_addr sin6_addr; }; struct radix_cidr_xentry { struct radix_node rn[2]; struct sa_in6 addr6; uint32_t value; uint8_t masklen; }; static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct radix_node_head *rnh; if (keylen == sizeof(in_addr_t)) { struct radix_cidr_entry *ent; struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = *((in_addr_t *)key); rnh = (struct radix_node_head *)ti->state; ent = (struct radix_cidr_entry *)(rnh->rnh_matchaddr(&sa, rnh)); if (ent != NULL) { *val = ent->value; return (1); } } else { struct radix_cidr_xentry *xent; struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; xent = (struct radix_cidr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); if (xent != NULL) { *val = xent->value; return (1); } } return (0); } /* * New table */ static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { if (!rn_inithead(&ti->state, OFF_LEN_INET)) return (ENOMEM); if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) { rn_detachhead(&ti->state); return (ENOMEM); } *ta_state = NULL; ti->lookup = ta_lookup_radix; return (0); } static int flush_table_entry(struct radix_node *rn, void *arg) { struct radix_node_head * const rnh = arg; struct radix_cidr_entry *ent; ent = (struct radix_cidr_entry *) rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); if (ent != NULL) free(ent, M_IPFW_TBL); return (0); } static void ta_destroy_radix(void *ta_state, struct table_info *ti) { struct radix_node_head *rnh; rnh = (struct radix_node_head *)(ti->state); rnh->rnh_walktree(rnh, flush_table_entry, rnh); rn_detachhead(&ti->state); rnh = (struct radix_node_head *)(ti->xstate); rnh->rnh_walktree(rnh, flush_table_entry, rnh); rn_detachhead(&ti->xstate); } static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct radix_cidr_entry *n; struct radix_cidr_xentry *xn; n = (struct radix_cidr_entry *)e; /* Guess IPv4/IPv6 radix by sockaddr family */ if (n->addr.sin_family == AF_INET) { tent->k.addr.s_addr = n->addr.sin_addr.s_addr; tent->masklen = n->masklen; tent->subtype = AF_INET; tent->value = n->value; #ifdef INET6 } else { xn = (struct radix_cidr_xentry *)e; memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr)); tent->masklen = xn->masklen; tent->subtype = AF_INET6; tent->value = xn->value; #endif } return (0); } static int ta_find_radix_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct radix_node_head *rnh; void *e; e = NULL; if (tent->subtype == AF_INET) { struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = tent->k.addr.s_addr; rnh = (struct radix_node_head *)ti->state; e = rnh->rnh_matchaddr(&sa, rnh); } else { struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; e = rnh->rnh_matchaddr(&sa6, rnh); } if (e != NULL) { ta_dump_radix_tentry(ta_state, ti, e, tent); return (0); } return (ENOENT); } static void ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct radix_node_head *rnh; rnh = (struct radix_node_head *)(ti->state); rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); rnh = (struct radix_node_head *)(ti->xstate); rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); } struct ta_buf_cidr { void *ent_ptr; struct sockaddr *addr_ptr; struct sockaddr *mask_ptr; union { struct { struct sockaddr_in sa; struct sockaddr_in ma; } a4; struct { struct sa_in6 sa; struct sa_in6 ma; } a6; } addr; }; #ifdef INET6 static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask) { uint32_t *cp; for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) *cp++ = 0xFFFFFFFF; *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); } #endif static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, struct sockaddr *ma, int *set_mask) { int mlen; struct sockaddr_in *addr, *mask; struct sockaddr_in6 *addr6, *mask6; in_addr_t a4; mlen = tei->masklen; if (tei->subtype == AF_INET) { #ifdef INET addr = (struct sockaddr_in *)sa; mask = (struct sockaddr_in *)ma; /* Set 'total' structure length */ KEY_LEN(*addr) = KEY_LEN_INET; KEY_LEN(*mask) = KEY_LEN_INET; addr->sin_family = AF_INET; mask->sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); a4 = *((in_addr_t *)tei->paddr); addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr; if (mlen != 32) *set_mask = 1; else *set_mask = 0; #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ addr6 = (struct sockaddr_in6 *)sa; mask6 = (struct sockaddr_in6 *)ma; /* Set 'total' structure length */ KEY_LEN(*addr6) = KEY_LEN_INET6; KEY_LEN(*mask6) = KEY_LEN_INET6; addr6->sin6_family = AF_INET6; ipv6_writemask(&mask6->sin6_addr, mlen); memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr)); APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr); if (mlen != 128) *set_mask = 1; else *set_mask = 0; } #endif } static int ta_prepare_add_cidr(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_cidr *tb; struct radix_cidr_entry *ent; struct radix_cidr_xentry *xent; struct sockaddr *addr, *mask; int mlen, set_mask; tb = (struct ta_buf_cidr *)ta_buf; mlen = tei->masklen; set_mask = 0; if (tei->subtype == AF_INET) { #ifdef INET if (mlen > 32) return (EINVAL); ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); ent->value = tei->value; ent->masklen = mlen; addr = (struct sockaddr *)&ent->addr; mask = (struct sockaddr *)&tb->addr.a4.ma; tb->ent_ptr = ent; #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ if (mlen > 128) return (EINVAL); xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); xent->value = tei->value; xent->masklen = mlen; addr = (struct sockaddr *)&xent->addr6; mask = (struct sockaddr *)&tb->addr.a6.ma; tb->ent_ptr = xent; #endif } else { /* Unknown CIDR type */ return (EINVAL); } tei_to_sockaddr_ent(tei, addr, mask, &set_mask); /* Set pointers */ tb->addr_ptr = addr; if (set_mask != 0) tb->mask_ptr = mask; return (0); } static int ta_add_cidr(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct radix_node_head *rnh; struct radix_node *rn; struct ta_buf_cidr *tb; uint32_t value; tb = (struct ta_buf_cidr *)ta_buf; if (tei->subtype == AF_INET) rnh = ti->state; else rnh = ti->xstate; /* Search for an entry first */ rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh); if (rn != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ if (tei->subtype == AF_INET) { /* IPv4. */ value = ((struct radix_cidr_entry *)tb->ent_ptr)->value; ((struct radix_cidr_entry *)rn)->value = value; } else { /* IPv6 */ value = ((struct radix_cidr_xentry *)tb->ent_ptr)->value; ((struct radix_cidr_xentry *)rn)->value = value; } /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr); if (rn == NULL) { /* Unknown error */ return (EINVAL); } tb->ent_ptr = NULL; *pnum = 1; return (0); } static int ta_prepare_del_cidr(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_cidr *tb; struct sockaddr *addr, *mask; int mlen, set_mask; tb = (struct ta_buf_cidr *)ta_buf; mlen = tei->masklen; set_mask = 0; if (tei->subtype == AF_INET) { if (mlen > 32) return (EINVAL); addr = (struct sockaddr *)&tb->addr.a4.sa; mask = (struct sockaddr *)&tb->addr.a4.ma; #ifdef INET6 } else if (tei->subtype == AF_INET6) { if (mlen > 128) return (EINVAL); addr = (struct sockaddr *)&tb->addr.a6.sa; mask = (struct sockaddr *)&tb->addr.a6.ma; #endif } else return (EINVAL); tei_to_sockaddr_ent(tei, addr, mask, &set_mask); tb->addr_ptr = addr; if (set_mask != 0) tb->mask_ptr = mask; return (0); } static int ta_del_cidr(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct radix_node_head *rnh; struct radix_node *rn; struct ta_buf_cidr *tb; tb = (struct ta_buf_cidr *)ta_buf; if (tei->subtype == AF_INET) rnh = ti->state; else rnh = ti->xstate; rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh); tb->ent_ptr = rn; if (rn == NULL) return (ENOENT); *pnum = 1; return (0); } static void ta_flush_cidr_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_cidr *tb; tb = (struct ta_buf_cidr *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } +static int +ta_has_space_radix(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + + /* + * radix does not not require additional memory allocations + * other than nodes itself. Adding new masks to the tree do + * but we don't have any API to call (and we don't known which + * sizes do we need). + */ + return (1); +} + struct table_algo cidr_radix = { .name = "cidr:radix", .type = IPFW_TABLE_CIDR, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_cidr), .init = ta_init_radix, .destroy = ta_destroy_radix, .prepare_add = ta_prepare_add_cidr, .prepare_del = ta_prepare_del_cidr, .add = ta_add_cidr, .del = ta_del_cidr, .flush_entry = ta_flush_cidr_entry, .foreach = ta_foreach_radix, .dump_tentry = ta_dump_radix_tentry, .find_tentry = ta_find_radix_tentry, + .has_space = ta_has_space_radix, }; /* * cidr:hash cmds * * * ti->data: * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] * [ 8][ 8[ 8][ 8] * * inv.mask4: 32 - mask * inv.mask6: * 1) _slow lookup: mask * 2) _aligned: (128 - mask) / 8 * 3) _64: 8 * * * pflags: * [v4=1/v6=0][hsize] * [ 32][ 32] */ struct chashentry; SLIST_HEAD(chashbhead, chashentry); struct chash_cfg { struct chashbhead *head4; struct chashbhead *head6; size_t size4; size_t size6; size_t items4; size_t items6; uint8_t mask4; uint8_t mask6; }; struct chashentry { SLIST_ENTRY(chashentry) next; uint32_t value; uint32_t type; union { uint32_t a4; /* Host format */ struct in6_addr a6; /* Network format */ } a; }; static __inline uint32_t hash_ip(uint32_t addr, int hsize) { return (addr % (hsize - 1)); } static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize) { uint32_t i; i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^ addr6->s6_addr32[2] ^ addr6->s6_addr32[3]; return (i % (hsize - 1)); } static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize) { uint32_t i; i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1]; return (i % (hsize - 1)); } static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize) { struct in6_addr mask6; ipv6_writemask(&mask6, mask); memcpy(addr6, key, sizeof(struct in6_addr)); APPLY_MASK(addr6, &mask6); return (hash_ip6(addr6, hsize)); } static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize) { uint64_t *paddr; paddr = (uint64_t *)addr6; *paddr = 0; *(paddr + 1) = 0; memcpy(addr6, key, mask); return (hash_ip6(addr6, hsize)); } static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } } else { /* IPv6: worst scenario: non-round mask */ struct in6_addr addr6; head = (struct chashbhead *)ti->xstate; imask = (ti->data & 0xFF0000) >> 16; hsize = 1 << (ti->data & 0xFF); hash = hash_ip6_slow(&addr6, key, imask, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (memcmp(&ent->a.a6, &addr6, 16) == 0) { *val = ent->value; return (1); } } } return (0); } static int ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } } else { /* IPv6: aligned to 8bit mask */ struct in6_addr addr6; uint64_t *paddr, *ptmp; head = (struct chashbhead *)ti->xstate; imask = (ti->data & 0xFF0000) >> 16; hsize = 1 << (ti->data & 0xFF); hash = hash_ip6_al(&addr6, key, imask, hsize); paddr = (uint64_t *)&addr6; SLIST_FOREACH(ent, &head[hash], next) { ptmp = (uint64_t *)&ent->a.a6; if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) { *val = ent->value; return (1); } } } return (0); } static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } } else { /* IPv6: /64 */ uint64_t a6, *paddr; head = (struct chashbhead *)ti->xstate; paddr = (uint64_t *)key; hsize = 1 << (ti->data & 0xFF); a6 = *paddr; hash = hash_ip64((struct in6_addr *)key, hsize); SLIST_FOREACH(ent, &head[hash], next) { paddr = (uint64_t *)&ent->a.a6; if (a6 == *paddr) { *val = ent->value; return (1); } } } return (0); } static int chash_parse_opts(struct chash_cfg *ccfg, char *data) { char *pdel, *pend, *s; int mask4, mask6; mask4 = ccfg->mask4; mask6 = ccfg->mask6; if (data == NULL) return (0); if ((pdel = strchr(data, ' ')) == NULL) return (0); while (*pdel == ' ') pdel++; if (strncmp(pdel, "masks=", 6) != 0) return (EINVAL); if ((s = strchr(pdel, ' ')) != NULL) *s++ = '\0'; pdel += 6; /* Need /XX[,/YY] */ if (*pdel++ != '/') return (EINVAL); mask4 = strtol(pdel, &pend, 10); if (*pend == ',') { /* ,/YY */ pdel = pend + 1; if (*pdel++ != '/') return (EINVAL); mask6 = strtol(pdel, &pend, 10); if (*pend != '\0') return (EINVAL); } else if (*pend != '\0') return (EINVAL); if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128) return (EINVAL); ccfg->mask4 = mask4; ccfg->mask6 = mask6; return (0); } static void ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize) { struct chash_cfg *ccfg; ccfg = (struct chash_cfg *)ta_state; if (ccfg->mask4 != 32 || ccfg->mask6 != 128) snprintf(buf, bufsize, "%s masks=/%d,/%d", "cidr:hash", ccfg->mask4, ccfg->mask6); else snprintf(buf, bufsize, "%s", "cidr:hash"); } static int log2(uint32_t v) { uint32_t r; r = 0; while (v >>= 1) r++; return (r); } /* * New table. * We assume 'data' to be either NULL or the following format: * 'cidr:hash [masks=/32[,/128]]' */ static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int error, i; uint32_t hsize; struct chash_cfg *ccfg; ccfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO); ccfg->mask4 = 32; ccfg->mask6 = 128; if ((error = chash_parse_opts(ccfg, data)) != 0) { free(ccfg, M_IPFW); return (error); } ccfg->size4 = 128; ccfg->size6 = 128; ccfg->head4 = malloc(sizeof(struct chashbhead) * ccfg->size4, M_IPFW, M_WAITOK | M_ZERO); ccfg->head6 = malloc(sizeof(struct chashbhead) * ccfg->size6, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < ccfg->size4; i++) SLIST_INIT(&ccfg->head4[i]); for (i = 0; i < ccfg->size6; i++) SLIST_INIT(&ccfg->head6[i]); *ta_state = ccfg; ti->state = ccfg->head4; ti->xstate = ccfg->head6; /* Store data depending on v6 mask length */ hsize = log2(ccfg->size4) << 8 | log2(ccfg->size6); if (ccfg->mask6 == 64) { ti->data = (32 - ccfg->mask4) << 24 | (128 - ccfg->mask6) << 16| hsize; ti->lookup = ta_lookup_chash_64; } else if ((ccfg->mask6 % 8) == 0) { ti->data = (32 - ccfg->mask4) << 24 | ccfg->mask6 << 13 | hsize; ti->lookup = ta_lookup_chash_aligned; } else { /* don't do that! */ ti->data = (32 - ccfg->mask4) << 24 | ccfg->mask6 << 16 | hsize; ti->lookup = ta_lookup_chash_slow; } return (0); } static void ta_destroy_chash(void *ta_state, struct table_info *ti) { struct chash_cfg *ccfg; struct chashentry *ent, *ent_next; int i; ccfg = (struct chash_cfg *)ta_state; for (i = 0; i < ccfg->size4; i++) SLIST_FOREACH_SAFE(ent, &ccfg->head4[i], next, ent_next) free(ent, M_IPFW_TBL); for (i = 0; i < ccfg->size6; i++) SLIST_FOREACH_SAFE(ent, &ccfg->head6[i], next, ent_next) free(ent, M_IPFW_TBL); free(ccfg->head4, M_IPFW); free(ccfg->head6, M_IPFW); free(ccfg, M_IPFW); } static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct chash_cfg *ccfg; struct chashentry *ent; ccfg = (struct chash_cfg *)ta_state; ent = (struct chashentry *)e; if (ent->type == AF_INET) { tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - ccfg->mask4)); tent->masklen = ccfg->mask4; tent->subtype = AF_INET; tent->value = ent->value; #ifdef INET6 } else { memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr)); tent->masklen = ccfg->mask6; tent->subtype = AF_INET6; tent->value = ent->value; #endif } return (0); } static uint32_t hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size) { uint32_t hash; if (af == AF_INET) { hash = hash_ip(ent->a.a4, size); } else { if (mlen == 64) hash = hash_ip64(&ent->a.a6, size); else hash = hash_ip6(&ent->a.a6, size); } return (hash); } static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent) { struct in6_addr mask6; int mlen; mlen = tei->masklen; if (tei->subtype == AF_INET) { #ifdef INET if (mlen > 32) return (EINVAL); ent->type = AF_INET; /* Calculate masked address */ ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen); #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ if (mlen > 128) return (EINVAL); ent->type = AF_INET6; ipv6_writemask(&mask6, mlen); memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr)); APPLY_MASK(&ent->a.a6, &mask6); #endif } else { /* Unknown CIDR type */ return (EINVAL); } ent->value = tei->value; return (0); } static int ta_find_chash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct chash_cfg *ccfg; struct chashbhead *head; struct chashentry ent, *tmp; struct tentry_info tei; int error; uint32_t hash; ccfg = (struct chash_cfg *)ta_state; memset(&ent, 0, sizeof(ent)); memset(&tei, 0, sizeof(tei)); if (tent->subtype == AF_INET) { tei.paddr = &tent->k.addr; tei.masklen = ccfg->mask4; tei.subtype = AF_INET; if ((error = tei_to_chash_ent(&tei, &ent)) != 0) return (error); head = ccfg->head4; hash = hash_ent(&ent, AF_INET, ccfg->mask4, ccfg->size4); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (tmp->a.a4 != ent.a.a4) continue; ta_dump_chash_tentry(ta_state, ti, tmp, tent); return (0); } } else { tei.paddr = &tent->k.addr6; tei.masklen = ccfg->mask6; tei.subtype = AF_INET6; if ((error = tei_to_chash_ent(&tei, &ent)) != 0) return (error); head = ccfg->head6; hash = hash_ent(&ent, AF_INET6, ccfg->mask6, ccfg->size6); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0) continue; ta_dump_chash_tentry(ta_state, ti, tmp, tent); return (0); } } return (ENOENT); } static void ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct chash_cfg *ccfg; struct chashentry *ent, *ent_next; int i; ccfg = (struct chash_cfg *)ta_state; for (i = 0; i < ccfg->size4; i++) SLIST_FOREACH_SAFE(ent, &ccfg->head4[i], next, ent_next) f(ent, arg); for (i = 0; i < ccfg->size6; i++) SLIST_FOREACH_SAFE(ent, &ccfg->head6[i], next, ent_next) f(ent, arg); } struct ta_buf_chash { void *ent_ptr; struct chashentry ent; }; static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; struct chashentry *ent; int error; tb = (struct ta_buf_chash *)ta_buf; ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); error = tei_to_chash_ent(tei, ent); if (error != 0) { free(ent, M_IPFW_TBL); return (error); } tb->ent_ptr = ent; return (0); } static int ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct chash_cfg *ccfg; struct chashbhead *head; struct chashentry *ent, *tmp; struct ta_buf_chash *tb; int exists; uint32_t hash; ccfg = (struct chash_cfg *)ta_state; tb = (struct ta_buf_chash *)ta_buf; ent = (struct chashentry *)tb->ent_ptr; hash = 0; exists = 0; if (tei->subtype == AF_INET) { if (tei->masklen != ccfg->mask4) return (EINVAL); head = ccfg->head4; hash = hash_ent(ent, AF_INET, ccfg->mask4, ccfg->size4); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (tmp->a.a4 == ent->a.a4) { exists = 1; break; } } } else { if (tei->masklen != ccfg->mask6) return (EINVAL); head = ccfg->head6; hash = hash_ent(ent, AF_INET6, ccfg->mask6, ccfg->size6); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) { exists = 1; break; } } } if (exists == 1) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ tmp->value = tei->value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; } else { if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); SLIST_INSERT_HEAD(&head[hash], ent, next); tb->ent_ptr = NULL; *pnum = 1; - /* Update counters and check if we need to grow hash */ - if (tei->subtype == AF_INET) { + /* Update counters */ + if (tei->subtype == AF_INET) ccfg->items4++; - if (ccfg->items4 > ccfg->size4 && ccfg->size4 < 65536) - *pflags = (ccfg->size4 * 2) | (1UL << 32); - } else { + else ccfg->items6++; - if (ccfg->items6 > ccfg->size6 && ccfg->size6 < 65536) - *pflags = ccfg->size6 * 2; - } } return (0); } static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; tb = (struct ta_buf_chash *)ta_buf; return (tei_to_chash_ent(tei, &tb->ent)); } static int ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct chash_cfg *ccfg; struct chashbhead *head; struct chashentry *ent, *tmp_next, *dent; struct ta_buf_chash *tb; uint32_t hash; ccfg = (struct chash_cfg *)ta_state; tb = (struct ta_buf_chash *)ta_buf; dent = &tb->ent; if (tei->subtype == AF_INET) { if (tei->masklen != ccfg->mask4) return (EINVAL); head = ccfg->head4; hash = hash_ent(dent, AF_INET, ccfg->mask4, ccfg->size4); SLIST_FOREACH_SAFE(ent, &head[hash], next, tmp_next) { if (ent->a.a4 == dent->a.a4) { SLIST_REMOVE(&head[hash], ent, chashentry,next); *pnum = 1; ccfg->items4--; return (0); } } } else { if (tei->masklen != ccfg->mask6) return (EINVAL); head = ccfg->head6; hash = hash_ent(dent, AF_INET6, ccfg->mask6, ccfg->size6); SLIST_FOREACH_SAFE(ent, &head[hash], next, tmp_next) { if (memcmp(&ent->a.a6, &dent->a.a6, 16) == 0) { SLIST_REMOVE(&head[hash], ent, chashentry,next); ccfg->items6--; *pnum = 1; return (0); } } } return (ENOENT); } static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; tb = (struct ta_buf_chash *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } /* * Hash growing callbacks. */ struct mod_item { void *main_ptr; size_t size; + void *main_ptr6; + size_t size6; }; +static int +ta_has_space_chash(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct chash_cfg *cfg; + uint64_t data; + + /* + * Since we don't know exact number of IPv4/IPv6 records in @count, + * ignore non-zero @count value at all. Check current hash sizes + * and return appropriate data. + */ + + cfg = (struct chash_cfg *)ta_state; + + data = 0; + if (cfg->items4 > cfg->size4 && cfg->size4 < 65536) + data |= (cfg->size4 * 2) << 16; + if (cfg->items6 > cfg->size6 && cfg->size6 < 65536) + data |= cfg->size6 * 2; + + if (data != 0) { + *pflags = data; + return (0); + } + + return (1); +} + /* * Allocate new, larger chash. */ static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct chashbhead *head; int i; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); - mi->size = *pflags & 0xFFFFFFFF; - head = malloc(sizeof(struct chashbhead) * mi->size, M_IPFW, - M_WAITOK | M_ZERO); - for (i = 0; i < mi->size; i++) - SLIST_INIT(&head[i]); + mi->size = (*pflags >> 16) & 0xFFFF; + mi->size6 = *pflags & 0xFFFF; + if (mi->size > 0) { + head = malloc(sizeof(struct chashbhead) * mi->size, + M_IPFW, M_WAITOK | M_ZERO); + for (i = 0; i < mi->size; i++) + SLIST_INIT(&head[i]); + mi->main_ptr = head; + } - mi->main_ptr = head; + if (mi->size6 > 0) { + head = malloc(sizeof(struct chashbhead) * mi->size6, + M_IPFW, M_WAITOK | M_ZERO); + for (i = 0; i < mi->size6; i++) + SLIST_INIT(&head[i]); + mi->main_ptr6 = head; + } return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { /* In is not possible to do rehash if we're not holidng WLOCK. */ return (0); } - /* * Switch old & new arrays. */ static int ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; - struct chash_cfg *ccfg; + struct chash_cfg *cfg; struct chashbhead *old_head, *new_head; struct chashentry *ent, *ent_next; int af, i, mlen; uint32_t nhash; - size_t old_size; + size_t old_size, new_size; mi = (struct mod_item *)ta_buf; - ccfg = (struct chash_cfg *)ta_state; + cfg = (struct chash_cfg *)ta_state; /* Check which hash we need to grow and do we still need that */ - if ((pflags >> 32) == 1) { - old_size = ccfg->size4; + if (mi->size > 0 && cfg->size4 < mi->size) { + new_head = (struct chashbhead *)mi->main_ptr; + new_size = mi->size; + old_size = cfg->size4; old_head = ti->state; - mlen = ccfg->mask4; + mlen = cfg->mask4; af = AF_INET; - } else { - old_size = ccfg->size6; + + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_ent(ent, af, mlen, new_size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } + } + + ti->state = new_head; + cfg->head4 = new_head; + cfg->size4 = mi->size; + mi->main_ptr = old_head; + } + + if (mi->size6 > 0 && cfg->size6 < mi->size6) { + new_head = (struct chashbhead *)mi->main_ptr6; + new_size = mi->size6; + old_size = cfg->size6; old_head = ti->xstate; - mlen = ccfg->mask6; + mlen = cfg->mask6; af = AF_INET6; - } - if (old_size >= mi->size) - return (0); - - new_head = (struct chashbhead *)mi->main_ptr; - for (i = 0; i < old_size; i++) { - SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { - nhash = hash_ent(ent, af, mlen, mi->size); - SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_ent(ent, af, mlen, new_size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } } - } - if (af == AF_INET) { - ti->state = new_head; - ccfg->head4 = new_head; - ccfg->size4 = mi->size; - } else { ti->xstate = new_head; - ccfg->head6 = new_head; - ccfg->size6 = mi->size; + cfg->head6 = new_head; + cfg->size6 = mi->size6; + mi->main_ptr6 = old_head; } - ti->data = (ti->data & 0xFFFFFFFF00000000) | log2(ccfg->size4) << 8 | - log2(ccfg->size6); + /* Update lower 32 bits with new values */ + ti->data &= 0xFFFFFFFF00000000; + ti->data |= log2(cfg->size4) << 8 | log2(cfg->size6); - mi->main_ptr = old_head; - return (0); } /* * Free unneded array. */ static void ta_flush_mod_chash(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); + if (mi->main_ptr6 != NULL) + free(mi->main_ptr6, M_IPFW); } struct table_algo cidr_hash = { .name = "cidr:hash", .type = IPFW_TABLE_CIDR, .ta_buf_size = sizeof(struct ta_buf_chash), .init = ta_init_chash, .destroy = ta_destroy_chash, .prepare_add = ta_prepare_add_chash, .prepare_del = ta_prepare_del_chash, .add = ta_add_chash, .del = ta_del_chash, .flush_entry = ta_flush_chash_entry, .foreach = ta_foreach_chash, .dump_tentry = ta_dump_chash_tentry, .find_tentry = ta_find_chash_tentry, .print_config = ta_print_chash_config, + .has_space = ta_has_space_chash, .prepare_mod = ta_prepare_mod_chash, .fill_mod = ta_fill_mod_chash, .modify = ta_modify_chash, .flush_mod = ta_flush_mod_chash, }; /* * Iface table cmds. * * Implementation: * * Runtime part: * - sorted array of "struct ifidx" pointed by ti->state. * Array is allocated with rounding up to IFIDX_CHUNK. Only existing * interfaces are stored in array, however its allocated size is * sufficient to hold all table records if needed. * - current array size is stored in ti->data * * Table data: * - "struct iftable_cfg" is allocated to store table state (ta_state). * - All table records are stored inside namedobj instance. * */ struct ifidx { uint16_t kidx; uint16_t spare; uint32_t value; }; struct iftable_cfg; struct ifentry { struct named_object no; struct ipfw_ifc ic; struct iftable_cfg *icfg; uint32_t value; int linked; }; struct iftable_cfg { struct namedobj_instance *ii; struct ip_fw_chain *ch; struct table_info *ti; void *main_ptr; size_t size; /* Number of items allocated in array */ size_t count; /* Number of all items */ size_t used; /* Number of items _active_ now */ }; #define IFIDX_CHUNK 16 int compare_ifidx(const void *k, const void *v); static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); int compare_ifidx(const void *k, const void *v) { struct ifidx *ifidx; uint16_t key; key = *((uint16_t *)k); ifidx = (struct ifidx *)v; if (key < ifidx->kidx) return (-1); else if (key > ifidx->kidx) return (1); return (0); } /* * Adds item @item with key @key into ascending-sorted array @base. * Assumes @base has enough additional storage. * * Returns 1 on success, 0 on duplicate key. */ static int badd(const void *key, void *item, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)) { int min, max, mid, shift, res; caddr_t paddr; if (nmemb == 0) { memcpy(base, item, size); return (1); } /* Binary search */ min = 0; max = nmemb - 1; mid = 0; while (min <= max) { mid = (min + max) / 2; res = compar(key, (const void *)((caddr_t)base + mid * size)); if (res == 0) return (0); if (res > 0) min = mid + 1; else max = mid - 1; } /* Item not found. */ res = compar(key, (const void *)((caddr_t)base + mid * size)); if (res > 0) shift = mid + 1; else shift = mid; paddr = (caddr_t)base + shift * size; if (nmemb > shift) memmove(paddr + size, paddr, (nmemb - shift) * size); memcpy(paddr, item, size); return (1); } /* * Deletes item with key @key from ascending-sorted array @base. * * Returns 1 on success, 0 for non-existent key. */ static int bdel(const void *key, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)) { caddr_t item; size_t sz; item = (caddr_t)bsearch(key, base, nmemb, size, compar); if (item == NULL) return (0); sz = (caddr_t)base + nmemb * size - item; if (sz > 0) memmove(item, item + size, sz); return (1); } static struct ifidx * ifidx_find(struct table_info *ti, void *key) { struct ifidx *ifi; ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx), compare_ifidx); return (ifi); } static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct ifidx *ifi; ifi = ifidx_find(ti, key); if (ifi != NULL) { *val = ifi->value; return (1); } return (0); } static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct iftable_cfg *icfg; icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO); icfg->ii = ipfw_objhash_create(16); icfg->main_ptr = malloc(sizeof(struct ifidx) * IFIDX_CHUNK, M_IPFW, M_WAITOK | M_ZERO); icfg->size = IFIDX_CHUNK; icfg->ch = ch; *ta_state = icfg; ti->state = icfg->main_ptr; ti->lookup = ta_lookup_ifidx; return (0); } /* * Handle tableinfo @ti pointer change (on table array resize). */ static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti) { struct iftable_cfg *icfg; icfg = (struct iftable_cfg *)ta_state; icfg->ti = ti; } static void destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, void *arg) { struct ifentry *ife; struct ip_fw_chain *ch; ch = (struct ip_fw_chain *)arg; ife = (struct ifentry *)no; ipfw_iface_del_notify(ch, &ife->ic); free(ife, M_IPFW_TBL); } /* * Destroys table @ti */ static void ta_destroy_ifidx(void *ta_state, struct table_info *ti) { struct iftable_cfg *icfg; struct ip_fw_chain *ch; icfg = (struct iftable_cfg *)ta_state; ch = icfg->ch; if (icfg->main_ptr != NULL) free(icfg->main_ptr, M_IPFW); ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch); ipfw_objhash_destroy(icfg->ii); free(icfg, M_IPFW); } struct ta_buf_ifidx { struct ifentry *ife; uint32_t value; }; /* * Prepare state to add to the table: * allocate ifentry and reference needed interface. */ static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; char *ifname; struct ifentry *ife; tb = (struct ta_buf_ifidx *)ta_buf; /* Check if string is terminated */ ifname = (char *)tei->paddr; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO); ife->value = tei->value; ife->ic.cb = if_notifier; ife->ic.cbdata = ife; if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) return (EINVAL); /* Use ipfw_iface 'ifname' field as stable storage */ ife->no.name = ife->ic.iface->ifname; tb->ife = ife; return (0); } static int ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife, *tmp; struct ta_buf_ifidx *tb; struct ipfw_iface *iif; struct ifidx *ifi; char *ifname; tb = (struct ta_buf_ifidx *)ta_buf; ifname = (char *)tei->paddr; icfg = (struct iftable_cfg *)ta_state; ife = tb->ife; ife->icfg = icfg; tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (tmp != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* We need to update value */ iif = tmp->ic.iface; tmp->value = ife->value; if (iif->resolved != 0) { /* We need to update runtime value, too */ ifi = ifidx_find(ti, &iif->ifindex); ifi->value = ife->value; } /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); /* Link to internal list */ ipfw_objhash_add(icfg->ii, &ife->no); /* Link notifier (possible running its callback) */ ipfw_iface_add_notify(icfg->ch, &ife->ic); icfg->count++; - if (icfg->count + 1 == icfg->size) { - /* Notify core we need to grow */ - *pflags = icfg->size + IFIDX_CHUNK; - } - tb->ife = NULL; *pnum = 1; return (0); } /* * Prepare to delete key from table. * Do basic interface name checks. */ static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; char *ifname; tb = (struct ta_buf_ifidx *)ta_buf; /* Check if string is terminated */ ifname = (char *)tei->paddr; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); return (0); } /* * Remove key from both configuration list and * runtime array. Removed interface notification. */ static int ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife; struct ta_buf_ifidx *tb; char *ifname; uint16_t ifindex; int res; tb = (struct ta_buf_ifidx *)ta_buf; ifname = (char *)tei->paddr; icfg = (struct iftable_cfg *)ta_state; ife = tb->ife; ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (ife == NULL) return (ENOENT); if (ife->linked != 0) { /* We have to remove item from runtime */ ifindex = ife->ic.iface->ifindex; res = bdel(&ifindex, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d does not exist", ifindex)); icfg->used--; ti->data = icfg->used; ife->linked = 0; } /* Unlink from local list */ ipfw_objhash_del(icfg->ii, &ife->no); /* Unlink notifier */ ipfw_iface_del_notify(icfg->ch, &ife->ic); icfg->count--; tb->ife = ife; *pnum = 1; return (0); } /* * Flush deleted entry. * Drops interface reference and frees entry. */ static void ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; tb = (struct ta_buf_ifidx *)ta_buf; if (tb->ife != NULL) { /* Unlink first */ ipfw_iface_unref(ch, &tb->ife->ic); free(tb->ife, M_IPFW_TBL); } } /* * Handle interface announce/withdrawal for particular table. * Every real runtime array modification happens here. */ static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex) { struct ifentry *ife; struct ifidx ifi; struct iftable_cfg *icfg; struct table_info *ti; int res; ife = (struct ifentry *)cbdata; icfg = ife->icfg; ti = icfg->ti; KASSERT(ti != NULL, ("ti=NULL, check change_ti handler")); if (ife->linked == 0 && ifindex != 0) { /* Interface announce */ ifi.kidx = ifindex; ifi.spare = 0; ifi.value = ife->value; res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d already exists", ifindex)); icfg->used++; ti->data = icfg->used; ife->linked = 1; } else if (ife->linked != 0 && ifindex == 0) { /* Interface withdrawal */ ifindex = ife->ic.iface->ifindex; res = bdel(&ifindex, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d does not exist", ifindex)); icfg->used--; ti->data = icfg->used; ife->linked = 0; } } /* * Table growing callbacks. */ struct mod_ifidx { void *main_ptr; size_t size; }; +static int +ta_has_space_ifidx(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct iftable_cfg *cfg; + + cfg = (struct iftable_cfg *)ta_state; + + if (cfg->count + count > cfg->size) { + *pflags = roundup2(cfg->count + count, IFIDX_CHUNK); + return (0); + } + + return (1); +} + /* * Allocate ned, larger runtime ifidx array. */ static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags) { struct mod_ifidx *mi; mi = (struct mod_ifidx *)ta_buf; memset(mi, 0, sizeof(struct mod_ifidx)); mi->size = *pflags; mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW, M_WAITOK | M_ZERO); return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { struct mod_ifidx *mi; struct iftable_cfg *icfg; mi = (struct mod_ifidx *)ta_buf; icfg = (struct iftable_cfg *)ta_state; /* Check if we still need to grow array */ if (icfg->size >= mi->size) { *pflags = 0; return (0); } memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx)); return (0); } /* * Switch old & new arrays. */ static int ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_ifidx *mi; struct iftable_cfg *icfg; void *old_ptr; mi = (struct mod_ifidx *)ta_buf; icfg = (struct iftable_cfg *)ta_state; old_ptr = icfg->main_ptr; icfg->main_ptr = mi->main_ptr; icfg->size = mi->size; ti->state = icfg->main_ptr; mi->main_ptr = old_ptr; return (0); } /* * Free unneded array. */ static void ta_flush_mod_ifidx(void *ta_buf) { struct mod_ifidx *mi; mi = (struct mod_ifidx *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct ifentry *ife; ife = (struct ifentry *)e; tent->masklen = 8 * IF_NAMESIZE; memcpy(&tent->k, ife->no.name, IF_NAMESIZE); tent->value = ife->value; return (0); } static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct iftable_cfg *icfg; struct ifentry *ife; char *ifname; icfg = (struct iftable_cfg *)ta_state; ifname = tent->k.iface; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (ife != NULL) { ta_dump_ifidx_tentry(ta_state, ti, ife, tent); return (0); } return (ENOENT); } struct wa_ifidx { ta_foreach_f *f; void *arg; }; static void foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, void *arg) { struct ifentry *ife; struct wa_ifidx *wa; ife = (struct ifentry *)no; wa = (struct wa_ifidx *)arg; wa->f(ife, wa->arg); } static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct iftable_cfg *icfg; struct wa_ifidx wa; icfg = (struct iftable_cfg *)ta_state; wa.f = f; wa.arg = arg; ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa); } struct table_algo iface_idx = { .name = "iface:array", .type = IPFW_TABLE_INTERFACE, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_ifidx), .init = ta_init_ifidx, .destroy = ta_destroy_ifidx, .prepare_add = ta_prepare_add_ifidx, .prepare_del = ta_prepare_del_ifidx, .add = ta_add_ifidx, .del = ta_del_ifidx, .flush_entry = ta_flush_ifidx_entry, .foreach = ta_foreach_ifidx, .dump_tentry = ta_dump_ifidx_tentry, .find_tentry = ta_find_ifidx_tentry, + .has_space = ta_has_space_ifidx, .prepare_mod = ta_prepare_mod_ifidx, .fill_mod = ta_fill_mod_ifidx, .modify = ta_modify_ifidx, .flush_mod = ta_flush_mod_ifidx, .change_ti = ta_change_ti_ifidx, }; /* * Number array cmds. * * Implementation: * * Runtime part: * - sorted array of "struct numarray" pointed by ti->state. * Array is allocated with rounding up to NUMARRAY_CHUNK. * - current array size is stored in ti->data * */ struct numarray { uint32_t number; uint32_t value; }; struct numarray_cfg { void *main_ptr; size_t size; /* Number of items allocated in array */ size_t used; /* Number of items _active_ now */ }; #define NUMARRAY_CHUNK 16 int compare_numarray(const void *k, const void *v); int compare_numarray(const void *k, const void *v) { struct numarray *na; uint32_t key; key = *((uint32_t *)k); na = (struct numarray *)v; if (key < na->number) return (-1); else if (key > na->number) return (1); return (0); } static struct numarray * numarray_find(struct table_info *ti, void *key) { struct numarray *ri; ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray), compare_ifidx); return (ri); } static int ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct numarray *ri; ri = numarray_find(ti, key); if (ri != NULL) { *val = ri->value; return (1); } return (0); } static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct numarray_cfg *cfg; cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->size = NUMARRAY_CHUNK; cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW, M_WAITOK | M_ZERO); *ta_state = cfg; ti->state = cfg->main_ptr; ti->lookup = ta_lookup_numarray; return (0); } /* * Destroys table @ti */ static void ta_destroy_numarray(void *ta_state, struct table_info *ti) { struct numarray_cfg *cfg; cfg = (struct numarray_cfg *)ta_state; if (cfg->main_ptr != NULL) free(cfg->main_ptr, M_IPFW); free(cfg, M_IPFW); } struct ta_buf_numarray { struct numarray na; }; /* * Prepare for addition/deletion to an array. */ static int ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_numarray *tb; tb = (struct ta_buf_numarray *)ta_buf; tb->na.number = *((uint32_t *)tei->paddr); tb->na.value = tei->value; return (0); } static int ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; struct numarray *ri; int res; tb = (struct ta_buf_numarray*)ta_buf; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tb->na.number); if (ri != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* We need to update value */ ri->value = tb->na.value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used, sizeof(struct numarray), compare_numarray); KASSERT(res == 1, ("number %d already exists", tb->na.number)); cfg->used++; ti->data = cfg->used; - - if (cfg->used + 1 == cfg->size) { - /* Notify core we need to grow */ - *pflags = cfg->size + NUMARRAY_CHUNK; - } *pnum = 1; return (0); } /* * Remove key from both configuration list and * runtime array. Removed interface notification. */ static int ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; struct numarray *ri; int res; tb = (struct ta_buf_numarray *)ta_buf; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tb->na.number); if (ri == NULL) return (ENOENT); res = bdel(&tb->na.number, cfg->main_ptr, cfg->used, sizeof(struct numarray), compare_numarray); KASSERT(res == 1, ("number %u does not exist", tb->na.number)); cfg->used--; ti->data = cfg->used; - *pnum = 1; return (0); } static void ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { /* Do nothing */ } /* * Table growing callbacks. */ +static int +ta_has_space_numarray(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct numarray_cfg *cfg; + + cfg = (struct numarray_cfg *)ta_state; + + if (cfg->used + count > cfg->size) { + *pflags = roundup2(cfg->used + count, NUMARRAY_CHUNK); + return (0); + } + + return (1); +} + /* - * Allocate ned, larger runtime numarray array. + * Allocate new, larger runtime array. */ static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW, M_WAITOK | M_ZERO); return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct numarray_cfg *cfg; mi = (struct mod_item *)ta_buf; cfg = (struct numarray_cfg *)ta_state; /* Check if we still need to grow array */ if (cfg->size >= mi->size) { *pflags = 0; return (0); } memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray)); return (0); } /* * Switch old & new arrays. */ static int ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct numarray_cfg *cfg; void *old_ptr; mi = (struct mod_item *)ta_buf; cfg = (struct numarray_cfg *)ta_state; old_ptr = cfg->main_ptr; cfg->main_ptr = mi->main_ptr; cfg->size = mi->size; ti->state = cfg->main_ptr; mi->main_ptr = old_ptr; return (0); } /* * Free unneded array. */ static void ta_flush_mod_numarray(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct numarray *na; na = (struct numarray *)e; tent->k.key = na->number; tent->value = na->value; return (0); } static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct numarray_cfg *cfg; struct numarray *ri; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tent->k.key); if (ri != NULL) { ta_dump_numarray_tentry(ta_state, ti, ri, tent); return (0); } return (ENOENT); } static void ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct numarray_cfg *cfg; struct numarray *array; int i; cfg = (struct numarray_cfg *)ta_state; array = cfg->main_ptr; for (i = 0; i < cfg->used; i++) f(&array[i], arg); } struct table_algo number_array = { .name = "number:array", .type = IPFW_TABLE_NUMBER, .ta_buf_size = sizeof(struct ta_buf_numarray), .init = ta_init_numarray, .destroy = ta_destroy_numarray, .prepare_add = ta_prepare_add_numarray, .prepare_del = ta_prepare_add_numarray, .add = ta_add_numarray, .del = ta_del_numarray, .flush_entry = ta_flush_numarray_entry, .foreach = ta_foreach_numarray, .dump_tentry = ta_dump_numarray_tentry, .find_tentry = ta_find_numarray_tentry, + .has_space = ta_has_space_numarray, .prepare_mod = ta_prepare_mod_numarray, .fill_mod = ta_fill_mod_numarray, .modify = ta_modify_numarray, .flush_mod = ta_flush_mod_numarray, }; /* * flow:hash cmds * * * ti->data: * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] * [ 8][ 8[ 8][ 8] * * inv.mask4: 32 - mask * inv.mask6: * 1) _slow lookup: mask * 2) _aligned: (128 - mask) / 8 * 3) _64: 8 * * * pflags: - * [v4=1/v6=0][hsize] - * [ 32][ 32] + * [hsize4][hsize6] + * [ 16][ 16] */ struct fhashentry; SLIST_HEAD(fhashbhead, fhashentry); /* struct tflow_entry { uint8_t af; uint8_t proto; uint16_t spare; uint16_t dport; uint16_t sport; union { struct { struct in_addr sip; struct in_addr dip; } v4; struct { struct in6_addr sip6; struct in6_addr dip6; } v6; } a; }; */ struct fhashentry { SLIST_ENTRY(fhashentry) next; uint8_t af; uint8_t proto; uint16_t spare0; uint16_t dport; uint16_t sport; uint32_t value; uint32_t spare1; }; struct fhashentry4 { struct fhashentry e; struct in_addr dip; struct in_addr sip; }; struct fhashentry6 { struct fhashentry e; struct in6_addr dip6; struct in6_addr sip6; }; struct fhash_cfg { struct fhashbhead *head; size_t size; size_t items; struct fhashentry4 fe4; struct fhashentry6 fe6; }; static __inline int cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) { uint64_t *ka, *kb; ka = (uint64_t *)(&a->next + 1); kb = (uint64_t *)(&b->next + 1); if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0)) return (1); return (0); } static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize) { uint32_t i; i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport); return (i % (hsize - 1)); } static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize) { uint32_t i; i = (f->dip6.__u6_addr.__u6_addr32[2]) ^ (f->dip6.__u6_addr.__u6_addr32[3]) ^ (f->sip6.__u6_addr.__u6_addr32[2]) ^ (f->sip6.__u6_addr.__u6_addr32[3]) ^ (f->e.dport) ^ (f->e.sport); return (i % (hsize - 1)); } static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size) { uint32_t hash; if (ent->af == AF_INET) { hash = hash_flow4((struct fhashentry4 *)ent, size); } else { hash = hash_flow6((struct fhashentry6 *)ent, size); } return (hash); } static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct fhashbhead *head; struct fhashentry *ent; struct fhashentry4 *m4; struct ipfw_flow_id *id; uint16_t hash, hsize; id = (struct ipfw_flow_id *)key; head = (struct fhashbhead *)ti->state; hsize = ti->data; m4 = (struct fhashentry4 *)ti->xstate; if (id->addr_type == 4) { struct fhashentry4 f; /* Copy hash mask */ f = *m4; f.dip.s_addr &= id->dst_ip; f.sip.s_addr &= id->src_ip; f.e.dport &= id->dst_port; f.e.sport &= id->src_port; f.e.proto &= id->proto; hash = hash_flow4(&f, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { *val = ent->value; return (1); } } } else if (id->addr_type == 6) { struct fhashentry6 f; uint64_t *fp, *idp; /* Copy hash mask */ f = *((struct fhashentry6 *)(m4 + 1)); /* Handle lack of __u6_addr.__u6_addr64 */ fp = (uint64_t *)&f.dip6; idp = (uint64_t *)&id->dst_ip6; /* src IPv6 is stored after dst IPv6 */ *fp++ &= *idp++; *fp++ &= *idp++; *fp++ &= *idp++; *fp &= *idp; f.e.dport &= id->dst_port; f.e.sport &= id->src_port; f.e.proto &= id->proto; hash = hash_flow6(&f, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { *val = ent->value; return (1); } } } return (0); } /* * New table. */ static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int i; struct fhash_cfg *cfg; struct fhashentry4 *fe4; struct fhashentry6 *fe6; cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->size = 512; cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < cfg->size; i++) SLIST_INIT(&cfg->head[i]); /* Fill in fe masks based on @tflags */ fe4 = &cfg->fe4; fe6 = &cfg->fe6; if (tflags & IPFW_TFFLAG_SRCIP) { memset(&fe4->sip, 0xFF, sizeof(fe4->sip)); memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6)); } if (tflags & IPFW_TFFLAG_DSTIP) { memset(&fe4->dip, 0xFF, sizeof(fe4->dip)); memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6)); } if (tflags & IPFW_TFFLAG_SRCPORT) { memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport)); memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport)); } if (tflags & IPFW_TFFLAG_DSTPORT) { memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport)); memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport)); } if (tflags & IPFW_TFFLAG_PROTO) { memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto)); memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto)); } fe4->e.af = AF_INET; fe6->e.af = AF_INET6; *ta_state = cfg; ti->state = cfg->head; ti->xstate = &cfg->fe4; ti->data = cfg->size; ti->lookup = ta_lookup_fhash; return (0); } static void ta_destroy_fhash(void *ta_state, struct table_info *ti) { struct fhash_cfg *cfg; struct fhashentry *ent, *ent_next; int i; cfg = (struct fhash_cfg *)ta_state; for (i = 0; i < cfg->size; i++) SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) free(ent, M_IPFW_TBL); free(cfg->head, M_IPFW); free(cfg, M_IPFW); } static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct fhash_cfg *cfg; struct fhashentry *ent; struct fhashentry4 *fe4; struct fhashentry6 *fe6; struct tflow_entry *tfe; cfg = (struct fhash_cfg *)ta_state; ent = (struct fhashentry *)e; tfe = &tent->k.flow; tfe->af = ent->af; tfe->proto = ent->proto; tfe->dport = htons(ent->dport); tfe->sport = htons(ent->sport); tent->value = ent->value; tent->subtype = ent->af; if (ent->af == AF_INET) { fe4 = (struct fhashentry4 *)ent; tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr); tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr); tent->masklen = 32; #ifdef INET6 } else { fe6 = (struct fhashentry6 *)ent; tfe->a.a6.sip6 = fe6->sip6; tfe->a.a6.dip6 = fe6->dip6; tent->masklen = 128; #endif } return (0); } static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent) { struct fhashentry4 *fe4; struct fhashentry6 *fe6; struct tflow_entry *tfe; tfe = (struct tflow_entry *)tei->paddr; ent->af = tei->subtype; ent->proto = tfe->proto; ent->value = tei->value; ent->dport = ntohs(tfe->dport); ent->sport = ntohs(tfe->sport); if (tei->subtype == AF_INET) { #ifdef INET fe4 = (struct fhashentry4 *)ent; fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr); fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr); #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { fe6 = (struct fhashentry6 *)ent; fe6->sip6 = tfe->a.a6.sip6; fe6->dip6 = tfe->a.a6.dip6; #endif } else { /* Unknown CIDR type */ return (EINVAL); } return (0); } static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct fhashentry6 fe6; struct tentry_info tei; int error; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; ent = &fe6.e; memset(&fe6, 0, sizeof(fe6)); memset(&tei, 0, sizeof(tei)); tei.paddr = &tent->k.flow; tei.subtype = tent->subtype; if ((error = tei_to_fhash_ent(&tei, ent)) != 0) return (error); head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei.subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { ta_dump_fhash_tentry(ta_state, ti, tmp, tent); return (0); } } return (ENOENT); } static void ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct fhash_cfg *cfg; struct fhashentry *ent, *ent_next; int i; cfg = (struct fhash_cfg *)ta_state; for (i = 0; i < cfg->size; i++) SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) f(ent, arg); } struct ta_buf_fhash { void *ent_ptr; struct fhashentry6 fe6; }; static int ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; struct fhashentry *ent; size_t sz; int error; tb = (struct ta_buf_fhash *)ta_buf; if (tei->subtype == AF_INET) sz = sizeof(struct fhashentry4); else if (tei->subtype == AF_INET6) sz = sizeof(struct fhashentry6); else return (EINVAL); ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO); error = tei_to_fhash_ent(tei, ent); if (error != 0) { free(ent, M_IPFW_TBL); return (error); } tb->ent_ptr = ent; return (0); } static int ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct ta_buf_fhash *tb; int exists; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; tb = (struct ta_buf_fhash *)ta_buf; ent = (struct fhashentry *)tb->ent_ptr; exists = 0; head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei->subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { exists = 1; break; } } if (exists == 1) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ tmp->value = tei->value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; } else { if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); SLIST_INSERT_HEAD(&head[hash], ent, next); tb->ent_ptr = NULL; *pnum = 1; /* Update counters and check if we need to grow hash */ cfg->items++; - if (cfg->items > cfg->size && cfg->size < 65536) - *pflags = cfg->size * 2; } return (0); } static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; tb = (struct ta_buf_fhash *)ta_buf; return (tei_to_fhash_ent(tei, &tb->fe6.e)); } static int ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint64_t *pflags, uint32_t *pnum) + void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct ta_buf_fhash *tb; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; tb = (struct ta_buf_fhash *)ta_buf; ent = &tb->fe6.e; head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei->subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { SLIST_REMOVE(&head[hash], tmp, fhashentry, next); *pnum = 1; cfg->items--; return (0); } } return (ENOENT); } static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; tb = (struct ta_buf_fhash *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } /* * Hash growing callbacks. */ +static int +ta_has_space_fhash(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct fhash_cfg *cfg; + + cfg = (struct fhash_cfg *)ta_state; + + if (cfg->items > cfg->size && cfg->size < 65536) { + *pflags = cfg->size * 2; + return (0); + } + + return (1); +} + /* * Allocate new, larger fhash. */ static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct fhashbhead *head; int i; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size; i++) SLIST_INIT(&head[i]); mi->main_ptr = head; return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { /* In is not possible to do rehash if we're not holidng WLOCK. */ return (0); } /* * Switch old & new arrays. */ static int ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct fhash_cfg *cfg; struct fhashbhead *old_head, *new_head; struct fhashentry *ent, *ent_next; int i; uint32_t nhash; size_t old_size; mi = (struct mod_item *)ta_buf; cfg = (struct fhash_cfg *)ta_state; /* Check which hash we need to grow and do we still need that */ old_size = cfg->size; old_head = ti->state; if (old_size >= mi->size) return (0); new_head = (struct fhashbhead *)mi->main_ptr; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_flow_ent(ent, mi->size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->state = new_head; ti->data = mi->size; cfg->head = new_head; cfg->size = mi->size; mi->main_ptr = old_head; return (0); } /* * Free unneded array. */ static void ta_flush_mod_fhash(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } struct table_algo flow_hash = { .name = "flow:hash", .type = IPFW_TABLE_FLOW, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_fhash), .init = ta_init_fhash, .destroy = ta_destroy_fhash, .prepare_add = ta_prepare_add_fhash, .prepare_del = ta_prepare_del_fhash, .add = ta_add_fhash, .del = ta_del_fhash, .flush_entry = ta_flush_fhash_entry, .foreach = ta_foreach_fhash, .dump_tentry = ta_dump_fhash_tentry, .find_tentry = ta_find_fhash_tentry, + .has_space = ta_has_space_fhash, .prepare_mod = ta_prepare_mod_fhash, .fill_mod = ta_fill_mod_fhash, .modify = ta_modify_fhash, .flush_mod = ta_flush_mod_fhash, }; void ipfw_table_algo_init(struct ip_fw_chain *ch) { size_t sz; /* * Register all algorithms presented here. */ sz = sizeof(struct table_algo); ipfw_add_table_algo(ch, &cidr_radix, sz, &cidr_radix.idx); ipfw_add_table_algo(ch, &cidr_hash, sz, &cidr_hash.idx); ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); } void ipfw_table_algo_destroy(struct ip_fw_chain *ch) { ipfw_del_table_algo(ch, cidr_radix.idx); ipfw_del_table_algo(ch, cidr_hash.idx); ipfw_del_table_algo(ch, iface_idx.idx); ipfw_del_table_algo(ch, number_array.idx); ipfw_del_table_algo(ch, flow_hash.idx); }