Index: head/sys/netpfil/ipfw/ip_fw_iface.c =================================================================== --- head/sys/netpfil/ipfw/ip_fw_iface.c (revision 299151) +++ head/sys/netpfil/ipfw/ip_fw_iface.c (revision 299152) @@ -1,537 +1,539 @@ /*- * Copyright (c) 2014 Yandex LLC. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Kernel interface tracking API. * */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* struct ipfw_rule_ref */ #include #include #define CHAIN_TO_II(ch) ((struct namedobj_instance *)ch->ifcfg) #define DEFAULT_IFACES 128 static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif, uint16_t ifindex); static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif, uint16_t ifindex); static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); static struct ipfw_sopt_handler scodes[] = { { IP_FW_XIFLIST, 0, HDIR_GET, list_ifaces }, }; /* * FreeBSD Kernel interface. */ static void ipfw_kifhandler(void *arg, struct ifnet *ifp); static int ipfw_kiflookup(char *name); static void iface_khandler_register(void); static void iface_khandler_deregister(void); static eventhandler_tag ipfw_ifdetach_event, ipfw_ifattach_event; static int num_vnets = 0; static struct mtx vnet_mtx; /* * Checks if kernel interface is contained in our tracked * interface list and calls attach/detach handler. */ static void ipfw_kifhandler(void *arg, struct ifnet *ifp) { struct ip_fw_chain *ch; struct ipfw_iface *iif; struct namedobj_instance *ii; uintptr_t htype; if (V_ipfw_vnet_ready == 0) return; ch = &V_layer3_chain; htype = (uintptr_t)arg; IPFW_UH_WLOCK(ch); ii = CHAIN_TO_II(ch); if (ii == NULL) { IPFW_UH_WUNLOCK(ch); return; } iif = (struct ipfw_iface*)ipfw_objhash_lookup_name(ii, 0, if_name(ifp)); if (iif != NULL) { if (htype == 1) handle_ifattach(ch, iif, ifp->if_index); else handle_ifdetach(ch, iif, ifp->if_index); } IPFW_UH_WUNLOCK(ch); } /* * Reference current VNET as iface tracking API user. * Registers interface tracking handlers for first VNET. */ static void iface_khandler_register() { int create; create = 0; mtx_lock(&vnet_mtx); if (num_vnets == 0) create = 1; num_vnets++; mtx_unlock(&vnet_mtx); if (create == 0) return; printf("IPFW: starting up interface tracker\n"); ipfw_ifdetach_event = EVENTHANDLER_REGISTER( ifnet_departure_event, ipfw_kifhandler, NULL, EVENTHANDLER_PRI_ANY); ipfw_ifattach_event = EVENTHANDLER_REGISTER( ifnet_arrival_event, ipfw_kifhandler, (void*)((uintptr_t)1), EVENTHANDLER_PRI_ANY); } /* * * Detach interface event handlers on last VNET instance * detach. */ static void iface_khandler_deregister() { int destroy; destroy = 0; mtx_lock(&vnet_mtx); if (num_vnets == 1) destroy = 1; num_vnets--; mtx_unlock(&vnet_mtx); if (destroy == 0) return; EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipfw_ifattach_event); EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipfw_ifdetach_event); } /* * Retrieves ifindex for given @name. * * Returns ifindex or 0. */ static int ipfw_kiflookup(char *name) { struct ifnet *ifp; int ifindex; ifindex = 0; if ((ifp = ifunit_ref(name)) != NULL) { ifindex = ifp->if_index; if_rele(ifp); } return (ifindex); } /* * Global ipfw startup hook. * Since we perform lazy initialization, do nothing except * mutex init. */ int ipfw_iface_init() { mtx_init(&vnet_mtx, "IPFW ifhandler mtx", NULL, MTX_DEF); IPFW_ADD_SOPT_HANDLER(1, scodes); return (0); } /* * Global ipfw destroy hook. * Unregister khandlers iff init has been done. */ void ipfw_iface_destroy() { IPFW_DEL_SOPT_HANDLER(1, scodes); mtx_destroy(&vnet_mtx); } /* * Perform actual init on internal request. * Inits both namehash and global khandler. */ static void vnet_ipfw_iface_init(struct ip_fw_chain *ch) { struct namedobj_instance *ii; ii = ipfw_objhash_create(DEFAULT_IFACES); IPFW_UH_WLOCK(ch); if (ch->ifcfg == NULL) { ch->ifcfg = ii; ii = NULL; } IPFW_UH_WUNLOCK(ch); if (ii != NULL) { /* Already initialized. Free namehash. */ ipfw_objhash_destroy(ii); } else { /* We're the first ones. Init kernel hooks. */ iface_khandler_register(); } } -static void +static int destroy_iface(struct namedobj_instance *ii, struct named_object *no, void *arg) { /* Assume all consumers have been already detached */ free(no, M_IPFW); + return (0); } /* * Per-VNET ipfw detach hook. * */ void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch) { struct namedobj_instance *ii; IPFW_UH_WLOCK(ch); ii = CHAIN_TO_II(ch); ch->ifcfg = NULL; IPFW_UH_WUNLOCK(ch); if (ii != NULL) { ipfw_objhash_foreach(ii, destroy_iface, ch); ipfw_objhash_destroy(ii); iface_khandler_deregister(); } } /* * Notify the subsystem that we are interested in tracking * interface @name. This function has to be called without * holding any locks to permit allocating the necessary states * for proper interface tracking. * * Returns 0 on success. */ int ipfw_iface_ref(struct ip_fw_chain *ch, char *name, struct ipfw_ifc *ic) { struct namedobj_instance *ii; struct ipfw_iface *iif, *tmp; if (strlen(name) >= sizeof(iif->ifname)) return (EINVAL); IPFW_UH_WLOCK(ch); ii = CHAIN_TO_II(ch); if (ii == NULL) { /* * First request to subsystem. * Let's perform init. */ IPFW_UH_WUNLOCK(ch); vnet_ipfw_iface_init(ch); IPFW_UH_WLOCK(ch); ii = CHAIN_TO_II(ch); } iif = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); if (iif != NULL) { iif->no.refcnt++; ic->iface = iif; IPFW_UH_WUNLOCK(ch); return (0); } IPFW_UH_WUNLOCK(ch); /* Not found. Let's create one */ iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO); TAILQ_INIT(&iif->consumers); iif->no.name = iif->ifname; strlcpy(iif->ifname, name, sizeof(iif->ifname)); /* * Ref & link to the list. * * We assume ifnet_arrival_event / ifnet_departure_event * are not holding any locks. */ iif->no.refcnt = 1; IPFW_UH_WLOCK(ch); tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); if (tmp != NULL) { /* Interface has been created since unlock. Ref and return */ tmp->no.refcnt++; ic->iface = tmp; IPFW_UH_WUNLOCK(ch); free(iif, M_IPFW); return (0); } iif->ifindex = ipfw_kiflookup(name); if (iif->ifindex != 0) iif->resolved = 1; ipfw_objhash_add(ii, &iif->no); ic->iface = iif; IPFW_UH_WUNLOCK(ch); return (0); } /* * Adds @ic to the list of iif interface consumers. * Must be called with holding both UH+WLOCK. * Callback may be immediately called (if interface exists). */ void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic) { struct ipfw_iface *iif; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); iif = ic->iface; TAILQ_INSERT_TAIL(&iif->consumers, ic, next); if (iif->resolved != 0) ic->cb(ch, ic->cbdata, iif->ifindex); } /* * Unlinks interface tracker object @ic from interface. * Must be called while holding UH lock. */ void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic) { struct ipfw_iface *iif; IPFW_UH_WLOCK_ASSERT(ch); iif = ic->iface; TAILQ_REMOVE(&iif->consumers, ic, next); } /* * Unreference interface specified by @ic. * Must be called while holding UH lock. */ void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic) { struct ipfw_iface *iif; IPFW_UH_WLOCK_ASSERT(ch); iif = ic->iface; ic->iface = NULL; iif->no.refcnt--; /* TODO: check for references & delete */ } /* * Interface arrival handler. */ static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif, uint16_t ifindex) { struct ipfw_ifc *ic; IPFW_UH_WLOCK_ASSERT(ch); iif->gencnt++; iif->resolved = 1; iif->ifindex = ifindex; IPFW_WLOCK(ch); TAILQ_FOREACH(ic, &iif->consumers, next) ic->cb(ch, ic->cbdata, iif->ifindex); IPFW_WUNLOCK(ch); } /* * Interface departure handler. */ static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif, uint16_t ifindex) { struct ipfw_ifc *ic; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK(ch); TAILQ_FOREACH(ic, &iif->consumers, next) ic->cb(ch, ic->cbdata, 0); IPFW_WUNLOCK(ch); iif->gencnt++; iif->resolved = 0; iif->ifindex = 0; } struct dump_iface_args { struct ip_fw_chain *ch; struct sockopt_data *sd; }; -static void +static int export_iface_internal(struct namedobj_instance *ii, struct named_object *no, void *arg) { ipfw_iface_info *i; struct dump_iface_args *da; struct ipfw_iface *iif; da = (struct dump_iface_args *)arg; i = (ipfw_iface_info *)ipfw_get_sopt_space(da->sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); iif = (struct ipfw_iface *)no; strlcpy(i->ifname, iif->ifname, sizeof(i->ifname)); if (iif->resolved) i->flags |= IPFW_IFFLAG_RESOLVED; i->ifindex = iif->ifindex; i->refcnt = iif->no.refcnt; i->gencnt = iif->gencnt; + return (0); } /* * Lists all interface currently tracked by ipfw. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_iface_info x N ] * * Returns 0 on success */ static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct namedobj_instance *ii; struct _ipfw_obj_lheader *olh; struct dump_iface_args da; uint32_t count, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); ii = CHAIN_TO_II(ch); if (ii != NULL) count = ipfw_objhash_count(ii); else count = 0; size = count * sizeof(ipfw_iface_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_iface_info); if (size > olh->size) { olh->size = size; IPFW_UH_RUNLOCK(ch); return (ENOMEM); } olh->size = size; da.ch = ch; da.sd = sd; if (ii != NULL) ipfw_objhash_foreach(ii, export_iface_internal, &da); IPFW_UH_RUNLOCK(ch); return (0); } Index: head/sys/netpfil/ipfw/ip_fw_private.h =================================================================== --- head/sys/netpfil/ipfw/ip_fw_private.h (revision 299151) +++ head/sys/netpfil/ipfw/ip_fw_private.h (revision 299152) @@ -1,764 +1,764 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _IPFW2_PRIVATE_H #define _IPFW2_PRIVATE_H /* * Internal constants and data structures used by ipfw components * and not meant to be exported outside the kernel. */ #ifdef _KERNEL /* * For platforms that do not have SYSCTL support, we wrap the * SYSCTL_* into a function (one per file) to collect the values * into an array at module initialization. The wrapping macros, * SYSBEGIN() and SYSEND, are empty in the default case. */ #ifndef SYSBEGIN #define SYSBEGIN(x) #endif #ifndef SYSEND #define SYSEND #endif /* Return values from ipfw_chk() */ enum { IP_FW_PASS = 0, IP_FW_DENY, IP_FW_DIVERT, IP_FW_TEE, IP_FW_DUMMYNET, IP_FW_NETGRAPH, IP_FW_NGTEE, IP_FW_NAT, IP_FW_REASS, }; /* * Structure for collecting parameters to dummynet for ip6_output forwarding */ struct _ip6dn_args { struct ip6_pktopts *opt_or; int flags_or; struct ip6_moptions *im6o_or; struct ifnet *origifp_or; struct ifnet *ifp_or; struct sockaddr_in6 dst_or; u_long mtu_or; }; /* * Arguments for calling ipfw_chk() and dummynet_io(). We put them * all into a structure because this way it is easier and more * efficient to pass variables around and extend the interface. */ struct ip_fw_args { struct mbuf *m; /* the mbuf chain */ struct ifnet *oif; /* output interface */ struct sockaddr_in *next_hop; /* forward address */ struct sockaddr_in6 *next_hop6; /* ipv6 forward address */ /* * On return, it points to the matching rule. * On entry, rule.slot > 0 means the info is valid and * contains the starting rule for an ipfw search. * If chain_id == chain->id && slot >0 then jump to that slot. * Otherwise, we locate the first rule >= rulenum:rule_id */ struct ipfw_rule_ref rule; /* match/restart info */ struct ether_header *eh; /* for bridged packets */ struct ipfw_flow_id f_id; /* grabbed from IP header */ //uint32_t cookie; /* a cookie depending on rule action */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ union { /* store here if cannot use a pointer */ struct sockaddr_in hopstore; struct sockaddr_in6 hopstore6; }; }; MALLOC_DECLARE(M_IPFW); /* * Hooks sometime need to know the direction of the packet * (divert, dummynet, netgraph, ...) * We use a generic definition here, with bit0-1 indicating the * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the * specific protocol * indicating the protocol (if necessary) */ enum { DIR_MASK = 0x3, DIR_OUT = 0, DIR_IN = 1, DIR_FWD = 2, DIR_DROP = 3, PROTO_LAYER2 = 0x4, /* set for layer 2 */ /* PROTO_DEFAULT = 0, */ PROTO_IPV4 = 0x08, PROTO_IPV6 = 0x10, PROTO_IFB = 0x0c, /* layer2 + ifbridge */ /* PROTO_OLDBDG = 0x14, unused, old bridge */ }; /* wrapper for freeing a packet, in case we need to do more work */ #ifndef FREE_PKT #if defined(__linux__) || defined(_WIN32) #define FREE_PKT(m) netisr_dispatch(-1, m) #else #define FREE_PKT(m) m_freem(m) #endif #endif /* !FREE_PKT */ /* * Function definitions. */ /* attach (arg = 1) or detach (arg = 0) hooks */ int ipfw_attach_hooks(int); #ifdef NOTYET void ipfw_nat_destroy(void); #endif /* In ip_fw_log.c */ struct ip; struct ip_fw_chain; void ipfw_log_bpf(int); void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg, struct ip *ip); VNET_DECLARE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) VNET_DECLARE(int, verbose_limit); #define V_verbose_limit VNET(verbose_limit) /* In ip_fw_dynamic.c */ enum { /* result for matching dynamic rules */ MATCH_REVERSE = 0, MATCH_FORWARD, MATCH_NONE, MATCH_UNKNOWN, }; /* * The lock for dynamic rules is only used once outside the file, * and only to release the result of lookup_dyn_rule(). * Eventually we may implement it with a callback on the function. */ struct ip_fw_chain; struct sockopt_data; int ipfw_is_dyn_rule(struct ip_fw *rule); void ipfw_expire_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *); void ipfw_dyn_unlock(ipfw_dyn_rule *q); struct tcphdr; struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, u_int32_t, u_int32_t, int); int ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg); ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp); void ipfw_remove_dyn_children(struct ip_fw *rule); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd); void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); int ipfw_dyn_get_count(void); /* common variables */ VNET_DECLARE(int, fw_one_pass); #define V_fw_one_pass VNET(fw_one_pass) VNET_DECLARE(int, fw_verbose); #define V_fw_verbose VNET(fw_verbose) VNET_DECLARE(struct ip_fw_chain, layer3_chain); #define V_layer3_chain VNET(layer3_chain) VNET_DECLARE(int, ipfw_vnet_ready); #define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) VNET_DECLARE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) VNET_DECLARE(int, autoinc_step); #define V_autoinc_step VNET(autoinc_step) VNET_DECLARE(unsigned int, fw_tables_max); #define V_fw_tables_max VNET(fw_tables_max) VNET_DECLARE(unsigned int, fw_tables_sets); #define V_fw_tables_sets VNET(fw_tables_sets) struct tables_config; #ifdef _KERNEL /* * Here we have the structure representing an ipfw rule. * * It starts with a general area * followed by an array of one or more instructions, which the code * accesses as an array of 32-bit values. * * Given a rule pointer r: * * r->cmd is the start of the first instruction. * ACTION_PTR(r) is the start of the first action (things to do * once a rule matched). */ struct ip_fw { uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ uint8_t flags; /* currently unused */ counter_u64_t cntr; /* Pointer to rule counters */ uint32_t timestamp; /* tv_sec of last match */ uint32_t id; /* rule id */ uint32_t cached_id; /* used by jump_fast */ uint32_t cached_pos; /* used by jump_fast */ ipfw_insn cmd[1]; /* storage for commands */ }; #define IPFW_RULE_CNTR_SIZE (2 * sizeof(uint64_t)) #endif struct ip_fw_chain { struct ip_fw **map; /* array of rule ptrs to ease lookup */ uint32_t id; /* ruleset id */ int n_rules; /* number of static rules */ void *tablestate; /* runtime table info */ void *valuestate; /* runtime table value info */ int *idxmap; /* skipto array of rules */ void **srvstate; /* runtime service mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; #else struct rmlock rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ struct ip_fw *default_rule; struct tables_config *tblcfg; /* tables module data */ void *ifcfg; /* interface module data */ int *idxmap_back; /* standby skipto array of rules */ struct namedobj_instance *srvmap; /* cfg name->number mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t uh_lock; #else struct rwlock uh_lock; /* lock for upper half */ #endif }; /* 64-byte structure representing multi-field table value */ struct table_value { uint32_t tag; /* O_TAG/O_TAGGED */ uint32_t pipe; /* O_PIPE/O_QUEUE */ uint16_t divert; /* O_DIVERT/O_TEE */ uint16_t skipto; /* skipto, CALLRET */ uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */ uint32_t fib; /* O_SETFIB */ uint32_t nat; /* O_NAT */ uint32_t nh4; uint8_t dscp; uint8_t spare0; uint16_t spare1; /* -- 32 bytes -- */ struct in6_addr nh6; uint32_t limit; /* O_LIMIT */ uint32_t zoneid; /* scope zone id for nh6 */ uint64_t refcnt; /* Number of references */ }; struct named_object { TAILQ_ENTRY(named_object) nn_next; /* namehash */ TAILQ_ENTRY(named_object) nv_next; /* valuehash */ char *name; /* object name */ uint16_t etlv; /* Export TLV id */ uint8_t subtype;/* object subtype within class */ uint8_t spare[3]; uint16_t kidx; /* object kernel index */ uint32_t set; /* set object belongs to */ uint32_t refcnt; /* number of references */ }; TAILQ_HEAD(namedobjects_head, named_object); struct sockopt; /* used by tcp_var.h */ struct sockopt_data { caddr_t kbuf; /* allocated buffer */ size_t ksize; /* given buffer size */ size_t koff; /* data already used */ size_t kavail; /* number of bytes available */ size_t ktotal; /* total bytes pushed */ struct sockopt *sopt; /* socket data */ caddr_t sopt_val; /* sopt user buffer */ size_t valsize; /* original data size */ }; struct ipfw_ifc; typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); struct ipfw_iface { struct named_object no; char ifname[64]; int resolved; uint16_t ifindex; uint16_t spare; uint64_t gencnt; TAILQ_HEAD(, ipfw_ifc) consumers; }; struct ipfw_ifc { TAILQ_ENTRY(ipfw_ifc) next; struct ipfw_iface *iface; ipfw_ifc_cb *cb; void *cbdata; }; /* Macro for working with various counters */ #define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \ counter_u64_add((_cntr)->cntr, 1); \ counter_u64_add((_cntr)->cntr + 1, _bytes); \ if ((_cntr)->timestamp != time_uptime) \ (_cntr)->timestamp = time_uptime; \ } while (0) #define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \ (_cntr)->pcnt++; \ (_cntr)->bcnt += _bytes; \ } while (0) #define IPFW_ZERO_RULE_COUNTER(_cntr) do { \ counter_u64_zero((_cntr)->cntr); \ counter_u64_zero((_cntr)->cntr + 1); \ (_cntr)->timestamp = 0; \ } while (0) #define IPFW_ZERO_DYN_COUNTER(_cntr) do { \ (_cntr)->pcnt = 0; \ (_cntr)->bcnt = 0; \ } while (0) #define TARG_VAL(ch, k, f) ((struct table_value *)((ch)->valuestate))[k].f #define IP_FW_ARG_TABLEARG(ch, a, f) \ (((a) == IP_FW_TARG) ? TARG_VAL(ch, tablearg, f) : (a)) /* * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c * so the variable and the macros must be here. */ #if defined( __linux__ ) || defined( _WIN32 ) #define IPFW_LOCK_INIT(_chain) do { \ rw_init(&(_chain)->rwmtx, "IPFW static rules"); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ rw_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) #define IPFW_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_RLOCKED) #define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK_TRACKER #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) #define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) #define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ rm_init(&(_chain)->rwmtx, "IPFW static rules"); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ rm_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) #define IPFW_RLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_RLOCKED) #define IPFW_WLOCK_ASSERT(_chain) rm_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker #define IPFW_RLOCK(p) rm_rlock(&(p)->rwmtx, &_tracker) #define IPFW_RUNLOCK(p) rm_runlock(&(p)->rwmtx, &_tracker) #define IPFW_WLOCK(p) rm_wlock(&(p)->rwmtx) #define IPFW_WUNLOCK(p) rm_wunlock(&(p)->rwmtx) #define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #endif #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) #define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED) #define IPFW_UH_UNLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_UNLOCKED) #define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock) #define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock) #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock) #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock) struct obj_idx { uint16_t uidx; /* internal index supplied by userland */ uint16_t kidx; /* kernel object index */ uint16_t off; /* tlv offset from rule end in 4-byte words */ uint8_t spare; uint8_t type; /* object type within its category */ }; struct rule_check_info { uint16_t flags; /* rule-specific check flags */ uint16_t object_opcodes; /* num of opcodes referencing objects */ uint16_t urule_numoff; /* offset of rulenum in bytes */ uint8_t version; /* rule version */ uint8_t spare; ipfw_obj_ctlv *ctlv; /* name TLV containter */ struct ip_fw *krule; /* resulting rule pointer */ caddr_t urule; /* original rule pointer */ struct obj_idx obuf[8]; /* table references storage */ }; /* Legacy interface support */ /* * FreeBSD 8 export rule format */ struct ip_fw_rule0 { struct ip_fw *x_next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ uint8_t _pad; /* padding */ uint32_t id; /* rule id */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; struct ip_fw_bcounter0 { uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ }; /* Kernel rule length */ /* * RULE _K_ SIZE _V_ -> * get kernel size from userland rool version _V_. * RULE _U_ SIZE _V_ -> * get user size version _V_ from kernel rule * RULESIZE _V_ -> * get user size rule length */ /* FreeBSD8 <> current kernel format */ #define RULEUSIZE0(r) (sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4) #define RULEKSIZE0(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) /* FreeBSD11 <> current kernel format */ #define RULEUSIZE1(r) (roundup2(sizeof(struct ip_fw_rule) + \ (r)->cmd_len * 4 - 4, 8)) #define RULEKSIZE1(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) /* * Tables/Objects index rewriting code */ /* Default and maximum number of ipfw tables/objects. */ #define IPFW_TABLES_MAX 65536 #define IPFW_TABLES_DEFAULT 128 #define IPFW_OBJECTS_MAX 65536 #define IPFW_OBJECTS_DEFAULT 1024 #define CHAIN_TO_SRV(ch) ((ch)->srvmap) #define SRV_OBJECT(ch, idx) ((ch)->srvstate[(idx)]) struct tid_info { uint32_t set; /* table set */ uint16_t uidx; /* table index */ uint8_t type; /* table type */ uint8_t atype; uint8_t spare; int tlen; /* Total TLV size block */ void *tlvs; /* Pointer to first TLV */ }; /* * Classifier callback. Checks if @cmd opcode contains kernel object reference. * If true, returns its index and type. * Returns 0 if match is found, 1 overwise. */ typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype); /* * Updater callback. Sets kernel object reference index to @puidx */ typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx); /* * Finder callback. Tries to find named object by name (specified via @ti). * Stores found named object pointer in @pno. * If object was not found, NULL is stored. * * Return 0 if input data was valid. */ typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno); /* * Another finder callback. Tries to findex named object by kernel index. * * Returns pointer to named object or NULL. */ typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch, uint16_t kidx); /* * Object creator callback. Tries to create object specified by @ti. * Stores newly-allocated object index in @pkidx. * * Returns 0 on success. */ typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx); /* * Object destroy callback. Intended to free resources allocated by * create_object callback. */ typedef void (ipfw_obj_destroy_cb)(struct ip_fw_chain *ch, struct named_object *no); struct opcode_obj_rewrite { uint32_t opcode; /* Opcode to act upon */ uint32_t etlv; /* Relevant export TLV id */ ipfw_obj_rw_cl *classifier; /* Check if rewrite is needed */ ipfw_obj_rw_upd *update; /* update cmd with new value */ ipfw_obj_fname_cb *find_byname; /* Find named object by name */ ipfw_obj_fidx_cb *find_bykidx; /* Find named object by kidx */ ipfw_obj_create_cb *create_object; /* Create named object */ ipfw_obj_destroy_cb *destroy_object;/* Destroy named object */ }; #define IPFW_ADD_OBJ_REWRITER(f, c) do { \ if ((f) != 0) \ ipfw_add_obj_rewriter(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) #define IPFW_DEL_OBJ_REWRITER(l, c) do { \ if ((l) != 0) \ ipfw_del_obj_rewriter(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) /* In ip_fw_iface.c */ int ipfw_iface_init(void); void ipfw_iface_destroy(void); void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch); int ipfw_iface_ref(struct ip_fw_chain *ch, char *name, struct ipfw_ifc *ic); void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); /* In ip_fw_sockopt.c */ void ipfw_init_skipto_cache(struct ip_fw_chain *chain); void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain); int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_ctl3(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, struct ip_fw *rule); void ipfw_reap_rules(struct ip_fw *head); void ipfw_init_counters(void); void ipfw_destroy_counters(void); struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize); int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt); typedef int (sopt_handler_f)(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); struct ipfw_sopt_handler { uint16_t opcode; uint8_t version; uint8_t dir; sopt_handler_f *handler; uint64_t refcnt; }; #define HDIR_SET 0x01 /* Handler is used to set some data */ #define HDIR_GET 0x02 /* Handler is used to retrieve data */ #define HDIR_BOTH HDIR_GET|HDIR_SET void ipfw_init_sopt_handler(void); void ipfw_destroy_sopt_handler(void); void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count); int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count); caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed); caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed); #define IPFW_ADD_SOPT_HANDLER(f, c) do { \ if ((f) != 0) \ ipfw_add_sopt_handler(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) #define IPFW_DEL_SOPT_HANDLER(l, c) do { \ if ((l) != 0) \ ipfw_del_sopt_handler(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) struct namedobj_instance; -typedef void (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *, +typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *, void *arg); typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key, uint32_t kopt); typedef int (objhash_cmp_f)(struct named_object *no, const void *key, uint32_t kopt); struct namedobj_instance *ipfw_objhash_create(uint32_t items); void ipfw_objhash_destroy(struct namedobj_instance *); void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks); void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks); void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks); void ipfw_objhash_bitmap_free(void *idx, int blocks); void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f); struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name); struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set, uint32_t type, const char *name); struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t idx); int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, struct named_object *b); void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no); void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no); uint32_t ipfw_objhash_count(struct namedobj_instance *ni); -void ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, +int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg); int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx); int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx); void ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f, objhash_cmp_f *cmp_f); int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti, uint32_t etlv, struct named_object **pno); void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv); ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv); void ipfw_init_obj_rewriter(void); void ipfw_destroy_obj_rewriter(void); void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count); int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count); int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti); void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx); int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx); void ipfw_init_srv(struct ip_fw_chain *ch); void ipfw_destroy_srv(struct ip_fw_chain *ch); int ipfw_check_object_name_generic(const char *name); /* In ip_fw_eaction.c */ typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done); int ipfw_eaction_init(struct ip_fw_chain *ch, int first); void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last); uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler, const char *name); int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id); int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done); /* In ip_fw_table.c */ struct table_info; typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val); int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val); int ipfw_init_tables(struct ip_fw_chain *ch, int first); int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets); void ipfw_destroy_tables(struct ip_fw_chain *ch, int last); /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */ extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); typedef int ipfw_nat_cfg_t(struct sockopt *); VNET_DECLARE(int, ipfw_nat_ready); #define V_ipfw_nat_ready VNET(ipfw_nat_ready) #define IPFW_NAT_LOADED (V_ipfw_nat_ready) extern ipfw_nat_t *ipfw_nat_ptr; extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; extern ipfw_nat_cfg_t *ipfw_nat_del_ptr; extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; /* Helper functions for IP checksum adjustment */ static __inline uint16_t cksum_add(uint16_t sum, uint16_t a) { uint16_t res; res = sum + a; return (res + (res < a)); } static __inline uint16_t cksum_adjust(uint16_t oldsum, uint16_t old, uint16_t new) { return (~cksum_add(cksum_add(~oldsum, ~old), new)); } #endif /* _KERNEL */ #endif /* _IPFW2_PRIVATE_H */ Index: head/sys/netpfil/ipfw/ip_fw_sockopt.c =================================================================== --- head/sys/netpfil/ipfw/ip_fw_sockopt.c (revision 299151) +++ head/sys/netpfil/ipfw/ip_fw_sockopt.c (revision 299152) @@ -1,4329 +1,4334 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Supported by: Valeria Paoli * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Control socket and rule management routines for ipfw. * Control is currently implemented via IP_FW3 setsockopt() code. */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include /* struct m_tag used by nested headers */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* hooks */ #include #include #include #ifdef MAC #include #endif static int ipfw_ctl(struct sockopt *sopt); static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci); static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, struct rule_check_info *ci); static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, struct rule_check_info *ci); static int rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci); #define NAMEDOBJ_HASH_SIZE 32 struct namedobj_instance { struct namedobjects_head *names; struct namedobjects_head *values; uint32_t nn_size; /* names hash size */ uint32_t nv_size; /* number hash size */ u_long *idx_mask; /* used items bitmask */ uint32_t max_blocks; /* number of "long" blocks in bitmask */ uint32_t count; /* number of items */ uint16_t free_off[IPFW_MAX_SETS]; /* first possible free offset */ objhash_hash_f *hash_f; objhash_cmp_f *cmp_f; }; #define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */ static uint32_t objhash_hash_name(struct namedobj_instance *ni, const void *key, uint32_t kopt); static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val); static int objhash_cmp_name(struct named_object *no, const void *name, uint32_t set); MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); /* ctl3 handler data */ struct mtx ctl3_lock; #define CTL3_LOCK_INIT() mtx_init(&ctl3_lock, "ctl3_lock", NULL, MTX_DEF) #define CTL3_LOCK_DESTROY() mtx_destroy(&ctl3_lock) #define CTL3_LOCK() mtx_lock(&ctl3_lock) #define CTL3_UNLOCK() mtx_unlock(&ctl3_lock) static struct ipfw_sopt_handler *ctl3_handlers; static size_t ctl3_hsize; static uint64_t ctl3_refct, ctl3_gencnt; #define CTL3_SMALLBUF 4096 /* small page-size write buffer */ #define CTL3_LARGEBUF 16 * 1024 * 1024 /* handle large rulesets */ static int ipfw_flush_sopt_data(struct sockopt_data *sd); static struct ipfw_sopt_handler scodes[] = { { IP_FW_XGET, 0, HDIR_GET, dump_config }, { IP_FW_XADD, 0, HDIR_BOTH, add_rules }, { IP_FW_XDEL, 0, HDIR_BOTH, del_rules }, { IP_FW_XZERO, 0, HDIR_SET, clear_rules }, { IP_FW_XRESETLOG, 0, HDIR_SET, clear_rules }, { IP_FW_XMOVE, 0, HDIR_SET, move_rules }, { IP_FW_SET_SWAP, 0, HDIR_SET, manage_sets }, { IP_FW_SET_MOVE, 0, HDIR_SET, manage_sets }, { IP_FW_SET_ENABLE, 0, HDIR_SET, manage_sets }, { IP_FW_DUMP_SOPTCODES, 0, HDIR_GET, dump_soptcodes }, { IP_FW_DUMP_SRVOBJECTS,0, HDIR_GET, dump_srvobjects }, }; static int set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule); static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype); static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, uint32_t *bmask); static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti); static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti, struct obj_idx *pidx, int *unresolved); static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule); static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *end); static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx, struct sockopt_data *sd); /* * Opcode object rewriter variables */ struct opcode_obj_rewrite *ctl3_rewriters; static size_t ctl3_rsize; /* * static variables followed by global ones */ static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone); #define V_ipfw_cntr_zone VNET(ipfw_cntr_zone) void ipfw_init_counters() { V_ipfw_cntr_zone = uma_zcreate("IPFW counters", IPFW_RULE_CNTR_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); } void ipfw_destroy_counters() { uma_zdestroy(V_ipfw_cntr_zone); } struct ip_fw * ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) { struct ip_fw *rule; rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO); return (rule); } static void free_rule(struct ip_fw *rule) { uma_zfree(V_ipfw_cntr_zone, rule->cntr); free(rule, M_IPFW); } /* * Find the smallest rule >= key, id. * We could use bsearch but it is so simple that we code it directly */ int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id) { int i, lo, hi; struct ip_fw *r; for (lo = 0, hi = chain->n_rules - 1; lo < hi;) { i = (lo + hi) / 2; r = chain->map[i]; if (r->rulenum < key) lo = i + 1; /* continue from the next one */ else if (r->rulenum > key) hi = i; /* this might be good */ else if (r->id < id) lo = i + 1; /* continue from the next one */ else /* r->id >= id */ hi = i; /* this might be good */ } return hi; } /* * Builds skipto cache on rule set @map. */ static void update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map) { int *smap, rulenum; int i, mi; IPFW_UH_WLOCK_ASSERT(chain); mi = 0; rulenum = map[mi]->rulenum; smap = chain->idxmap_back; if (smap == NULL) return; for (i = 0; i < 65536; i++) { smap[i] = mi; /* Use the same rule index until i < rulenum */ if (i != rulenum || i == 65535) continue; /* Find next rule with num > i */ rulenum = map[++mi]->rulenum; while (rulenum == i) rulenum = map[++mi]->rulenum; } } /* * Swaps prepared (backup) index with current one. */ static void swap_skipto_cache(struct ip_fw_chain *chain) { int *map; IPFW_UH_WLOCK_ASSERT(chain); IPFW_WLOCK_ASSERT(chain); map = chain->idxmap; chain->idxmap = chain->idxmap_back; chain->idxmap_back = map; } /* * Allocate and initialize skipto cache. */ void ipfw_init_skipto_cache(struct ip_fw_chain *chain) { int *idxmap, *idxmap_back; idxmap = malloc(65536 * sizeof(uint32_t *), M_IPFW, M_WAITOK | M_ZERO); idxmap_back = malloc(65536 * sizeof(uint32_t *), M_IPFW, M_WAITOK | M_ZERO); /* * Note we may be called at any time after initialization, * for example, on first skipto rule, so we need to * provide valid chain->idxmap on return */ IPFW_UH_WLOCK(chain); if (chain->idxmap != NULL) { IPFW_UH_WUNLOCK(chain); free(idxmap, M_IPFW); free(idxmap_back, M_IPFW); return; } /* Set backup pointer first to permit building cache */ chain->idxmap_back = idxmap_back; update_skipto_cache(chain, chain->map); IPFW_WLOCK(chain); /* It is now safe to set chain->idxmap ptr */ chain->idxmap = idxmap; swap_skipto_cache(chain); IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); } /* * Destroys skipto cache. */ void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain) { if (chain->idxmap != NULL) free(chain->idxmap, M_IPFW); if (chain->idxmap != NULL) free(chain->idxmap_back, M_IPFW); } /* * allocate a new map, returns the chain locked. extra is the number * of entries to add or delete. */ static struct ip_fw ** get_map(struct ip_fw_chain *chain, int extra, int locked) { for (;;) { struct ip_fw **map; int i, mflags; mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK); i = chain->n_rules + extra; map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags); if (map == NULL) { printf("%s: cannot allocate map\n", __FUNCTION__); return NULL; } if (!locked) IPFW_UH_WLOCK(chain); if (i >= chain->n_rules + extra) /* good */ return map; /* otherwise we lost the race, free and retry */ if (!locked) IPFW_UH_WUNLOCK(chain); free(map, M_IPFW); } } /* * swap the maps. It is supposed to be called with IPFW_UH_WLOCK */ static struct ip_fw ** swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) { struct ip_fw **old_map; IPFW_WLOCK(chain); chain->id++; chain->n_rules = new_len; old_map = chain->map; chain->map = new_map; swap_skipto_cache(chain); IPFW_WUNLOCK(chain); return old_map; } static void export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr) { cntr->size = sizeof(*cntr); if (krule->cntr != NULL) { cntr->pcnt = counter_u64_fetch(krule->cntr); cntr->bcnt = counter_u64_fetch(krule->cntr + 1); cntr->timestamp = krule->timestamp; } if (cntr->timestamp > 0) cntr->timestamp += boottime.tv_sec; } static void export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr) { if (krule->cntr != NULL) { cntr->pcnt = counter_u64_fetch(krule->cntr); cntr->bcnt = counter_u64_fetch(krule->cntr + 1); cntr->timestamp = krule->timestamp; } if (cntr->timestamp > 0) cntr->timestamp += boottime.tv_sec; } /* * Copies rule @urule from v1 userland format (current). * to kernel @krule. * Assume @krule is zeroed. */ static void import_rule1(struct rule_check_info *ci) { struct ip_fw_rule *urule; struct ip_fw *krule; urule = (struct ip_fw_rule *)ci->urule; krule = (struct ip_fw *)ci->krule; /* copy header */ krule->act_ofs = urule->act_ofs; krule->cmd_len = urule->cmd_len; krule->rulenum = urule->rulenum; krule->set = urule->set; krule->flags = urule->flags; /* Save rulenum offset */ ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum); /* Copy opcodes */ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); } /* * Export rule into v1 format (Current). * Layout: * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT) * [ ip_fw_rule ] OR * [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs). * ] * Assume @data is zeroed. */ static void export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs) { struct ip_fw_bcounter *cntr; struct ip_fw_rule *urule; ipfw_obj_tlv *tlv; /* Fill in TLV header */ tlv = (ipfw_obj_tlv *)data; tlv->type = IPFW_TLV_RULE_ENT; tlv->length = len; if (rcntrs != 0) { /* Copy counters */ cntr = (struct ip_fw_bcounter *)(tlv + 1); urule = (struct ip_fw_rule *)(cntr + 1); export_cntr1_base(krule, cntr); } else urule = (struct ip_fw_rule *)(tlv + 1); /* copy header */ urule->act_ofs = krule->act_ofs; urule->cmd_len = krule->cmd_len; urule->rulenum = krule->rulenum; urule->set = krule->set; urule->flags = krule->flags; urule->id = krule->id; /* Copy opcodes */ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); } /* * Copies rule @urule from FreeBSD8 userland format (v0) * to kernel @krule. * Assume @krule is zeroed. */ static void import_rule0(struct rule_check_info *ci) { struct ip_fw_rule0 *urule; struct ip_fw *krule; int cmdlen, l; ipfw_insn *cmd; ipfw_insn_limit *lcmd; ipfw_insn_if *cmdif; urule = (struct ip_fw_rule0 *)ci->urule; krule = (struct ip_fw *)ci->krule; /* copy header */ krule->act_ofs = urule->act_ofs; krule->cmd_len = urule->cmd_len; krule->rulenum = urule->rulenum; krule->set = urule->set; if ((urule->_pad & 1) != 0) krule->flags |= IPFW_RULE_NOOPT; /* Save rulenum offset */ ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum); /* Copy opcodes */ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); /* * Alter opcodes: * 1) convert tablearg value from 65335 to 0 * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room for targ). * 3) convert table number in iface opcodes to u16 */ l = krule->cmd_len; cmd = krule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); switch (cmd->opcode) { /* Opcodes supporting tablearg */ case O_TAG: case O_TAGGED: case O_PIPE: case O_QUEUE: case O_DIVERT: case O_TEE: case O_SKIPTO: case O_CALLRETURN: case O_NETGRAPH: case O_NGTEE: case O_NAT: if (cmd->arg1 == 65535) cmd->arg1 = IP_FW_TARG; break; case O_SETFIB: case O_SETDSCP: if (cmd->arg1 == 65535) cmd->arg1 = IP_FW_TARG; else cmd->arg1 |= 0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; if (lcmd->conn_limit == 65535) lcmd->conn_limit = IP_FW_TARG; break; /* Interface tables */ case O_XMIT: case O_RECV: case O_VIA: /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') break; cmdif->p.kidx = (uint16_t)cmdif->p.glob; break; } } } /* * Copies rule @krule from kernel to FreeBSD8 userland format (v0) */ static void export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) { int cmdlen, l; ipfw_insn *cmd; ipfw_insn_limit *lcmd; ipfw_insn_if *cmdif; /* copy header */ memset(urule, 0, len); urule->act_ofs = krule->act_ofs; urule->cmd_len = krule->cmd_len; urule->rulenum = krule->rulenum; urule->set = krule->set; if ((krule->flags & IPFW_RULE_NOOPT) != 0) urule->_pad |= 1; /* Copy opcodes */ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); /* Export counters */ export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt); /* * Alter opcodes: * 1) convert tablearg value from 0 to 65335 * 2) Remove highest bit from O_SETFIB/O_SETDSCP values. * 3) convert table number in iface opcodes to int */ l = urule->cmd_len; cmd = urule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); switch (cmd->opcode) { /* Opcodes supporting tablearg */ case O_TAG: case O_TAGGED: case O_PIPE: case O_QUEUE: case O_DIVERT: case O_TEE: case O_SKIPTO: case O_CALLRETURN: case O_NETGRAPH: case O_NGTEE: case O_NAT: if (cmd->arg1 == IP_FW_TARG) cmd->arg1 = 65535; break; case O_SETFIB: case O_SETDSCP: if (cmd->arg1 == IP_FW_TARG) cmd->arg1 = 65535; else cmd->arg1 &= ~0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; if (lcmd->conn_limit == IP_FW_TARG) lcmd->conn_limit = 65535; break; /* Interface tables */ case O_XMIT: case O_RECV: case O_VIA: /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') break; cmdif->p.glob = cmdif->p.kidx; break; } } } /* * Add new rule(s) to the list possibly creating rule number for each. * Update the rule_number in the input struct so the caller knows it as well. * Must be called without IPFW_UH held */ static int commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count) { int error, i, insert_before, tcount; uint16_t rulenum, *pnum; struct rule_check_info *ci; struct ip_fw *krule; struct ip_fw **map; /* the new array of pointers */ /* Check if we need to do table/obj index remap */ tcount = 0; for (ci = rci, i = 0; i < count; ci++, i++) { if (ci->object_opcodes == 0) continue; /* * Rule has some object opcodes. * We need to find (and create non-existing) * kernel objects, and reference existing ones. */ error = rewrite_rule_uidx(chain, ci); if (error != 0) { /* * rewrite failed, state for current rule * has been reverted. Check if we need to * revert more. */ if (tcount > 0) { /* * We have some more table rules * we need to rollback. */ IPFW_UH_WLOCK(chain); while (ci != rci) { ci--; if (ci->object_opcodes == 0) continue; unref_rule_objects(chain,ci->krule); } IPFW_UH_WUNLOCK(chain); } return (error); } tcount++; } /* get_map returns with IPFW_UH_WLOCK if successful */ map = get_map(chain, count, 0 /* not locked */); if (map == NULL) { if (tcount > 0) { /* Unbind tables */ IPFW_UH_WLOCK(chain); for (ci = rci, i = 0; i < count; ci++, i++) { if (ci->object_opcodes == 0) continue; unref_rule_objects(chain, ci->krule); } IPFW_UH_WUNLOCK(chain); } return (ENOSPC); } if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; /* FIXME: Handle count > 1 */ ci = rci; krule = ci->krule; rulenum = krule->rulenum; /* find the insertion point, we will insert before */ insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE; i = ipfw_find_rule(chain, insert_before, 0); /* duplicate first part */ if (i > 0) bcopy(chain->map, map, i * sizeof(struct ip_fw *)); map[i] = krule; /* duplicate remaining part, we always have the default rule */ bcopy(chain->map + i, map + i + 1, sizeof(struct ip_fw *) *(chain->n_rules - i)); if (rulenum == 0) { /* Compute rule number and write it back */ rulenum = i > 0 ? map[i-1]->rulenum : 0; if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) rulenum += V_autoinc_step; krule->rulenum = rulenum; /* Save number to userland rule */ pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff); *pnum = rulenum; } krule->id = chain->id + 1; update_skipto_cache(chain, map); map = swap_map(chain, map, chain->n_rules + 1); chain->static_len += RULEUSIZE0(krule); IPFW_UH_WUNLOCK(chain); if (map) free(map, M_IPFW); return (0); } /* * Adds @rule to the list of rules to reap */ void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, struct ip_fw *rule) { IPFW_UH_WLOCK_ASSERT(chain); /* Unlink rule from everywhere */ unref_rule_objects(chain, rule); *((struct ip_fw **)rule) = *head; *head = rule; } /* * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. */ void ipfw_reap_rules(struct ip_fw *head) { struct ip_fw *rule; while ((rule = head) != NULL) { head = *((struct ip_fw **)head); free_rule(rule); } } /* * Rules to keep are * (default || reserved || !match_set || !match_number) * where * default ::= (rule->rulenum == IPFW_DEFAULT_RULE) * // the default rule is always protected * * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET) * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush") * * match_set ::= (cmd == 0 || rule->set == set) * // set number is ignored for cmd == 0 * * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum) * // number is ignored for cmd == 1 or n == 0 * */ int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt) { /* Don't match default rule for modification queries */ if (rule->rulenum == IPFW_DEFAULT_RULE && (rt->flags & IPFW_RCFLAG_DEFAULT) == 0) return (0); /* Don't match rules in reserved set for flush requests */ if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET) return (0); /* If we're filtering by set, don't match other sets */ if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set) return (0); if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule)) return (0); return (1); } /* * Delete rules matching range @rt. * Saves number of deleted rules in @ndel. * * Returns 0 on success. */ static int delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel) { struct ip_fw *reap, *rule, **map; int end, start; int i, n, ndyn, ofs; reap = NULL; IPFW_UH_WLOCK(chain); /* arbitrate writers */ /* * Stage 1: Determine range to inspect. * Range is half-inclusive, e.g [start, end). */ start = 0; end = chain->n_rules - 1; if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) { start = ipfw_find_rule(chain, rt->start_rule, 0); end = ipfw_find_rule(chain, rt->end_rule, 0); if (rt->end_rule != IPFW_DEFAULT_RULE) while (chain->map[end]->rulenum == rt->end_rule) end++; } /* Allocate new map of the same size */ map = get_map(chain, 0, 1 /* locked */); if (map == NULL) { IPFW_UH_WUNLOCK(chain); return (ENOMEM); } n = 0; ndyn = 0; ofs = start; /* 1. bcopy the initial part of the map */ if (start > 0) bcopy(chain->map, map, start * sizeof(struct ip_fw *)); /* 2. copy active rules between start and end */ for (i = start; i < end; i++) { rule = chain->map[i]; if (ipfw_match_range(rule, rt) == 0) { map[ofs++] = rule; continue; } n++; if (ipfw_is_dyn_rule(rule) != 0) ndyn++; } /* 3. copy the final part of the map */ bcopy(chain->map + end, map + ofs, (chain->n_rules - end) * sizeof(struct ip_fw *)); /* 4. recalculate skipto cache */ update_skipto_cache(chain, map); /* 5. swap the maps (under UH_WLOCK + WHLOCK) */ map = swap_map(chain, map, chain->n_rules - n); /* 6. Remove all dynamic states originated by deleted rules */ if (ndyn > 0) ipfw_expire_dyn_rules(chain, rt); /* 7. now remove the rules deleted from the old map */ for (i = start; i < end; i++) { rule = map[i]; if (ipfw_match_range(rule, rt) == 0) continue; chain->static_len -= RULEUSIZE0(rule); ipfw_reap_add(chain, &reap, rule); } IPFW_UH_WUNLOCK(chain); ipfw_reap_rules(reap); if (map != NULL) free(map, M_IPFW); *ndel = n; return (0); } /* * Changes set of given rule rannge @rt * with each other. * * Returns 0 on success. */ static int move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { struct ip_fw *rule; int i; IPFW_UH_WLOCK(chain); /* * Move rules with matching paramenerts to a new set. * This one is much more complex. We have to ensure * that all referenced tables (if any) are referenced * by given rule subset only. Otherwise, we can't move * them to new set and have to return error. */ if (V_fw_tables_sets != 0) { if (ipfw_move_tables_sets(chain, rt, rt->new_set) != 0) { IPFW_UH_WUNLOCK(chain); return (EBUSY); } } /* XXX: We have to do swap holding WLOCK */ for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; rule->set = rt->new_set; } IPFW_UH_WUNLOCK(chain); return (0); } /* * Clear counters for a specific rule. * Normally run under IPFW_UH_RLOCK, but these are idempotent ops * so we only care that rules do not disappear. */ static void clear_counters(struct ip_fw *rule, int log_only) { ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); if (log_only == 0) IPFW_ZERO_RULE_COUNTER(rule); if (l->o.opcode == O_LOG) l->log_left = l->max_log; } /* * Flushes rules counters and/or log values on matching range. * * Returns number of items cleared. */ static int clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only) { struct ip_fw *rule; int num; int i; num = 0; rt->flags |= IPFW_RCFLAG_DEFAULT; IPFW_UH_WLOCK(chain); /* arbitrate writers */ for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; clear_counters(rule, log_only); num++; } IPFW_UH_WUNLOCK(chain); return (num); } static int check_range_tlv(ipfw_range_tlv *rt) { if (rt->head.length != sizeof(*rt)) return (1); if (rt->start_rule > rt->end_rule) return (1); if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS) return (1); if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags) return (1); return (0); } /* * Delete rules matching specified parameters * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * Reply: [ ipfw_obj_header ipfw_range_tlv ] * * Saves number of deleted rules in ipfw_range_tlv->new_set. * * Returns 0 on success. */ static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; int error, ndel; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (check_range_tlv(&rh->range) != 0) return (EINVAL); ndel = 0; if ((error = delete_range(chain, &rh->range, &ndel)) != 0) return (error); /* Save number of rules deleted */ rh->range.new_set = ndel; return (0); } /* * Move rules/sets matching specified parameters * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * * Returns 0 on success. */ static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (check_range_tlv(&rh->range) != 0) return (EINVAL); return (move_range(chain, &rh->range)); } /* * Clear rule accounting data matching specified parameters * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * Reply: [ ipfw_obj_header ipfw_range_tlv ] * * Saves number of cleared rules in ipfw_range_tlv->new_set. * * Returns 0 on success. */ static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; int log_only, num; char *msg; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (check_range_tlv(&rh->range) != 0) return (EINVAL); log_only = (op3->opcode == IP_FW_XRESETLOG); num = clear_range(chain, &rh->range, log_only); if (rh->range.flags & IPFW_RCFLAG_ALL) msg = log_only ? "All logging counts reset" : "Accounting cleared"; else msg = log_only ? "logging count reset" : "cleared"; if (V_fw_verbose) { int lev = LOG_SECURITY | LOG_NOTICE; log(lev, "ipfw: %s.\n", msg); } /* Save number of rules cleared */ rh->range.new_set = num; return (0); } static void enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { uint32_t v_set; IPFW_UH_WLOCK_ASSERT(chain); /* Change enabled/disabled sets mask */ v_set = (V_set_disable | rt->set) & ~rt->new_set; v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */ IPFW_WLOCK(chain); V_set_disable = v_set; IPFW_WUNLOCK(chain); } static void swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv) { struct ip_fw *rule; int i; IPFW_UH_WLOCK_ASSERT(chain); /* Swap or move two sets */ for (i = 0; i < chain->n_rules - 1; i++) { rule = chain->map[i]; if (rule->set == rt->set) rule->set = rt->new_set; else if (rule->set == rt->new_set && mv == 0) rule->set = rt->set; } if (V_fw_tables_sets != 0) ipfw_swap_tables_sets(chain, rt->set, rt->new_set, mv); } /* * Swaps or moves set * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * * Returns 0 on success. */ static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (rh->range.head.length != sizeof(ipfw_range_tlv)) return (1); IPFW_UH_WLOCK(chain); switch (op3->opcode) { case IP_FW_SET_SWAP: case IP_FW_SET_MOVE: swap_sets(chain, &rh->range, op3->opcode == IP_FW_SET_MOVE); break; case IP_FW_SET_ENABLE: enable_sets(chain, &rh->range); break; } IPFW_UH_WUNLOCK(chain); return (0); } /** * Remove all rules with given number, or do set manipulation. * Assumes chain != NULL && *chain != NULL. * * The argument is an uint32_t. The low 16 bit are the rule or set number; * the next 8 bits are the new set; the top 8 bits indicate the command: * * 0 delete rules numbered "rulenum" * 1 delete rules in set "rulenum" * 2 move rules "rulenum" to set "new_set" * 3 move rules from set "rulenum" to set "new_set" * 4 swap sets "rulenum" and "new_set" * 5 delete rules "rulenum" and set "new_set" */ static int del_entry(struct ip_fw_chain *chain, uint32_t arg) { uint32_t num; /* rule number or old_set */ uint8_t cmd, new_set; int do_del, ndel; int error = 0; ipfw_range_tlv rt; num = arg & 0xffff; cmd = (arg >> 24) & 0xff; new_set = (arg >> 16) & 0xff; if (cmd > 5 || new_set > RESVD_SET) return EINVAL; if (cmd == 0 || cmd == 2 || cmd == 5) { if (num >= IPFW_DEFAULT_RULE) return EINVAL; } else { if (num > RESVD_SET) /* old_set */ return EINVAL; } /* Convert old requests into new representation */ memset(&rt, 0, sizeof(rt)); rt.start_rule = num; rt.end_rule = num; rt.set = num; rt.new_set = new_set; do_del = 0; switch (cmd) { case 0: /* delete rules numbered "rulenum" */ if (num == 0) rt.flags |= IPFW_RCFLAG_ALL; else rt.flags |= IPFW_RCFLAG_RANGE; do_del = 1; break; case 1: /* delete rules in set "rulenum" */ rt.flags |= IPFW_RCFLAG_SET; do_del = 1; break; case 5: /* delete rules "rulenum" and set "new_set" */ rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET; rt.set = new_set; rt.new_set = 0; do_del = 1; break; case 2: /* move rules "rulenum" to set "new_set" */ rt.flags |= IPFW_RCFLAG_RANGE; break; case 3: /* move rules from set "rulenum" to set "new_set" */ IPFW_UH_WLOCK(chain); swap_sets(chain, &rt, 1); IPFW_UH_WUNLOCK(chain); return (0); case 4: /* swap sets "rulenum" and "new_set" */ IPFW_UH_WLOCK(chain); swap_sets(chain, &rt, 0); IPFW_UH_WUNLOCK(chain); return (0); default: return (ENOTSUP); } if (do_del != 0) { if ((error = delete_range(chain, &rt, &ndel)) != 0) return (error); if (ndel == 0 && (cmd != 1 && num != 0)) return (EINVAL); return (0); } return (move_range(chain, &rt)); } /** * Reset some or all counters on firewall rules. * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, * the next 8 bits are the set number, the top 8 bits are the command: * 0 work with rules from all set's; * 1 work with rules only from specified set. * Specified rule number is zero if we want to clear all entries. * log_only is 1 if we only want to reset logs, zero otherwise. */ static int zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) { struct ip_fw *rule; char *msg; int i; uint16_t rulenum = arg & 0xffff; uint8_t set = (arg >> 16) & 0xff; uint8_t cmd = (arg >> 24) & 0xff; if (cmd > 1) return (EINVAL); if (cmd == 1 && set > RESVD_SET) return (EINVAL); IPFW_UH_RLOCK(chain); if (rulenum == 0) { V_norule_counter = 0; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; /* Skip rules not in our set. */ if (cmd == 1 && rule->set != set) continue; clear_counters(rule, log_only); } msg = log_only ? "All logging counts reset" : "Accounting cleared"; } else { int cleared = 0; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (rule->rulenum == rulenum) { if (cmd == 0 || rule->set == set) clear_counters(rule, log_only); cleared = 1; } if (rule->rulenum > rulenum) break; } if (!cleared) { /* we did not find any matching rules */ IPFW_UH_RUNLOCK(chain); return (EINVAL); } msg = log_only ? "logging count reset" : "cleared"; } IPFW_UH_RUNLOCK(chain); if (V_fw_verbose) { int lev = LOG_SECURITY | LOG_NOTICE; if (rulenum) log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); else log(lev, "ipfw: %s.\n", msg); } return (0); } /* * Check rule head in FreeBSD11 format * */ static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, struct rule_check_info *ci) { int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* Check for valid cmd_len */ l = roundup2(RULESIZE(rule), sizeof(uint64_t)); if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } if (rule->rulenum > IPFW_DEFAULT_RULE - 1) return (EINVAL); return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } /* * Check rule head in FreeBSD8 format * */ static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, struct rule_check_info *ci) { int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* Check for valid cmd_len */ l = sizeof(*rule) + rule->cmd_len * 4 - 4; if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } if (rule->rulenum > IPFW_DEFAULT_RULE - 1) return (EINVAL); return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) { int cmdlen, l; int have_action; have_action = 0; /* * Now go for the individual checks. Very simple ones, basically only * instruction sizes. */ for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (cmdlen > l) { printf("ipfw: opcode %d size truncated\n", cmd->opcode); return EINVAL; } switch (cmd->opcode) { case O_PROBE_STATE: case O_KEEP_STATE: case O_PROTO: case O_IP_SRC_ME: case O_IP_DST_ME: case O_LAYER2: case O_IN: case O_FRAG: case O_DIVERTED: case O_IPOPT: case O_IPTOS: case O_IPPRECEDENCE: case O_IPVER: case O_SOCKARG: case O_TCPFLAGS: case O_TCPOPTS: case O_ESTAB: case O_VERREVPATH: case O_VERSRCREACH: case O_ANTISPOOF: case O_IPSEC: #ifdef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: #endif case O_IP4: case O_TAG: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_EXTERNAL_ACTION: if (cmd->arg1 == 0 || cmdlen != F_INSN_SIZE(ipfw_insn)) { printf("ipfw: invalid external " "action opcode\n"); return (EINVAL); } ci->object_opcodes++; /* Do we have O_EXTERNAL_INSTANCE opcode? */ if (l != cmdlen) { l -= cmdlen; cmd += cmdlen; cmdlen = F_LEN(cmd); if (cmd->opcode != O_EXTERNAL_INSTANCE) { printf("ipfw: invalid opcode " "next to external action %u\n", cmd->opcode); return (EINVAL); } if (cmd->arg1 == 0 || cmdlen != F_INSN_SIZE(ipfw_insn)) { printf("ipfw: invalid external " "action instance opcode\n"); return (EINVAL); } ci->object_opcodes++; } goto check_action; case O_FIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if (cmd->arg1 >= rt_numfibs) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; } break; case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if ((cmd->arg1 != IP_FW_TARG) && ((cmd->arg1 & 0x7FFF) >= rt_numfibs)) { printf("ipfw: invalid fib number %d\n", cmd->arg1 & 0x7FFF); return EINVAL; } goto check_action; case O_UID: case O_GID: case O_JAIL: case O_IP_SRC: case O_IP_DST: case O_TCPSEQ: case O_TCPACK: case O_PROB: case O_ICMPTYPE: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; case O_LIMIT: if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) goto bad_size; break; case O_LOG: if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) goto bad_size; ((ipfw_insn_log *)cmd)->log_left = ((ipfw_insn_log *)cmd)->max_log; break; case O_IP_SRC_MASK: case O_IP_DST_MASK: /* only odd command lengths */ if ((cmdlen & 1) == 0) goto bad_size; break; case O_IP_SRC_SET: case O_IP_DST_SET: if (cmd->arg1 == 0 || cmd->arg1 > 256) { printf("ipfw: invalid set size %d\n", cmd->arg1); return EINVAL; } if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + (cmd->arg1+31)/32 ) goto bad_size; break; case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; ci->object_opcodes++; break; case O_IP_FLOW_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; ci->object_opcodes++; break; case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; break; case O_NOP: case O_IPID: case O_IPTTL: case O_IPLEN: case O_TCPDATALEN: case O_TCPWIN: case O_TAGGED: if (cmdlen < 1 || cmdlen > 31) goto bad_size; break; case O_DSCP: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1) goto bad_size; break; case O_MAC_TYPE: case O_IP_SRCPORT: case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ if (cmdlen < 2 || cmdlen > 31) goto bad_size; break; case O_RECV: case O_XMIT: case O_VIA: if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) goto bad_size; ci->object_opcodes++; break; case O_ALTQ: if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) goto bad_size; break; case O_PIPE: case O_QUEUE: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; goto check_action; case O_FORWARD_IP: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) goto bad_size; goto check_action; #ifdef INET6 case O_FORWARD_IP6: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6)) goto bad_size; goto check_action; #endif /* INET6 */ case O_DIVERT: case O_TEE: if (ip_divert_ptr == NULL) return EINVAL; else goto check_size; case O_NETGRAPH: case O_NGTEE: if (ng_ipfw_input_p == NULL) return EINVAL; else goto check_size; case O_NAT: if (!IPFW_NAT_LOADED) return EINVAL; if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) goto bad_size; goto check_action; case O_FORWARD_MAC: /* XXX not implemented yet */ case O_CHECK_STATE: case O_COUNT: case O_ACCEPT: case O_DENY: case O_REJECT: case O_SETDSCP: #ifdef INET6 case O_UNREACH6: #endif case O_SKIPTO: case O_REASS: case O_CALLRETURN: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; check_action: if (have_action) { printf("ipfw: opcode %d, multiple actions" " not allowed\n", cmd->opcode); return (EINVAL); } have_action = 1; if (l != cmdlen) { printf("ipfw: opcode %d, action must be" " last opcode\n", cmd->opcode); return (EINVAL); } break; #ifdef INET6 case O_IP6_SRC: case O_IP6_DST: if (cmdlen != F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_FLOW6ID: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + ((ipfw_insn_u32 *)cmd)->o.arg1) goto bad_size; break; case O_IP6_SRC_MASK: case O_IP6_DST_MASK: if ( !(cmdlen & 1) || cmdlen > 127) goto bad_size; break; case O_ICMP6TYPE: if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) goto bad_size; break; #endif default: switch (cmd->opcode) { #ifndef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: case O_UNREACH6: case O_IP6_SRC: case O_IP6_DST: case O_FLOW6ID: case O_IP6_SRC_MASK: case O_IP6_DST_MASK: case O_ICMP6TYPE: printf("ipfw: no IPv6 support in kernel\n"); return (EPROTONOSUPPORT); #endif default: printf("ipfw: opcode %d, unknown opcode\n", cmd->opcode); return (EINVAL); } } } if (have_action == 0) { printf("ipfw: missing action\n"); return (EINVAL); } return 0; bad_size: printf("ipfw: opcode %d size %d wrong\n", cmd->opcode, cmdlen); return (EINVAL); } /* * Translation of requests for compatibility with FreeBSD 7.2/8. * a static variable tells us if we have an old client from userland, * and if necessary we translate requests and responses between the * two formats. */ static int is7 = 0; struct ip_fw7 { struct ip_fw7 *next; /* linked list of rules */ struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ // #define RESVD_SET 31 /* set for default and persistent rules */ uint8_t _pad; /* padding */ // uint32_t id; /* rule id, only in v.8 */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; static int convert_rule_to_7(struct ip_fw_rule0 *rule); static int convert_rule_to_8(struct ip_fw_rule0 *rule); #ifndef RULESIZE7 #define RULESIZE7(rule) (sizeof(struct ip_fw7) + \ ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4) #endif /* * Copy the static and dynamic rules to the supplied buffer * and return the amount of space actually used. * Must be run under IPFW_UH_RLOCK */ static size_t ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; struct ip_fw *rule; struct ip_fw_rule0 *dst; int error, i, l, warnflag; time_t boot_seconds; warnflag = 0; boot_seconds = boottime.tv_sec; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (is7) { /* Convert rule to FreeBSd 7.2 format */ l = RULESIZE7(rule); if (bp + l + sizeof(uint32_t) <= ep) { bcopy(rule, bp, l + sizeof(uint32_t)); error = set_legacy_obj_kidx(chain, (struct ip_fw_rule0 *)bp); if (error != 0) return (0); error = convert_rule_to_7((struct ip_fw_rule0 *) bp); if (error) return 0; /*XXX correct? */ /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? */ bcopy(&V_set_disable, &(((struct ip_fw7 *)bp)->next_rule), sizeof(V_set_disable)); if (((struct ip_fw7 *)bp)->timestamp) ((struct ip_fw7 *)bp)->timestamp += boot_seconds; bp += l; } continue; /* go to next rule */ } l = RULEUSIZE0(rule); if (bp + l > ep) { /* should not happen */ printf("overflow dumping static rules\n"); break; } dst = (struct ip_fw_rule0 *)bp; export_rule0(rule, dst, l); error = set_legacy_obj_kidx(chain, dst); /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? * * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask * so we need to fail _after_ saving at least one mask. */ bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable)); if (dst->timestamp) dst->timestamp += boot_seconds; bp += l; if (error != 0) { if (error == 2) { /* Non-fatal table rewrite error. */ warnflag = 1; continue; } printf("Stop on rule %d. Fail to convert table\n", rule->rulenum); break; } } if (warnflag != 0) printf("ipfw: process %s is using legacy interfaces," " consider rebuilding\n", ""); ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */ return (bp - (char *)buf); } struct dump_args { uint32_t b; /* start rule */ uint32_t e; /* end rule */ uint32_t rcount; /* number of rules */ uint32_t rsize; /* rules size */ uint32_t tcount; /* number of tables */ int rcounters; /* counters */ }; void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv) { ntlv->head.type = no->etlv; ntlv->head.length = sizeof(*ntlv); ntlv->idx = no->kidx; strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); } /* * Export named object info in instance @ni, identified by @kidx * to ipfw_obj_ntlv. TLV is allocated from @sd space. * * Returns 0 on success. */ static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx, struct sockopt_data *sd) { struct named_object *no; ipfw_obj_ntlv *ntlv; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("invalid object kernel index passed")); ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ipfw_export_obj_ntlv(no, ntlv); return (0); } /* * Dumps static rules with table TLVs in buffer @sd. * * Returns 0 on success. */ static int dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da, uint32_t *bmask, struct sockopt_data *sd) { int error; int i, l; uint32_t tcount; ipfw_obj_ctlv *ctlv; struct ip_fw *krule; struct namedobj_instance *ni; caddr_t dst; /* Dump table names first (if any) */ if (da->tcount > 0) { /* Header first */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); ctlv->head.type = IPFW_TLV_TBLNAME_LIST; ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + sizeof(*ctlv); ctlv->count = da->tcount; ctlv->objsize = sizeof(ipfw_obj_ntlv); } i = 0; tcount = da->tcount; ni = ipfw_get_table_objhash(chain); while (tcount > 0) { if ((bmask[i / 32] & (1 << (i % 32))) == 0) { i++; continue; } /* Jump to shared named object bitmask */ if (i >= IPFW_TABLES_MAX) { ni = CHAIN_TO_SRV(chain); i -= IPFW_TABLES_MAX; bmask += IPFW_TABLES_MAX / 32; } if ((error = export_objhash_ntlv(ni, i, sd)) != 0) return (error); i++; tcount--; } /* Dump rules */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); ctlv->head.type = IPFW_TLV_RULE_LIST; ctlv->head.length = da->rsize + sizeof(*ctlv); ctlv->count = da->rcount; for (i = da->b; i < da->e; i++) { krule = chain->map[i]; l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv); if (da->rcounters != 0) l += sizeof(struct ip_fw_bcounter); dst = (caddr_t)ipfw_get_sopt_space(sd, l); if (dst == NULL) return (ENOMEM); export_rule1(krule, dst, l, da->rcounters); } return (0); } /* * Marks every object index used in @rule with bit in @bmask. * Used to generate bitmask of referenced tables/objects for given ruleset * or its part. * * Returns number of newly-referenced objects. */ static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, uint32_t *bmask) { struct opcode_obj_rewrite *rw; ipfw_insn *cmd; int bidx, cmdlen, l, count; uint16_t kidx; uint8_t subtype; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; count = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, &subtype); if (rw == NULL) continue; bidx = kidx / 32; /* * Maintain separate bitmasks for table and * non-table objects. */ if (rw->etlv != IPFW_TLV_TBL_NAME) bidx += IPFW_TABLES_MAX / 32; if ((bmask[bidx] & (1 << (kidx % 32))) == 0) count++; bmask[bidx] |= 1 << (kidx % 32); } return (count); } /* * Dumps requested objects data * Data layout (version 0)(current): * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags * size = ipfw_cfg_lheader.size * Reply: [ ipfw_cfg_lheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) * ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ] * ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional) * ] * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize. * The rest (size, count) are set to zero and needs to be ignored. * * Returns 0 on success. */ static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_cfg_lheader *hdr; struct ip_fw *rule; size_t sz, rnum; uint32_t hdr_flags; int error, i; struct dump_args da; uint32_t *bmask; hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); if (hdr == NULL) return (EINVAL); error = 0; bmask = NULL; /* Allocate needed state. Note we allocate 2xspace mask, for table&srv */ if (hdr->flags & IPFW_CFG_GET_STATIC) bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* * STAGE 1: Determine size/count for objects in range. * Prepare used tables bitmask. */ sz = sizeof(ipfw_cfg_lheader); memset(&da, 0, sizeof(da)); da.b = 0; da.e = chain->n_rules; if (hdr->end_rule != 0) { /* Handle custom range */ if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE) rnum = IPFW_DEFAULT_RULE; da.b = ipfw_find_rule(chain, rnum, 0); rnum = hdr->end_rule; rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE; da.e = ipfw_find_rule(chain, rnum, 0) + 1; } if (hdr->flags & IPFW_CFG_GET_STATIC) { for (i = da.b; i < da.e; i++) { rule = chain->map[i]; da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv); da.rcount++; /* Update bitmask of used objects for given range */ da.tcount += mark_object_kidx(chain, rule, bmask); } /* Add counters if requested */ if (hdr->flags & IPFW_CFG_GET_COUNTERS) { da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount; da.rcounters = 1; } if (da.tcount > 0) sz += da.tcount * sizeof(ipfw_obj_ntlv) + sizeof(ipfw_obj_ctlv); sz += da.rsize + sizeof(ipfw_obj_ctlv); } if (hdr->flags & IPFW_CFG_GET_STATES) sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) + sizeof(ipfw_obj_ctlv); /* * Fill header anyway. * Note we have to save header fields to stable storage * buffer inside @sd can be flushed after dumping rules */ hdr->size = sz; hdr->set_mask = ~V_set_disable; hdr_flags = hdr->flags; hdr = NULL; if (sd->valsize < sz) { error = ENOMEM; goto cleanup; } /* STAGE2: Store actual data */ if (hdr_flags & IPFW_CFG_GET_STATIC) { error = dump_static_rules(chain, &da, bmask, sd); if (error != 0) goto cleanup; } if (hdr_flags & IPFW_CFG_GET_STATES) error = ipfw_dump_states(chain, sd); cleanup: IPFW_UH_RUNLOCK(chain); if (bmask != NULL) free(bmask, M_TEMP); return (error); } int ipfw_check_object_name_generic(const char *name) { int nsize; nsize = sizeof(((ipfw_obj_ntlv *)0)->name); if (strnlen(name, nsize) == nsize) return (EINVAL); if (name[0] == '\0') return (EINVAL); return (0); } /* * Creates non-existent objects referenced by rule. * * Return 0 on success. */ int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti) { struct opcode_obj_rewrite *rw; struct obj_idx *p; uint16_t kidx; int error; /* * Compatibility stuff: do actual creation for non-existing, * but referenced objects. */ for (p = oib; p < pidx; p++) { if (p->kidx != 0) continue; ti->uidx = p->uidx; ti->type = p->type; ti->atype = 0; rw = find_op_rw(cmd + p->off, NULL, NULL); KASSERT(rw != NULL, ("Unable to find handler for op %d", (cmd + p->off)->opcode)); if (rw->create_object == NULL) error = EOPNOTSUPP; else error = rw->create_object(ch, ti, &kidx); if (error == 0) { p->kidx = kidx; continue; } /* * Error happened. We have to rollback everything. * Drop all already acquired references. */ IPFW_UH_WLOCK(ch); unref_oib_objects(ch, cmd, oib, pidx); IPFW_UH_WUNLOCK(ch); return (error); } return (0); } /* * Compatibility function for old ipfw(8) binaries. * Rewrites table/nat kernel indices with userland ones. * Convert tables matching '/^\d+$/' to their atoi() value. * Use number 65535 for other tables. * * Returns 0 on success. */ static int set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule) { struct opcode_obj_rewrite *rw; struct named_object *no; ipfw_insn *cmd; char *end; long val; int cmdlen, error, l; uint16_t kidx, uidx; uint8_t subtype; error = 0; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); /* Check if is index in given opcode */ rw = find_op_rw(cmd, &kidx, &subtype); if (rw == NULL) continue; /* Try to find referenced kernel object */ no = rw->find_bykidx(ch, kidx); if (no == NULL) continue; val = strtol(no->name, &end, 10); if (*end == '\0' && val < 65535) { uidx = val; } else { /* * We are called via legacy opcode. * Save error and show table as fake number * not to make ipfw(8) hang. */ uidx = 65535; error = 2; } rw->update(cmd, uidx); } return (error); } /* * Unreferences all already-referenced objects in given @cmd rule, * using information in @oib. * * Used to rollback partially converted rule on error. */ static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *end) { struct opcode_obj_rewrite *rw; struct named_object *no; struct obj_idx *p; IPFW_UH_WLOCK_ASSERT(ch); for (p = oib; p < end; p++) { if (p->kidx == 0) continue; rw = find_op_rw(cmd + p->off, NULL, NULL); KASSERT(rw != NULL, ("Unable to find handler for op %d", (cmd + p->off)->opcode)); /* Find & unref by existing idx */ no = rw->find_bykidx(ch, p->kidx); KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx)); no->refcnt--; } } /* * Remove references from every object used in @rule. * Used at rule removal code. */ static void unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule) { struct opcode_obj_rewrite *rw; struct named_object *no; ipfw_insn *cmd; int cmdlen, l; uint16_t kidx; uint8_t subtype; IPFW_UH_WLOCK_ASSERT(ch); l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, &subtype); if (rw == NULL) continue; no = rw->find_bykidx(ch, kidx); KASSERT(no != NULL, ("table id %d not found", kidx)); KASSERT(no->subtype == subtype, ("wrong type %d (%d) for table id %d", no->subtype, subtype, kidx)); KASSERT(no->refcnt > 0, ("refcount for table %d is %d", kidx, no->refcnt)); if (no->refcnt == 1 && rw->destroy_object != NULL) rw->destroy_object(ch, no); else no->refcnt--; } } /* * Find and reference object (if any) stored in instruction @cmd. * * Saves object info in @pidx, sets * - @unresolved to 1 if object should exists but not found * * Returns non-zero value in case of error. */ static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti, struct obj_idx *pidx, int *unresolved) { struct named_object *no; struct opcode_obj_rewrite *rw; int error; /* Check if this opcode is candidate for rewrite */ rw = find_op_rw(cmd, &ti->uidx, &ti->type); if (rw == NULL) return (0); /* Need to rewrite. Save necessary fields */ pidx->uidx = ti->uidx; pidx->type = ti->type; /* Try to find referenced kernel object */ error = rw->find_byname(ch, ti, &no); if (error != 0) return (error); if (no == NULL) { /* * Report about unresolved object for automaic * creation. */ *unresolved = 1; return (0); } /* Found. Bump refcount and update kidx. */ no->refcnt++; rw->update(cmd, no->kidx); return (0); } /* * Finds and bumps refcount for objects referenced by given @rule. * Auto-creates non-existing tables. * Fills in @oib array with userland/kernel indexes. * * Returns 0 on success. */ static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti) { struct obj_idx *pidx; ipfw_insn *cmd; int cmdlen, error, l, unresolved; pidx = oib; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; error = 0; IPFW_UH_WLOCK(ch); /* Increase refcount on each existing referenced table. */ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); unresolved = 0; error = ref_opcode_object(ch, cmd, ti, pidx, &unresolved); if (error != 0) break; /* * Compatibility stuff for old clients: * prepare to automaitcally create non-existing objects. */ if (unresolved != 0) { pidx->off = rule->cmd_len - l; pidx++; } } if (error != 0) { /* Unref everything we have already done */ unref_oib_objects(ch, rule->cmd, oib, pidx); IPFW_UH_WUNLOCK(ch); return (error); } IPFW_UH_WUNLOCK(ch); /* Perform auto-creation for non-existing objects */ if (pidx != oib) error = create_objects_compat(ch, rule->cmd, oib, pidx, ti); /* Calculate real number of dynamic objects */ ci->object_opcodes = (uint16_t)(pidx - oib); return (error); } /* * Checks is opcode is referencing table of appropriate type. * Adds reference count for found table if true. * Rewrites user-supplied opcode values with kernel ones. * * Returns 0 on success and appropriate error code otherwise. */ static int rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci) { int error; ipfw_insn *cmd; uint8_t type; struct obj_idx *p, *pidx_first, *pidx_last; struct tid_info ti; /* * Prepare an array for storing opcode indices. * Use stack allocation by default. */ if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) { /* Stack */ pidx_first = ci->obuf; } else pidx_first = malloc( ci->object_opcodes * sizeof(struct obj_idx), M_IPFW, M_WAITOK | M_ZERO); error = 0; type = 0; memset(&ti, 0, sizeof(ti)); /* * Use default set for looking up tables (old way) or * use set rule is assigned to (new way). */ ti.set = (V_fw_tables_sets != 0) ? ci->krule->set : 0; if (ci->ctlv != NULL) { ti.tlvs = (void *)(ci->ctlv + 1); ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv); } /* Reference all used tables and other objects */ error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti); if (error != 0) goto free; /* * Note that ref_rule_objects() might have updated ci->object_opcodes * to reflect actual number of object opcodes. */ /* Perform rewrite of remaining opcodes */ p = pidx_first; pidx_last = pidx_first + ci->object_opcodes; for (p = pidx_first; p < pidx_last; p++) { cmd = ci->krule->cmd + p->off; update_opcode_kidx(cmd, p->kidx); } free: if (pidx_first != ci->obuf) free(pidx_first, M_IPFW); return (error); } /* * Adds one or more rules to ipfw @chain. * Data layout (version 0)(current): * Request: * [ * ip_fw3_opheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3) * ] * Reply: * [ * ip_fw3_opheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] * ] * * Rules in reply are modified to store their actual ruleset number. * * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending * according to their idx field and there has to be no duplicates. * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending. * (*3) Each ip_fw structure needs to be aligned to u64 boundary. * * Returns 0 on success. */ static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_ctlv *ctlv, *rtlv, *tstate; ipfw_obj_ntlv *ntlv; int clen, error, idx; uint32_t count, read; struct ip_fw_rule *r; struct rule_check_info rci, *ci, *cbuf; int i, rsize; op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize); ctlv = (ipfw_obj_ctlv *)(op3 + 1); read = sizeof(ip_fw3_opheader); rtlv = NULL; tstate = NULL; cbuf = NULL; memset(&rci, 0, sizeof(struct rule_check_info)); if (read + sizeof(*ctlv) > sd->valsize) return (EINVAL); if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { clen = ctlv->head.length; /* Check size and alignment */ if (clen > sd->valsize || clen < sizeof(*ctlv)) return (EINVAL); if ((clen % sizeof(uint64_t)) != 0) return (EINVAL); /* * Some table names or other named objects. * Check for validness. */ count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv); if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv)) return (EINVAL); /* * Check each TLV. * Ensure TLVs are sorted ascending and * there are no duplicates. */ idx = -1; ntlv = (ipfw_obj_ntlv *)(ctlv + 1); while (count > 0) { if (ntlv->head.length != sizeof(ipfw_obj_ntlv)) return (EINVAL); error = ipfw_check_object_name_generic(ntlv->name); if (error != 0) return (error); if (ntlv->idx <= idx) return (EINVAL); idx = ntlv->idx; count--; ntlv++; } tstate = ctlv; read += ctlv->head.length; ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); } if (read + sizeof(*ctlv) > sd->valsize) return (EINVAL); if (ctlv->head.type == IPFW_TLV_RULE_LIST) { clen = ctlv->head.length; if (clen + read > sd->valsize || clen < sizeof(*ctlv)) return (EINVAL); if ((clen % sizeof(uint64_t)) != 0) return (EINVAL); /* * TODO: Permit adding multiple rules at once */ if (ctlv->count != 1) return (ENOTSUP); clen -= sizeof(*ctlv); if (ctlv->count > clen / sizeof(struct ip_fw_rule)) return (EINVAL); /* Allocate state for each rule or use stack */ if (ctlv->count == 1) { memset(&rci, 0, sizeof(struct rule_check_info)); cbuf = &rci; } else cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP, M_WAITOK | M_ZERO); ci = cbuf; /* * Check each rule for validness. * Ensure numbered rules are sorted ascending * and properly aligned */ idx = 0; r = (struct ip_fw_rule *)(ctlv + 1); count = 0; error = 0; while (clen > 0) { rsize = roundup2(RULESIZE(r), sizeof(uint64_t)); if (rsize > clen || ctlv->count <= count) { error = EINVAL; break; } ci->ctlv = tstate; error = check_ipfw_rule1(r, rsize, ci); if (error != 0) break; /* Check sorting */ if (r->rulenum != 0 && r->rulenum < idx) { printf("rulenum %d idx %d\n", r->rulenum, idx); error = EINVAL; break; } idx = r->rulenum; ci->urule = (caddr_t)r; rsize = roundup2(rsize, sizeof(uint64_t)); clen -= rsize; r = (struct ip_fw_rule *)((caddr_t)r + rsize); count++; ci++; } if (ctlv->count != count || error != 0) { if (cbuf != &rci) free(cbuf, M_TEMP); return (EINVAL); } rtlv = ctlv; read += ctlv->head.length; ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); } if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) { if (cbuf != NULL && cbuf != &rci) free(cbuf, M_TEMP); return (EINVAL); } /* * Passed rules seems to be valid. * Allocate storage and try to add them to chain. */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) { clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule); ci->krule = ipfw_alloc_rule(chain, clen); import_rule1(ci); } if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) { /* Free allocate krules */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) free(ci->krule, M_IPFW); } if (cbuf != NULL && cbuf != &rci) free(cbuf, M_TEMP); return (error); } /* * Lists all sopts currently registered. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_sopt_info x N ] * * Returns 0 on success */ static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; ipfw_sopt_info *i; struct ipfw_sopt_handler *sh; uint32_t count, n, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); CTL3_LOCK(); count = ctl3_hsize; size = count * sizeof(ipfw_sopt_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_sopt_info); if (size > olh->size) { olh->size = size; CTL3_UNLOCK(); return (ENOMEM); } olh->size = size; for (n = 1; n <= count; n++) { i = (ipfw_sopt_info *)ipfw_get_sopt_space(sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); sh = &ctl3_handlers[n]; i->opcode = sh->opcode; i->version = sh->version; i->refcnt = sh->refcnt; } CTL3_UNLOCK(); return (0); } /* * Compares two opcodes. * Used both in qsort() and bsearch(). * * Returns 0 if match is found. */ static int compare_opcodes(const void *_a, const void *_b) { const struct opcode_obj_rewrite *a, *b; a = (const struct opcode_obj_rewrite *)_a; b = (const struct opcode_obj_rewrite *)_b; if (a->opcode < b->opcode) return (-1); else if (a->opcode > b->opcode) return (1); return (0); } /* * XXX: Rewrite bsearch() */ static int find_op_rw_range(uint16_t op, struct opcode_obj_rewrite **plo, struct opcode_obj_rewrite **phi) { struct opcode_obj_rewrite *ctl3_max, *lo, *hi, h, *rw; memset(&h, 0, sizeof(h)); h.opcode = op; rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters, ctl3_rsize, sizeof(h), compare_opcodes); if (rw == NULL) return (1); /* Find the first element matching the same opcode */ lo = rw; for ( ; lo > ctl3_rewriters && (lo - 1)->opcode == op; lo--) ; /* Find the last element matching the same opcode */ hi = rw; ctl3_max = ctl3_rewriters + ctl3_rsize; for ( ; (hi + 1) < ctl3_max && (hi + 1)->opcode == op; hi++) ; *plo = lo; *phi = hi; return (0); } /* * Finds opcode object rewriter based on @code. * * Returns pointer to handler or NULL. */ static struct opcode_obj_rewrite * find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { struct opcode_obj_rewrite *rw, *lo, *hi; uint16_t uidx; uint8_t subtype; if (find_op_rw_range(cmd->opcode, &lo, &hi) != 0) return (NULL); for (rw = lo; rw <= hi; rw++) { if (rw->classifier(cmd, &uidx, &subtype) == 0) { if (puidx != NULL) *puidx = uidx; if (ptype != NULL) *ptype = subtype; return (rw); } } return (NULL); } int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx) { if (find_op_rw(cmd, puidx, NULL) == 0) return (1); return (0); } void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx) { struct opcode_obj_rewrite *rw; rw = find_op_rw(cmd, NULL, NULL); KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode)); rw->update(cmd, idx); } void ipfw_init_obj_rewriter() { ctl3_rewriters = NULL; ctl3_rsize = 0; } void ipfw_destroy_obj_rewriter() { if (ctl3_rewriters != NULL) free(ctl3_rewriters, M_IPFW); ctl3_rewriters = NULL; ctl3_rsize = 0; } /* * Adds one or more opcode object rewrite handlers to the global array. * Function may sleep. */ void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count) { size_t sz; struct opcode_obj_rewrite *tmp; CTL3_LOCK(); for (;;) { sz = ctl3_rsize + count; CTL3_UNLOCK(); tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO); CTL3_LOCK(); if (ctl3_rsize + count <= sz) break; /* Retry */ free(tmp, M_IPFW); } /* Merge old & new arrays */ sz = ctl3_rsize + count; memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw)); memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw)); qsort(tmp, sz, sizeof(*rw), compare_opcodes); /* Switch new and free old */ if (ctl3_rewriters != NULL) free(ctl3_rewriters, M_IPFW); ctl3_rewriters = tmp; ctl3_rsize = sz; CTL3_UNLOCK(); } /* * Removes one or more object rewrite handlers from the global array. */ int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count) { size_t sz; struct opcode_obj_rewrite *ctl3_max, *ktmp, *lo, *hi; int i; CTL3_LOCK(); for (i = 0; i < count; i++) { if (find_op_rw_range(rw[i].opcode, &lo, &hi) != 0) continue; for (ktmp = lo; ktmp <= hi; ktmp++) { if (ktmp->classifier != rw[i].classifier) continue; ctl3_max = ctl3_rewriters + ctl3_rsize; sz = (ctl3_max - (ktmp + 1)) * sizeof(*ktmp); memmove(ktmp, ktmp + 1, sz); ctl3_rsize--; break; } } if (ctl3_rsize == 0) { if (ctl3_rewriters != NULL) free(ctl3_rewriters, M_IPFW); ctl3_rewriters = NULL; } CTL3_UNLOCK(); return (0); } -static void +static int export_objhash_ntlv_internal(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct sockopt_data *sd; ipfw_obj_ntlv *ntlv; sd = (struct sockopt_data *)arg; ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) - return; + return (ENOMEM); ipfw_export_obj_ntlv(no, ntlv); + return (0); } /* * Lists all service objects. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ] size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader [ ipfw_obj_ntlv x N ] (optional) ] * Returns 0 on success */ static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *hdr; int count; hdr = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); if (hdr == NULL) return (EINVAL); IPFW_UH_RLOCK(chain); count = ipfw_objhash_count(CHAIN_TO_SRV(chain)); hdr->size = sizeof(ipfw_obj_lheader) + count * sizeof(ipfw_obj_ntlv); if (sd->valsize < hdr->size) { IPFW_UH_RUNLOCK(chain); return (ENOMEM); } hdr->count = count; hdr->objsize = sizeof(ipfw_obj_ntlv); if (count > 0) ipfw_objhash_foreach(CHAIN_TO_SRV(chain), export_objhash_ntlv_internal, sd); IPFW_UH_RUNLOCK(chain); return (0); } /* * Compares two sopt handlers (code, version and handler ptr). * Used both as qsort() and bsearch(). * Does not compare handler for latter case. * * Returns 0 if match is found. */ static int compare_sh(const void *_a, const void *_b) { const struct ipfw_sopt_handler *a, *b; a = (const struct ipfw_sopt_handler *)_a; b = (const struct ipfw_sopt_handler *)_b; if (a->opcode < b->opcode) return (-1); else if (a->opcode > b->opcode) return (1); if (a->version < b->version) return (-1); else if (a->version > b->version) return (1); /* bsearch helper */ if (a->handler == NULL) return (0); if ((uintptr_t)a->handler < (uintptr_t)b->handler) return (-1); else if ((uintptr_t)a->handler > (uintptr_t)b->handler) return (1); return (0); } /* * Finds sopt handler based on @code and @version. * * Returns pointer to handler or NULL. */ static struct ipfw_sopt_handler * find_sh(uint16_t code, uint8_t version, sopt_handler_f *handler) { struct ipfw_sopt_handler *sh, h; memset(&h, 0, sizeof(h)); h.opcode = code; h.version = version; h.handler = handler; sh = (struct ipfw_sopt_handler *)bsearch(&h, ctl3_handlers, ctl3_hsize, sizeof(h), compare_sh); return (sh); } static int find_ref_sh(uint16_t opcode, uint8_t version, struct ipfw_sopt_handler *psh) { struct ipfw_sopt_handler *sh; CTL3_LOCK(); if ((sh = find_sh(opcode, version, NULL)) == NULL) { CTL3_UNLOCK(); printf("ipfw: ipfw_ctl3 invalid option %d""v""%d\n", opcode, version); return (EINVAL); } sh->refcnt++; ctl3_refct++; /* Copy handler data to requested buffer */ *psh = *sh; CTL3_UNLOCK(); return (0); } static void find_unref_sh(struct ipfw_sopt_handler *psh) { struct ipfw_sopt_handler *sh; CTL3_LOCK(); sh = find_sh(psh->opcode, psh->version, NULL); KASSERT(sh != NULL, ("ctl3 handler disappeared")); sh->refcnt--; ctl3_refct--; CTL3_UNLOCK(); } void ipfw_init_sopt_handler() { CTL3_LOCK_INIT(); IPFW_ADD_SOPT_HANDLER(1, scodes); } void ipfw_destroy_sopt_handler() { IPFW_DEL_SOPT_HANDLER(1, scodes); CTL3_LOCK_DESTROY(); } /* * Adds one or more sockopt handlers to the global array. * Function may sleep. */ void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count) { size_t sz; struct ipfw_sopt_handler *tmp; CTL3_LOCK(); for (;;) { sz = ctl3_hsize + count; CTL3_UNLOCK(); tmp = malloc(sizeof(*sh) * sz, M_IPFW, M_WAITOK | M_ZERO); CTL3_LOCK(); if (ctl3_hsize + count <= sz) break; /* Retry */ free(tmp, M_IPFW); } /* Merge old & new arrays */ sz = ctl3_hsize + count; memcpy(tmp, ctl3_handlers, ctl3_hsize * sizeof(*sh)); memcpy(&tmp[ctl3_hsize], sh, count * sizeof(*sh)); qsort(tmp, sz, sizeof(*sh), compare_sh); /* Switch new and free old */ if (ctl3_handlers != NULL) free(ctl3_handlers, M_IPFW); ctl3_handlers = tmp; ctl3_hsize = sz; ctl3_gencnt++; CTL3_UNLOCK(); } /* * Removes one or more sockopt handlers from the global array. */ int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count) { size_t sz; struct ipfw_sopt_handler *tmp, *h; int i; CTL3_LOCK(); for (i = 0; i < count; i++) { tmp = &sh[i]; h = find_sh(tmp->opcode, tmp->version, tmp->handler); if (h == NULL) continue; sz = (ctl3_handlers + ctl3_hsize - (h + 1)) * sizeof(*h); memmove(h, h + 1, sz); ctl3_hsize--; } if (ctl3_hsize == 0) { if (ctl3_handlers != NULL) free(ctl3_handlers, M_IPFW); ctl3_handlers = NULL; } ctl3_gencnt++; CTL3_UNLOCK(); return (0); } /* * Writes data accumulated in @sd to sockopt buffer. * Zeroes internal @sd buffer. */ static int ipfw_flush_sopt_data(struct sockopt_data *sd) { struct sockopt *sopt; int error; size_t sz; sz = sd->koff; if (sz == 0) return (0); sopt = sd->sopt; if (sopt->sopt_dir == SOPT_GET) { error = copyout(sd->kbuf, sopt->sopt_val, sz); if (error != 0) return (error); } memset(sd->kbuf, 0, sd->ksize); sd->ktotal += sz; sd->koff = 0; if (sd->ktotal + sd->ksize < sd->valsize) sd->kavail = sd->ksize; else sd->kavail = sd->valsize - sd->ktotal; /* Update sopt buffer data */ sopt->sopt_valsize = sd->ktotal; sopt->sopt_val = sd->sopt_val + sd->ktotal; return (0); } /* * Ensures that @sd buffer has contiguous @neeeded number of * bytes. * * Returns pointer to requested space or NULL. */ caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed) { int error; caddr_t addr; if (sd->kavail < needed) { /* * Flush data and try another time. */ error = ipfw_flush_sopt_data(sd); if (sd->kavail < needed || error != 0) return (NULL); } addr = sd->kbuf + sd->koff; sd->koff += needed; sd->kavail -= needed; return (addr); } /* * Requests @needed contiguous bytes from @sd buffer. * Function is used to notify subsystem that we are * interesed in first @needed bytes (request header) * and the rest buffer can be safely zeroed. * * Returns pointer to requested space or NULL. */ caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed) { caddr_t addr; if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL) return (NULL); if (sd->kavail > 0) memset(sd->kbuf + sd->koff, 0, sd->kavail); return (addr); } /* * New sockopt handler. */ int ipfw_ctl3(struct sockopt *sopt) { int error, locked; size_t size, valsize; struct ip_fw_chain *chain; char xbuf[256]; struct sockopt_data sdata; struct ipfw_sopt_handler h; ip_fw3_opheader *op3 = NULL; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error != 0) return (error); if (sopt->sopt_name != IP_FW3) return (ipfw_ctl(sopt)); chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; memset(&sdata, 0, sizeof(sdata)); /* Read op3 header first to determine actual operation */ op3 = (ip_fw3_opheader *)xbuf; error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3)); if (error != 0) return (error); sopt->sopt_valsize = valsize; /* * Find and reference command. */ error = find_ref_sh(op3->opcode, op3->version, &h); if (error != 0) return (error); /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if ((h.dir & HDIR_SET) != 0 && h.opcode != IP_FW_XRESETLOG) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error != 0) { find_unref_sh(&h); return (error); } } /* * Fill in sockopt_data structure that may be useful for * IP_FW3 get requests. */ locked = 0; if (valsize <= sizeof(xbuf)) { /* use on-stack buffer */ sdata.kbuf = xbuf; sdata.ksize = sizeof(xbuf); sdata.kavail = valsize; } else { /* * Determine opcode type/buffer size: * allocate sliding-window buf for data export or * contiguous buffer for special ops. */ if ((h.dir & HDIR_SET) != 0) { /* Set request. Allocate contigous buffer. */ if (valsize > CTL3_LARGEBUF) { find_unref_sh(&h); return (EFBIG); } size = valsize; } else { /* Get request. Allocate sliding window buffer */ size = (valsizesopt_val, valsize); if (error != 0) return (error); locked = 1; } } sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); sdata.ksize = size; sdata.kavail = size; } sdata.sopt = sopt; sdata.sopt_val = sopt->sopt_val; sdata.valsize = valsize; /* * Copy either all request (if valsize < bsize_max) * or first bsize_max bytes to guarantee most consumers * that all necessary data has been copied). * Anyway, copy not less than sizeof(ip_fw3_opheader). */ if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize, sizeof(ip_fw3_opheader))) != 0) return (error); op3 = (ip_fw3_opheader *)sdata.kbuf; /* Finally, run handler */ error = h.handler(chain, op3, &sdata); find_unref_sh(&h); /* Flush state and free buffers */ if (error == 0) error = ipfw_flush_sopt_data(&sdata); else ipfw_flush_sopt_data(&sdata); if (locked != 0) vsunlock(sdata.sopt_val, valsize); /* Restore original pointer and set number of bytes written */ sopt->sopt_val = sdata.sopt_val; sopt->sopt_valsize = sdata.ktotal; if (sdata.kbuf != xbuf) free(sdata.kbuf, M_TEMP); return (error); } /** * {set|get}sockopt parser. */ int ipfw_ctl(struct sockopt *sopt) { #define RULE_MAXSIZE (512*sizeof(u_int32_t)) int error; size_t size, valsize; struct ip_fw *buf; struct ip_fw_rule0 *rule; struct ip_fw_chain *chain; u_int32_t rulenum[2]; uint32_t opt; struct rule_check_info ci; IPFW_RLOCK_TRACKER; chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; opt = sopt->sopt_name; /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if (opt == IP_FW_ADD || (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error != 0) return (error); } switch (opt) { case IP_FW_GET: /* * pass up a copy of the current rules. Static rules * come first (the last of which has number IPFW_DEFAULT_RULE), * followed by a possibly empty list of dynamic rule. * The last dynamic rule has NULL in the "next" field. * * Note that the calculated size is used to bound the * amount of data returned to the user. The rule set may * change between calculating the size and returning the * data in which case we'll just return what fits. */ for (;;) { int len = 0, want; size = chain->static_len; size += ipfw_dyn_len(); if (size >= sopt->sopt_valsize) break; buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* check again how much space we need */ want = chain->static_len + ipfw_dyn_len(); if (size >= want) len = ipfw_getrules(chain, buf, size); IPFW_UH_RUNLOCK(chain); if (size >= want) error = sooptcopyout(sopt, buf, len); free(buf, M_TEMP); if (size >= want) break; } break; case IP_FW_FLUSH: /* locking is done within del_entry() */ error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */ break; case IP_FW_ADD: rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, rule, RULE_MAXSIZE, sizeof(struct ip_fw7) ); memset(&ci, 0, sizeof(struct rule_check_info)); /* * If the size of commands equals RULESIZE7 then we assume * a FreeBSD7.2 binary is talking to us (set is7=1). * is7 is persistent so the next 'ipfw list' command * will use this format. * NOTE: If wrong version is guessed (this can happen if * the first ipfw command is 'ipfw [pipe] list') * the ipfw binary may crash or loop infinitly... */ size = sopt->sopt_valsize; if (size == RULESIZE7(rule)) { is7 = 1; error = convert_rule_to_8(rule); if (error) { free(rule, M_TEMP); return error; } size = RULESIZE(rule); } else is7 = 0; if (error == 0) error = check_ipfw_rule0(rule, size, &ci); if (error == 0) { /* locking is done within add_rule() */ struct ip_fw *krule; krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule)); ci.urule = (caddr_t)rule; ci.krule = krule; import_rule0(&ci); error = commit_rules(chain, &ci, 1); if (!error && sopt->sopt_dir == SOPT_GET) { if (is7) { error = convert_rule_to_7(rule); size = RULESIZE7(rule); if (error) { free(rule, M_TEMP); return error; } } error = sooptcopyout(sopt, rule, size); } } free(rule, M_TEMP); break; case IP_FW_DEL: /* * IP_FW_DEL is used for deleting single rules or sets, * and (ab)used to atomically manipulate sets. Argument size * is used to distinguish between the two: * sizeof(u_int32_t) * delete single rule or set of rules, * or reassign rules (or sets) to a different set. * 2*sizeof(u_int32_t) * atomic disable/enable sets. * first u_int32_t contains sets to be disabled, * second u_int32_t contains sets to be enabled. */ error = sooptcopyin(sopt, rulenum, 2*sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; size = sopt->sopt_valsize; if (size == sizeof(u_int32_t) && rulenum[0] != 0) { /* delete or reassign, locking done in del_entry() */ error = del_entry(chain, rulenum[0]); } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */ IPFW_UH_WLOCK(chain); V_set_disable = (V_set_disable | rulenum[0]) & ~rulenum[1] & ~(1<sopt_val != 0) { error = sooptcopyin(sopt, rulenum, sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; } error = zero_entry(chain, rulenum[0], sopt->sopt_name == IP_FW_RESETLOG); break; /*--- TABLE opcodes ---*/ case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: { ipfw_table_entry ent; struct tentry_info tei; struct tid_info ti; struct table_value v; error = sooptcopyin(sopt, &ent, sizeof(ent), sizeof(ent)); if (error) break; memset(&tei, 0, sizeof(tei)); tei.paddr = &ent.addr; tei.subtype = AF_INET; tei.masklen = ent.masklen; ipfw_import_table_value_legacy(ent.value, &v); tei.pvalue = &v; memset(&ti, 0, sizeof(ti)); ti.uidx = ent.tbl; ti.type = IPFW_TABLE_CIDR; error = (opt == IP_FW_TABLE_ADD) ? add_table_entry(chain, &ti, &tei, 0, 1) : del_table_entry(chain, &ti, &tei, 0, 1); } break; case IP_FW_TABLE_FLUSH: { u_int16_t tbl; struct tid_info ti; error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)); if (error) break; memset(&ti, 0, sizeof(ti)); ti.uidx = tbl; error = flush_table(chain, &ti); } break; case IP_FW_TABLE_GETSIZE: { u_int32_t tbl, cnt; struct tid_info ti; if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)))) break; memset(&ti, 0, sizeof(ti)); ti.uidx = tbl; IPFW_RLOCK(chain); error = ipfw_count_table(chain, &ti, &cnt); IPFW_RUNLOCK(chain); if (error) break; error = sooptcopyout(sopt, &cnt, sizeof(cnt)); } break; case IP_FW_TABLE_LIST: { ipfw_table *tbl; struct tid_info ti; if (sopt->sopt_valsize < sizeof(*tbl)) { error = EINVAL; break; } size = sopt->sopt_valsize; tbl = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); if (error) { free(tbl, M_TEMP); break; } tbl->size = (size - sizeof(*tbl)) / sizeof(ipfw_table_entry); memset(&ti, 0, sizeof(ti)); ti.uidx = tbl->tbl; IPFW_RLOCK(chain); error = ipfw_dump_table_legacy(chain, &ti, tbl); IPFW_RUNLOCK(chain); if (error) { free(tbl, M_TEMP); break; } error = sooptcopyout(sopt, tbl, size); free(tbl, M_TEMP); } break; /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) error = ipfw_nat_cfg_ptr(sopt); else { printf("IP_FW_NAT_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_DEL: if (IPFW_NAT_LOADED) error = ipfw_nat_del_ptr(sopt); else { printf("IP_FW_NAT_DEL: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_CONFIG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_cfg_ptr(sopt); else { printf("IP_FW_NAT_GET_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_LOG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_log_ptr(sopt); else { printf("IP_FW_NAT_GET_LOG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; default: printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); error = EINVAL; } return (error); #undef RULE_MAXSIZE } #define RULE_MAXSIZE (256*sizeof(u_int32_t)) /* Functions to convert rules 7.2 <==> 8.0 */ static int convert_rule_to_7(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *)rule; /* copy of original rule, version 8 */ struct ip_fw_rule0 *tmp; /* Used to copy commands */ ipfw_insn *ccmd, *dst; int ll = 0, ccmdlen = 0; tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); if (tmp == NULL) { return 1; //XXX error } bcopy(rule, tmp, RULE_MAXSIZE); /* Copy fields */ //rule7->_pad = tmp->_pad; rule7->set = tmp->set; rule7->rulenum = tmp->rulenum; rule7->cmd_len = tmp->cmd_len; rule7->act_ofs = tmp->act_ofs; rule7->next_rule = (struct ip_fw7 *)tmp->next_rule; rule7->cmd_len = tmp->cmd_len; rule7->pcnt = tmp->pcnt; rule7->bcnt = tmp->bcnt; rule7->timestamp = tmp->timestamp; /* Copy commands */ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ; ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { ccmdlen = F_LEN(ccmd); bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); if (dst->opcode > O_NAT) /* O_REASS doesn't exists in 7.2 version, so * decrement opcode if it is after O_REASS */ dst->opcode--; if (ccmdlen > ll) { printf("ipfw: opcode %d size truncated\n", ccmd->opcode); return EINVAL; } } free(tmp, M_TEMP); return 0; } static int convert_rule_to_8(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *) rule; /* Used to copy commands */ ipfw_insn *ccmd, *dst; int ll = 0, ccmdlen = 0; /* Copy of original rule */ struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); if (tmp == NULL) { return 1; //XXX error } bcopy(rule7, tmp, RULE_MAXSIZE); for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ; ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { ccmdlen = F_LEN(ccmd); bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); if (dst->opcode > O_NAT) /* O_REASS doesn't exists in 7.2 version, so * increment opcode if it is after O_REASS */ dst->opcode++; if (ccmdlen > ll) { printf("ipfw: opcode %d size truncated\n", ccmd->opcode); return EINVAL; } } rule->_pad = tmp->_pad; rule->set = tmp->set; rule->rulenum = tmp->rulenum; rule->cmd_len = tmp->cmd_len; rule->act_ofs = tmp->act_ofs; rule->next_rule = (struct ip_fw *)tmp->next_rule; rule->cmd_len = tmp->cmd_len; rule->id = 0; /* XXX see if is ok = 0 */ rule->pcnt = tmp->pcnt; rule->bcnt = tmp->bcnt; rule->timestamp = tmp->timestamp; free (tmp, M_TEMP); return 0; } /* * Named object api * */ void ipfw_init_srv(struct ip_fw_chain *ch) { ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT); ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT, M_IPFW, M_WAITOK | M_ZERO); } void ipfw_destroy_srv(struct ip_fw_chain *ch) { free(ch->srvstate, M_IPFW); ipfw_objhash_destroy(ch->srvmap); } /* * Allocate new bitmask which can be used to enlarge/shrink * named instance index. */ void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks) { size_t size; int max_blocks; u_long *idx_mask; KASSERT((items % BLOCK_ITEMS) == 0, ("bitmask size needs to power of 2 and greater or equal to %zu", BLOCK_ITEMS)); max_blocks = items / BLOCK_ITEMS; size = items / 8; idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK); /* Mark all as free */ memset(idx_mask, 0xFF, size * IPFW_MAX_SETS); *idx_mask &= ~(u_long)1; /* Skip index 0 */ *idx = idx_mask; *pblocks = max_blocks; } /* * Copy current bitmask index to new one. */ void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks) { int old_blocks, new_blocks; u_long *old_idx, *new_idx; int i; old_idx = ni->idx_mask; old_blocks = ni->max_blocks; new_idx = *idx; new_blocks = *blocks; for (i = 0; i < IPFW_MAX_SETS; i++) { memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i], old_blocks * sizeof(u_long)); } } /* * Swaps current @ni index with new one. */ void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks) { int old_blocks; u_long *old_idx; old_idx = ni->idx_mask; old_blocks = ni->max_blocks; ni->idx_mask = *idx; ni->max_blocks = *blocks; /* Save old values */ *idx = old_idx; *blocks = old_blocks; } void ipfw_objhash_bitmap_free(void *idx, int blocks) { free(idx, M_IPFW); } /* * Creates named hash instance. * Must be called without holding any locks. * Return pointer to new instance. */ struct namedobj_instance * ipfw_objhash_create(uint32_t items) { struct namedobj_instance *ni; int i; size_t size; size = sizeof(struct namedobj_instance) + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE; ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO); ni->nn_size = NAMEDOBJ_HASH_SIZE; ni->nv_size = NAMEDOBJ_HASH_SIZE; ni->names = (struct namedobjects_head *)(ni +1); ni->values = &ni->names[ni->nn_size]; for (i = 0; i < ni->nn_size; i++) TAILQ_INIT(&ni->names[i]); for (i = 0; i < ni->nv_size; i++) TAILQ_INIT(&ni->values[i]); /* Set default hashing/comparison functions */ ni->hash_f = objhash_hash_name; ni->cmp_f = objhash_cmp_name; /* Allocate bitmask separately due to possible resize */ ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks); return (ni); } void ipfw_objhash_destroy(struct namedobj_instance *ni) { free(ni->idx_mask, M_IPFW); free(ni, M_IPFW); } void ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f, objhash_cmp_f *cmp_f) { ni->hash_f = hash_f; ni->cmp_f = cmp_f; } static uint32_t objhash_hash_name(struct namedobj_instance *ni, const void *name, uint32_t set) { return (fnv_32_str((const char *)name, FNV1_32_INIT)); } static int objhash_cmp_name(struct named_object *no, const void *name, uint32_t set) { if ((strcmp(no->name, (const char *)name) == 0) && (no->set == set)) return (0); return (1); } static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val) { uint32_t v; v = val % (ni->nv_size - 1); return (v); } struct named_object * ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name) { struct named_object *no; uint32_t hash; hash = ni->hash_f(ni, name, set) % ni->nn_size; TAILQ_FOREACH(no, &ni->names[hash], nn_next) { if (ni->cmp_f(no, name, set) == 0) return (no); } return (NULL); } /* * Find named object by @uid. * Check @tlvs for valid data inside. * * Returns pointer to found TLV or NULL. */ ipfw_obj_ntlv * ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv) { ipfw_obj_ntlv *ntlv; uintptr_t pa, pe; int l; pa = (uintptr_t)tlvs; pe = pa + len; l = 0; for (; pa < pe; pa += l) { ntlv = (ipfw_obj_ntlv *)pa; l = ntlv->head.length; if (l != sizeof(*ntlv)) return (NULL); if (ntlv->idx != uidx) continue; /* * When userland has specified zero TLV type, do * not compare it with eltv. In some cases userland * doesn't know what type should it have. Use only * uidx and name for search named_object. */ if (ntlv->head.type != 0 && ntlv->head.type != (uint16_t)etlv) continue; if (ipfw_check_object_name_generic(ntlv->name) != 0) return (NULL); return (ntlv); } return (NULL); } /* * Finds object config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns 0 in success and fills in @pno with found config */ int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti, uint32_t etlv, struct named_object **pno) { char *name; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs == NULL) return (EINVAL); ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, etlv); if (ntlv == NULL) return (EINVAL); name = ntlv->name; /* * Use set provided by @ti instead of @ntlv one. * This is needed due to different sets behavior * controlled by V_fw_tables_sets. */ set = ti->set; *pno = ipfw_objhash_lookup_name(ni, set, name); if (*pno == NULL) return (ESRCH); return (0); } /* * Find named object by name, considering also its TLV type. */ struct named_object * ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set, uint32_t type, const char *name) { struct named_object *no; uint32_t hash; hash = ni->hash_f(ni, name, set) % ni->nn_size; TAILQ_FOREACH(no, &ni->names[hash], nn_next) { if (ni->cmp_f(no, name, set) == 0 && no->etlv == (uint16_t)type) return (no); } return (NULL); } struct named_object * ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx) { struct named_object *no; uint32_t hash; hash = objhash_hash_idx(ni, kidx); TAILQ_FOREACH(no, &ni->values[hash], nv_next) { if (no->kidx == kidx) return (no); } return (NULL); } int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, struct named_object *b) { if ((strcmp(a->name, b->name) == 0) && a->set == b->set) return (1); return (0); } void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no) { uint32_t hash; hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size; TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next); hash = objhash_hash_idx(ni, no->kidx); TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next); ni->count++; } void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no) { uint32_t hash; hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size; TAILQ_REMOVE(&ni->names[hash], no, nn_next); hash = objhash_hash_idx(ni, no->kidx); TAILQ_REMOVE(&ni->values[hash], no, nv_next); ni->count--; } uint32_t ipfw_objhash_count(struct namedobj_instance *ni) { return (ni->count); } /* * Runs @func for each found named object. * It is safe to delete objects from callback */ -void +int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg) { struct named_object *no, *no_tmp; - int i; + int i, ret; for (i = 0; i < ni->nn_size; i++) { - TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) - f(ni, no, arg); + TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) { + ret = f(ni, no, arg); + if (ret != 0) + return (ret); + } } + return (0); } /* * Removes index from given set. * Returns 0 on success. */ int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx) { u_long *mask; int i, v; i = idx / BLOCK_ITEMS; v = idx % BLOCK_ITEMS; if (i >= ni->max_blocks) return (1); mask = &ni->idx_mask[i]; if ((*mask & ((u_long)1 << v)) != 0) return (1); /* Mark as free */ *mask |= (u_long)1 << v; /* Update free offset */ if (ni->free_off[0] > i) ni->free_off[0] = i; return (0); } /* * Allocate new index in given instance and stores in in @pidx. * Returns 0 on success. */ int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx) { struct namedobj_instance *ni; u_long *mask; int i, off, v; ni = (struct namedobj_instance *)n; off = ni->free_off[0]; mask = &ni->idx_mask[off]; for (i = off; i < ni->max_blocks; i++, mask++) { if ((v = ffsl(*mask)) == 0) continue; /* Mark as busy */ *mask &= ~ ((u_long)1 << (v - 1)); ni->free_off[0] = i; v = BLOCK_ITEMS * i + v - 1; *pidx = v; return (0); } return (1); } /* end of file */ Index: head/sys/netpfil/ipfw/ip_fw_table.c =================================================================== --- head/sys/netpfil/ipfw/ip_fw_table.c (revision 299151) +++ head/sys/netpfil/ipfw/ip_fw_table.c (revision 299152) @@ -1,3385 +1,3389 @@ /*- * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Lookup table support for ipfw. * * This file contains handlers for all generic tables' operations: * add/del/flush entries, list/dump tables etc.. * * Table data modification is protected by both UH and runtime lock * while reading configuration/data is protected by UH lock. * * Lookup algorithms for all table types are located in ip_fw_table_algo.c */ #include "opt_ipfw.h" #include #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include /* struct ipfw_rule_ref */ #include #include #include /* * Table has the following `type` concepts: * * `no.type` represents lookup key type (addr, ifp, uid, etc..) * vmask represents bitmask of table values which are present at the moment. * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old * single-value-for-all approach. */ struct table_config { struct named_object no; uint8_t tflags; /* type flags */ uint8_t locked; /* 1 if locked from changes */ uint8_t linked; /* 1 if already linked */ uint8_t ochanged; /* used by set swapping */ uint8_t vshared; /* 1 if using shared value array */ uint8_t spare[3]; uint32_t count; /* Number of records */ uint32_t limit; /* Max number of records */ uint32_t vmask; /* bitmask with supported values */ uint32_t ocount; /* used by set swapping */ uint64_t gencnt; /* generation count */ char tablename[64]; /* table name */ struct table_algo *ta; /* Callbacks for given algo */ void *astate; /* algorithm state */ struct table_info ti_copy; /* data to put to table_info */ struct namedobj_instance *vi; }; static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc); static struct table_config *find_table(struct namedobj_instance *ni, struct tid_info *ti); static struct table_config *alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags); static void free_table_config(struct namedobj_instance *ni, struct table_config *tc); static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref); static void link_table(struct ip_fw_chain *ch, struct table_config *tc); static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc); #define OP_ADD 1 #define OP_DEL 0 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd); static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i); static int dump_table_tentry(void *e, void *arg); static int dump_table_xentry(void *e, void *arg); static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, struct tid_info *b); static int check_table_name(const char *name); static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, struct table_config *tc, struct table_info *ti, uint32_t count); static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); static struct table_algo *find_table_algo(struct tables_config *tableconf, struct tid_info *ti, char *name); static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); #define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) #define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) #define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ void rollback_toperation_state(struct ip_fw_chain *ch, void *object) { struct tables_config *tcfg; struct op_state *os; tcfg = CHAIN_TO_TCFG(ch); TAILQ_FOREACH(os, &tcfg->state_list, next) os->func(object, os); } void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) { struct tables_config *tcfg; tcfg = CHAIN_TO_TCFG(ch); TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); } void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) { struct tables_config *tcfg; tcfg = CHAIN_TO_TCFG(ch); TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); } void tc_ref(struct table_config *tc) { tc->no.refcnt++; } void tc_unref(struct table_config *tc) { tc->no.refcnt--; } static struct table_value * get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) { struct table_value *pval; pval = (struct table_value *)ch->valuestate; return (&pval[kidx]); } /* * Checks if we're able to insert/update entry @tei into table * w.r.t @tc limits. * May alter @tei to indicate insertion error / insert * options. * * Returns 0 if operation can be performed/ */ static int check_table_limit(struct table_config *tc, struct tentry_info *tei) { if (tc->limit == 0 || tc->count < tc->limit) return (0); if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { /* Notify userland on error cause */ tei->flags |= TEI_FLAGS_LIMIT; return (EFBIG); } /* * We have UPDATE flag set. * Permit updating record (if found), * but restrict adding new one since we've * already hit the limit. */ tei->flags |= TEI_FLAGS_DONTADD; return (0); } /* * Convert algorithm callback return code into * one of pre-defined states known by userland. */ static void store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num) { int flag; flag = 0; switch (error) { case 0: if (op == OP_ADD && num != 0) flag = TEI_FLAGS_ADDED; if (op == OP_DEL) flag = TEI_FLAGS_DELETED; break; case ENOENT: flag = TEI_FLAGS_NOTFOUND; break; case EEXIST: flag = TEI_FLAGS_EXISTS; break; default: flag = TEI_FLAGS_ERROR; } tei->flags |= flag; } /* * Creates and references table with default parameters. * Saves table config, algo and allocated kidx info @ptc, @pta and * @pkidx if non-zero. * Used for table auto-creation to support old binaries. * * Returns 0 on success. */ static int create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx) { ipfw_xtable_info xi; int error; memset(&xi, 0, sizeof(xi)); /* Set default value mask for legacy clients */ xi.vmask = IPFW_VTYPE_LEGACY; error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1); if (error != 0) return (error); return (0); } /* * Find and reference existing table optionally * creating new one. * * Saves found table config into @ptc. * Note function may drop/acquire UH_WLOCK. * Returns 0 if table was found/created and referenced * or non-zero return code. */ static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc) { struct namedobj_instance *ni; struct table_config *tc; uint16_t kidx; int error; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); tc = NULL; if ((tc = find_table(ni, ti)) != NULL) { /* check table type */ if (tc->no.subtype != ti->type) return (EINVAL); if (tc->locked != 0) return (EACCES); /* Try to exit early on limit hit */ if (op == OP_ADD && count == 1 && check_table_limit(tc, tei) != 0) return (EFBIG); /* Reference and return */ tc->no.refcnt++; *ptc = tc; return (0); } if (op == OP_DEL) return (ESRCH); /* Compatibility mode: create new table for old clients */ if ((tei->flags & TEI_FLAGS_COMPAT) == 0) return (ESRCH); IPFW_UH_WUNLOCK(ch); error = create_table_compat(ch, ti, &kidx); IPFW_UH_WLOCK(ch); if (error != 0) return (error); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx)); /* OK, now we've got referenced table. */ *ptc = tc; return (0); } /* * Rolls back already @added to @tc entries using state array @ta_buf_m. * Assume the following layout: * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) * for storing deleted state */ static void rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, uint32_t count, uint32_t added) { struct table_algo *ta; struct tentry_info *ptei; caddr_t v, vv; size_t ta_buf_sz; int error, i; uint32_t num; IPFW_UH_WLOCK_ASSERT(ch); ta = tc->ta; ta_buf_sz = ta->ta_buf_size; v = ta_buf_m; vv = v + count * ta_buf_sz; for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { ptei = &tei[i]; if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { /* * We have old value stored by previous * call in @ptei->value. Do add once again * to restore it. */ error = ta->add(tc->astate, tinfo, ptei, v, &num); KASSERT(error == 0, ("rollback UPDATE fail")); KASSERT(num == 0, ("rollback UPDATE fail2")); continue; } error = ta->prepare_del(ch, ptei, vv); KASSERT(error == 0, ("pre-rollback INSERT failed")); error = ta->del(tc->astate, tinfo, ptei, vv, &num); KASSERT(error == 0, ("rollback INSERT failed")); tc->count -= num; } } /* * Prepares add/del state for all @count entries in @tei. * Uses either stack buffer (@ta_buf) or allocates a new one. * Stores pointer to allocated buffer back to @ta_buf. * * Returns 0 on success. */ static int prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf) { caddr_t ta_buf_m, v; size_t ta_buf_sz, sz; struct tentry_info *ptei; int error, i; error = 0; ta_buf_sz = ta->ta_buf_size; if (count == 1) { /* Sigle add/delete, use on-stack buffer */ memset(*ta_buf, 0, TA_BUF_SZ); ta_buf_m = *ta_buf; } else { /* * Multiple adds/deletes, allocate larger buffer * * Note we need 2xcount buffer for add case: * we have hold both ADD state * and DELETE state (this may be needed * if we need to rollback all changes) */ sz = count * ta_buf_sz; ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP, M_WAITOK | M_ZERO); } v = ta_buf_m; for (i = 0; i < count; i++, v += ta_buf_sz) { ptei = &tei[i]; error = (op == OP_ADD) ? ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); /* * Some syntax error (incorrect mask, or address, or * anything). Return error regardless of atomicity * settings. */ if (error != 0) break; } *ta_buf = ta_buf_m; return (error); } /* * Flushes allocated state for each @count entries in @tei. * Frees @ta_buf_m if differs from stack buffer @ta_buf. */ static void flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, struct tentry_info *tei, uint32_t count, int rollback, caddr_t ta_buf_m, caddr_t ta_buf) { caddr_t v; struct tentry_info *ptei; size_t ta_buf_sz; int i; ta_buf_sz = ta->ta_buf_size; /* Run cleaning callback anyway */ v = ta_buf_m; for (i = 0; i < count; i++, v += ta_buf_sz) { ptei = &tei[i]; ta->flush_entry(ch, ptei, v); if (ptei->ptv != NULL) { free(ptei->ptv, M_IPFW); ptei->ptv = NULL; } } /* Clean up "deleted" state in case of rollback */ if (rollback != 0) { v = ta_buf_m + count * ta_buf_sz; for (i = 0; i < count; i++, v += ta_buf_sz) ta->flush_entry(ch, &tei[i], v); } if (ta_buf_m != ta_buf) free(ta_buf_m, M_TEMP); } static void rollback_add_entry(void *object, struct op_state *_state) { struct ip_fw_chain *ch; struct tableop_state *ts; ts = (struct tableop_state *)_state; if (ts->tc != object && ts->ch != object) return; ch = ts->ch; IPFW_UH_WLOCK_ASSERT(ch); /* Call specifid unlockers */ rollback_table_values(ts); /* Indicate we've called */ ts->modified = 1; } /* * Adds/updates one or more entries in table @ti. * * Function may drop/reacquire UH wlock multiple times due to * items alloc, algorithm callbacks (check_space), value linkage * (new values, value storage realloc), etc.. * Other processes like other adds (which may involve storage resize), * table swaps (which changes table data and may change algo type), * table modify (which may change value mask) may be executed * simultaneously so we need to deal with it. * * The following approach was implemented: * we have per-chain linked list, protected with UH lock. * add_table_entry prepares special on-stack structure wthich is passed * to its descendants. Users add this structure to this list before unlock. * After performing needed operations and acquiring UH lock back, each user * checks if structure has changed. If true, it rolls local state back and * returns without error to the caller. * add_table_entry() on its own checks if structure has changed and restarts * its operation from the beginning (goto restart). * * Functions which are modifying fields of interest (currently * resize_shared_value_storage() and swap_tables() ) * traverses given list while holding UH lock immediately before * performing their operations calling function provided be list entry * ( currently rollback_add_entry ) which performs rollback for all necessary * state and sets appropriate values in structure indicating rollback * has happened. * * Algo interaction: * Function references @ti first to ensure table won't * disappear or change its type. * After that, prepare_add callback is called for each @tei entry. * Next, we try to add each entry under UH+WHLOCK * using add() callback. * Finally, we free all state by calling flush_entry callback * for each @tei. * * Returns 0 on success. */ int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint8_t flags, uint32_t count) { struct table_config *tc; struct table_algo *ta; uint16_t kidx; int error, first_error, i, rollback; uint32_t num, numadd; struct tentry_info *ptei; struct tableop_state ts; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; memset(&ts, 0, sizeof(ts)); ta = NULL; IPFW_UH_WLOCK(ch); /* * Find and reference existing table. */ restart: if (ts.modified != 0) { IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); memset(&ts, 0, sizeof(ts)); ta = NULL; IPFW_UH_WLOCK(ch); } error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } ta = tc->ta; /* Fill in tablestate */ ts.ch = ch; ts.opstate.func = rollback_add_entry; ts.tc = tc; ts.vshared = tc->vshared; ts.vmask = tc->vmask; ts.ta = ta; ts.tei = tei; ts.count = count; rollback = 0; add_toperation_state(ch, &ts); IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); IPFW_UH_WLOCK(ch); del_toperation_state(ch, &ts); /* Drop reference we've used in first search */ tc->no.refcnt--; /* Check prepare_batch_buffer() error */ if (error != 0) goto cleanup; /* * Check if table swap has happened. * (so table algo might be changed). * Restart operation to achieve consistent behavior. */ if (ts.modified != 0) goto restart; /* * Link all values values to shared/per-table value array. * * May release/reacquire UH_WLOCK. */ error = ipfw_link_table_values(ch, &ts); if (error != 0) goto cleanup; if (ts.modified != 0) goto restart; /* * Ensure we are able to add all entries without additional * memory allocations. May release/reacquire UH_WLOCK. */ kidx = tc->no.kidx; error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); if (error != 0) goto cleanup; if (ts.modified != 0) goto restart; /* We've got valid table in @tc. Let's try to add data */ kidx = tc->no.kidx; ta = tc->ta; numadd = 0; first_error = 0; IPFW_WLOCK(ch); v = ta_buf_m; for (i = 0; i < count; i++, v += ta->ta_buf_size) { ptei = &tei[i]; num = 0; /* check limit before adding */ if ((error = check_table_limit(tc, ptei)) == 0) { error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, &num); /* Set status flag to inform userland */ store_tei_result(ptei, OP_ADD, error, num); } if (error == 0) { /* Update number of records to ease limit checking */ tc->count += num; numadd += num; continue; } if (first_error == 0) first_error = error; /* * Some error have happened. Check our atomicity * settings: continue if atomicity is not required, * rollback changes otherwise. */ if ((flags & IPFW_CTF_ATOMIC) == 0) continue; rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), tei, ta_buf_m, count, i); rollback = 1; break; } IPFW_WUNLOCK(ch); ipfw_garbage_table_values(ch, tc, tei, count, rollback); /* Permit post-add algorithm grow/rehash. */ if (numadd != 0) check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); /* Return first error to user, if any */ error = first_error; cleanup: IPFW_UH_WUNLOCK(ch); flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); return (error); } /* * Deletes one or more entries in table @ti. * * Returns 0 on success. */ int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, struct tentry_info *tei, uint8_t flags, uint32_t count) { struct table_config *tc; struct table_algo *ta; struct tentry_info *ptei; uint16_t kidx; int error, first_error, i; uint32_t num, numdel; char ta_buf[TA_BUF_SZ]; caddr_t ta_buf_m, v; /* * Find and reference existing table. */ IPFW_UH_WLOCK(ch); error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } ta = tc->ta; IPFW_UH_WUNLOCK(ch); /* Allocate memory and prepare record(s) */ /* Pass stack buffer by default */ ta_buf_m = ta_buf; error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m); if (error != 0) goto cleanup; IPFW_UH_WLOCK(ch); /* Drop reference we've used in first search */ tc->no.refcnt--; /* * Check if table algo is still the same. * (changed ta may be the result of table swap). */ if (ta != tc->ta) { IPFW_UH_WUNLOCK(ch); error = EINVAL; goto cleanup; } kidx = tc->no.kidx; numdel = 0; first_error = 0; IPFW_WLOCK(ch); v = ta_buf_m; for (i = 0; i < count; i++, v += ta->ta_buf_size) { ptei = &tei[i]; num = 0; error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, &num); /* Save state for userland */ store_tei_result(ptei, OP_DEL, error, num); if (error != 0 && first_error == 0) first_error = error; tc->count -= num; numdel += num; } IPFW_WUNLOCK(ch); /* Unlink non-used values */ ipfw_garbage_table_values(ch, tc, tei, count, 0); if (numdel != 0) { /* Run post-del hook to permit shrinking */ check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); } IPFW_UH_WUNLOCK(ch); /* Return first error to user, if any */ error = first_error; cleanup: flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf); return (error); } /* * Ensure that table @tc has enough space to add @count entries without * need for reallocation. * * Callbacks order: * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. * * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage * 3) modify (UH_WLOCK + WLOCK) - switch pointers * 4) flush_modify (UH_WLOCK) - free state, if needed * * Returns 0 on success. */ static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, struct table_config *tc, struct table_info *ti, uint32_t count) { struct table_algo *ta; uint64_t pflags; char ta_buf[TA_BUF_SZ]; int error; IPFW_UH_WLOCK_ASSERT(ch); error = 0; ta = tc->ta; if (ta->need_modify == NULL) return (0); /* Acquire reference not to loose @tc between locks/unlocks */ tc->no.refcnt++; /* * TODO: think about avoiding race between large add/large delete * operation on algorithm which implements shrinking along with * growing. */ while (true) { pflags = 0; if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { error = 0; break; } /* We have to shrink/grow table */ if (ts != NULL) add_toperation_state(ch, ts); IPFW_UH_WUNLOCK(ch); memset(&ta_buf, 0, sizeof(ta_buf)); error = ta->prepare_mod(ta_buf, &pflags); IPFW_UH_WLOCK(ch); if (ts != NULL) del_toperation_state(ch, ts); if (error != 0) break; if (ts != NULL && ts->modified != 0) { /* * Swap operation has happened * so we're currently operating on other * table data. Stop doing this. */ ta->flush_mod(ta_buf); break; } /* Check if we still need to alter table */ ti = KIDX_TO_TI(ch, tc->no.kidx); if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { IPFW_UH_WUNLOCK(ch); /* * Other thread has already performed resize. * Flush our state and return. */ ta->flush_mod(ta_buf); break; } error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); if (error == 0) { /* Do actual modification */ IPFW_WLOCK(ch); ta->modify(tc->astate, ti, ta_buf, pflags); IPFW_WUNLOCK(ch); } /* Anyway, flush data and retry */ ta->flush_mod(ta_buf); } tc->no.refcnt--; return (error); } /* * Adds or deletes record in table. * Data layout (v0): * Request: [ ip_fw3_opheader ipfw_table_xentry ] * * Returns 0 on success */ static int manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_table_xentry *xent; struct tentry_info tei; struct tid_info ti; struct table_value v; int error, hdrlen, read; hdrlen = offsetof(ipfw_table_xentry, k); /* Check minimum header size */ if (sd->valsize < (sizeof(*op3) + hdrlen)) return (EINVAL); read = sizeof(ip_fw3_opheader); /* Check if xentry len field is valid */ xent = (ipfw_table_xentry *)(op3 + 1); if (xent->len < hdrlen || xent->len + read > sd->valsize) return (EINVAL); memset(&tei, 0, sizeof(tei)); tei.paddr = &xent->k; tei.masklen = xent->masklen; ipfw_import_table_value_legacy(xent->value, &v); tei.pvalue = &v; /* Old requests compatibility */ tei.flags = TEI_FLAGS_COMPAT; if (xent->type == IPFW_TABLE_ADDR) { if (xent->len - hdrlen == sizeof(in_addr_t)) tei.subtype = AF_INET; else tei.subtype = AF_INET6; } memset(&ti, 0, sizeof(ti)); ti.uidx = xent->tbl; ti.type = xent->type; error = (op3->opcode == IP_FW_TABLE_XADD) ? add_table_entry(ch, &ti, &tei, 0, 1) : del_table_entry(ch, &ti, &tei, 0, 1); return (error); } /* * Adds or deletes record in table. * Data layout (v1)(current): * Request: [ ipfw_obj_header * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] * ] * * Returns 0 on success */ static int manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent, *ptent; ipfw_obj_ctlv *ctlv; ipfw_obj_header *oh; struct tentry_info *ptei, tei, *tei_buf; struct tid_info ti; int error, i, kidx, read; /* Check minimum header size */ if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) return (EINVAL); /* Check if passed data is too long */ if (sd->valsize != sd->kavail) return (EINVAL); oh = (ipfw_obj_header *)sd->kbuf; /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); read = sizeof(*oh); ctlv = (ipfw_obj_ctlv *)(oh + 1); if (ctlv->head.length + read != sd->valsize) return (EINVAL); read += sizeof(*ctlv); tent = (ipfw_obj_tentry *)(ctlv + 1); if (ctlv->count * sizeof(*tent) + read != sd->valsize) return (EINVAL); if (ctlv->count == 0) return (0); /* * Mark entire buffer as "read". * This instructs sopt api write it back * after function return. */ ipfw_get_sopt_header(sd, sd->valsize); /* Perform basic checks for each entry */ ptent = tent; kidx = tent->idx; for (i = 0; i < ctlv->count; i++, ptent++) { if (ptent->head.length != sizeof(*ptent)) return (EINVAL); if (ptent->idx != kidx) return (ENOTSUP); } /* Convert data into kernel request objects */ objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = kidx; /* Use on-stack buffer for single add/del */ if (ctlv->count == 1) { memset(&tei, 0, sizeof(tei)); tei_buf = &tei; } else tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, M_WAITOK | M_ZERO); ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { ptei->paddr = &ptent->k; ptei->subtype = ptent->subtype; ptei->masklen = ptent->masklen; if (ptent->head.flags & IPFW_TF_UPDATE) ptei->flags |= TEI_FLAGS_UPDATE; ipfw_import_table_value_v1(&ptent->v.value); ptei->pvalue = (struct table_value *)&ptent->v.value; } error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); /* Translate result back to userland */ ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { if (ptei->flags & TEI_FLAGS_ADDED) ptent->result = IPFW_TR_ADDED; else if (ptei->flags & TEI_FLAGS_DELETED) ptent->result = IPFW_TR_DELETED; else if (ptei->flags & TEI_FLAGS_UPDATED) ptent->result = IPFW_TR_UPDATED; else if (ptei->flags & TEI_FLAGS_LIMIT) ptent->result = IPFW_TR_LIMIT; else if (ptei->flags & TEI_FLAGS_ERROR) ptent->result = IPFW_TR_ERROR; else if (ptei->flags & TEI_FLAGS_NOTFOUND) ptent->result = IPFW_TR_NOTFOUND; else if (ptei->flags & TEI_FLAGS_EXISTS) ptent->result = IPFW_TR_EXISTS; ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value); } if (tei_buf != &tei) free(tei_buf, M_TEMP); return (error); } /* * Looks up an entry in given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_tentry ] * Reply: [ ipfw_obj_header ipfw_obj_tentry ] * * Returns 0 on success */ static int find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_tentry *tent; ipfw_obj_header *oh; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct table_info *kti; struct namedobj_instance *ni; int error; size_t sz; /* Check minimum header size */ sz = sizeof(*oh) + sizeof(*tent); if (sd->valsize != sz) return (EINVAL); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); tent = (ipfw_obj_tentry *)(oh + 1); /* Basic length checks for TLVs */ if (oh->ntlv.head.length != sizeof(oh->ntlv)) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = oh->ntlv.type; ti.uidx = tent->idx; IPFW_UH_RLOCK(ch); ni = CHAIN_TO_NI(ch); /* * Find existing table and check its type . */ ta = NULL; if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } /* check table type */ if (tc->no.subtype != ti.type) { IPFW_UH_RUNLOCK(ch); return (EINVAL); } kti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->find_tentry == NULL) return (ENOTSUP); error = ta->find_tentry(tc->astate, kti, tent); IPFW_UH_RUNLOCK(ch); return (error); } /* * Flushes all entries or destroys given table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ] * * Returns 0 on success */ static int flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti; if (sd->valsize != sizeof(*oh)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; objheader_to_ti(oh, &ti); if (op3->opcode == IP_FW_TABLE_XDESTROY) error = destroy_table(ch, &ti); else if (op3->opcode == IP_FW_TABLE_XFLUSH) error = flush_table(ch, &ti); else return (ENOTSUP); return (error); } static void restart_flush(void *object, struct op_state *_state) { struct tableop_state *ts; ts = (struct tableop_state *)_state; if (ts->tc != object) return; /* Indicate we've called */ ts->modified = 1; } /* * Flushes given table. * * Function create new table instance with the same * parameters, swaps it with old one and * flushes state without holding runtime WLOCK. * * Returns 0 on success. */ int flush_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct table_info ti_old, ti_new, *tablestate; void *astate_old, *astate_new; char algostate[64], *pstate; struct tableop_state ts; int error, need_gc; uint16_t kidx; uint8_t tflags; /* * Stage 1: save table algorithm. * Reference found table to ensure it won't disappear. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } need_gc = 0; astate_new = NULL; memset(&ti_new, 0, sizeof(ti_new)); restart: /* Set up swap handler */ memset(&ts, 0, sizeof(ts)); ts.opstate.func = restart_flush; ts.tc = tc; ta = tc->ta; /* Do not flush readonly tables */ if ((ta->flags & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } /* Save startup algo parameters */ if (ta->print_config != NULL) { ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), algostate, sizeof(algostate)); pstate = algostate; } else pstate = NULL; tflags = tc->tflags; tc->no.refcnt++; add_toperation_state(ch, &ts); IPFW_UH_WUNLOCK(ch); /* * Stage 1.5: if this is not the first attempt, destroy previous state */ if (need_gc != 0) { ta->destroy(astate_new, &ti_new); need_gc = 0; } /* * Stage 2: allocate new table instance using same algo. */ memset(&ti_new, 0, sizeof(struct table_info)); error = ta->init(ch, &astate_new, &ti_new, pstate, tflags); /* * Stage 3: swap old state pointers with newly-allocated ones. * Decrease refcount. */ IPFW_UH_WLOCK(ch); tc->no.refcnt--; del_toperation_state(ch, &ts); if (error != 0) { IPFW_UH_WUNLOCK(ch); return (error); } /* * Restart operation if table swap has happened: * even if algo may be the same, algo init parameters * may change. Restart operation instead of doing * complex checks. */ if (ts.modified != 0) { /* Delay destroying data since we're holding UH lock */ need_gc = 1; goto restart; } ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; tablestate = (struct table_info *)ch->tablestate; IPFW_WLOCK(ch); ti_old = tablestate[kidx]; tablestate[kidx] = ti_new; IPFW_WUNLOCK(ch); astate_old = tc->astate; tc->astate = astate_new; tc->ti_copy = ti_new; tc->count = 0; /* Notify algo on real @ti address */ if (ta->change_ti != NULL) ta->change_ti(tc->astate, &tablestate[kidx]); /* * Stage 4: unref values. */ ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old); IPFW_UH_WUNLOCK(ch); /* * Stage 5: perform real flush/destroy. */ ta->destroy(astate_old, &ti_old); return (0); } /* * Swaps two tables. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_obj_ntlv ] * * Returns 0 on success */ static int swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { int error; struct _ipfw_obj_header *oh; struct tid_info ti_a, ti_b; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) return (EINVAL); oh = (struct _ipfw_obj_header *)op3; ntlv_to_ti(&oh->ntlv, &ti_a); ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); error = swap_tables(ch, &ti_a, &ti_b); return (error); } /* * Swaps two tables of the same type/valtype. * * Checks if tables are compatible and limits * permits swap, than actually perform swap. * * Each table consists of 2 different parts: * config: * @tc (with name, set, kidx) and rule bindings, which is "stable". * number of items * table algo * runtime: * runtime data @ti (ch->tablestate) * runtime cache in @tc * algo-specific data (@tc->astate) * * So we switch: * all runtime data * number of items * table algo * * After that we call @ti change handler for each table. * * Note that referencing @tc won't protect tc->ta from change. * XXX: Do we need to restrict swap between locked tables? * XXX: Do we need to exchange ftype? * * Returns 0 on success. */ static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, struct tid_info *b) { struct namedobj_instance *ni; struct table_config *tc_a, *tc_b; struct table_algo *ta; struct table_info ti, *tablestate; void *astate; uint32_t count; /* * Stage 1: find both tables and ensure they are of * the same type. */ IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc_a = find_table(ni, a)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } if ((tc_b = find_table(ni, b)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* It is very easy to swap between the same table */ if (tc_a == tc_b) { IPFW_UH_WUNLOCK(ch); return (0); } /* Check type and value are the same */ if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) { IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Check limits before swap */ if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { IPFW_UH_WUNLOCK(ch); return (EFBIG); } /* Check if one of the tables is readonly */ if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } /* Notify we're going to swap */ rollback_toperation_state(ch, tc_a); rollback_toperation_state(ch, tc_b); /* Everything is fine, prepare to swap */ tablestate = (struct table_info *)ch->tablestate; ti = tablestate[tc_a->no.kidx]; ta = tc_a->ta; astate = tc_a->astate; count = tc_a->count; IPFW_WLOCK(ch); /* a <- b */ tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; tc_a->ta = tc_b->ta; tc_a->astate = tc_b->astate; tc_a->count = tc_b->count; /* b <- a */ tablestate[tc_b->no.kidx] = ti; tc_b->ta = ta; tc_b->astate = astate; tc_b->count = count; IPFW_WUNLOCK(ch); /* Ensure tc.ti copies are in sync */ tc_a->ti_copy = tablestate[tc_a->no.kidx]; tc_b->ti_copy = tablestate[tc_b->no.kidx]; /* Notify both tables on @ti change */ if (tc_a->ta->change_ti != NULL) tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); if (tc_b->ta->change_ti != NULL) tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); IPFW_UH_WUNLOCK(ch); return (0); } /* * Destroys table specified by @ti. * Data layout (v0)(current): * Request: [ ip_fw3_opheader ] * * Returns 0 on success */ static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not permit destroying referenced tables */ if (tc->no.refcnt > 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } IPFW_WLOCK(ch); unlink_table(ch, tc); IPFW_WUNLOCK(ch); /* Free obj index */ if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) printf("Error unlinking kidx %d from table %s\n", tc->no.kidx, tc->tablename); /* Unref values used in tables while holding UH lock */ ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy); IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (0); } static uint32_t roundup2p(uint32_t v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return (v); } /* * Grow tables index. * * Returns 0 on success. */ int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) { unsigned int ntables_old, tbl; struct namedobj_instance *ni; void *new_idx, *old_tablestate, *tablestate; struct table_info *ti; struct table_config *tc; int i, new_blocks; /* Check new value for validity */ if (ntables == 0) return (EINVAL); if (ntables > IPFW_TABLES_MAX) ntables = IPFW_TABLES_MAX; /* Alight to nearest power of 2 */ ntables = (unsigned int)roundup2p(ntables); /* Allocate new pointers */ tablestate = malloc(ntables * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); IPFW_UH_WLOCK(ch); tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; ni = CHAIN_TO_NI(ch); /* Temporary restrict decreasing max_tables */ if (ntables < V_fw_tables_max) { /* * FIXME: Check if we really can shrink */ IPFW_UH_WUNLOCK(ch); return (EINVAL); } /* Copy table info/indices */ memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); IPFW_WLOCK(ch); /* Change pointers */ old_tablestate = ch->tablestate; ch->tablestate = tablestate; ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); ntables_old = V_fw_tables_max; V_fw_tables_max = ntables; IPFW_WUNLOCK(ch); /* Notify all consumers that their @ti pointer has changed */ ti = (struct table_info *)ch->tablestate; for (i = 0; i < tbl; i++, ti++) { if (ti->lookup == NULL) continue; tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); if (tc == NULL || tc->ta->change_ti == NULL) continue; tc->ta->change_ti(tc->astate, ti); } IPFW_UH_WUNLOCK(ch); /* Free old pointers */ free(old_tablestate, M_IPFW); ipfw_objhash_bitmap_free(new_idx, new_blocks); return (0); } /* * Switch between "set 0" and "rule's set" table binding, * Check all ruleset bindings and permits changing * IFF each binding has both rule AND table in default set (set 0). * * Returns 0 on success. */ int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) { struct namedobj_instance *ni; struct named_object *no; struct ip_fw *rule; ipfw_insn *cmd; int cmdlen, i, l; uint16_t kidx; IPFW_UH_WLOCK(ch); if (V_fw_tables_sets == sets) { IPFW_UH_WUNLOCK(ch); return (0); } ni = CHAIN_TO_NI(ch); /* * Scan all rules and examine tables opcodes. */ for (i = 0; i < ch->n_rules; i++) { rule = ch->map[i]; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); /* Check if both table object and rule has the set 0 */ if (no->set != 0 || rule->set != 0) { IPFW_UH_WUNLOCK(ch); return (EBUSY); } } } V_fw_tables_sets = sets; IPFW_UH_WUNLOCK(ch); return (0); } /* * Lookup an IP @addr in table @tbl. * Stores found value in @val. * * Returns 1 if @addr was found. */ int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { struct table_info *ti; ti = KIDX_TO_TI(ch, tbl); return (ti->lookup(ti, &addr, sizeof(in_addr_t), val)); } /* * Lookup an arbtrary key @paddr of legth @plen in table @tbl. * Stores found value in @val. * * Returns 1 if key was found. */ int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val) { struct table_info *ti; ti = KIDX_TO_TI(ch, tbl); return (ti->lookup(ti, paddr, plen, val)); } /* * Info/List/dump support for tables. * */ /* * High-level 'get' cmds sysctl handlers */ /* * Lists all tables currently available in kernel. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] * * Returns 0 on success */ static int list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; int error; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); error = export_tables(ch, olh, sd); IPFW_UH_RUNLOCK(ch); return (error); } /* * Store table info to buffer provided by @sd. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] * Reply: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success. */ static int describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; struct table_config *tc; struct tid_info ti; size_t sz; sz = sizeof(*oh) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); IPFW_UH_RUNLOCK(ch); return (0); } /* * Modifies existing table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ static int modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname; struct tid_info ti; struct namedobj_instance *ni; struct table_config *tc; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; if (check_table_name(tname) != 0) return (EINVAL); objheader_to_ti(oh, &ti); ti.type = i->type; IPFW_UH_WLOCK(ch); ni = CHAIN_TO_NI(ch); if ((tc = find_table(ni, &ti)) == NULL) { IPFW_UH_WUNLOCK(ch); return (ESRCH); } /* Do not support any modifications for readonly tables */ if ((tc->ta->flags & TA_FLAG_READONLY) != 0) { IPFW_UH_WUNLOCK(ch); return (EACCES); } if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) tc->limit = i->limit; if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); IPFW_UH_WUNLOCK(ch); return (0); } /* * Creates new table. * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_xtable_info ] * * Returns 0 on success */ static int create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; char *tname, *aname; struct tid_info ti; struct namedobj_instance *ni; if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) return (EINVAL); oh = (struct _ipfw_obj_header *)sd->kbuf; i = (ipfw_xtable_info *)(oh + 1); /* * Verify user-supplied strings. * Check for null-terminated/zero-length strings/ */ tname = oh->ntlv.name; aname = i->algoname; if (check_table_name(tname) != 0 || strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) return (EINVAL); if (aname[0] == '\0') { /* Use default algorithm */ aname = NULL; } objheader_to_ti(oh, &ti); ti.type = i->type; ni = CHAIN_TO_NI(ch); IPFW_UH_RLOCK(ch); if (find_table(ni, &ti) != NULL) { IPFW_UH_RUNLOCK(ch); return (EEXIST); } IPFW_UH_RUNLOCK(ch); return (create_table_internal(ch, &ti, aname, i, NULL, 0)); } /* * Creates new table based on @ti and @aname. * * Assume @aname to be checked and valid. * Stores allocated table kidx inside @pkidx (if non-NULL). * Reference created table if @compat is non-zero. * * Returns 0 on success. */ static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat) { struct namedobj_instance *ni; struct table_config *tc, *tc_new, *tmp; struct table_algo *ta; uint16_t kidx; ni = CHAIN_TO_NI(ch); ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); if (ta == NULL) return (ENOTSUP); tc = alloc_table_config(ch, ti, ta, aname, i->tflags); if (tc == NULL) return (ENOMEM); tc->vmask = i->vmask; tc->limit = i->limit; if (ta->flags & TA_FLAG_READONLY) tc->locked = 1; else tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; IPFW_UH_WLOCK(ch); /* Check if table has been already created */ tc_new = find_table(ni, ti); if (tc_new != NULL) { /* * Compat: do not fail if we're * requesting to create existing table * which has the same type */ if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { IPFW_UH_WUNLOCK(ch); free_table_config(ni, tc); return (EEXIST); } /* Exchange tc and tc_new for proper refcounting & freeing */ tmp = tc; tc = tc_new; tc_new = tmp; } else { /* New table */ if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { IPFW_UH_WUNLOCK(ch); printf("Unable to allocate table index." " Consider increasing net.inet.ip.fw.tables_max"); free_table_config(ni, tc); return (EBUSY); } tc->no.kidx = kidx; tc->no.etlv = IPFW_TLV_TBL_NAME; IPFW_WLOCK(ch); link_table(ch, tc); IPFW_WUNLOCK(ch); } if (compat != 0) tc->no.refcnt++; if (pkidx != NULL) *pkidx = tc->no.kidx; IPFW_UH_WUNLOCK(ch); if (tc_new != NULL) free_table_config(ni, tc_new); return (0); } static void ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) { memset(ti, 0, sizeof(struct tid_info)); ti->set = ntlv->set; ti->uidx = ntlv->idx; ti->tlvs = ntlv; ti->tlen = ntlv->head.length; } static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) { ntlv_to_ti(&oh->ntlv, ti); } struct namedobj_instance * ipfw_get_table_objhash(struct ip_fw_chain *ch) { return (CHAIN_TO_NI(ch)); } /* * Exports basic table info as name TLV. * Used inside dump_static_rules() to provide info * about all tables referenced by current ruleset. * * Returns 0 on success. */ int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, struct sockopt_data *sd) { struct namedobj_instance *ni; struct named_object *no; ipfw_obj_ntlv *ntlv; ni = CHAIN_TO_NI(ch); no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("invalid table kidx passed")); ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ntlv->head.type = IPFW_TLV_TBL_NAME; ntlv->head.length = sizeof(*ntlv); ntlv->idx = no->kidx; strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); return (0); } struct dump_args { struct ip_fw_chain *ch; struct table_info *ti; struct table_config *tc; struct sockopt_data *sd; uint32_t cnt; uint16_t uidx; int error; uint32_t size; ipfw_table_entry *ent; ta_foreach_f *f; void *farg; ipfw_obj_tentry tent; }; static int count_ext_entries(void *e, void *arg) { struct dump_args *da; da = (struct dump_args *)arg; da->cnt++; return (0); } /* * Gets number of items from table either using * internal counter or calling algo callback for * externally-managed tables. * * Returns number of records. */ static uint32_t table_get_count(struct ip_fw_chain *ch, struct table_config *tc) { struct table_info *ti; struct table_algo *ta; struct dump_args da; ti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; /* Use internal counter for self-managed tables */ if ((ta->flags & TA_FLAG_READONLY) == 0) return (tc->count); /* Use callback to quickly get number of items */ if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0) return (ta->get_count(tc->astate, ti)); /* Count number of iterms ourselves */ memset(&da, 0, sizeof(da)); ta->foreach(tc->astate, ti, count_ext_entries, &da); return (da.cnt); } /* * Exports table @tc info into standard ipfw_xtable_info format. */ static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, ipfw_xtable_info *i) { struct table_info *ti; struct table_algo *ta; i->type = tc->no.subtype; i->tflags = tc->tflags; i->vmask = tc->vmask; i->set = tc->no.set; i->kidx = tc->no.kidx; i->refcnt = tc->no.refcnt; i->count = table_get_count(ch, tc); i->limit = tc->limit; i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; i->size = i->count * sizeof(ipfw_obj_tentry); i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); ti = KIDX_TO_TI(ch, tc->no.kidx); ta = tc->ta; if (ta->print_config != NULL) { /* Use algo function to print table config to string */ ta->print_config(tc->astate, ti, i->algoname, sizeof(i->algoname)); } else strlcpy(i->algoname, ta->name, sizeof(i->algoname)); /* Dump algo-specific data, if possible */ if (ta->dump_tinfo != NULL) { ta->dump_tinfo(tc->astate, ti, &i->ta_info); i->ta_info.flags |= IPFW_TATFLAGS_DATA; } } struct dump_table_args { struct ip_fw_chain *ch; struct sockopt_data *sd; }; -static void +static int export_table_internal(struct namedobj_instance *ni, struct named_object *no, void *arg) { ipfw_xtable_info *i; struct dump_table_args *dta; dta = (struct dump_table_args *)arg; i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); export_table_info(dta->ch, (struct table_config *)no, i); + return (0); } /* * Export all tables as ipfw_xtable_info structures to * storage provided by @sd. * * If supplied buffer is too small, fills in required size * and returns ENOMEM. * Returns 0 on success. */ static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, struct sockopt_data *sd) { uint32_t size; uint32_t count; struct dump_table_args dta; count = ipfw_objhash_count(CHAIN_TO_NI(ch)); size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_xtable_info); if (size > olh->size) { olh->size = size; return (ENOMEM); } olh->size = size; dta.ch = ch; dta.sd = sd; ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); return (0); } /* * Dumps all table data * Data layout (v1)(current): * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] * * Returns 0 on success */ static int dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_header *oh; ipfw_xtable_info *i; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; uint32_t sz; sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); if (oh == NULL) return (EINVAL); i = (ipfw_xtable_info *)(oh + 1); objheader_to_ti(oh, &ti); IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (ESRCH); } export_table_info(ch, tc, i); if (sd->valsize < i->size) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* * Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); IPFW_UH_RUNLOCK(ch); return (da.error); } /* * Dumps all table data * Data layout (version 0)(legacy): * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() * Reply: [ ipfw_xtable ipfw_table_xentry x N ] * * Returns 0 on success */ static int dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_xtable *xtbl; struct tid_info ti; struct table_config *tc; struct table_algo *ta; struct dump_args da; size_t sz, count; xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); if (xtbl == NULL) return (EINVAL); memset(&ti, 0, sizeof(ti)); ti.uidx = xtbl->tbl; IPFW_UH_RLOCK(ch); if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { IPFW_UH_RUNLOCK(ch); return (0); } count = table_get_count(ch, tc); sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); xtbl->cnt = count; xtbl->size = sz; xtbl->type = tc->no.subtype; xtbl->tbl = ti.uidx; if (sd->valsize < sz) { /* * Submitted buffer size is not enough. * WE've already filled in @i structure with * relevant table info including size, so we * can return. Buffer will be flushed automatically. */ IPFW_UH_RUNLOCK(ch); return (ENOMEM); } /* Do the actual dump in eXtended format */ memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; ta = tc->ta; ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); IPFW_UH_RUNLOCK(ch); return (0); } /* * Legacy function to retrieve number of items in table. */ static int get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { uint32_t *tbl; struct tid_info ti; size_t sz; int error; sz = sizeof(*op3) + sizeof(uint32_t); op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz); if (op3 == NULL) return (EINVAL); tbl = (uint32_t *)(op3 + 1); memset(&ti, 0, sizeof(ti)); ti.uidx = *tbl; IPFW_UH_RLOCK(ch); error = ipfw_count_xtable(ch, &ti, tbl); IPFW_UH_RUNLOCK(ch); return (error); } /* * Legacy IP_FW_TABLE_GETSIZE handler */ int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (ESRCH); *cnt = table_get_count(ch, tc); return (0); } /* * Legacy IP_FW_TABLE_XGETSIZE handler */ int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) { struct table_config *tc; uint32_t count; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { *cnt = 0; return (0); /* 'table all list' requires success */ } count = table_get_count(ch, tc); *cnt = count * sizeof(ipfw_table_xentry); if (count > 0) *cnt += sizeof(ipfw_xtable); return (0); } static int dump_table_entry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_entry *ent; struct table_value *pval; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; /* Out of memory, returning */ if (da->cnt == da->size) return (1); ent = da->ent++; ent->tbl = da->uidx; da->cnt++; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); ent->addr = da->tent.k.addr.s_addr; ent->masklen = da->tent.masklen; pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); ent->value = ipfw_export_table_value_legacy(pval); return (0); } /* * Dumps table in pre-8.1 legacy format. */ int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, ipfw_table *tbl) { struct table_config *tc; struct table_algo *ta; struct dump_args da; tbl->cnt = 0; if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) return (0); /* XXX: We should return ESRCH */ ta = tc->ta; /* This dump format supports IPv4 only */ if (tc->no.subtype != IPFW_TABLE_ADDR) return (0); memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.ent = &tbl->ent[0]; da.size = tbl->size; tbl->cnt = 0; ta->foreach(tc->astate, da.ti, dump_table_entry, &da); tbl->cnt = da.cnt; return (0); } /* * Dumps table entry in eXtended format (v1)(current). */ static int dump_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; struct table_value *pval; ipfw_obj_tentry *tent; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); /* Out of memory, returning */ if (tent == NULL) { da->error = ENOMEM; return (1); } tent->head.length = sizeof(ipfw_obj_tentry); tent->idx = da->uidx; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); pval = get_table_value(da->ch, da->tc, tent->v.kidx); ipfw_export_table_value_v1(pval, &tent->v.value); return (0); } /* * Dumps table entry in eXtended format (v0). */ static int dump_table_xentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; ipfw_table_xentry *xent; ipfw_obj_tentry *tent; struct table_value *pval; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); /* Out of memory, returning */ if (xent == NULL) return (1); xent->len = sizeof(ipfw_table_xentry); xent->tbl = da->uidx; memset(&da->tent, 0, sizeof(da->tent)); tent = &da->tent; error = ta->dump_tentry(tc->astate, da->ti, e, tent); if (error != 0) return (error); /* Convert current format to previous one */ xent->masklen = tent->masklen; pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); xent->value = ipfw_export_table_value_legacy(pval); /* Apply some hacks */ if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) { xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; xent->flags = IPFW_TCF_INET; } else memcpy(&xent->k, &tent->k, sizeof(xent->k)); return (0); } /* * Helper function to export table algo data * to tentry format before calling user function. * * Returns 0 on success. */ static int prepare_table_tentry(void *e, void *arg) { struct dump_args *da; struct table_config *tc; struct table_algo *ta; int error; da = (struct dump_args *)arg; tc = da->tc; ta = tc->ta; error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); if (error != 0) return (error); da->f(&da->tent, da->farg); return (0); } /* * Allow external consumers to read table entries in standard format. */ int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, ta_foreach_f *f, void *arg) { struct namedobj_instance *ni; struct table_config *tc; struct table_algo *ta; struct dump_args da; ni = CHAIN_TO_NI(ch); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); if (tc == NULL) return (ESRCH); ta = tc->ta; memset(&da, 0, sizeof(da)); da.ch = ch; da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.f = f; da.farg = arg; ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da); return (0); } /* * Table algorithms */ /* * Finds algorithm by index, table type or supplied name. * * Returns pointer to algo or NULL. */ static struct table_algo * find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) { int i, l; struct table_algo *ta; if (ti->type > IPFW_TABLE_MAXTYPE) return (NULL); /* Search by index */ if (ti->atype != 0) { if (ti->atype > tcfg->algo_count) return (NULL); return (tcfg->algo[ti->atype]); } if (name == NULL) { /* Return default algorithm for given type if set */ return (tcfg->def_algo[ti->type]); } /* Search by name */ /* TODO: better search */ for (i = 1; i <= tcfg->algo_count; i++) { ta = tcfg->algo[i]; /* * One can supply additional algorithm * parameters so we compare only the first word * of supplied name: * 'addr:chash hsize=32' * '^^^^^^^^^' * */ l = strlen(ta->name); if (strncmp(name, ta->name, l) != 0) continue; if (name[l] != '\0' && name[l] != ' ') continue; /* Check if we're requesting proper table type */ if (ti->type != 0 && ti->type != ta->type) return (NULL); return (ta); } return (NULL); } /* * Register new table algo @ta. * Stores algo id inside @idx. * * Returns 0 on success. */ int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, int *idx) { struct tables_config *tcfg; struct table_algo *ta_new; size_t sz; if (size > sizeof(struct table_algo)) return (EINVAL); /* Check for the required on-stack size for add/del */ sz = roundup2(ta->ta_buf_size, sizeof(void *)); if (sz > TA_BUF_SZ) return (EINVAL); KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); /* Copy algorithm data to stable storage. */ ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); memcpy(ta_new, ta, size); tcfg = CHAIN_TO_TCFG(ch); KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); tcfg->algo[++tcfg->algo_count] = ta_new; ta_new->idx = tcfg->algo_count; /* Set algorithm as default one for given type */ if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && tcfg->def_algo[ta_new->type] == NULL) tcfg->def_algo[ta_new->type] = ta_new; *idx = ta_new->idx; return (0); } /* * Unregisters table algo using @idx as id. * XXX: It is NOT safe to call this function in any place * other than ipfw instance destroy handler. */ void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) { struct tables_config *tcfg; struct table_algo *ta; tcfg = CHAIN_TO_TCFG(ch); KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", idx, tcfg->algo_count)); ta = tcfg->algo[idx]; KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); if (tcfg->def_algo[ta->type] == ta) tcfg->def_algo[ta->type] = NULL; free(ta, M_IPFW); } /* * Lists all table algorithms currently available. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] * * Returns 0 on success */ static int list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; struct tables_config *tcfg; ipfw_ta_info *i; struct table_algo *ta; uint32_t count, n, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); tcfg = CHAIN_TO_TCFG(ch); count = tcfg->algo_count; size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_ta_info); if (size > olh->size) { olh->size = size; IPFW_UH_RUNLOCK(ch); return (ENOMEM); } olh->size = size; for (n = 1; n <= count; n++) { i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); ta = tcfg->algo[n]; strlcpy(i->algoname, ta->name, sizeof(i->algoname)); i->type = ta->type; i->refcnt = ta->refcnt; } IPFW_UH_RUNLOCK(ch); return (0); } static int classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { /* Basic IPv4/IPv6 or u32 lookups */ *puidx = cmd->arg1; /* Assume ADDR by default */ *ptype = IPFW_TABLE_ADDR; int v; if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { /* * generic lookup. The key must be * in 32bit big-endian format. */ v = ((ipfw_insn_u32 *)cmd)->d[1]; switch (v) { case 0: case 1: /* IPv4 src/dst */ break; case 2: case 3: /* src/dst port */ *ptype = IPFW_TABLE_NUMBER; break; case 4: /* uid/gid */ *ptype = IPFW_TABLE_NUMBER; break; case 5: /* jid */ *ptype = IPFW_TABLE_NUMBER; break; case 6: /* dscp */ *ptype = IPFW_TABLE_NUMBER; break; } } return (0); } static int classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { ipfw_insn_if *cmdif; /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') return (1); *ptype = IPFW_TABLE_INTERFACE; *puidx = cmdif->p.kidx; return (0); } static int classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { *puidx = cmd->arg1; *ptype = IPFW_TABLE_FLOW; return (0); } static void update_arg1(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; } static void update_via(ipfw_insn *cmd, uint16_t idx) { ipfw_insn_if *cmdif; cmdif = (ipfw_insn_if *)cmd; cmdif->p.kidx = idx; } static int table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { struct table_config *tc; int error; IPFW_UH_WLOCK_ASSERT(ch); error = find_table_err(CHAIN_TO_NI(ch), ti, &tc); if (error != 0) return (error); *pno = &tc->no; return (0); } /* XXX: sets-sets! */ static struct named_object * table_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { struct namedobj_instance *ni; struct table_config *tc; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx); KASSERT(tc != NULL, ("Table with index %d not found", idx)); return (&tc->no); } static struct opcode_obj_rewrite opcodes[] = { { O_IP_SRC_LOOKUP, IPFW_TLV_TBL_NAME, classify_srcdst, update_arg1, table_findbyname, table_findbykidx, create_table_compat }, { O_IP_DST_LOOKUP, IPFW_TLV_TBL_NAME, classify_srcdst, update_arg1, table_findbyname, table_findbykidx, create_table_compat }, { O_IP_FLOW_LOOKUP, IPFW_TLV_TBL_NAME, classify_flow, update_arg1, table_findbyname, table_findbykidx, create_table_compat }, { O_XMIT, IPFW_TLV_TBL_NAME, classify_via, update_via, table_findbyname, table_findbykidx, create_table_compat }, { O_RECV, IPFW_TLV_TBL_NAME, classify_via, update_via, table_findbyname, table_findbykidx, create_table_compat }, { O_VIA, IPFW_TLV_TBL_NAME, classify_via, update_via, table_findbyname, table_findbykidx, create_table_compat }, }; /* * Checks table name for validity. * Enforce basic length checks, the rest * should be done in userland. * * Returns 0 if name is considered valid. */ static int check_table_name(const char *name) { /* * TODO: do some more complicated checks */ return (ipfw_check_object_name_generic(name)); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns 0 in success and fills in @tc with found config */ static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, struct table_config **tc) { char *name, bname[16]; struct named_object *no; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_TBL_NAME); if (ntlv == NULL) return (EINVAL); name = ntlv->name; /* * Use set provided by @ti instead of @ntlv one. * This is needed due to different sets behavior * controlled by V_fw_tables_sets. */ set = ti->set; } else { snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } no = ipfw_objhash_lookup_name(ni, set, name); *tc = (struct table_config *)no; return (0); } /* * Finds table config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns pointer to table_config or NULL. */ static struct table_config * find_table(struct namedobj_instance *ni, struct tid_info *ti) { struct table_config *tc; if (find_table_err(ni, ti, &tc) != 0) return (NULL); return (tc); } /* * Allocate new table config structure using * specified @algo and @aname. * * Returns pointer to config or NULL. */ static struct table_config * alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, struct table_algo *ta, char *aname, uint8_t tflags) { char *name, bname[16]; struct table_config *tc; int error; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs != NULL) { ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_TBL_NAME); if (ntlv == NULL) return (NULL); name = ntlv->name; set = ntlv->set; } else { /* Compat part: convert number to string representation */ snprintf(bname, sizeof(bname), "%d", ti->uidx); name = bname; set = 0; } tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); tc->no.name = tc->tablename; tc->no.subtype = ta->type; tc->no.set = set; tc->tflags = tflags; tc->ta = ta; strlcpy(tc->tablename, name, sizeof(tc->tablename)); /* Set "shared" value type by default */ tc->vshared = 1; /* Preallocate data structures for new tables */ error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags); if (error != 0) { free(tc, M_IPFW); return (NULL); } return (tc); } /* * Destroys table state and config. */ static void free_table_config(struct namedobj_instance *ni, struct table_config *tc) { KASSERT(tc->linked == 0, ("free() on linked config")); /* UH lock MUST NOT be held */ /* * We're using ta without any locking/referencing. * TODO: fix this if we're going to use unloadable algos. */ tc->ta->destroy(tc->astate, &tc->ti_copy); free(tc, M_IPFW); } /* * Links @tc to @chain table named instance. * Sets appropriate type/states in @chain table info. */ static void link_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; ipfw_objhash_add(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); *ti = tc->ti_copy; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, ti); tc->linked = 1; tc->ta->refcnt++; } /* * Unlinks @tc from @chain table named instance. * Zeroes states in @chain and stores them in @tc. */ static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc) { struct namedobj_instance *ni; struct table_info *ti; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); IPFW_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); kidx = tc->no.kidx; /* Clear state. @ti copy is already saved inside @tc */ ipfw_objhash_del(ni, &tc->no); ti = KIDX_TO_TI(ch, kidx); memset(ti, 0, sizeof(struct table_info)); tc->linked = 0; tc->ta->refcnt--; /* Notify algo on real @ti address */ if (tc->ta->change_ti != NULL) tc->ta->change_ti(tc->astate, NULL); } struct swap_table_args { int set; int new_set; int mv; }; /* * Change set for each matching table. * * Ensure we dispatch each table once by setting/checking ochange * fields. */ -static void +static int swap_table_set(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct table_config *tc; struct swap_table_args *sta; tc = (struct table_config *)no; sta = (struct swap_table_args *)arg; if (no->set != sta->set && (no->set != sta->new_set || sta->mv != 0)) - return; + return (0); if (tc->ochanged != 0) - return; + return (0); tc->ochanged = 1; ipfw_objhash_del(ni, no); if (no->set == sta->set) no->set = sta->new_set; else no->set = sta->set; ipfw_objhash_add(ni, no); + return (0); } /* * Cleans up ochange field for all tables. */ -static void +static int clean_table_set_data(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct table_config *tc; struct swap_table_args *sta; tc = (struct table_config *)no; sta = (struct swap_table_args *)arg; tc->ochanged = 0; + return (0); } /* * Swaps tables within two sets. */ void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t set, uint32_t new_set, int mv) { struct swap_table_args sta; IPFW_UH_WLOCK_ASSERT(ch); sta.set = set; sta.new_set = new_set; sta.mv = mv; ipfw_objhash_foreach(CHAIN_TO_NI(ch), swap_table_set, &sta); ipfw_objhash_foreach(CHAIN_TO_NI(ch), clean_table_set_data, &sta); } /* * Move all tables which are reference by rules in @rr to set @new_set. * Makes sure that all relevant tables are referenced ONLLY by given rules. * * Returns 0 on success, */ int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt, uint32_t new_set) { struct ip_fw *rule; struct table_config *tc; struct named_object *no; struct namedobj_instance *ni; int bad, i, l, cmdlen; uint16_t kidx; ipfw_insn *cmd; IPFW_UH_WLOCK_ASSERT(ch); ni = CHAIN_TO_NI(ch); /* Stage 1: count number of references by given rules */ for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("objhash lookup failed on index %d", kidx)); tc = (struct table_config *)no; tc->ocount++; } } /* Stage 2: verify "ownership" */ bad = 0; for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("objhash lookup failed on index %d", kidx)); tc = (struct table_config *)no; if (tc->no.refcnt != tc->ocount) { /* * Number of references differ: * Other rule(s) are holding reference to given * table, so it is not possible to change its set. * * Note that refcnt may account * references to some going-to-be-added rules. * Since we don't know their numbers (and event * if they will be added) it is perfectly OK * to return error here. */ bad = 1; break; } } if (bad != 0) break; } /* Stage 3: change set or cleanup */ for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (classify_opcode_kidx(cmd, &kidx) != 0) continue; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("objhash lookup failed on index %d", kidx)); tc = (struct table_config *)no; tc->ocount = 0; if (bad != 0) continue; /* Actually change set. */ ipfw_objhash_del(ni, no); no->set = new_set; ipfw_objhash_add(ni, no); } } return (bad); } static struct ipfw_sopt_handler scodes[] = { { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table }, { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 }, { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 }, { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table }, { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table }, { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables }, { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 }, { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 }, { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 }, { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 }, { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 }, { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 }, { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry }, { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, }; -static void +static int destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, void *arg) { unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); if (ipfw_objhash_free_idx(ni, no->kidx) != 0) printf("Error unlinking kidx %d from table %s\n", no->kidx, no->name); free_table_config(ni, (struct table_config *)no); + return (0); } /* * Shuts tables module down. */ void ipfw_destroy_tables(struct ip_fw_chain *ch, int last) { IPFW_DEL_SOPT_HANDLER(last, scodes); IPFW_DEL_OBJ_REWRITER(last, opcodes); /* Remove all tables from working set */ IPFW_UH_WLOCK(ch); IPFW_WLOCK(ch); ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); IPFW_WUNLOCK(ch); IPFW_UH_WUNLOCK(ch); /* Free pointers itself */ free(ch->tablestate, M_IPFW); ipfw_table_value_destroy(ch, last); ipfw_table_algo_destroy(ch); ipfw_objhash_destroy(CHAIN_TO_NI(ch)); free(CHAIN_TO_TCFG(ch), M_IPFW); } /* * Starts tables module. */ int ipfw_init_tables(struct ip_fw_chain *ch, int first) { struct tables_config *tcfg; /* Allocate pointers */ ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), M_IPFW, M_WAITOK | M_ZERO); tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); ch->tblcfg = tcfg; ipfw_table_value_init(ch, first); ipfw_table_algo_init(ch); IPFW_ADD_OBJ_REWRITER(first, opcodes); IPFW_ADD_SOPT_HANDLER(first, scodes); return (0); } Index: head/sys/netpfil/ipfw/ip_fw_table_algo.c =================================================================== --- head/sys/netpfil/ipfw/ip_fw_table_algo.c (revision 299151) +++ head/sys/netpfil/ipfw/ip_fw_table_algo.c (revision 299152) @@ -1,4107 +1,4109 @@ /*- * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Lookup table algorithms. * */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include #include #include #include #include /* struct ipfw_rule_ref */ #include #include #include #include /* * IPFW table lookup algorithms. * * What is needed to add another table algo? * * Algo init: * * struct table_algo has to be filled with: * name: "type:algoname" format, e.g. "addr:radix". Currently * there are the following types: "addr", "iface", "number" and "flow". * type: one of IPFW_TABLE_* types * flags: one or more TA_FLAGS_* * ta_buf_size: size of structure used to store add/del item state. * Needs to be less than TA_BUF_SZ. * callbacks: see below for description. * * ipfw_add_table_algo / ipfw_del_table_algo has to be called * * Callbacks description: * * -init: request to initialize new table instance. * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, * struct table_info *ti, char *data, uint8_t tflags); * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. * * Allocate all structures needed for normal operations. * * Caller may want to parse @data for some algo-specific * options provided by userland. * * Caller may want to save configuration state pointer to @ta_state * * Caller needs to save desired runtime structure pointer(s) * inside @ti fields. Note that it is not correct to save * @ti pointer at this moment. Use -change_ti hook for that. * * Caller has to fill in ti->lookup to appropriate function * pointer. * * * * -destroy: request to destroy table instance. * typedef void (ta_destroy)(void *ta_state, struct table_info *ti); * MANDATORY, unlocked. (M_WAITOK). * * Frees all table entries and all tables structures allocated by -init. * * * * -prepare_add: request to allocate state for adding new entry. * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, * void *ta_buf); * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. * * Allocates state and fills it in with all necessary data (EXCEPT value) * from @tei to minimize operations needed to be done under WLOCK. * "value" field has to be copied to new entry in @add callback. * Buffer ta_buf of size ta->ta_buf_sz may be used to store * allocated state. * * * * -prepare_del: request to set state for deleting existing entry. * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, * void *ta_buf); * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success. * * Buffer ta_buf of size ta->ta_buf_sz may be used to store * allocated state. Caller should use on-stack ta_buf allocation * instead of doing malloc(). * * * * -add: request to insert new entry into runtime/config structures. * typedef int (ta_add)(void *ta_state, struct table_info *ti, * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. * * Insert new entry using previously-allocated state in @ta_buf. * * @tei may have the following flags: * TEI_FLAGS_UPDATE: request to add or update entry. * TEI_FLAGS_DONTADD: request to update (but not add) entry. * * Caller is required to do the following: * copy real entry value from @tei * entry added: return 0, set 1 to @pnum * entry updated: return 0, store 0 to @pnum, store old value in @tei, * add TEI_FLAGS_UPDATED flag to @tei. * entry exists: return EEXIST * entry not found: return ENOENT * other error: return non-zero error code. * * * * -del: request to delete existing entry from runtime/config structures. * typedef int (ta_del)(void *ta_state, struct table_info *ti, * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. * * Delete entry using previously set up in @ta_buf. * * Caller is required to do the following: * entry deleted: return 0, set 1 to @pnum, store old value in @tei. * entry not found: return ENOENT * other error: return non-zero error code. * * * * -flush_entry: flush entry state created by -prepare_add / -del / others * typedef void (ta_flush_entry)(struct ip_fw_chain *ch, * struct tentry_info *tei, void *ta_buf); * MANDATORY, may be locked. (M_NOWAIT). * * Delete state allocated by: * -prepare_add (-add returned EEXIST|UPDATED) * -prepare_del (if any) * -del * * Caller is required to handle empty @ta_buf correctly. * * * -find_tentry: finds entry specified by key @tei * typedef int ta_find_tentry(void *ta_state, struct table_info *ti, * ipfw_obj_tentry *tent); * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success. * * Finds entry specified by given key. * * Caller is required to do the following: * entry found: returns 0, export entry to @tent * entry not found: returns ENOENT * * * -need_modify: checks if @ti has enough space to hold another @count items. * typedef int (ta_need_modify)(void *ta_state, struct table_info *ti, * uint32_t count, uint64_t *pflags); * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has. * * Checks if given table has enough space to add @count items without * resize. Caller may use @pflags to store desired modification data. * * * * -prepare_mod: allocate structures for table modification. * typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success. * * Allocate all needed state for table modification. Caller * should use `struct mod_item` to store new state in @ta_buf. * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf. * * * * -fill_mod: copy some data to new state/ * typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, * void *ta_buf, uint64_t *pflags); * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success. * * Copy as much data as we can to minimize changes under WLOCK. * For example, array can be merged inside this callback. * * * * -modify: perform final modification. * typedef void (ta_modify)(void *ta_state, struct table_info *ti, * void *ta_buf, uint64_t pflags); * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT). * * Performs all changes necessary to switch to new structures. * * Caller should save old pointers to @ta_buf storage. * * * * -flush_mod: flush table modification state. * typedef void (ta_flush_mod)(void *ta_buf); * OPTIONAL(need_modify), unlocked. (M_WAITOK). * * Performs flush for the following: * - prepare_mod (modification was not necessary) * - modify (for the old state) * * * * -change_gi: monitor table info pointer changes * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); * OPTIONAL, locked (UH). (M_NOWAIT). * * Called on @ti pointer changed. Called immediately after -init * to set initial state. * * * * -foreach: calls @f for each table entry * typedef void ta_foreach(void *ta_state, struct table_info *ti, * ta_foreach_f *f, void *arg); * MANDATORY, locked(UH). (M_NOWAIT). * * Runs callback with specified argument for each table entry, * Typically used for dumping table entries. * * * * -dump_tentry: dump table entry in current @tentry format. * typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, * ipfw_obj_tentry *tent); * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success. * * Dumps entry @e to @tent. * * * -print_config: prints custom algorithm options into buffer. * typedef void (ta_print_config)(void *ta_state, struct table_info *ti, * char *buf, size_t bufsize); * OPTIONAL. locked(UH). (M_NOWAIT). * * Prints custom algorithm options in the format suitable to pass * back to -init callback. * * * * -dump_tinfo: dumps algo-specific info. * typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, * ipfw_ta_tinfo *tinfo); * OPTIONAL. locked(UH). (M_NOWAIT). * * Dumps options like items size/hash size, etc. */ MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); /* * Utility structures/functions common to more than one algo */ struct mod_item { void *main_ptr; size_t size; void *main_ptr6; size_t size6; }; static int badd(const void *key, void *item, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); static int bdel(const void *key, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); /* * ADDR implementation using radix * */ /* * The radix code expects addr and mask to be array of bytes, * with the first byte being the length of the array. rn_inithead * is called with the offset in bits of the lookup key within the * array. If we use a sockaddr_in as the underlying type, * sin_len is conveniently located at offset 0, sin_addr is at * offset 4 and normally aligned. * But for portability, let's avoid assumption and make the code explicit */ #define KEY_LEN(v) *((uint8_t *)&(v)) /* * Do not require radix to compare more than actual IPv4/IPv6 address */ #define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) #define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr)) #define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) #define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr)) struct radix_addr_entry { struct radix_node rn[2]; struct sockaddr_in addr; uint32_t value; uint8_t masklen; }; struct sa_in6 { uint8_t sin6_len; uint8_t sin6_family; uint8_t pad[2]; struct in6_addr sin6_addr; }; struct radix_addr_xentry { struct radix_node rn[2]; struct sa_in6 addr6; uint32_t value; uint8_t masklen; }; struct radix_cfg { struct radix_node_head *head4; struct radix_node_head *head6; size_t count4; size_t count6; }; struct ta_buf_radix { void *ent_ptr; struct sockaddr *addr_ptr; struct sockaddr *mask_ptr; union { struct { struct sockaddr_in sa; struct sockaddr_in ma; } a4; struct { struct sa_in6 sa; struct sa_in6 ma; } a6; } addr; }; static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static int flush_radix_entry(struct radix_node *rn, void *arg); static void ta_destroy_radix(void *ta_state, struct table_info *ti); static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_find_radix_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, struct sockaddr *ma, int *set_mask); static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct radix_node_head *rnh; if (keylen == sizeof(in_addr_t)) { struct radix_addr_entry *ent; struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = *((in_addr_t *)key); rnh = (struct radix_node_head *)ti->state; ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, &rnh->rh)); if (ent != NULL) { *val = ent->value; return (1); } } else { struct radix_addr_xentry *xent; struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, &rnh->rh)); if (xent != NULL) { *val = xent->value; return (1); } } return (0); } /* * New table */ static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct radix_cfg *cfg; if (!rn_inithead(&ti->state, OFF_LEN_INET)) return (ENOMEM); if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) { rn_detachhead(&ti->state); return (ENOMEM); } cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO); *ta_state = cfg; ti->lookup = ta_lookup_radix; return (0); } static int flush_radix_entry(struct radix_node *rn, void *arg) { struct radix_node_head * const rnh = arg; struct radix_addr_entry *ent; ent = (struct radix_addr_entry *) rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, &rnh->rh); if (ent != NULL) free(ent, M_IPFW_TBL); return (0); } static void ta_destroy_radix(void *ta_state, struct table_info *ti) { struct radix_cfg *cfg; struct radix_node_head *rnh; cfg = (struct radix_cfg *)ta_state; rnh = (struct radix_node_head *)(ti->state); rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh); rn_detachhead(&ti->state); rnh = (struct radix_node_head *)(ti->xstate); rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh); rn_detachhead(&ti->xstate); free(cfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct radix_cfg *cfg; cfg = (struct radix_cfg *)ta_state; tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; tinfo->taclass4 = IPFW_TACLASS_RADIX; tinfo->count4 = cfg->count4; tinfo->itemsize4 = sizeof(struct radix_addr_entry); tinfo->taclass6 = IPFW_TACLASS_RADIX; tinfo->count6 = cfg->count6; tinfo->itemsize6 = sizeof(struct radix_addr_xentry); } static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct radix_addr_entry *n; #ifdef INET6 struct radix_addr_xentry *xn; #endif n = (struct radix_addr_entry *)e; /* Guess IPv4/IPv6 radix by sockaddr family */ if (n->addr.sin_family == AF_INET) { tent->k.addr.s_addr = n->addr.sin_addr.s_addr; tent->masklen = n->masklen; tent->subtype = AF_INET; tent->v.kidx = n->value; #ifdef INET6 } else { xn = (struct radix_addr_xentry *)e; memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr)); tent->masklen = xn->masklen; tent->subtype = AF_INET6; tent->v.kidx = xn->value; #endif } return (0); } static int ta_find_radix_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct radix_node_head *rnh; void *e; e = NULL; if (tent->subtype == AF_INET) { struct sockaddr_in sa; KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = tent->k.addr.s_addr; rnh = (struct radix_node_head *)ti->state; e = rnh->rnh_matchaddr(&sa, &rnh->rh); } else { struct sa_in6 sa6; KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); rnh = (struct radix_node_head *)ti->xstate; e = rnh->rnh_matchaddr(&sa6, &rnh->rh); } if (e != NULL) { ta_dump_radix_tentry(ta_state, ti, e, tent); return (0); } return (ENOENT); } static void ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct radix_node_head *rnh; rnh = (struct radix_node_head *)(ti->state); rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg); rnh = (struct radix_node_head *)(ti->xstate); rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg); } #ifdef INET6 static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask); static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask) { uint32_t *cp; for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) *cp++ = 0xFFFFFFFF; *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); } #endif static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, struct sockaddr *ma, int *set_mask) { int mlen; #ifdef INET struct sockaddr_in *addr, *mask; #endif #ifdef INET6 struct sa_in6 *addr6, *mask6; #endif in_addr_t a4; mlen = tei->masklen; if (tei->subtype == AF_INET) { #ifdef INET addr = (struct sockaddr_in *)sa; mask = (struct sockaddr_in *)ma; /* Set 'total' structure length */ KEY_LEN(*addr) = KEY_LEN_INET; KEY_LEN(*mask) = KEY_LEN_INET; addr->sin_family = AF_INET; mask->sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); a4 = *((in_addr_t *)tei->paddr); addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr; if (mlen != 32) *set_mask = 1; else *set_mask = 0; #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ addr6 = (struct sa_in6 *)sa; mask6 = (struct sa_in6 *)ma; /* Set 'total' structure length */ KEY_LEN(*addr6) = KEY_LEN_INET6; KEY_LEN(*mask6) = KEY_LEN_INET6; addr6->sin6_family = AF_INET6; ipv6_writemask(&mask6->sin6_addr, mlen); memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr)); APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr); if (mlen != 128) *set_mask = 1; else *set_mask = 0; #endif } } static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_radix *tb; struct radix_addr_entry *ent; #ifdef INET6 struct radix_addr_xentry *xent; #endif struct sockaddr *addr, *mask; int mlen, set_mask; tb = (struct ta_buf_radix *)ta_buf; mlen = tei->masklen; set_mask = 0; if (tei->subtype == AF_INET) { #ifdef INET if (mlen > 32) return (EINVAL); ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); ent->masklen = mlen; addr = (struct sockaddr *)&ent->addr; mask = (struct sockaddr *)&tb->addr.a4.ma; tb->ent_ptr = ent; #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ if (mlen > 128) return (EINVAL); xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); xent->masklen = mlen; addr = (struct sockaddr *)&xent->addr6; mask = (struct sockaddr *)&tb->addr.a6.ma; tb->ent_ptr = xent; #endif } else { /* Unknown CIDR type */ return (EINVAL); } tei_to_sockaddr_ent(tei, addr, mask, &set_mask); /* Set pointers */ tb->addr_ptr = addr; if (set_mask != 0) tb->mask_ptr = mask; return (0); } static int ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct radix_cfg *cfg; struct radix_node_head *rnh; struct radix_node *rn; struct ta_buf_radix *tb; uint32_t *old_value, value; cfg = (struct radix_cfg *)ta_state; tb = (struct ta_buf_radix *)ta_buf; /* Save current entry value from @tei */ if (tei->subtype == AF_INET) { rnh = ti->state; ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value; } else { rnh = ti->xstate; ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value; } /* Search for an entry first */ rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, &rnh->rh); if (rn != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ if (tei->subtype == AF_INET) old_value = &((struct radix_addr_entry *)rn)->value; else old_value = &((struct radix_addr_xentry *)rn)->value; value = *old_value; *old_value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, &rnh->rh,tb->ent_ptr); if (rn == NULL) { /* Unknown error */ return (EINVAL); } if (tei->subtype == AF_INET) cfg->count4++; else cfg->count6++; tb->ent_ptr = NULL; *pnum = 1; return (0); } static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_radix *tb; struct sockaddr *addr, *mask; int mlen, set_mask; tb = (struct ta_buf_radix *)ta_buf; mlen = tei->masklen; set_mask = 0; if (tei->subtype == AF_INET) { if (mlen > 32) return (EINVAL); addr = (struct sockaddr *)&tb->addr.a4.sa; mask = (struct sockaddr *)&tb->addr.a4.ma; #ifdef INET6 } else if (tei->subtype == AF_INET6) { if (mlen > 128) return (EINVAL); addr = (struct sockaddr *)&tb->addr.a6.sa; mask = (struct sockaddr *)&tb->addr.a6.ma; #endif } else return (EINVAL); tei_to_sockaddr_ent(tei, addr, mask, &set_mask); tb->addr_ptr = addr; if (set_mask != 0) tb->mask_ptr = mask; return (0); } static int ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct radix_cfg *cfg; struct radix_node_head *rnh; struct radix_node *rn; struct ta_buf_radix *tb; cfg = (struct radix_cfg *)ta_state; tb = (struct ta_buf_radix *)ta_buf; if (tei->subtype == AF_INET) rnh = ti->state; else rnh = ti->xstate; rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, &rnh->rh); if (rn == NULL) return (ENOENT); /* Save entry value to @tei */ if (tei->subtype == AF_INET) tei->value = ((struct radix_addr_entry *)rn)->value; else tei->value = ((struct radix_addr_xentry *)rn)->value; tb->ent_ptr = rn; if (tei->subtype == AF_INET) cfg->count4--; else cfg->count6--; *pnum = 1; return (0); } static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_radix *tb; tb = (struct ta_buf_radix *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } static int ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { /* * radix does not require additional memory allocations * other than nodes itself. Adding new masks to the tree do * but we don't have any API to call (and we don't known which * sizes do we need). */ return (0); } struct table_algo addr_radix = { .name = "addr:radix", .type = IPFW_TABLE_ADDR, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_radix), .init = ta_init_radix, .destroy = ta_destroy_radix, .prepare_add = ta_prepare_add_radix, .prepare_del = ta_prepare_del_radix, .add = ta_add_radix, .del = ta_del_radix, .flush_entry = ta_flush_radix_entry, .foreach = ta_foreach_radix, .dump_tentry = ta_dump_radix_tentry, .find_tentry = ta_find_radix_tentry, .dump_tinfo = ta_dump_radix_tinfo, .need_modify = ta_need_modify_radix, }; /* * addr:hash cmds * * * ti->data: * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] * [ 8][ 8[ 8][ 8] * * inv.mask4: 32 - mask * inv.mask6: * 1) _slow lookup: mask * 2) _aligned: (128 - mask) / 8 * 3) _64: 8 * * * pflags: * [v4=1/v6=0][hsize] * [ 32][ 32] */ struct chashentry; SLIST_HEAD(chashbhead, chashentry); struct chash_cfg { struct chashbhead *head4; struct chashbhead *head6; size_t size4; size_t size6; size_t items4; size_t items6; uint8_t mask4; uint8_t mask6; }; struct chashentry { SLIST_ENTRY(chashentry) next; uint32_t value; uint32_t type; union { uint32_t a4; /* Host format */ struct in6_addr a6; /* Network format */ } a; }; struct ta_buf_chash { void *ent_ptr; struct chashentry ent; }; #ifdef INET static __inline uint32_t hash_ip(uint32_t addr, int hsize); #endif #ifdef INET6 static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize); static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize); static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize); static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize); #endif static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int chash_parse_opts(struct chash_cfg *cfg, char *data); static void ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); static int ta_log2(uint32_t v); static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_chash(void *ta_state, struct table_info *ti); static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static uint32_t hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size); static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent); static int ta_find_chash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_chash(void *ta_buf); #ifdef INET static __inline uint32_t hash_ip(uint32_t addr, int hsize) { return (addr % (hsize - 1)); } #endif #ifdef INET6 static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize) { uint32_t i; i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^ addr6->s6_addr32[2] ^ addr6->s6_addr32[3]; return (i % (hsize - 1)); } static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize) { uint32_t i; i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1]; return (i % (hsize - 1)); } static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize) { struct in6_addr mask6; ipv6_writemask(&mask6, mask); memcpy(addr6, key, sizeof(struct in6_addr)); APPLY_MASK(addr6, &mask6); return (hash_ip6(addr6, hsize)); } static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize) { uint64_t *paddr; paddr = (uint64_t *)addr6; *paddr = 0; *(paddr + 1) = 0; memcpy(addr6, key, mask); return (hash_ip6(addr6, hsize)); } #endif static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { #ifdef INET head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } #endif } else { #ifdef INET6 /* IPv6: worst scenario: non-round mask */ struct in6_addr addr6; head = (struct chashbhead *)ti->xstate; imask = (ti->data & 0xFF0000) >> 16; hsize = 1 << (ti->data & 0xFF); hash = hash_ip6_slow(&addr6, key, imask, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (memcmp(&ent->a.a6, &addr6, 16) == 0) { *val = ent->value; return (1); } } #endif } return (0); } static int ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { #ifdef INET head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } #endif } else { #ifdef INET6 /* IPv6: aligned to 8bit mask */ struct in6_addr addr6; uint64_t *paddr, *ptmp; head = (struct chashbhead *)ti->xstate; imask = (ti->data & 0xFF0000) >> 16; hsize = 1 << (ti->data & 0xFF); hash = hash_ip6_al(&addr6, key, imask, hsize); paddr = (uint64_t *)&addr6; SLIST_FOREACH(ent, &head[hash], next) { ptmp = (uint64_t *)&ent->a.a6; if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) { *val = ent->value; return (1); } } #endif } return (0); } static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct chashbhead *head; struct chashentry *ent; uint16_t hash, hsize; uint8_t imask; if (keylen == sizeof(in_addr_t)) { #ifdef INET head = (struct chashbhead *)ti->state; imask = ti->data >> 24; hsize = 1 << ((ti->data & 0xFFFF) >> 8); uint32_t a; a = ntohl(*((in_addr_t *)key)); a = a >> imask; hash = hash_ip(a, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; return (1); } } #endif } else { #ifdef INET6 /* IPv6: /64 */ uint64_t a6, *paddr; head = (struct chashbhead *)ti->xstate; paddr = (uint64_t *)key; hsize = 1 << (ti->data & 0xFF); a6 = *paddr; hash = hash_ip64((struct in6_addr *)key, hsize); SLIST_FOREACH(ent, &head[hash], next) { paddr = (uint64_t *)&ent->a.a6; if (a6 == *paddr) { *val = ent->value; return (1); } } #endif } return (0); } static int chash_parse_opts(struct chash_cfg *cfg, char *data) { char *pdel, *pend, *s; int mask4, mask6; mask4 = cfg->mask4; mask6 = cfg->mask6; if (data == NULL) return (0); if ((pdel = strchr(data, ' ')) == NULL) return (0); while (*pdel == ' ') pdel++; if (strncmp(pdel, "masks=", 6) != 0) return (EINVAL); if ((s = strchr(pdel, ' ')) != NULL) *s++ = '\0'; pdel += 6; /* Need /XX[,/YY] */ if (*pdel++ != '/') return (EINVAL); mask4 = strtol(pdel, &pend, 10); if (*pend == ',') { /* ,/YY */ pdel = pend + 1; if (*pdel++ != '/') return (EINVAL); mask6 = strtol(pdel, &pend, 10); if (*pend != '\0') return (EINVAL); } else if (*pend != '\0') return (EINVAL); if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128) return (EINVAL); cfg->mask4 = mask4; cfg->mask6 = mask6; return (0); } static void ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize) { struct chash_cfg *cfg; cfg = (struct chash_cfg *)ta_state; if (cfg->mask4 != 32 || cfg->mask6 != 128) snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash", cfg->mask4, cfg->mask6); else snprintf(buf, bufsize, "%s", "addr:hash"); } static int ta_log2(uint32_t v) { uint32_t r; r = 0; while (v >>= 1) r++; return (r); } /* * New table. * We assume 'data' to be either NULL or the following format: * 'addr:hash [masks=/32[,/128]]' */ static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int error, i; uint32_t hsize; struct chash_cfg *cfg; cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->mask4 = 32; cfg->mask6 = 128; if ((error = chash_parse_opts(cfg, data)) != 0) { free(cfg, M_IPFW); return (error); } cfg->size4 = 128; cfg->size6 = 128; cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW, M_WAITOK | M_ZERO); cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < cfg->size4; i++) SLIST_INIT(&cfg->head4[i]); for (i = 0; i < cfg->size6; i++) SLIST_INIT(&cfg->head6[i]); *ta_state = cfg; ti->state = cfg->head4; ti->xstate = cfg->head6; /* Store data depending on v6 mask length */ hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6); if (cfg->mask6 == 64) { ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16| hsize; ti->lookup = ta_lookup_chash_64; } else if ((cfg->mask6 % 8) == 0) { ti->data = (32 - cfg->mask4) << 24 | cfg->mask6 << 13 | hsize; ti->lookup = ta_lookup_chash_aligned; } else { /* don't do that! */ ti->data = (32 - cfg->mask4) << 24 | cfg->mask6 << 16 | hsize; ti->lookup = ta_lookup_chash_slow; } return (0); } static void ta_destroy_chash(void *ta_state, struct table_info *ti) { struct chash_cfg *cfg; struct chashentry *ent, *ent_next; int i; cfg = (struct chash_cfg *)ta_state; for (i = 0; i < cfg->size4; i++) SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) free(ent, M_IPFW_TBL); for (i = 0; i < cfg->size6; i++) SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) free(ent, M_IPFW_TBL); free(cfg->head4, M_IPFW); free(cfg->head6, M_IPFW); free(cfg, M_IPFW); } static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct chash_cfg *cfg; cfg = (struct chash_cfg *)ta_state; tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; tinfo->taclass4 = IPFW_TACLASS_HASH; tinfo->size4 = cfg->size4; tinfo->count4 = cfg->items4; tinfo->itemsize4 = sizeof(struct chashentry); tinfo->taclass6 = IPFW_TACLASS_HASH; tinfo->size6 = cfg->size6; tinfo->count6 = cfg->items6; tinfo->itemsize6 = sizeof(struct chashentry); } static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct chash_cfg *cfg; struct chashentry *ent; cfg = (struct chash_cfg *)ta_state; ent = (struct chashentry *)e; if (ent->type == AF_INET) { tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4)); tent->masklen = cfg->mask4; tent->subtype = AF_INET; tent->v.kidx = ent->value; #ifdef INET6 } else { memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr)); tent->masklen = cfg->mask6; tent->subtype = AF_INET6; tent->v.kidx = ent->value; #endif } return (0); } static uint32_t hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size) { uint32_t hash; hash = 0; if (af == AF_INET) { #ifdef INET hash = hash_ip(ent->a.a4, size); #endif } else { #ifdef INET6 if (mlen == 64) hash = hash_ip64(&ent->a.a6, size); else hash = hash_ip6(&ent->a.a6, size); #endif } return (hash); } static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent) { int mlen; #ifdef INET6 struct in6_addr mask6; #endif mlen = tei->masklen; if (tei->subtype == AF_INET) { #ifdef INET if (mlen > 32) return (EINVAL); ent->type = AF_INET; /* Calculate masked address */ ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen); #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { /* IPv6 case */ if (mlen > 128) return (EINVAL); ent->type = AF_INET6; ipv6_writemask(&mask6, mlen); memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr)); APPLY_MASK(&ent->a.a6, &mask6); #endif } else { /* Unknown CIDR type */ return (EINVAL); } return (0); } static int ta_find_chash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct chash_cfg *cfg; struct chashbhead *head; struct chashentry ent, *tmp; struct tentry_info tei; int error; uint32_t hash; cfg = (struct chash_cfg *)ta_state; memset(&ent, 0, sizeof(ent)); memset(&tei, 0, sizeof(tei)); if (tent->subtype == AF_INET) { tei.paddr = &tent->k.addr; tei.masklen = cfg->mask4; tei.subtype = AF_INET; if ((error = tei_to_chash_ent(&tei, &ent)) != 0) return (error); head = cfg->head4; hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (tmp->a.a4 != ent.a.a4) continue; ta_dump_chash_tentry(ta_state, ti, tmp, tent); return (0); } } else { tei.paddr = &tent->k.addr6; tei.masklen = cfg->mask6; tei.subtype = AF_INET6; if ((error = tei_to_chash_ent(&tei, &ent)) != 0) return (error); head = cfg->head6; hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0) continue; ta_dump_chash_tentry(ta_state, ti, tmp, tent); return (0); } } return (ENOENT); } static void ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct chash_cfg *cfg; struct chashentry *ent, *ent_next; int i; cfg = (struct chash_cfg *)ta_state; for (i = 0; i < cfg->size4; i++) SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) f(ent, arg); for (i = 0; i < cfg->size6; i++) SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) f(ent, arg); } static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; struct chashentry *ent; int error; tb = (struct ta_buf_chash *)ta_buf; ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); error = tei_to_chash_ent(tei, ent); if (error != 0) { free(ent, M_IPFW_TBL); return (error); } tb->ent_ptr = ent; return (0); } static int ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct chash_cfg *cfg; struct chashbhead *head; struct chashentry *ent, *tmp; struct ta_buf_chash *tb; int exists; uint32_t hash, value; cfg = (struct chash_cfg *)ta_state; tb = (struct ta_buf_chash *)ta_buf; ent = (struct chashentry *)tb->ent_ptr; hash = 0; exists = 0; /* Read current value from @tei */ ent->value = tei->value; /* Read cuurrent value */ if (tei->subtype == AF_INET) { if (tei->masklen != cfg->mask4) return (EINVAL); head = cfg->head4; hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (tmp->a.a4 == ent->a.a4) { exists = 1; break; } } } else { if (tei->masklen != cfg->mask6) return (EINVAL); head = cfg->head6; hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) { exists = 1; break; } } } if (exists == 1) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ value = tmp->value; tmp->value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; } else { if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); SLIST_INSERT_HEAD(&head[hash], ent, next); tb->ent_ptr = NULL; *pnum = 1; /* Update counters */ if (tei->subtype == AF_INET) cfg->items4++; else cfg->items6++; } return (0); } static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; tb = (struct ta_buf_chash *)ta_buf; return (tei_to_chash_ent(tei, &tb->ent)); } static int ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct chash_cfg *cfg; struct chashbhead *head; struct chashentry *tmp, *tmp_next, *ent; struct ta_buf_chash *tb; uint32_t hash; cfg = (struct chash_cfg *)ta_state; tb = (struct ta_buf_chash *)ta_buf; ent = &tb->ent; if (tei->subtype == AF_INET) { if (tei->masklen != cfg->mask4) return (EINVAL); head = cfg->head4; hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { if (tmp->a.a4 != ent->a.a4) continue; SLIST_REMOVE(&head[hash], tmp, chashentry, next); cfg->items4--; tb->ent_ptr = tmp; tei->value = tmp->value; *pnum = 1; return (0); } } else { if (tei->masklen != cfg->mask6) return (EINVAL); head = cfg->head6; hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0) continue; SLIST_REMOVE(&head[hash], tmp, chashentry, next); cfg->items6--; tb->ent_ptr = tmp; tei->value = tmp->value; *pnum = 1; return (0); } } return (ENOENT); } static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_chash *tb; tb = (struct ta_buf_chash *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } /* * Hash growing callbacks. */ static int ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct chash_cfg *cfg; uint64_t data; /* * Since we don't know exact number of IPv4/IPv6 records in @count, * ignore non-zero @count value at all. Check current hash sizes * and return appropriate data. */ cfg = (struct chash_cfg *)ta_state; data = 0; if (cfg->items4 > cfg->size4 && cfg->size4 < 65536) data |= (cfg->size4 * 2) << 16; if (cfg->items6 > cfg->size6 && cfg->size6 < 65536) data |= cfg->size6 * 2; if (data != 0) { *pflags = data; return (1); } return (0); } /* * Allocate new, larger chash. */ static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct chashbhead *head; int i; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = (*pflags >> 16) & 0xFFFF; mi->size6 = *pflags & 0xFFFF; if (mi->size > 0) { head = malloc(sizeof(struct chashbhead) * mi->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size; i++) SLIST_INIT(&head[i]); mi->main_ptr = head; } if (mi->size6 > 0) { head = malloc(sizeof(struct chashbhead) * mi->size6, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size6; i++) SLIST_INIT(&head[i]); mi->main_ptr6 = head; } return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { /* In is not possible to do rehash if we're not holidng WLOCK. */ return (0); } /* * Switch old & new arrays. */ static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct chash_cfg *cfg; struct chashbhead *old_head, *new_head; struct chashentry *ent, *ent_next; int af, i, mlen; uint32_t nhash; size_t old_size, new_size; mi = (struct mod_item *)ta_buf; cfg = (struct chash_cfg *)ta_state; /* Check which hash we need to grow and do we still need that */ if (mi->size > 0 && cfg->size4 < mi->size) { new_head = (struct chashbhead *)mi->main_ptr; new_size = mi->size; old_size = cfg->size4; old_head = ti->state; mlen = cfg->mask4; af = AF_INET; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_ent(ent, af, mlen, new_size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->state = new_head; cfg->head4 = new_head; cfg->size4 = mi->size; mi->main_ptr = old_head; } if (mi->size6 > 0 && cfg->size6 < mi->size6) { new_head = (struct chashbhead *)mi->main_ptr6; new_size = mi->size6; old_size = cfg->size6; old_head = ti->xstate; mlen = cfg->mask6; af = AF_INET6; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_ent(ent, af, mlen, new_size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->xstate = new_head; cfg->head6 = new_head; cfg->size6 = mi->size6; mi->main_ptr6 = old_head; } /* Update lower 32 bits with new values */ ti->data &= 0xFFFFFFFF00000000; ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6); } /* * Free unneded array. */ static void ta_flush_mod_chash(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); if (mi->main_ptr6 != NULL) free(mi->main_ptr6, M_IPFW); } struct table_algo addr_hash = { .name = "addr:hash", .type = IPFW_TABLE_ADDR, .ta_buf_size = sizeof(struct ta_buf_chash), .init = ta_init_chash, .destroy = ta_destroy_chash, .prepare_add = ta_prepare_add_chash, .prepare_del = ta_prepare_del_chash, .add = ta_add_chash, .del = ta_del_chash, .flush_entry = ta_flush_chash_entry, .foreach = ta_foreach_chash, .dump_tentry = ta_dump_chash_tentry, .find_tentry = ta_find_chash_tentry, .print_config = ta_print_chash_config, .dump_tinfo = ta_dump_chash_tinfo, .need_modify = ta_need_modify_chash, .prepare_mod = ta_prepare_mod_chash, .fill_mod = ta_fill_mod_chash, .modify = ta_modify_chash, .flush_mod = ta_flush_mod_chash, }; /* * Iface table cmds. * * Implementation: * * Runtime part: * - sorted array of "struct ifidx" pointed by ti->state. * Array is allocated with rounding up to IFIDX_CHUNK. Only existing * interfaces are stored in array, however its allocated size is * sufficient to hold all table records if needed. * - current array size is stored in ti->data * * Table data: * - "struct iftable_cfg" is allocated to store table state (ta_state). * - All table records are stored inside namedobj instance. * */ struct ifidx { uint16_t kidx; uint16_t spare; uint32_t value; }; #define DEFAULT_IFIDX_SIZE 64 struct iftable_cfg; struct ifentry { struct named_object no; struct ipfw_ifc ic; struct iftable_cfg *icfg; uint32_t value; int linked; }; struct iftable_cfg { struct namedobj_instance *ii; struct ip_fw_chain *ch; struct table_info *ti; void *main_ptr; size_t size; /* Number of items allocated in array */ size_t count; /* Number of all items */ size_t used; /* Number of items _active_ now */ }; struct ta_buf_ifidx { struct ifentry *ife; uint32_t value; }; int compare_ifidx(const void *k, const void *v); static struct ifidx * ifidx_find(struct table_info *ti, void *key); static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti); -static void destroy_ifidx_locked(struct namedobj_instance *ii, +static int destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, void *arg); static void ta_destroy_ifidx(void *ta_state, struct table_info *ti); static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_ifidx(void *ta_buf); static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); -static void foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, +static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, void *arg); static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); int compare_ifidx(const void *k, const void *v) { const struct ifidx *ifidx; uint16_t key; key = *((const uint16_t *)k); ifidx = (const struct ifidx *)v; if (key < ifidx->kidx) return (-1); else if (key > ifidx->kidx) return (1); return (0); } /* * Adds item @item with key @key into ascending-sorted array @base. * Assumes @base has enough additional storage. * * Returns 1 on success, 0 on duplicate key. */ static int badd(const void *key, void *item, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)) { int min, max, mid, shift, res; caddr_t paddr; if (nmemb == 0) { memcpy(base, item, size); return (1); } /* Binary search */ min = 0; max = nmemb - 1; mid = 0; while (min <= max) { mid = (min + max) / 2; res = compar(key, (const void *)((caddr_t)base + mid * size)); if (res == 0) return (0); if (res > 0) min = mid + 1; else max = mid - 1; } /* Item not found. */ res = compar(key, (const void *)((caddr_t)base + mid * size)); if (res > 0) shift = mid + 1; else shift = mid; paddr = (caddr_t)base + shift * size; if (nmemb > shift) memmove(paddr + size, paddr, (nmemb - shift) * size); memcpy(paddr, item, size); return (1); } /* * Deletes item with key @key from ascending-sorted array @base. * * Returns 1 on success, 0 for non-existent key. */ static int bdel(const void *key, void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)) { caddr_t item; size_t sz; item = (caddr_t)bsearch(key, base, nmemb, size, compar); if (item == NULL) return (0); sz = (caddr_t)base + nmemb * size - item; if (sz > 0) memmove(item, item + size, sz); return (1); } static struct ifidx * ifidx_find(struct table_info *ti, void *key) { struct ifidx *ifi; ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx), compare_ifidx); return (ifi); } static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct ifidx *ifi; ifi = ifidx_find(ti, key); if (ifi != NULL) { *val = ifi->value; return (1); } return (0); } static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct iftable_cfg *icfg; icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO); icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE); icfg->size = DEFAULT_IFIDX_SIZE; icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW, M_WAITOK | M_ZERO); icfg->ch = ch; *ta_state = icfg; ti->state = icfg->main_ptr; ti->lookup = ta_lookup_ifidx; return (0); } /* * Handle tableinfo @ti pointer change (on table array resize). */ static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti) { struct iftable_cfg *icfg; icfg = (struct iftable_cfg *)ta_state; icfg->ti = ti; } -static void +static int destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, void *arg) { struct ifentry *ife; struct ip_fw_chain *ch; ch = (struct ip_fw_chain *)arg; ife = (struct ifentry *)no; ipfw_iface_del_notify(ch, &ife->ic); ipfw_iface_unref(ch, &ife->ic); free(ife, M_IPFW_TBL); + return (0); } /* * Destroys table @ti */ static void ta_destroy_ifidx(void *ta_state, struct table_info *ti) { struct iftable_cfg *icfg; struct ip_fw_chain *ch; icfg = (struct iftable_cfg *)ta_state; ch = icfg->ch; if (icfg->main_ptr != NULL) free(icfg->main_ptr, M_IPFW); IPFW_UH_WLOCK(ch); ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch); IPFW_UH_WUNLOCK(ch); ipfw_objhash_destroy(icfg->ii); free(icfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct iftable_cfg *cfg; cfg = (struct iftable_cfg *)ta_state; tinfo->taclass4 = IPFW_TACLASS_ARRAY; tinfo->size4 = cfg->size; tinfo->count4 = cfg->used; tinfo->itemsize4 = sizeof(struct ifidx); } /* * Prepare state to add to the table: * allocate ifentry and reference needed interface. */ static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; char *ifname; struct ifentry *ife; tb = (struct ta_buf_ifidx *)ta_buf; /* Check if string is terminated */ ifname = (char *)tei->paddr; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO); ife->ic.cb = if_notifier; ife->ic.cbdata = ife; if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) { free(ife, M_IPFW_TBL); return (EINVAL); } /* Use ipfw_iface 'ifname' field as stable storage */ ife->no.name = ife->ic.iface->ifname; tb->ife = ife; return (0); } static int ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife, *tmp; struct ta_buf_ifidx *tb; struct ipfw_iface *iif; struct ifidx *ifi; char *ifname; uint32_t value; tb = (struct ta_buf_ifidx *)ta_buf; ifname = (char *)tei->paddr; icfg = (struct iftable_cfg *)ta_state; ife = tb->ife; ife->icfg = icfg; ife->value = tei->value; tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (tmp != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Exchange values in @tmp and @tei */ value = tmp->value; tmp->value = tei->value; tei->value = value; iif = tmp->ic.iface; if (iif->resolved != 0) { /* We have to update runtime value, too */ ifi = ifidx_find(ti, &iif->ifindex); ifi->value = ife->value; } /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); /* Link to internal list */ ipfw_objhash_add(icfg->ii, &ife->no); /* Link notifier (possible running its callback) */ ipfw_iface_add_notify(icfg->ch, &ife->ic); icfg->count++; tb->ife = NULL; *pnum = 1; return (0); } /* * Prepare to delete key from table. * Do basic interface name checks. */ static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; char *ifname; tb = (struct ta_buf_ifidx *)ta_buf; /* Check if string is terminated */ ifname = (char *)tei->paddr; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); return (0); } /* * Remove key from both configuration list and * runtime array. Removed interface notification. */ static int ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct iftable_cfg *icfg; struct ifentry *ife; struct ta_buf_ifidx *tb; char *ifname; uint16_t ifindex; int res; tb = (struct ta_buf_ifidx *)ta_buf; ifname = (char *)tei->paddr; icfg = (struct iftable_cfg *)ta_state; ife = tb->ife; ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (ife == NULL) return (ENOENT); if (ife->linked != 0) { /* We have to remove item from runtime */ ifindex = ife->ic.iface->ifindex; res = bdel(&ifindex, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d does not exist", ifindex)); icfg->used--; ti->data = icfg->used; ife->linked = 0; } /* Unlink from local list */ ipfw_objhash_del(icfg->ii, &ife->no); /* Unlink notifier and deref */ ipfw_iface_del_notify(icfg->ch, &ife->ic); ipfw_iface_unref(icfg->ch, &ife->ic); icfg->count--; tei->value = ife->value; tb->ife = ife; *pnum = 1; return (0); } /* * Flush deleted entry. * Drops interface reference and frees entry. */ static void ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_ifidx *tb; tb = (struct ta_buf_ifidx *)ta_buf; if (tb->ife != NULL) free(tb->ife, M_IPFW_TBL); } /* * Handle interface announce/withdrawal for particular table. * Every real runtime array modification happens here. */ static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex) { struct ifentry *ife; struct ifidx ifi; struct iftable_cfg *icfg; struct table_info *ti; int res; ife = (struct ifentry *)cbdata; icfg = ife->icfg; ti = icfg->ti; KASSERT(ti != NULL, ("ti=NULL, check change_ti handler")); if (ife->linked == 0 && ifindex != 0) { /* Interface announce */ ifi.kidx = ifindex; ifi.spare = 0; ifi.value = ife->value; res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d already exists", ifindex)); icfg->used++; ti->data = icfg->used; ife->linked = 1; } else if (ife->linked != 0 && ifindex == 0) { /* Interface withdrawal */ ifindex = ife->ic.iface->ifindex; res = bdel(&ifindex, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d does not exist", ifindex)); icfg->used--; ti->data = icfg->used; ife->linked = 0; } } /* * Table growing callbacks. */ static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct iftable_cfg *cfg; uint32_t size; cfg = (struct iftable_cfg *)ta_state; size = cfg->size; while (size < cfg->count + count) size *= 2; if (size != cfg->size) { *pflags = size; return (1); } return (0); } /* * Allocate ned, larger runtime ifidx array. */ static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW, M_WAITOK | M_ZERO); return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct iftable_cfg *icfg; mi = (struct mod_item *)ta_buf; icfg = (struct iftable_cfg *)ta_state; /* Check if we still need to grow array */ if (icfg->size >= mi->size) { *pflags = 0; return (0); } memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx)); return (0); } /* * Switch old & new arrays. */ static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct iftable_cfg *icfg; void *old_ptr; mi = (struct mod_item *)ta_buf; icfg = (struct iftable_cfg *)ta_state; old_ptr = icfg->main_ptr; icfg->main_ptr = mi->main_ptr; icfg->size = mi->size; ti->state = icfg->main_ptr; mi->main_ptr = old_ptr; } /* * Free unneded array. */ static void ta_flush_mod_ifidx(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct ifentry *ife; ife = (struct ifentry *)e; tent->masklen = 8 * IF_NAMESIZE; memcpy(&tent->k, ife->no.name, IF_NAMESIZE); tent->v.kidx = ife->value; return (0); } static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct iftable_cfg *icfg; struct ifentry *ife; char *ifname; icfg = (struct iftable_cfg *)ta_state; ifname = tent->k.iface; if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) return (EINVAL); ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); if (ife != NULL) { ta_dump_ifidx_tentry(ta_state, ti, ife, tent); return (0); } return (ENOENT); } struct wa_ifidx { ta_foreach_f *f; void *arg; }; -static void +static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, void *arg) { struct ifentry *ife; struct wa_ifidx *wa; ife = (struct ifentry *)no; wa = (struct wa_ifidx *)arg; wa->f(ife, wa->arg); + return (0); } static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct iftable_cfg *icfg; struct wa_ifidx wa; icfg = (struct iftable_cfg *)ta_state; wa.f = f; wa.arg = arg; ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa); } struct table_algo iface_idx = { .name = "iface:array", .type = IPFW_TABLE_INTERFACE, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_ifidx), .init = ta_init_ifidx, .destroy = ta_destroy_ifidx, .prepare_add = ta_prepare_add_ifidx, .prepare_del = ta_prepare_del_ifidx, .add = ta_add_ifidx, .del = ta_del_ifidx, .flush_entry = ta_flush_ifidx_entry, .foreach = ta_foreach_ifidx, .dump_tentry = ta_dump_ifidx_tentry, .find_tentry = ta_find_ifidx_tentry, .dump_tinfo = ta_dump_ifidx_tinfo, .need_modify = ta_need_modify_ifidx, .prepare_mod = ta_prepare_mod_ifidx, .fill_mod = ta_fill_mod_ifidx, .modify = ta_modify_ifidx, .flush_mod = ta_flush_mod_ifidx, .change_ti = ta_change_ti_ifidx, }; /* * Number array cmds. * * Implementation: * * Runtime part: * - sorted array of "struct numarray" pointed by ti->state. * Array is allocated with rounding up to NUMARRAY_CHUNK. * - current array size is stored in ti->data * */ struct numarray { uint32_t number; uint32_t value; }; struct numarray_cfg { void *main_ptr; size_t size; /* Number of items allocated in array */ size_t used; /* Number of items _active_ now */ }; struct ta_buf_numarray { struct numarray na; }; int compare_numarray(const void *k, const void *v); static struct numarray *numarray_find(struct table_info *ti, void *key); static int ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_numarray(void *ta_state, struct table_info *ti); static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_numarray(void *ta_buf); static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); int compare_numarray(const void *k, const void *v) { const struct numarray *na; uint32_t key; key = *((const uint32_t *)k); na = (const struct numarray *)v; if (key < na->number) return (-1); else if (key > na->number) return (1); return (0); } static struct numarray * numarray_find(struct table_info *ti, void *key) { struct numarray *ri; ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray), compare_ifidx); return (ri); } static int ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct numarray *ri; ri = numarray_find(ti, key); if (ri != NULL) { *val = ri->value; return (1); } return (0); } static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { struct numarray_cfg *cfg; cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->size = 16; cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW, M_WAITOK | M_ZERO); *ta_state = cfg; ti->state = cfg->main_ptr; ti->lookup = ta_lookup_numarray; return (0); } /* * Destroys table @ti */ static void ta_destroy_numarray(void *ta_state, struct table_info *ti) { struct numarray_cfg *cfg; cfg = (struct numarray_cfg *)ta_state; if (cfg->main_ptr != NULL) free(cfg->main_ptr, M_IPFW); free(cfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct numarray_cfg *cfg; cfg = (struct numarray_cfg *)ta_state; tinfo->taclass4 = IPFW_TACLASS_ARRAY; tinfo->size4 = cfg->size; tinfo->count4 = cfg->used; tinfo->itemsize4 = sizeof(struct numarray); } /* * Prepare for addition/deletion to an array. */ static int ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_numarray *tb; tb = (struct ta_buf_numarray *)ta_buf; tb->na.number = *((uint32_t *)tei->paddr); return (0); } static int ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; struct numarray *ri; int res; uint32_t value; tb = (struct ta_buf_numarray *)ta_buf; cfg = (struct numarray_cfg *)ta_state; /* Read current value from @tei */ tb->na.value = tei->value; ri = numarray_find(ti, &tb->na.number); if (ri != NULL) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Exchange values between ri and @tei */ value = ri->value; ri->value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; return (0); } if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used, sizeof(struct numarray), compare_numarray); KASSERT(res == 1, ("number %d already exists", tb->na.number)); cfg->used++; ti->data = cfg->used; *pnum = 1; return (0); } /* * Remove key from both configuration list and * runtime array. Removed interface notification. */ static int ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct numarray_cfg *cfg; struct ta_buf_numarray *tb; struct numarray *ri; int res; tb = (struct ta_buf_numarray *)ta_buf; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tb->na.number); if (ri == NULL) return (ENOENT); tei->value = ri->value; res = bdel(&tb->na.number, cfg->main_ptr, cfg->used, sizeof(struct numarray), compare_numarray); KASSERT(res == 1, ("number %u does not exist", tb->na.number)); cfg->used--; ti->data = cfg->used; *pnum = 1; return (0); } static void ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { /* We don't have any state, do nothing */ } /* * Table growing callbacks. */ static int ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct numarray_cfg *cfg; size_t size; cfg = (struct numarray_cfg *)ta_state; size = cfg->size; while (size < cfg->used + count) size *= 2; if (size != cfg->size) { *pflags = size; return (1); } return (0); } /* * Allocate new, larger runtime array. */ static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW, M_WAITOK | M_ZERO); return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct numarray_cfg *cfg; mi = (struct mod_item *)ta_buf; cfg = (struct numarray_cfg *)ta_state; /* Check if we still need to grow array */ if (cfg->size >= mi->size) { *pflags = 0; return (0); } memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray)); return (0); } /* * Switch old & new arrays. */ static void ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct numarray_cfg *cfg; void *old_ptr; mi = (struct mod_item *)ta_buf; cfg = (struct numarray_cfg *)ta_state; old_ptr = cfg->main_ptr; cfg->main_ptr = mi->main_ptr; cfg->size = mi->size; ti->state = cfg->main_ptr; mi->main_ptr = old_ptr; } /* * Free unneded array. */ static void ta_flush_mod_numarray(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct numarray *na; na = (struct numarray *)e; tent->k.key = na->number; tent->v.kidx = na->value; return (0); } static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct numarray_cfg *cfg; struct numarray *ri; cfg = (struct numarray_cfg *)ta_state; ri = numarray_find(ti, &tent->k.key); if (ri != NULL) { ta_dump_numarray_tentry(ta_state, ti, ri, tent); return (0); } return (ENOENT); } static void ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct numarray_cfg *cfg; struct numarray *array; int i; cfg = (struct numarray_cfg *)ta_state; array = cfg->main_ptr; for (i = 0; i < cfg->used; i++) f(&array[i], arg); } struct table_algo number_array = { .name = "number:array", .type = IPFW_TABLE_NUMBER, .ta_buf_size = sizeof(struct ta_buf_numarray), .init = ta_init_numarray, .destroy = ta_destroy_numarray, .prepare_add = ta_prepare_add_numarray, .prepare_del = ta_prepare_add_numarray, .add = ta_add_numarray, .del = ta_del_numarray, .flush_entry = ta_flush_numarray_entry, .foreach = ta_foreach_numarray, .dump_tentry = ta_dump_numarray_tentry, .find_tentry = ta_find_numarray_tentry, .dump_tinfo = ta_dump_numarray_tinfo, .need_modify = ta_need_modify_numarray, .prepare_mod = ta_prepare_mod_numarray, .fill_mod = ta_fill_mod_numarray, .modify = ta_modify_numarray, .flush_mod = ta_flush_mod_numarray, }; /* * flow:hash cmds * * * ti->data: * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] * [ 8][ 8[ 8][ 8] * * inv.mask4: 32 - mask * inv.mask6: * 1) _slow lookup: mask * 2) _aligned: (128 - mask) / 8 * 3) _64: 8 * * * pflags: * [hsize4][hsize6] * [ 16][ 16] */ struct fhashentry; SLIST_HEAD(fhashbhead, fhashentry); struct fhashentry { SLIST_ENTRY(fhashentry) next; uint8_t af; uint8_t proto; uint16_t spare0; uint16_t dport; uint16_t sport; uint32_t value; uint32_t spare1; }; struct fhashentry4 { struct fhashentry e; struct in_addr dip; struct in_addr sip; }; struct fhashentry6 { struct fhashentry e; struct in6_addr dip6; struct in6_addr sip6; }; struct fhash_cfg { struct fhashbhead *head; size_t size; size_t items; struct fhashentry4 fe4; struct fhashentry6 fe6; }; struct ta_buf_fhash { void *ent_ptr; struct fhashentry6 fe6; }; static __inline int cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz); static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize); static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize); static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size); static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_fhash(void *ta_state, struct table_info *ti); static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent); static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static int ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum); static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf); static int ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags); static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags); static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_fhash(void *ta_buf); static __inline int cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) { uint64_t *ka, *kb; ka = (uint64_t *)(&a->next + 1); kb = (uint64_t *)(&b->next + 1); if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0)) return (1); return (0); } static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize) { uint32_t i; i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport); return (i % (hsize - 1)); } static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize) { uint32_t i; i = (f->dip6.__u6_addr.__u6_addr32[2]) ^ (f->dip6.__u6_addr.__u6_addr32[3]) ^ (f->sip6.__u6_addr.__u6_addr32[2]) ^ (f->sip6.__u6_addr.__u6_addr32[3]) ^ (f->e.dport) ^ (f->e.sport); return (i % (hsize - 1)); } static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size) { uint32_t hash; if (ent->af == AF_INET) { hash = hash_flow4((struct fhashentry4 *)ent, size); } else { hash = hash_flow6((struct fhashentry6 *)ent, size); } return (hash); } static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { struct fhashbhead *head; struct fhashentry *ent; struct fhashentry4 *m4; struct ipfw_flow_id *id; uint16_t hash, hsize; id = (struct ipfw_flow_id *)key; head = (struct fhashbhead *)ti->state; hsize = ti->data; m4 = (struct fhashentry4 *)ti->xstate; if (id->addr_type == 4) { struct fhashentry4 f; /* Copy hash mask */ f = *m4; f.dip.s_addr &= id->dst_ip; f.sip.s_addr &= id->src_ip; f.e.dport &= id->dst_port; f.e.sport &= id->src_port; f.e.proto &= id->proto; hash = hash_flow4(&f, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { *val = ent->value; return (1); } } } else if (id->addr_type == 6) { struct fhashentry6 f; uint64_t *fp, *idp; /* Copy hash mask */ f = *((struct fhashentry6 *)(m4 + 1)); /* Handle lack of __u6_addr.__u6_addr64 */ fp = (uint64_t *)&f.dip6; idp = (uint64_t *)&id->dst_ip6; /* src IPv6 is stored after dst IPv6 */ *fp++ &= *idp++; *fp++ &= *idp++; *fp++ &= *idp++; *fp &= *idp; f.e.dport &= id->dst_port; f.e.sport &= id->src_port; f.e.proto &= id->proto; hash = hash_flow6(&f, hsize); SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { *val = ent->value; return (1); } } } return (0); } /* * New table. */ static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int i; struct fhash_cfg *cfg; struct fhashentry4 *fe4; struct fhashentry6 *fe6; cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO); cfg->size = 512; cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < cfg->size; i++) SLIST_INIT(&cfg->head[i]); /* Fill in fe masks based on @tflags */ fe4 = &cfg->fe4; fe6 = &cfg->fe6; if (tflags & IPFW_TFFLAG_SRCIP) { memset(&fe4->sip, 0xFF, sizeof(fe4->sip)); memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6)); } if (tflags & IPFW_TFFLAG_DSTIP) { memset(&fe4->dip, 0xFF, sizeof(fe4->dip)); memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6)); } if (tflags & IPFW_TFFLAG_SRCPORT) { memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport)); memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport)); } if (tflags & IPFW_TFFLAG_DSTPORT) { memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport)); memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport)); } if (tflags & IPFW_TFFLAG_PROTO) { memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto)); memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto)); } fe4->e.af = AF_INET; fe6->e.af = AF_INET6; *ta_state = cfg; ti->state = cfg->head; ti->xstate = &cfg->fe4; ti->data = cfg->size; ti->lookup = ta_lookup_fhash; return (0); } static void ta_destroy_fhash(void *ta_state, struct table_info *ti) { struct fhash_cfg *cfg; struct fhashentry *ent, *ent_next; int i; cfg = (struct fhash_cfg *)ta_state; for (i = 0; i < cfg->size; i++) SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) free(ent, M_IPFW_TBL); free(cfg->head, M_IPFW); free(cfg, M_IPFW); } /* * Provide algo-specific table info */ static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { struct fhash_cfg *cfg; cfg = (struct fhash_cfg *)ta_state; tinfo->flags = IPFW_TATFLAGS_AFITEM; tinfo->taclass4 = IPFW_TACLASS_HASH; tinfo->size4 = cfg->size; tinfo->count4 = cfg->items; tinfo->itemsize4 = sizeof(struct fhashentry4); tinfo->itemsize6 = sizeof(struct fhashentry6); } static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct fhash_cfg *cfg; struct fhashentry *ent; struct fhashentry4 *fe4; #ifdef INET6 struct fhashentry6 *fe6; #endif struct tflow_entry *tfe; cfg = (struct fhash_cfg *)ta_state; ent = (struct fhashentry *)e; tfe = &tent->k.flow; tfe->af = ent->af; tfe->proto = ent->proto; tfe->dport = htons(ent->dport); tfe->sport = htons(ent->sport); tent->v.kidx = ent->value; tent->subtype = ent->af; if (ent->af == AF_INET) { fe4 = (struct fhashentry4 *)ent; tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr); tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr); tent->masklen = 32; #ifdef INET6 } else { fe6 = (struct fhashentry6 *)ent; tfe->a.a6.sip6 = fe6->sip6; tfe->a.a6.dip6 = fe6->dip6; tent->masklen = 128; #endif } return (0); } static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent) { #ifdef INET struct fhashentry4 *fe4; #endif #ifdef INET6 struct fhashentry6 *fe6; #endif struct tflow_entry *tfe; tfe = (struct tflow_entry *)tei->paddr; ent->af = tei->subtype; ent->proto = tfe->proto; ent->dport = ntohs(tfe->dport); ent->sport = ntohs(tfe->sport); if (tei->subtype == AF_INET) { #ifdef INET fe4 = (struct fhashentry4 *)ent; fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr); fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr); #endif #ifdef INET6 } else if (tei->subtype == AF_INET6) { fe6 = (struct fhashentry6 *)ent; fe6->sip6 = tfe->a.a6.sip6; fe6->dip6 = tfe->a.a6.dip6; #endif } else { /* Unknown CIDR type */ return (EINVAL); } return (0); } static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct fhashentry6 fe6; struct tentry_info tei; int error; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; ent = &fe6.e; memset(&fe6, 0, sizeof(fe6)); memset(&tei, 0, sizeof(tei)); tei.paddr = &tent->k.flow; tei.subtype = tent->subtype; if ((error = tei_to_fhash_ent(&tei, ent)) != 0) return (error); head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei.subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { ta_dump_fhash_tentry(ta_state, ti, tmp, tent); return (0); } } return (ENOENT); } static void ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct fhash_cfg *cfg; struct fhashentry *ent, *ent_next; int i; cfg = (struct fhash_cfg *)ta_state; for (i = 0; i < cfg->size; i++) SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) f(ent, arg); } static int ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; struct fhashentry *ent; size_t sz; int error; tb = (struct ta_buf_fhash *)ta_buf; if (tei->subtype == AF_INET) sz = sizeof(struct fhashentry4); else if (tei->subtype == AF_INET6) sz = sizeof(struct fhashentry6); else return (EINVAL); ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO); error = tei_to_fhash_ent(tei, ent); if (error != 0) { free(ent, M_IPFW_TBL); return (error); } tb->ent_ptr = ent; return (0); } static int ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct ta_buf_fhash *tb; int exists; uint32_t hash, value; size_t sz; cfg = (struct fhash_cfg *)ta_state; tb = (struct ta_buf_fhash *)ta_buf; ent = (struct fhashentry *)tb->ent_ptr; exists = 0; /* Read current value from @tei */ ent->value = tei->value; head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei->subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) != 0) { exists = 1; break; } } if (exists == 1) { if ((tei->flags & TEI_FLAGS_UPDATE) == 0) return (EEXIST); /* Record already exists. Update value if we're asked to */ /* Exchange values between tmp and @tei */ value = tmp->value; tmp->value = tei->value; tei->value = value; /* Indicate that update has happened instead of addition */ tei->flags |= TEI_FLAGS_UPDATED; *pnum = 0; } else { if ((tei->flags & TEI_FLAGS_DONTADD) != 0) return (EFBIG); SLIST_INSERT_HEAD(&head[hash], ent, next); tb->ent_ptr = NULL; *pnum = 1; /* Update counters and check if we need to grow hash */ cfg->items++; } return (0); } static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; tb = (struct ta_buf_fhash *)ta_buf; return (tei_to_fhash_ent(tei, &tb->fe6.e)); } static int ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, void *ta_buf, uint32_t *pnum) { struct fhash_cfg *cfg; struct fhashbhead *head; struct fhashentry *ent, *tmp; struct ta_buf_fhash *tb; uint32_t hash; size_t sz; cfg = (struct fhash_cfg *)ta_state; tb = (struct ta_buf_fhash *)ta_buf; ent = &tb->fe6.e; head = cfg->head; hash = hash_flow_ent(ent, cfg->size); if (tei->subtype == AF_INET) sz = 2 * sizeof(struct in_addr); else sz = 2 * sizeof(struct in6_addr); /* Check for existence */ SLIST_FOREACH(tmp, &head[hash], next) { if (cmp_flow_ent(tmp, ent, sz) == 0) continue; SLIST_REMOVE(&head[hash], tmp, fhashentry, next); tei->value = tmp->value; *pnum = 1; cfg->items--; tb->ent_ptr = tmp; return (0); } return (ENOENT); } static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, void *ta_buf) { struct ta_buf_fhash *tb; tb = (struct ta_buf_fhash *)ta_buf; if (tb->ent_ptr != NULL) free(tb->ent_ptr, M_IPFW_TBL); } /* * Hash growing callbacks. */ static int ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags) { struct fhash_cfg *cfg; cfg = (struct fhash_cfg *)ta_state; if (cfg->items > cfg->size && cfg->size < 65536) { *pflags = cfg->size * 2; return (1); } return (0); } /* * Allocate new, larger fhash. */ static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags) { struct mod_item *mi; struct fhashbhead *head; int i; mi = (struct mod_item *)ta_buf; memset(mi, 0, sizeof(struct mod_item)); mi->size = *pflags; head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW, M_WAITOK | M_ZERO); for (i = 0; i < mi->size; i++) SLIST_INIT(&head[i]); mi->main_ptr = head; return (0); } /* * Copy data from old runtime array to new one. */ static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t *pflags) { /* In is not possible to do rehash if we're not holidng WLOCK. */ return (0); } /* * Switch old & new arrays. */ static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags) { struct mod_item *mi; struct fhash_cfg *cfg; struct fhashbhead *old_head, *new_head; struct fhashentry *ent, *ent_next; int i; uint32_t nhash; size_t old_size; mi = (struct mod_item *)ta_buf; cfg = (struct fhash_cfg *)ta_state; old_size = cfg->size; old_head = ti->state; new_head = (struct fhashbhead *)mi->main_ptr; for (i = 0; i < old_size; i++) { SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { nhash = hash_flow_ent(ent, mi->size); SLIST_INSERT_HEAD(&new_head[nhash], ent, next); } } ti->state = new_head; ti->data = mi->size; cfg->head = new_head; cfg->size = mi->size; mi->main_ptr = old_head; } /* * Free unneded array. */ static void ta_flush_mod_fhash(void *ta_buf) { struct mod_item *mi; mi = (struct mod_item *)ta_buf; if (mi->main_ptr != NULL) free(mi->main_ptr, M_IPFW); } struct table_algo flow_hash = { .name = "flow:hash", .type = IPFW_TABLE_FLOW, .flags = TA_FLAG_DEFAULT, .ta_buf_size = sizeof(struct ta_buf_fhash), .init = ta_init_fhash, .destroy = ta_destroy_fhash, .prepare_add = ta_prepare_add_fhash, .prepare_del = ta_prepare_del_fhash, .add = ta_add_fhash, .del = ta_del_fhash, .flush_entry = ta_flush_fhash_entry, .foreach = ta_foreach_fhash, .dump_tentry = ta_dump_fhash_tentry, .find_tentry = ta_find_fhash_tentry, .dump_tinfo = ta_dump_fhash_tinfo, .need_modify = ta_need_modify_fhash, .prepare_mod = ta_prepare_mod_fhash, .fill_mod = ta_fill_mod_fhash, .modify = ta_modify_fhash, .flush_mod = ta_flush_mod_fhash, }; /* * Kernel fibs bindings. * * Implementation: * * Runtime part: * - fully relies on route API * - fib number is stored in ti->data * */ static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); static int kfib_parse_opts(int *pfib, char *data); static void ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_kfib(void *ta_state, struct table_info *ti); static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); static int contigmask(uint8_t *p, int len); static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent); static int ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask, ipfw_obj_tentry *tent); static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val) { #ifdef INET struct nhop4_basic nh4; struct in_addr in; #endif #ifdef INET6 struct nhop6_basic nh6; #endif int error; error = ENOENT; #ifdef INET if (keylen == 4) { in.s_addr = *(in_addr_t *)key; error = fib4_lookup_nh_basic(ti->data, in, 0, 0, &nh4); } #endif #ifdef INET6 if (keylen == 6) error = fib6_lookup_nh_basic(ti->data, (struct in6_addr *)key, 0, 0, 0, &nh6); #endif if (error != 0) return (0); *val = 0; return (1); } /* Parse 'fib=%d' */ static int kfib_parse_opts(int *pfib, char *data) { char *pdel, *pend, *s; int fibnum; if (data == NULL) return (0); if ((pdel = strchr(data, ' ')) == NULL) return (0); while (*pdel == ' ') pdel++; if (strncmp(pdel, "fib=", 4) != 0) return (EINVAL); if ((s = strchr(pdel, ' ')) != NULL) *s++ = '\0'; pdel += 4; /* Need \d+ */ fibnum = strtol(pdel, &pend, 10); if (*pend != '\0') return (EINVAL); *pfib = fibnum; return (0); } static void ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize) { if (ti->data != 0) snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data); else snprintf(buf, bufsize, "%s", "addr:kfib"); } static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags) { int error, fibnum; fibnum = 0; if ((error = kfib_parse_opts(&fibnum, data)) != 0) return (error); if (fibnum >= rt_numfibs) return (E2BIG); ti->data = fibnum; ti->lookup = ta_lookup_kfib; return (0); } /* * Destroys table @ti */ static void ta_destroy_kfib(void *ta_state, struct table_info *ti) { } /* * Provide algo-specific table info */ static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) { tinfo->flags = IPFW_TATFLAGS_AFDATA; tinfo->taclass4 = IPFW_TACLASS_RADIX; tinfo->count4 = 0; tinfo->itemsize4 = sizeof(struct rtentry); tinfo->taclass6 = IPFW_TACLASS_RADIX; tinfo->count6 = 0; tinfo->itemsize6 = sizeof(struct rtentry); } static int contigmask(uint8_t *p, int len) { int i, n; for (i = 0; i < len ; i++) if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */ break; for (n= i + 1; n < len; n++) if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0) return (-1); /* mask not contiguous */ return (i); } static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct rtentry *rte; rte = (struct rtentry *)e; return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent); } static int ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask, ipfw_obj_tentry *tent) { #ifdef INET struct sockaddr_in *addr, *mask; #endif #ifdef INET6 struct sockaddr_in6 *addr6, *mask6; #endif int len; len = 0; /* Guess IPv4/IPv6 radix by sockaddr family */ #ifdef INET if (paddr->sa_family == AF_INET) { addr = (struct sockaddr_in *)paddr; mask = (struct sockaddr_in *)pmask; tent->k.addr.s_addr = addr->sin_addr.s_addr; len = 32; if (mask != NULL) len = contigmask((uint8_t *)&mask->sin_addr, 32); if (len == -1) len = 0; tent->masklen = len; tent->subtype = AF_INET; tent->v.kidx = 0; /* Do we need to put GW here? */ } #endif #ifdef INET6 if (paddr->sa_family == AF_INET6) { addr6 = (struct sockaddr_in6 *)paddr; mask6 = (struct sockaddr_in6 *)pmask; memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr)); len = 128; if (mask6 != NULL) len = contigmask((uint8_t *)&mask6->sin6_addr, 128); if (len == -1) len = 0; tent->masklen = len; tent->subtype = AF_INET6; tent->v.kidx = 0; } #endif return (0); } static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent) { struct rt_addrinfo info; struct sockaddr_in6 key6, dst6, mask6; struct sockaddr *dst, *key, *mask; /* Prepare sockaddr for prefix/mask and info */ bzero(&dst6, sizeof(dst6)); dst6.sin6_len = sizeof(dst6); dst = (struct sockaddr *)&dst6; bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask = (struct sockaddr *)&mask6; bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_NETMASK] = mask; /* Prepare the lookup key */ bzero(&key6, sizeof(key6)); key6.sin6_family = tent->subtype; key = (struct sockaddr *)&key6; if (tent->subtype == AF_INET) { ((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr; key6.sin6_len = sizeof(struct sockaddr_in); } else { key6.sin6_addr = tent->k.addr6; key6.sin6_len = sizeof(struct sockaddr_in6); } if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0) return (ENOENT); if ((info.rti_addrs & RTA_NETMASK) == 0) mask = NULL; ta_dump_kfib_tentry_int(dst, mask, tent); return (0); } static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { struct rib_head *rh; int error; rh = rt_tables_get_rnh(ti->data, AF_INET); if (rh != NULL) { RIB_RLOCK(rh); error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg); RIB_RUNLOCK(rh); } rh = rt_tables_get_rnh(ti->data, AF_INET6); if (rh != NULL) { RIB_RLOCK(rh); error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg); RIB_RUNLOCK(rh); } } struct table_algo addr_kfib = { .name = "addr:kfib", .type = IPFW_TABLE_ADDR, .flags = TA_FLAG_READONLY, .ta_buf_size = 0, .init = ta_init_kfib, .destroy = ta_destroy_kfib, .foreach = ta_foreach_kfib, .dump_tentry = ta_dump_kfib_tentry, .find_tentry = ta_find_kfib_tentry, .dump_tinfo = ta_dump_kfib_tinfo, .print_config = ta_print_kfib_config, }; void ipfw_table_algo_init(struct ip_fw_chain *ch) { size_t sz; /* * Register all algorithms presented here. */ sz = sizeof(struct table_algo); ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx); ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx); ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx); } void ipfw_table_algo_destroy(struct ip_fw_chain *ch) { ipfw_del_table_algo(ch, addr_radix.idx); ipfw_del_table_algo(ch, addr_hash.idx); ipfw_del_table_algo(ch, iface_idx.idx); ipfw_del_table_algo(ch, number_array.idx); ipfw_del_table_algo(ch, flow_hash.idx); ipfw_del_table_algo(ch, addr_kfib.idx); } Index: head/sys/netpfil/ipfw/ip_fw_table_value.c =================================================================== --- head/sys/netpfil/ipfw/ip_fw_table_value.c (revision 299151) +++ head/sys/netpfil/ipfw/ip_fw_table_value.c (revision 299152) @@ -1,806 +1,808 @@ /*- * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Multi-field value support for ipfw tables. * * This file contains necessary functions to convert * large multi-field values into u32 indices suitable to be fed * to various table algorithms. Other machinery like proper refcounting, * internal structures resizing are also kept here. */ #include "opt_ipfw.h" #include #include #include #include #include #include #include #include #include #include #include #include /* ip_fw.h requires IFNAMSIZ */ #include #include /* struct ipfw_rule_ref */ #include #include #include static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt); static int cmp_table_value(struct named_object *no, const void *key, uint32_t kopt); static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); static struct ipfw_sopt_handler scodes[] = { { IP_FW_TABLE_VLIST, 0, HDIR_GET, list_table_values }, }; #define CHAIN_TO_VI(chain) (CHAIN_TO_TCFG(chain)->valhash) struct table_val_link { struct named_object no; struct table_value *pval; /* Pointer to real table value */ }; #define VALDATA_START_SIZE 64 /* Allocate 64-items array by default */ struct vdump_args { struct ip_fw_chain *ch; struct sockopt_data *sd; struct table_value *pval; int error; }; static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt) { return (hash32_buf(key, 56, 0)); } static int cmp_table_value(struct named_object *no, const void *key, uint32_t kopt) { return (memcmp(((struct table_val_link *)no)->pval, key, 56)); } static void mask_table_value(struct table_value *src, struct table_value *dst, uint32_t mask) { #define _MCPY(f, b) if ((mask & (b)) != 0) { dst->f = src->f; } memset(dst, 0, sizeof(*dst)); _MCPY(tag, IPFW_VTYPE_TAG); _MCPY(pipe, IPFW_VTYPE_PIPE); _MCPY(divert, IPFW_VTYPE_DIVERT); _MCPY(skipto, IPFW_VTYPE_SKIPTO); _MCPY(netgraph, IPFW_VTYPE_NETGRAPH); _MCPY(fib, IPFW_VTYPE_FIB); _MCPY(nat, IPFW_VTYPE_NAT); _MCPY(dscp, IPFW_VTYPE_DSCP); _MCPY(nh4, IPFW_VTYPE_NH4); _MCPY(nh6, IPFW_VTYPE_NH6); _MCPY(zoneid, IPFW_VTYPE_NH6); #undef _MCPY } static void get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared, struct table_value **ptv, struct namedobj_instance **pvi) { struct table_value *pval; struct namedobj_instance *vi; if (vshared != 0) { pval = (struct table_value *)ch->valuestate; vi = CHAIN_TO_VI(ch); } else { pval = NULL; vi = NULL; //pval = (struct table_value *)&tc->ti.data; } if (ptv != NULL) *ptv = pval; if (pvi != NULL) *pvi = vi; } /* * Update pointers to real vaues after @pval change. */ -static void +static int update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct vdump_args *da; struct table_val_link *ptv; struct table_value *pval; da = (struct vdump_args *)arg; ptv = (struct table_val_link *)no; pval = da->pval; ptv->pval = &pval[ptv->no.kidx]; ptv->no.name = (char *)&pval[ptv->no.kidx]; - + return (0); } /* * Grows value storage shared among all tables. * Drops/reacquires UH locks. * Notifies other running adds on @ch shared storage resize. * Note function does not guarantee that free space * will be available after invocation, so one caller needs * to roll cycle himself. * * Returns 0 if case of no errors. */ static int resize_shared_value_storage(struct ip_fw_chain *ch) { struct tables_config *tcfg; struct namedobj_instance *vi; struct table_value *pval, *valuestate, *old_valuestate; void *new_idx; struct vdump_args da; int new_blocks; int val_size, val_size_old; IPFW_UH_WLOCK_ASSERT(ch); valuestate = NULL; new_idx = NULL; pval = (struct table_value *)ch->valuestate; vi = CHAIN_TO_VI(ch); tcfg = CHAIN_TO_TCFG(ch); val_size = tcfg->val_size * 2; if (val_size == (1 << 30)) return (ENOSPC); IPFW_UH_WUNLOCK(ch); valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW, M_WAITOK | M_ZERO); ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx, &new_blocks); IPFW_UH_WLOCK(ch); /* * Check if we still need to resize */ if (tcfg->val_size >= val_size) goto done; /* Update pointers and notify everyone we're changing @ch */ pval = (struct table_value *)ch->valuestate; rollback_toperation_state(ch, ch); /* Good. Let's merge */ memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size); ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks); IPFW_WLOCK(ch); /* Change pointers */ old_valuestate = ch->valuestate; ch->valuestate = valuestate; valuestate = old_valuestate; ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks); val_size_old = tcfg->val_size; tcfg->val_size = val_size; val_size = val_size_old; IPFW_WUNLOCK(ch); /* Update pointers to reflect resize */ memset(&da, 0, sizeof(da)); da.pval = (struct table_value *)ch->valuestate; ipfw_objhash_foreach(vi, update_tvalue, &da); done: free(valuestate, M_IPFW); ipfw_objhash_bitmap_free(new_idx, new_blocks); return (0); } /* * Drops reference for table value with index @kidx, stored in @pval and * @vi. Frees value if it has no references. */ static void unref_table_value(struct namedobj_instance *vi, struct table_value *pval, uint32_t kidx) { struct table_val_link *ptvl; KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx)); if (--pval[kidx].refcnt > 0) return; /* Last reference, delete item */ ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx); KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx)); ipfw_objhash_del(vi, &ptvl->no); ipfw_objhash_free_idx(vi, kidx); free(ptvl, M_IPFW); } struct flush_args { struct ip_fw_chain *ch; struct table_algo *ta; struct table_info *ti; void *astate; ipfw_obj_tentry tent; }; static int unref_table_value_cb(void *e, void *arg) { struct flush_args *fa; struct ip_fw_chain *ch; struct table_algo *ta; ipfw_obj_tentry *tent; int error; fa = (struct flush_args *)arg; ta = fa->ta; memset(&fa->tent, 0, sizeof(fa->tent)); tent = &fa->tent; error = ta->dump_tentry(fa->astate, fa->ti, e, tent); if (error != 0) return (error); ch = fa->ch; unref_table_value(CHAIN_TO_VI(ch), (struct table_value *)ch->valuestate, tent->v.kidx); return (0); } /* * Drop references for each value used in @tc. */ void ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc, struct table_algo *ta, void *astate, struct table_info *ti) { struct flush_args fa; IPFW_UH_WLOCK_ASSERT(ch); memset(&fa, 0, sizeof(fa)); fa.ch = ch; fa.ta = ta; fa.astate = astate; fa.ti = ti; ta->foreach(astate, ti, unref_table_value_cb, &fa); } /* * Table operation state handler. * Called when we are going to change something in @tc which * may lead to inconsistencies in on-going table data addition. * * Here we rollback all already committed state (table values, currently) * and set "modified" field to non-zero value to indicate * that we need to restart original operation. */ void rollback_table_values(struct tableop_state *ts) { struct ip_fw_chain *ch; struct table_value *pval; struct tentry_info *ptei; struct namedobj_instance *vi; int i; ch = ts->ch; IPFW_UH_WLOCK_ASSERT(ch); /* Get current table value pointer */ get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi); for (i = 0; i < ts->count; i++) { ptei = &ts->tei[i]; if (ptei->value == 0) continue; unref_table_value(vi, pval, ptei->value); } } /* * Allocate new value index in either shared or per-table array. * Function may drop/reacquire UH lock. * * Returns 0 on success. */ static int alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts, struct namedobj_instance *vi, uint16_t *pvidx) { int error, vlimit; uint16_t vidx; IPFW_UH_WLOCK_ASSERT(ch); error = ipfw_objhash_alloc_idx(vi, &vidx); if (error != 0) { /* * We need to resize array. This involves * lock/unlock, so we need to check "modified" * state. */ ts->opstate.func(ts->tc, &ts->opstate); error = resize_shared_value_storage(ch); return (error); /* ts->modified should be set, we will restart */ } vlimit = ts->ta->vlimit; if (vlimit != 0 && vidx >= vlimit) { /* * Algorithm is not able to store given index. * We have to rollback state, start using * per-table value array or return error * if we're already using it. * * TODO: do not rollback state if * atomicity is not required. */ if (ts->vshared != 0) { /* shared -> per-table */ return (ENOSPC); /* TODO: proper error */ } /* per-table. Fail for now. */ return (ENOSPC); /* TODO: proper error */ } *pvidx = vidx; return (0); } /* * Drops value reference for unused values (updates, deletes, partially * successful adds or rollbacks). */ void ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc, struct tentry_info *tei, uint32_t count, int rollback) { int i; struct tentry_info *ptei; struct table_value *pval; struct namedobj_instance *vi; /* * We have two slightly different ADD cases here: * either (1) we are successful / partially successful, * in that case we need * * to ignore ADDED entries values * * rollback every other values (either UPDATED since * old value has been stored there, or some failure like * EXISTS or LIMIT or simply "ignored" case. * * (2): atomic rollback of partially successful operation * in that case we simply need to unref all entries. * * DELETE case is simpler: no atomic support there, so * we simply unref all non-zero values. */ /* * Get current table value pointers. * XXX: Properly read vshared */ get_value_ptrs(ch, tc, 1, &pval, &vi); for (i = 0; i < count; i++) { ptei = &tei[i]; if (ptei->value == 0) { /* * We may be deleting non-existing record. * Skip. */ continue; } if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) { ptei->value = 0; continue; } unref_table_value(vi, pval, ptei->value); ptei->value = 0; } } /* * Main function used to link values of entries going to be added, * to the index. Since we may perform many UH locks drops/acquires, * handle changes by checking tablestate "modified" field. * * Success: return 0. */ int ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts) { int error, i, found; struct namedobj_instance *vi; struct table_config *tc; struct tentry_info *tei, *ptei; uint32_t count, vlimit; uint16_t vidx; struct table_val_link *ptv; struct table_value tval, *pval; /* * Stage 1: reference all existing values and * save their indices. */ IPFW_UH_WLOCK_ASSERT(ch); get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi); error = 0; found = 0; vlimit = ts->ta->vlimit; vidx = 0; tc = ts->tc; tei = ts->tei; count = ts->count; for (i = 0; i < count; i++) { ptei = &tei[i]; ptei->value = 0; /* Ensure value is always 0 in the beginning */ mask_table_value(ptei->pvalue, &tval, ts->vmask); ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0, (char *)&tval); if (ptv == NULL) continue; /* Deal with vlimit later */ if (vlimit > 0 && vlimit <= ptv->no.kidx) continue; /* Value found. Bump refcount */ ptv->pval->refcnt++; ptei->value = ptv->no.kidx; found++; } if (ts->count == found) { /* We've found all values , no need ts create new ones */ return (0); } /* * we have added some state here, let's attach operation * state ts the list ts be able ts rollback if necessary. */ add_toperation_state(ch, ts); /* Ensure table won't disappear */ tc_ref(tc); IPFW_UH_WUNLOCK(ch); /* * Stage 2: allocate objects for non-existing values. */ for (i = 0; i < count; i++) { ptei = &tei[i]; if (ptei->value != 0) continue; if (ptei->ptv != NULL) continue; ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW, M_WAITOK | M_ZERO); } /* * Stage 3: allocate index numbers for new values * and link them to index. */ IPFW_UH_WLOCK(ch); tc_unref(tc); del_toperation_state(ch, ts); if (ts->modified != 0) { /* * In general, we should free all state/indexes here * and return. However, we keep allocated state instead * to ensure we achieve some progress on each restart. */ return (0); } KASSERT(pval == ch->valuestate, ("resize_storage() notify failure")); /* Let's try to link values */ for (i = 0; i < count; i++) { ptei = &tei[i]; /* Check if record has appeared */ mask_table_value(ptei->pvalue, &tval, ts->vmask); ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0, (char *)&tval); if (ptv != NULL) { ptv->pval->refcnt++; ptei->value = ptv->no.kidx; continue; } /* May perform UH unlock/lock */ error = alloc_table_vidx(ch, ts, vi, &vidx); if (error != 0) { ts->opstate.func(ts->tc, &ts->opstate); return (error); } /* value storage resize has happened, return */ if (ts->modified != 0) return (0); /* Finally, we have allocated valid index, let's add entry */ ptei->value = vidx; ptv = (struct table_val_link *)ptei->ptv; ptei->ptv = NULL; ptv->no.kidx = vidx; ptv->no.name = (char *)&pval[vidx]; ptv->pval = &pval[vidx]; memcpy(ptv->pval, &tval, sizeof(struct table_value)); pval[vidx].refcnt = 1; ipfw_objhash_add(vi, &ptv->no); } return (0); } /* * Compatibility function used to import data from old * IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes. */ void ipfw_import_table_value_legacy(uint32_t value, struct table_value *v) { memset(v, 0, sizeof(*v)); v->tag = value; v->pipe = value; v->divert = value; v->skipto = value; v->netgraph = value; v->fib = value; v->nat = value; v->nh4 = value; /* host format */ v->dscp = value; v->limit = value; } /* * Export data to legacy table dumps opcodes. */ uint32_t ipfw_export_table_value_legacy(struct table_value *v) { /* * TODO: provide more compatibility depending on * vmask value. */ return (v->tag); } /* * Imports table value from current userland format. * Saves value in kernel format to the same place. */ void ipfw_import_table_value_v1(ipfw_table_value *iv) { struct table_value v; memset(&v, 0, sizeof(v)); v.tag = iv->tag; v.pipe = iv->pipe; v.divert = iv->divert; v.skipto = iv->skipto; v.netgraph = iv->netgraph; v.fib = iv->fib; v.nat = iv->nat; v.dscp = iv->dscp; v.nh4 = iv->nh4; v.nh6 = iv->nh6; v.limit = iv->limit; v.zoneid = iv->zoneid; memcpy(iv, &v, sizeof(ipfw_table_value)); } /* * Export real table value @v to current userland format. * Note that @v and @piv may point to the same memory. */ void ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv) { ipfw_table_value iv; memset(&iv, 0, sizeof(iv)); iv.tag = v->tag; iv.pipe = v->pipe; iv.divert = v->divert; iv.skipto = v->skipto; iv.netgraph = v->netgraph; iv.fib = v->fib; iv.nat = v->nat; iv.dscp = v->dscp; iv.limit = v->limit; iv.nh4 = v->nh4; iv.nh6 = v->nh6; iv.zoneid = v->zoneid; memcpy(piv, &iv, sizeof(iv)); } /* * Exports real value data into ipfw_table_value structure. * Utilizes "spare1" field to store kernel index. */ -static void +static int dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct vdump_args *da; struct table_val_link *ptv; struct table_value *v; da = (struct vdump_args *)arg; ptv = (struct table_val_link *)no; v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v)); /* Out of memory, returning */ if (v == NULL) { da->error = ENOMEM; - return; + return (ENOMEM); } memcpy(v, ptv->pval, sizeof(*v)); v->spare1 = ptv->no.kidx; + return (0); } /* * Dumps all shared/table value data * Data layout (v1)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_table_value x N ] * * Returns 0 on success */ static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; struct namedobj_instance *vi; struct vdump_args da; uint32_t count, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); IPFW_UH_RLOCK(ch); vi = CHAIN_TO_VI(ch); count = ipfw_objhash_count(vi); size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_table_value); if (size > olh->size) { olh->size = size; IPFW_UH_RUNLOCK(ch); return (ENOMEM); } olh->size = size; /* * Do the actual value dump */ memset(&da, 0, sizeof(da)); da.ch = ch; da.sd = sd; ipfw_objhash_foreach(vi, dump_tvalue, &da); IPFW_UH_RUNLOCK(ch); return (0); } void ipfw_table_value_init(struct ip_fw_chain *ch, int first) { struct tables_config *tcfg; ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value), M_IPFW, M_WAITOK | M_ZERO); tcfg = ch->tblcfg; tcfg->val_size = VALDATA_START_SIZE; tcfg->valhash = ipfw_objhash_create(tcfg->val_size); ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value, cmp_table_value); IPFW_ADD_SOPT_HANDLER(first, scodes); } -static void +static int destroy_value(struct namedobj_instance *ni, struct named_object *no, void *arg) { free(no, M_IPFW); + return (0); } void ipfw_table_value_destroy(struct ip_fw_chain *ch, int last) { IPFW_DEL_SOPT_HANDLER(last, scodes); free(ch->valuestate, M_IPFW); ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch); ipfw_objhash_destroy(CHAIN_TO_VI(ch)); }