Index: stable/11/sys/netpfil/ipfw/ip_fw2.c =================================================================== --- stable/11/sys/netpfil/ipfw/ip_fw2.c (revision 326387) +++ stable/11/sys/netpfil/ipfw/ip_fw2.c (revision 326388) @@ -1,3023 +1,3159 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * The FreeBSD IP packet firewall, main file */ #include "opt_ipfw.h" #include "opt_ipdivert.h" #include "opt_inet.h" #ifndef INET #error "IPFIREWALL requires INET" #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for ETHERTYPE_IP */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #endif #include /* for struct grehdr */ #include #include /* XXX for in_cksum */ #ifdef MAC #include #endif /* * static variables followed by global ones. * All ipfw global variables are here. */ static VNET_DEFINE(int, fw_deny_unknown_exthdrs); #define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) static VNET_DEFINE(int, fw_permit_single_frag6) = 1; #define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6) #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT static int default_to_accept = 1; #else static int default_to_accept; #endif VNET_DEFINE(int, autoinc_step); VNET_DEFINE(int, fw_one_pass) = 1; VNET_DEFINE(unsigned int, fw_tables_max); VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */ /* Use 128 tables by default */ static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; #ifndef LINEAR_SKIPTO static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, int tablearg, int jump_backwards); #define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back) #else static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, int tablearg, int jump_backwards); #define JUMP(ch, f, num, targ, back) jump_linear(ch, f, num, targ, back) #endif /* * Each rule belongs to one of 32 different sets (0..31). * The variable set_disable contains one bit per set. * If the bit is set, all rules in the corresponding set * are disabled. Set RESVD_SET(31) is reserved for the default rule * and rules that are not deleted by the flush command, * and CANNOT be disabled. * Rules in set RESVD_SET can only be deleted individually. */ VNET_DEFINE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) VNET_DEFINE(int, fw_verbose); /* counter for ipfw_log(NULL...) */ VNET_DEFINE(u_int64_t, norule_counter); VNET_DEFINE(int, verbose_limit); /* layer3_chain contains the list of rules for layer 3 */ VNET_DEFINE(struct ip_fw_chain, layer3_chain); /* ipfw_vnet_ready controls when we are open for business */ VNET_DEFINE(int, ipfw_vnet_ready) = 0; VNET_DEFINE(int, ipfw_nat_ready) = 0; ipfw_nat_t *ipfw_nat_ptr = NULL; struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; ipfw_nat_cfg_t *ipfw_nat_del_ptr; ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; #ifdef SYSCTL_NODE uint32_t dummy_def = IPFW_DEFAULT_RULE; static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS); SYSBEGIN(f3) SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, "Only do a single pass through ipfw when using dummynet(4)"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, "Rule number auto-increment step"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, "Log matches to ipfw rules"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, "Set upper limit of matches of ipfw rules logged"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, &dummy_def, 0, "The default/max possible rule number."); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", "Maximum number of concurrently used tables"); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_tables_sets, "IU", "Use per-set namespace for tables"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, &default_to_accept, 0, "Make the default rule accept all packets."); TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, "Number of static rules"); #ifdef INET6 SYSCTL_DECL(_net_inet6_ip6); SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0, "Deny packets with unknown IPv6 Extension Headers"); SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6, CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0, "Permit single packet IPv6 fragments"); #endif /* INET6 */ SYSEND #endif /* SYSCTL_NODE */ /* * Some macros used in the various matching options. * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T * Other macros just cast void * into the appropriate type */ #define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) #define TCP(p) ((struct tcphdr *)(p)) #define SCTP(p) ((struct sctphdr *)(p)) #define UDP(p) ((struct udphdr *)(p)) #define ICMP(p) ((struct icmphdr *)(p)) #define ICMP6(p) ((struct icmp6_hdr *)(p)) static __inline int icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) { int type = icmp->icmp_type; return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<icmp_type; return (type <= ICMP_MAXTYPE && (TT & (1<arg1 or cmd->d[0]. * * We scan options and store the bits we find set. We succeed if * * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear * * The code is sometimes optimized not to store additional variables. */ static int flags_match(ipfw_insn *cmd, u_int8_t bits) { u_char want_clear; bits = ~bits; if ( ((cmd->arg1 & 0xff) & bits) != 0) return 0; /* some bits we want set were clear */ want_clear = (cmd->arg1 >> 8) & 0xff; if ( (want_clear & bits) != want_clear) return 0; /* some bits we want clear were set */ return 1; } static int ipopts_match(struct ip *ip, ipfw_insn *cmd) { int optlen, bits = 0; u_char *cp = (u_char *)(ip + 1); int x = (ip->ip_hl << 2) - sizeof (struct ip); for (; x > 0; x -= optlen, cp += optlen) { int opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { optlen = cp[IPOPT_OLEN]; if (optlen <= 0 || optlen > x) return 0; /* invalid or truncated */ } switch (opt) { default: break; case IPOPT_LSRR: bits |= IP_FW_IPOPT_LSRR; break; case IPOPT_SSRR: bits |= IP_FW_IPOPT_SSRR; break; case IPOPT_RR: bits |= IP_FW_IPOPT_RR; break; case IPOPT_TS: bits |= IP_FW_IPOPT_TS; break; } } return (flags_match(cmd, bits)); } static int tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) { int optlen, bits = 0; u_char *cp = (u_char *)(tcp + 1); int x = (tcp->th_off << 2) - sizeof(struct tcphdr); for (; x > 0; x -= optlen, cp += optlen) { int opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { optlen = cp[1]; if (optlen <= 0) break; } switch (opt) { default: break; case TCPOPT_MAXSEG: bits |= IP_FW_TCPOPT_MSS; break; case TCPOPT_WINDOW: bits |= IP_FW_TCPOPT_WINDOW; break; case TCPOPT_SACK_PERMITTED: case TCPOPT_SACK: bits |= IP_FW_TCPOPT_SACK; break; case TCPOPT_TIMESTAMP: bits |= IP_FW_TCPOPT_TS; break; } } return (flags_match(cmd, bits)); } static int iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg) { if (ifp == NULL) /* no iface with this packet, match fails */ return (0); /* Check by name or by IP address */ if (cmd->name[0] != '\0') { /* match by name */ if (cmd->name[0] == '\1') /* use tablearg to match */ return ipfw_lookup_table(chain, cmd->p.kidx, 0, &ifp->if_index, tablearg); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) return(1); } else { if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) return(1); } } else { #if !defined(USERSPACE) && defined(__FreeBSD__) /* and OSX too ? */ struct ifaddr *ia; if_addr_rlock(ifp); TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { if (ia->ifa_addr->sa_family != AF_INET) continue; if (cmd->p.ip.s_addr == ((struct sockaddr_in *) (ia->ifa_addr))->sin_addr.s_addr) { if_addr_runlock(ifp); return(1); /* match */ } } if_addr_runlock(ifp); #endif /* __FreeBSD__ */ } return(0); /* no match, fail ... */ } /* * The verify_path function checks if a route to the src exists and * if it is reachable via ifp (when provided). * * The 'verrevpath' option checks that the interface that an IP packet * arrives on is the same interface that traffic destined for the * packet's source address would be routed out of. * The 'versrcreach' option just checks that the source address is * reachable via any route (except default) in the routing table. * These two are a measure to block forged packets. This is also * commonly known as "anti-spoofing" or Unicast Reverse Path * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs * is purposely reminiscent of the Cisco IOS command, * * ip verify unicast reverse-path * ip verify unicast source reachable-via any * * which implements the same functionality. But note that the syntax * is misleading, and the check may be performed on all IP packets * whether unicast, multicast, or broadcast. */ static int verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) { #if defined(USERSPACE) || !defined(__FreeBSD__) return 0; #else struct nhop4_basic nh4; if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0) return (0); /* * If ifp is provided, check for equality with rtentry. * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, * in order to pass packets injected back by if_simloop(): * routing entry (via lo0) for our own address * may exist, so we need to handle routing assymetry. */ if (ifp != NULL && ifp != nh4.nh_ifp) return (0); /* if no ifp provided, check if rtentry is not default route */ if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0) return (0); /* or if this is a blackhole/reject route */ if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0) return (0); /* found valid route */ return 1; #endif /* __FreeBSD__ */ } +/* + * Generate a TCP packet, containing either a RST or a keepalive. + * When flags & TH_RST, we are sending a RST packet, because of a + * "reset" action matched the packet. + * Otherwise we are sending a keepalive, and flags & TH_ + * The 'replyto' mbuf is the mbuf being replied to, if any, and is required + * so that MAC can label the reply appropriately. + */ +struct mbuf * +ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, + u_int32_t ack, int flags) +{ + struct mbuf *m = NULL; /* stupid compiler */ + struct ip *h = NULL; /* stupid compiler */ #ifdef INET6 + struct ip6_hdr *h6 = NULL; +#endif + struct tcphdr *th = NULL; + int len, dir; + + MGETHDR(m, M_NOWAIT, MT_DATA); + if (m == NULL) + return (NULL); + + M_SETFIB(m, id->fib); +#ifdef MAC + if (replyto != NULL) + mac_netinet_firewall_reply(replyto, m); + else + mac_netinet_firewall_send(m); +#else + (void)replyto; /* don't warn about unused arg */ +#endif + + switch (id->addr_type) { + case 4: + len = sizeof(struct ip) + sizeof(struct tcphdr); + break; +#ifdef INET6 + case 6: + len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + break; +#endif + default: + /* XXX: log me?!? */ + FREE_PKT(m); + return (NULL); + } + dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); + + m->m_data += max_linkhdr; + m->m_flags |= M_SKIP_FIREWALL; + m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.rcvif = NULL; + bzero(m->m_data, len); + + switch (id->addr_type) { + case 4: + h = mtod(m, struct ip *); + + /* prepare for checksum */ + h->ip_p = IPPROTO_TCP; + h->ip_len = htons(sizeof(struct tcphdr)); + if (dir) { + h->ip_src.s_addr = htonl(id->src_ip); + h->ip_dst.s_addr = htonl(id->dst_ip); + } else { + h->ip_src.s_addr = htonl(id->dst_ip); + h->ip_dst.s_addr = htonl(id->src_ip); + } + + th = (struct tcphdr *)(h + 1); + break; +#ifdef INET6 + case 6: + h6 = mtod(m, struct ip6_hdr *); + + /* prepare for checksum */ + h6->ip6_nxt = IPPROTO_TCP; + h6->ip6_plen = htons(sizeof(struct tcphdr)); + if (dir) { + h6->ip6_src = id->src_ip6; + h6->ip6_dst = id->dst_ip6; + } else { + h6->ip6_src = id->dst_ip6; + h6->ip6_dst = id->src_ip6; + } + + th = (struct tcphdr *)(h6 + 1); + break; +#endif + } + + if (dir) { + th->th_sport = htons(id->src_port); + th->th_dport = htons(id->dst_port); + } else { + th->th_sport = htons(id->dst_port); + th->th_dport = htons(id->src_port); + } + th->th_off = sizeof(struct tcphdr) >> 2; + + if (flags & TH_RST) { + if (flags & TH_ACK) { + th->th_seq = htonl(ack); + th->th_flags = TH_RST; + } else { + if (flags & TH_SYN) + seq++; + th->th_ack = htonl(seq); + th->th_flags = TH_RST | TH_ACK; + } + } else { + /* + * Keepalive - use caller provided sequence numbers + */ + th->th_seq = htonl(seq); + th->th_ack = htonl(ack); + th->th_flags = TH_ACK; + } + + switch (id->addr_type) { + case 4: + th->th_sum = in_cksum(m, len); + + /* finish the ip header */ + h->ip_v = 4; + h->ip_hl = sizeof(*h) >> 2; + h->ip_tos = IPTOS_LOWDELAY; + h->ip_off = htons(0); + h->ip_len = htons(len); + h->ip_ttl = V_ip_defttl; + h->ip_sum = 0; + break; +#ifdef INET6 + case 6: + th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), + sizeof(struct tcphdr)); + + /* finish the ip6 header */ + h6->ip6_vfc |= IPV6_VERSION; + h6->ip6_hlim = IPV6_DEFHLIM; + break; +#endif + } + + return (m); +} + +#ifdef INET6 /* * ipv6 specific rules here... */ static __inline int icmp6type_match (int type, ipfw_insn_u32 *cmd) { return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); } static int flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) { int i; for (i=0; i <= cmd->o.arg1; ++i ) if (curr_flow == cmd->d[i] ) return 1; return 0; } /* support for IP6_*_ME opcodes */ static const struct in6_addr lla_mask = {{{ 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}}; static int ipfw_localip6(struct in6_addr *in6) { struct rm_priotracker in6_ifa_tracker; struct in6_ifaddr *ia; if (IN6_IS_ADDR_MULTICAST(in6)) return (0); if (!IN6_IS_ADDR_LINKLOCAL(in6)) return (in6_localip(in6)); IN6_IFADDR_RLOCK(&in6_ifa_tracker); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) continue; if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr, in6, &lla_mask)) { IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (1); } } IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); } static int verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) { struct nhop6_basic nh6; if (IN6_IS_SCOPE_LINKLOCAL(src)) return (1); if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0) return (0); /* If ifp is provided, check for equality with route table. */ if (ifp != NULL && ifp != nh6.nh_ifp) return (0); /* if no ifp provided, check if rtentry is not default route */ if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0) return (0); /* or if this is a blackhole/reject route */ if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0) return (0); /* found valid route */ return 1; } static int is_icmp6_query(int icmp6_type) { if ((icmp6_type <= ICMP6_MAXTYPE) && (icmp6_type == ICMP6_ECHO_REQUEST || icmp6_type == ICMP6_MEMBERSHIP_QUERY || icmp6_type == ICMP6_WRUREQUEST || icmp6_type == ICMP6_FQDN_QUERY || icmp6_type == ICMP6_NI_QUERY)) return (1); return (0); } static void send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) { struct mbuf *m; m = args->m; if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { struct tcphdr *tcp; tcp = (struct tcphdr *)((char *)ip6 + hlen); if ((tcp->th_flags & TH_RST) == 0) { struct mbuf *m0; m0 = ipfw_send_pkt(args->m, &(args->f_id), ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); if (m0 != NULL) ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); } FREE_PKT(m); } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ #if 0 /* * Unlike above, the mbufs need to line up with the ip6 hdr, * as the contents are read. We need to m_adj() the * needed amount. * The mbuf will however be thrown away so we can adjust it. * Remember we did an m_pullup on it already so we * can make some assumptions about contiguousness. */ if (args->L3offset) m_adj(m, args->L3offset); #endif icmp6_error(m, ICMP6_DST_UNREACH, code, 0); } else FREE_PKT(m); args->m = NULL; } #endif /* INET6 */ /* * sends a reject message, consuming the mbuf passed as an argument. */ static void send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip) { #if 0 /* XXX When ip is not guaranteed to be at mtod() we will * need to account for this */ * The mbuf will however be thrown away so we can adjust it. * Remember we did an m_pullup on it already so we * can make some assumptions about contiguousness. */ if (args->L3offset) m_adj(m, args->L3offset); #endif if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); } else if (args->f_id.proto == IPPROTO_TCP) { struct tcphdr *const tcp = L3HDR(struct tcphdr, mtod(args->m, struct ip *)); if ( (tcp->th_flags & TH_RST) == 0) { struct mbuf *m; m = ipfw_send_pkt(args->m, &(args->f_id), ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); if (m != NULL) ip_output(m, NULL, NULL, 0, NULL, NULL); } FREE_PKT(args->m); } else FREE_PKT(args->m); args->m = NULL; } /* * Support for uid/gid/jail lookup. These tests are expensive * (because we may need to look into the list of active sockets) * so we cache the results. ugid_lookupp is 0 if we have not * yet done a lookup, 1 if we succeeded, and -1 if we tried * and failed. The function always returns the match value. * We could actually spare the variable and use *uc, setting * it to '(void *)check_uidgid if we have no info, NULL if * we tried and failed, or any other value if successful. */ static int check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp, struct ucred **uc) { #if defined(USERSPACE) return 0; // not supported in userspace #else #ifndef __FreeBSD__ /* XXX */ return cred_check(insn, proto, oif, dst_ip, dst_port, src_ip, src_port, (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); #else /* FreeBSD */ struct in_addr src_ip, dst_ip; struct inpcbinfo *pi; struct ipfw_flow_id *id; struct inpcb *pcb, *inp; struct ifnet *oif; int lookupflags; int match; id = &args->f_id; inp = args->inp; oif = args->oif; /* * Check to see if the UDP or TCP stack supplied us with * the PCB. If so, rather then holding a lock and looking * up the PCB, we can use the one that was supplied. */ if (inp && *ugid_lookupp == 0) { INP_LOCK_ASSERT(inp); if (inp->inp_socket != NULL) { *uc = crhold(inp->inp_cred); *ugid_lookupp = 1; } else *ugid_lookupp = -1; } /* * If we have already been here and the packet has no * PCB entry associated with it, then we can safely * assume that this is a no match. */ if (*ugid_lookupp == -1) return (0); if (id->proto == IPPROTO_TCP) { lookupflags = 0; pi = &V_tcbinfo; } else if (id->proto == IPPROTO_UDP) { lookupflags = INPLOOKUP_WILDCARD; pi = &V_udbinfo; } else return 0; lookupflags |= INPLOOKUP_RLOCKPCB; match = 0; if (*ugid_lookupp == 0) { if (id->addr_type == 6) { #ifdef INET6 if (oif == NULL) pcb = in6_pcblookup_mbuf(pi, &id->src_ip6, htons(id->src_port), &id->dst_ip6, htons(id->dst_port), lookupflags, oif, args->m); else pcb = in6_pcblookup_mbuf(pi, &id->dst_ip6, htons(id->dst_port), &id->src_ip6, htons(id->src_port), lookupflags, oif, args->m); #else *ugid_lookupp = -1; return (0); #endif } else { src_ip.s_addr = htonl(id->src_ip); dst_ip.s_addr = htonl(id->dst_ip); if (oif == NULL) pcb = in_pcblookup_mbuf(pi, src_ip, htons(id->src_port), dst_ip, htons(id->dst_port), lookupflags, oif, args->m); else pcb = in_pcblookup_mbuf(pi, dst_ip, htons(id->dst_port), src_ip, htons(id->src_port), lookupflags, oif, args->m); } if (pcb != NULL) { INP_RLOCK_ASSERT(pcb); *uc = crhold(pcb->inp_cred); *ugid_lookupp = 1; INP_RUNLOCK(pcb); } if (*ugid_lookupp == 0) { /* * We tried and failed, set the variable to -1 * so we will not try again on this packet. */ *ugid_lookupp = -1; return (0); } } if (insn->o.opcode == O_UID) match = ((*uc)->cr_uid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_GID) match = groupmember((gid_t)insn->d[0], *uc); else if (insn->o.opcode == O_JAIL) match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); return (match); #endif /* __FreeBSD__ */ #endif /* not supported in userspace */ } /* * Helper function to set args with info on the rule after the matching * one. slot is precise, whereas we guess rule_id as they are * assigned sequentially. */ static inline void set_match(struct ip_fw_args *args, int slot, struct ip_fw_chain *chain) { args->rule.chain_id = chain->id; args->rule.slot = slot + 1; /* we use 0 as a marker */ args->rule.rule_id = 1 + chain->map[slot]->id; args->rule.rulenum = chain->map[slot]->rulenum; } #ifndef LINEAR_SKIPTO /* * Helper function to enable cached rule lookups using * cached_id and cached_pos fields in ipfw rule. */ static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, int tablearg, int jump_backwards) { int f_pos; /* If possible use cached f_pos (in f->cached_pos), * whose version is written in f->cached_id * (horrible hacks to avoid changing the ABI). */ if (num != IP_FW_TARG && f->cached_id == chain->id) f_pos = f->cached_pos; else { int i = IP_FW_ARG_TABLEARG(chain, num, skipto); /* make sure we do not jump backward */ if (jump_backwards == 0 && i <= f->rulenum) i = f->rulenum + 1; if (chain->idxmap != NULL) f_pos = chain->idxmap[i]; else f_pos = ipfw_find_rule(chain, i, 0); /* update the cache */ if (num != IP_FW_TARG) { f->cached_id = chain->id; f->cached_pos = f_pos; } } return (f_pos); } #else /* * Helper function to enable real fast rule lookups. */ static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, int tablearg, int jump_backwards) { int f_pos; num = IP_FW_ARG_TABLEARG(chain, num, skipto); /* make sure we do not jump backward */ if (jump_backwards == 0 && num <= f->rulenum) num = f->rulenum + 1; f_pos = chain->idxmap[num]; return (f_pos); } #endif #define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) /* * The main check routine for the firewall. * * All arguments are in args so we can modify them and return them * back to the caller. * * Parameters: * * args->m (in/out) The packet; we set to NULL when/if we nuke it. * Starts with the IP header. * args->eh (in) Mac header if present, NULL for layer3 packet. * args->L3offset Number of bytes bypassed if we came from L2. * e.g. often sizeof(eh) ** NOTYET ** * args->oif Outgoing interface, NULL if packet is incoming. * The incoming interface is in the mbuf. (in) * args->divert_rule (in/out) * Skip up to the first rule past this rule number; * upon return, non-zero port number for divert or tee. * * args->rule Pointer to the last matching rule (in/out) * args->next_hop Socket we are forwarding to (out). * args->next_hop6 IPv6 next hop we are forwarding to (out). * args->f_id Addresses grabbed from the packet (out) * args->rule.info a cookie depending on rule action * * Return value: * * IP_FW_PASS the packet must be accepted * IP_FW_DENY the packet must be dropped * IP_FW_DIVERT divert packet, port in m_tag * IP_FW_TEE tee packet, port in m_tag * IP_FW_DUMMYNET to dummynet, pipe in args->cookie * IP_FW_NETGRAPH into netgraph, cookie args->cookie * args->rule contains the matching rule, * args->rule.info has additional information. * */ int ipfw_chk(struct ip_fw_args *args) { /* * Local variables holding state while processing a packet: * * IMPORTANT NOTE: to speed up the processing of rules, there * are some assumption on the values of the variables, which * are documented here. Should you change them, please check * the implementation of the various instructions to make sure * that they still work. * * args->eh The MAC header. It is non-null for a layer2 * packet, it is NULL for a layer-3 packet. * **notyet** * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. * * m | args->m Pointer to the mbuf, as received from the caller. * It may change if ipfw_chk() does an m_pullup, or if it * consumes the packet because it calls send_reject(). * XXX This has to change, so that ipfw_chk() never modifies * or consumes the buffer. * ip is the beginning of the ip(4 or 6) header. * Calculated by adding the L3offset to the start of data. * (Until we start using L3offset, the packet is * supposed to start with the ip header). */ struct mbuf *m = args->m; struct ip *ip = mtod(m, struct ip *); /* * For rules which contain uid/gid or jail constraints, cache * a copy of the users credentials after the pcb lookup has been * executed. This will speed up the processing of rules with * these types of constraints, as well as decrease contention * on pcb related locks. */ #ifndef __FreeBSD__ struct bsd_ucred ucred_cache; #else struct ucred *ucred_cache = NULL; #endif int ucred_lookup = 0; /* * oif | args->oif If NULL, ipfw_chk has been called on the * inbound path (ether_input, ip_input). * If non-NULL, ipfw_chk has been called on the outbound path * (ether_output, ip_output). */ struct ifnet *oif = args->oif; int f_pos = 0; /* index of current rule in the array */ int retval = 0; /* * hlen The length of the IP header. */ u_int hlen = 0; /* hlen >0 means we have an IP pkt */ /* * offset The offset of a fragment. offset != 0 means that * we have a fragment at this offset of an IPv4 packet. * offset == 0 means that (if this is an IPv4 packet) * this is the first or only fragment. * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header * or there is a single packet fragment (fragment header added * without needed). We will treat a single packet fragment as if * there was no fragment header (or log/block depending on the * V_fw_permit_single_frag6 sysctl setting). */ u_short offset = 0; u_short ip6f_mf = 0; /* * Local copies of addresses. They are only valid if we have * an IP packet. * * proto The protocol. Set to 0 for non-ip packets, * or to the protocol read from the packet otherwise. * proto != 0 means that we have an IPv4 packet. * * src_port, dst_port port numbers, in HOST format. Only * valid for TCP and UDP packets. * * src_ip, dst_ip ip addresses, in NETWORK format. * Only valid for IPv4 packets. */ uint8_t proto; uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ struct in_addr src_ip, dst_ip; /* NOTE: network format */ int iplen = 0; int pktlen; uint16_t etype = 0; /* Host order stored ether type */ /* * dyn_dir = MATCH_UNKNOWN when rules unchecked, * MATCH_NONE when checked and not matched (q = NULL), * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) */ int dyn_dir = MATCH_UNKNOWN; uint16_t dyn_name = 0; - ipfw_dyn_rule *q = NULL; + struct ip_fw *q = NULL; struct ip_fw_chain *chain = &V_layer3_chain; /* * We store in ulp a pointer to the upper layer protocol header. * In the ipv4 case this is easy to determine from the header, * but for ipv6 we might have some additional headers in the middle. * ulp is NULL if not found. */ void *ulp = NULL; /* upper layer protocol pointer. */ /* XXX ipv6 variables */ int is_ipv6 = 0; uint8_t icmp6_type = 0; uint16_t ext_hd = 0; /* bits vector for extension header filtering */ /* end of ipv6 variables */ int is_ipv4 = 0; int done = 0; /* flag to exit the outer loop */ if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) return (IP_FW_PASS); /* accept */ dst_ip.s_addr = 0; /* make sure it is initialized */ src_ip.s_addr = 0; /* make sure it is initialized */ pktlen = m->m_pkthdr.len; args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ proto = args->f_id.proto = 0; /* mark f_id invalid */ /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ /* * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, * then it sets p to point at the offset "len" in the mbuf. WARNING: the * pointer might become stale after other pullups (but we never use it * this way). */ #define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) #define PULLUP_LEN(_len, p, T) \ do { \ int x = (_len) + T; \ if ((m)->m_len < x) { \ args->m = m = m_pullup(m, x); \ if (m == NULL) \ goto pullup_failed; \ } \ p = (mtod(m, char *) + (_len)); \ } while (0) /* * if we have an ether header, */ if (args->eh) etype = ntohs(args->eh->ether_type); /* Identify IP packets and fill up variables. */ if (pktlen >= sizeof(struct ip6_hdr) && (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; is_ipv6 = 1; args->f_id.addr_type = 6; hlen = sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; /* Search extension headers to find upper layer protocols */ while (ulp == NULL && offset == 0) { switch (proto) { case IPPROTO_ICMPV6: PULLUP_TO(hlen, ulp, struct icmp6_hdr); icmp6_type = ICMP6(ulp)->icmp6_type; break; case IPPROTO_TCP: PULLUP_TO(hlen, ulp, struct tcphdr); dst_port = TCP(ulp)->th_dport; src_port = TCP(ulp)->th_sport; /* save flags for dynamic rules */ args->f_id._flags = TCP(ulp)->th_flags; break; case IPPROTO_SCTP: PULLUP_TO(hlen, ulp, struct sctphdr); src_port = SCTP(ulp)->src_port; dst_port = SCTP(ulp)->dest_port; break; case IPPROTO_UDP: PULLUP_TO(hlen, ulp, struct udphdr); dst_port = UDP(ulp)->uh_dport; src_port = UDP(ulp)->uh_sport; break; case IPPROTO_HOPOPTS: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_hbh); ext_hd |= EXT_HOPOPTS; hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; ulp = NULL; break; case IPPROTO_ROUTING: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_rthdr); switch (((struct ip6_rthdr *)ulp)->ip6r_type) { case 0: ext_hd |= EXT_RTHDR0; break; case 2: ext_hd |= EXT_RTHDR2; break; default: if (V_fw_verbose) printf("IPFW2: IPV6 - Unknown " "Routing Header type(%d)\n", ((struct ip6_rthdr *) ulp)->ip6r_type); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; } ext_hd |= EXT_ROUTING; hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; ulp = NULL; break; case IPPROTO_FRAGMENT: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_frag); ext_hd |= EXT_FRAGMENT; hlen += sizeof (struct ip6_frag); proto = ((struct ip6_frag *)ulp)->ip6f_nxt; offset = ((struct ip6_frag *)ulp)->ip6f_offlg & IP6F_OFF_MASK; ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & IP6F_MORE_FRAG; if (V_fw_permit_single_frag6 == 0 && offset == 0 && ip6f_mf == 0) { if (V_fw_verbose) printf("IPFW2: IPV6 - Invalid " "Fragment Header\n"); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; } args->f_id.extra = ntohl(((struct ip6_frag *)ulp)->ip6f_ident); ulp = NULL; break; case IPPROTO_DSTOPTS: /* RFC 2460 */ PULLUP_TO(hlen, ulp, struct ip6_hbh); ext_hd |= EXT_DSTOPTS; hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; ulp = NULL; break; case IPPROTO_AH: /* RFC 2402 */ PULLUP_TO(hlen, ulp, struct ip6_ext); ext_hd |= EXT_AH; hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; proto = ((struct ip6_ext *)ulp)->ip6e_nxt; ulp = NULL; break; case IPPROTO_ESP: /* RFC 2406 */ PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ /* Anything past Seq# is variable length and * data past this ext. header is encrypted. */ ext_hd |= EXT_ESP; break; case IPPROTO_NONE: /* RFC 2460 */ /* * Packet ends here, and IPv6 header has * already been pulled up. If ip6e_len!=0 * then octets must be ignored. */ ulp = ip; /* non-NULL to get out of loop. */ break; case IPPROTO_OSPFIGP: /* XXX OSPF header check? */ PULLUP_TO(hlen, ulp, struct ip6_ext); break; case IPPROTO_PIM: /* XXX PIM header check? */ PULLUP_TO(hlen, ulp, struct pim); break; case IPPROTO_GRE: /* RFC 1701 */ /* XXX GRE header check? */ PULLUP_TO(hlen, ulp, struct grehdr); break; case IPPROTO_CARP: PULLUP_TO(hlen, ulp, struct carp_header); if (((struct carp_header *)ulp)->carp_version != CARP_VERSION) return (IP_FW_DENY); if (((struct carp_header *)ulp)->carp_type != CARP_ADVERTISEMENT) return (IP_FW_DENY); break; case IPPROTO_IPV6: /* RFC 2893 */ PULLUP_TO(hlen, ulp, struct ip6_hdr); break; case IPPROTO_IPV4: /* RFC 2893 */ PULLUP_TO(hlen, ulp, struct ip); break; default: if (V_fw_verbose) printf("IPFW2: IPV6 - Unknown " "Extension Header(%d), ext_hd=%x\n", proto, ext_hd); if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); PULLUP_TO(hlen, ulp, struct ip6_ext); break; } /*switch */ } ip = mtod(m, struct ip *); ip6 = (struct ip6_hdr *)ip; args->f_id.src_ip6 = ip6->ip6_src; args->f_id.dst_ip6 = ip6->ip6_dst; args->f_id.src_ip = 0; args->f_id.dst_ip = 0; args->f_id.flow_id6 = ntohl(ip6->ip6_flow); iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6); } else if (pktlen >= sizeof(struct ip) && (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { is_ipv4 = 1; hlen = ip->ip_hl << 2; args->f_id.addr_type = 4; /* * Collect parameters into local variables for faster matching. */ proto = ip->ip_p; src_ip = ip->ip_src; dst_ip = ip->ip_dst; offset = ntohs(ip->ip_off) & IP_OFFMASK; iplen = ntohs(ip->ip_len); if (offset == 0) { switch (proto) { case IPPROTO_TCP: PULLUP_TO(hlen, ulp, struct tcphdr); dst_port = TCP(ulp)->th_dport; src_port = TCP(ulp)->th_sport; /* save flags for dynamic rules */ args->f_id._flags = TCP(ulp)->th_flags; break; case IPPROTO_SCTP: PULLUP_TO(hlen, ulp, struct sctphdr); src_port = SCTP(ulp)->src_port; dst_port = SCTP(ulp)->dest_port; break; case IPPROTO_UDP: PULLUP_TO(hlen, ulp, struct udphdr); dst_port = UDP(ulp)->uh_dport; src_port = UDP(ulp)->uh_sport; break; case IPPROTO_ICMP: PULLUP_TO(hlen, ulp, struct icmphdr); //args->f_id.flags = ICMP(ulp)->icmp_type; break; default: break; } } ip = mtod(m, struct ip *); args->f_id.src_ip = ntohl(src_ip.s_addr); args->f_id.dst_ip = ntohl(dst_ip.s_addr); } #undef PULLUP_TO pktlen = iplen < pktlen ? iplen: pktlen; if (proto) { /* we may have port numbers, store them */ args->f_id.proto = proto; args->f_id.src_port = src_port = ntohs(src_port); args->f_id.dst_port = dst_port = ntohs(dst_port); } IPFW_PF_RLOCK(chain); if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ IPFW_PF_RUNLOCK(chain); return (IP_FW_PASS); /* accept */ } if (args->rule.slot) { /* * Packet has already been tagged as a result of a previous * match on rule args->rule aka args->rule_id (PIPE, QUEUE, * REASS, NETGRAPH, DIVERT/TEE...) * Validate the slot and continue from the next one * if still present, otherwise do a lookup. */ f_pos = (args->rule.chain_id == chain->id) ? args->rule.slot : ipfw_find_rule(chain, args->rule.rulenum, args->rule.rule_id); } else { f_pos = 0; } /* * Now scan the rules, and parse microinstructions for each rule. * We have two nested loops and an inner switch. Sometimes we * need to break out of one or both loops, or re-enter one of * the loops with updated variables. Loop variables are: * * f_pos (outer loop) points to the current rule. * On output it points to the matching rule. * done (outer loop) is used as a flag to break the loop. * l (inner loop) residual length of current rule. * cmd points to the current microinstruction. * * We break the inner loop by setting l=0 and possibly * cmdlen=0 if we don't want to advance cmd. * We break the outer loop by setting done=1 * We can restart the inner loop by setting l>0 and f_pos, f, cmd * as needed. */ for (; f_pos < chain->n_rules; f_pos++) { ipfw_insn *cmd; uint32_t tablearg = 0; int l, cmdlen, skip_or; /* skip rest of OR block */ struct ip_fw *f; f = chain->map[f_pos]; if (V_set_disable & (1 << f->set) ) continue; skip_or = 0; for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; l -= cmdlen, cmd += cmdlen) { int match; /* * check_body is a jump target used when we find a * CHECK_STATE, and need to jump to the body of * the target rule. */ /* check_body: */ cmdlen = F_LEN(cmd); /* * An OR block (insn_1 || .. || insn_n) has the * F_OR bit set in all but the last instruction. * The first match will set "skip_or", and cause * the following instructions to be skipped until * past the one with the F_OR bit clear. */ if (skip_or) { /* skip this instruction */ if ((cmd->len & F_OR) == 0) skip_or = 0; /* next one is good */ continue; } match = 0; /* set to 1 if we succeed */ switch (cmd->opcode) { /* * The first set of opcodes compares the packet's * fields with some pattern, setting 'match' if a * match is found. At the end of the loop there is * logic to deal with F_NOT and F_OR flags associated * with the opcode. */ case O_NOP: match = 1; break; case O_FORWARD_MAC: printf("ipfw: opcode %d unimplemented\n", cmd->opcode); break; case O_GID: case O_UID: case O_JAIL: /* * We only check offset == 0 && proto != 0, * as this ensures that we have a * packet with the ports info. */ if (offset != 0) break; if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) match = check_uidgid( (ipfw_insn_u32 *)cmd, args, &ucred_lookup, #ifdef __FreeBSD__ &ucred_cache); #else (void *)&ucred_cache); #endif break; case O_RECV: match = iface_match(m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, chain, &tablearg); break; case O_XMIT: match = iface_match(oif, (ipfw_insn_if *)cmd, chain, &tablearg); break; case O_VIA: match = iface_match(oif ? oif : m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, chain, &tablearg); break; case O_MACADDR2: if (args->eh != NULL) { /* have MAC header */ u_int32_t *want = (u_int32_t *) ((ipfw_insn_mac *)cmd)->addr; u_int32_t *mask = (u_int32_t *) ((ipfw_insn_mac *)cmd)->mask; u_int32_t *hdr = (u_int32_t *)args->eh; match = ( want[0] == (hdr[0] & mask[0]) && want[1] == (hdr[1] & mask[1]) && want[2] == (hdr[2] & mask[2]) ); } break; case O_MAC_TYPE: if (args->eh != NULL) { u_int16_t *p = ((ipfw_insn_u16 *)cmd)->ports; int i; for (i = cmdlen - 1; !match && i>0; i--, p += 2) match = (etype >= p[0] && etype <= p[1]); } break; case O_FRAG: match = (offset != 0); break; case O_IN: /* "out" is "not in" */ match = (oif == NULL); break; case O_LAYER2: match = (args->eh != NULL); break; case O_DIVERTED: { /* For diverted packets, args->rule.info * contains the divert port (in host format) * reason and direction. */ uint32_t i = args->rule.info; match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT && cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2); } break; case O_PROTO: /* * We do not allow an arg of 0 so the * check of "proto" only suffices. */ match = (proto == cmd->arg1); break; case O_IP_SRC: match = is_ipv4 && (((ipfw_insn_ip *)cmd)->addr.s_addr == src_ip.s_addr); break; case O_IP_DST_LOOKUP: { void *pkey; uint32_t vidx, key; uint16_t keylen; if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { /* Determine lookup key type */ vidx = ((ipfw_insn_u32 *)cmd)->d[1]; if (vidx != 4 /* uid */ && vidx != 5 /* jail */ && is_ipv6 == 0 && is_ipv4 == 0) break; /* Determine key length */ if (vidx == 0 /* dst-ip */ || vidx == 1 /* src-ip */) keylen = is_ipv6 ? sizeof(struct in6_addr): sizeof(in_addr_t); else { keylen = sizeof(key); pkey = &key; } if (vidx == 0 /* dst-ip */) pkey = is_ipv4 ? (void *)&dst_ip: (void *)&args->f_id.dst_ip6; else if (vidx == 1 /* src-ip */) pkey = is_ipv4 ? (void *)&src_ip: (void *)&args->f_id.src_ip6; else if (vidx == 6 /* dscp */) { if (is_ipv4) key = ip->ip_tos >> 2; else { key = args->f_id.flow_id6; key = (key & 0x0f) << 2 | (key & 0xf000) >> 14; } key &= 0x3f; } else if (vidx == 2 /* dst-port */ || vidx == 3 /* src-port */) { /* Skip fragments */ if (offset != 0) break; /* Skip proto without ports */ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && proto != IPPROTO_SCTP) break; if (vidx == 2 /* dst-port */) key = dst_port; else key = src_port; } #ifndef USERSPACE else if (vidx == 4 /* uid */ || vidx == 5 /* jail */) { check_uidgid( (ipfw_insn_u32 *)cmd, args, &ucred_lookup, #ifdef __FreeBSD__ &ucred_cache); if (vidx == 4 /* uid */) key = ucred_cache->cr_uid; else if (vidx == 5 /* jail */) key = ucred_cache->cr_prison->pr_id; #else /* !__FreeBSD__ */ (void *)&ucred_cache); if (vidx == 4 /* uid */) key = ucred_cache.uid; else if (vidx == 5 /* jail */) key = ucred_cache.xid; #endif /* !__FreeBSD__ */ } #endif /* !USERSPACE */ else break; match = ipfw_lookup_table(chain, cmd->arg1, keylen, pkey, &vidx); if (!match) break; tablearg = vidx; break; } /* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */ /* FALLTHROUGH */ } case O_IP_SRC_LOOKUP: { void *pkey; uint32_t vidx; uint16_t keylen; if (is_ipv4) { keylen = sizeof(in_addr_t); if (cmd->opcode == O_IP_DST_LOOKUP) pkey = &dst_ip; else pkey = &src_ip; } else if (is_ipv6) { keylen = sizeof(struct in6_addr); if (cmd->opcode == O_IP_DST_LOOKUP) pkey = &args->f_id.dst_ip6; else pkey = &args->f_id.src_ip6; } else break; match = ipfw_lookup_table(chain, cmd->arg1, keylen, pkey, &vidx); if (!match) break; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) { match = ((ipfw_insn_u32 *)cmd)->d[0] == TARG_VAL(chain, vidx, tag); if (!match) break; } tablearg = vidx; break; } case O_IP_FLOW_LOOKUP: { uint32_t v = 0; match = ipfw_lookup_table(chain, cmd->arg1, 0, &args->f_id, &v); if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == TARG_VAL(chain, v, tag); if (match) tablearg = v; } break; case O_IP_SRC_MASK: case O_IP_DST_MASK: if (is_ipv4) { uint32_t a = (cmd->opcode == O_IP_DST_MASK) ? dst_ip.s_addr : src_ip.s_addr; uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; int i = cmdlen-1; for (; !match && i>0; i-= 2, p+= 2) match = (p[0] == (a & p[1])); } break; case O_IP_SRC_ME: if (is_ipv4) { match = in_localip(src_ip); break; } #ifdef INET6 /* FALLTHROUGH */ case O_IP6_SRC_ME: match= is_ipv6 && ipfw_localip6(&args->f_id.src_ip6); #endif break; case O_IP_DST_SET: case O_IP_SRC_SET: if (is_ipv4) { u_int32_t *d = (u_int32_t *)(cmd+1); u_int32_t addr = cmd->opcode == O_IP_DST_SET ? args->f_id.dst_ip : args->f_id.src_ip; if (addr < d[0]) break; addr -= d[0]; /* subtract base */ match = (addr < cmd->arg1) && ( d[ 1 + (addr>>5)] & (1<<(addr & 0x1f)) ); } break; case O_IP_DST: match = is_ipv4 && (((ipfw_insn_ip *)cmd)->addr.s_addr == dst_ip.s_addr); break; case O_IP_DST_ME: if (is_ipv4) { match = in_localip(dst_ip); break; } #ifdef INET6 /* FALLTHROUGH */ case O_IP6_DST_ME: match= is_ipv6 && ipfw_localip6(&args->f_id.dst_ip6); #endif break; case O_IP_SRCPORT: case O_IP_DSTPORT: /* * offset == 0 && proto != 0 is enough * to guarantee that we have a * packet with port info. */ if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) && offset == 0) { u_int16_t x = (cmd->opcode == O_IP_SRCPORT) ? src_port : dst_port ; u_int16_t *p = ((ipfw_insn_u16 *)cmd)->ports; int i; for (i = cmdlen - 1; !match && i>0; i--, p += 2) match = (x>=p[0] && x<=p[1]); } break; case O_ICMPTYPE: match = (offset == 0 && proto==IPPROTO_ICMP && icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); break; #ifdef INET6 case O_ICMP6TYPE: match = is_ipv6 && offset == 0 && proto==IPPROTO_ICMPV6 && icmp6type_match( ICMP6(ulp)->icmp6_type, (ipfw_insn_u32 *)cmd); break; #endif /* INET6 */ case O_IPOPT: match = (is_ipv4 && ipopts_match(ip, cmd) ); break; case O_IPVER: match = (is_ipv4 && cmd->arg1 == ip->ip_v); break; case O_IPID: case O_IPLEN: case O_IPTTL: if (is_ipv4) { /* only for IP packets */ uint16_t x; uint16_t *p; int i; if (cmd->opcode == O_IPLEN) x = iplen; else if (cmd->opcode == O_IPTTL) x = ip->ip_ttl; else /* must be IPID */ x = ntohs(ip->ip_id); if (cmdlen == 1) { match = (cmd->arg1 == x); break; } /* otherwise we have ranges */ p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for (; !match && i>0; i--, p += 2) match = (x >= p[0] && x <= p[1]); } break; case O_IPPRECEDENCE: match = (is_ipv4 && (cmd->arg1 == (ip->ip_tos & 0xe0)) ); break; case O_IPTOS: match = (is_ipv4 && flags_match(cmd, ip->ip_tos)); break; case O_DSCP: { uint32_t *p; uint16_t x; p = ((ipfw_insn_u32 *)cmd)->d; if (is_ipv4) x = ip->ip_tos >> 2; else if (is_ipv6) { uint8_t *v; v = &((struct ip6_hdr *)ip)->ip6_vfc; x = (*v & 0x0F) << 2; v++; x |= *v >> 6; } else break; /* DSCP bitmask is stored as low_u32 high_u32 */ if (x >= 32) match = *(p + 1) & (1 << (x - 32)); else match = *p & (1 << x); } break; case O_TCPDATALEN: if (proto == IPPROTO_TCP && offset == 0) { struct tcphdr *tcp; uint16_t x; uint16_t *p; int i; #ifdef INET6 if (is_ipv6) { struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)ip; if (ip6->ip6_plen == 0) { /* * Jumbo payload is not * supported by this * opcode. */ break; } x = iplen - hlen; } else #endif /* INET6 */ x = iplen - (ip->ip_hl << 2); tcp = TCP(ulp); x -= tcp->th_off << 2; if (cmdlen == 1) { match = (cmd->arg1 == x); break; } /* otherwise we have ranges */ p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for (; !match && i>0; i--, p += 2) match = (x >= p[0] && x <= p[1]); } break; case O_TCPFLAGS: match = (proto == IPPROTO_TCP && offset == 0 && flags_match(cmd, TCP(ulp)->th_flags)); break; case O_TCPOPTS: if (proto == IPPROTO_TCP && offset == 0 && ulp){ PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2)); match = tcpopts_match(TCP(ulp), cmd); } break; case O_TCPSEQ: match = (proto == IPPROTO_TCP && offset == 0 && ((ipfw_insn_u32 *)cmd)->d[0] == TCP(ulp)->th_seq); break; case O_TCPACK: match = (proto == IPPROTO_TCP && offset == 0 && ((ipfw_insn_u32 *)cmd)->d[0] == TCP(ulp)->th_ack); break; case O_TCPWIN: if (proto == IPPROTO_TCP && offset == 0) { uint16_t x; uint16_t *p; int i; x = ntohs(TCP(ulp)->th_win); if (cmdlen == 1) { match = (cmd->arg1 == x); break; } /* Otherwise we have ranges. */ p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for (; !match && i > 0; i--, p += 2) match = (x >= p[0] && x <= p[1]); } break; case O_ESTAB: /* reject packets which have SYN only */ /* XXX should i also check for TH_ACK ? */ match = (proto == IPPROTO_TCP && offset == 0 && (TCP(ulp)->th_flags & (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); break; case O_ALTQ: { struct pf_mtag *at; struct m_tag *mtag; ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; /* * ALTQ uses mbuf tags from another * packet filtering system - pf(4). * We allocate a tag in its format * and fill it in, pretending to be pf(4). */ match = 1; at = pf_find_mtag(m); if (at != NULL && at->qid != 0) break; mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag), M_NOWAIT | M_ZERO); if (mtag == NULL) { /* * Let the packet fall back to the * default ALTQ. */ break; } m_tag_prepend(m, mtag); at = (struct pf_mtag *)(mtag + 1); at->qid = altq->qid; at->hdr = ip; break; } case O_LOG: ipfw_log(chain, f, hlen, args, m, oif, offset | ip6f_mf, tablearg, ip); match = 1; break; case O_PROB: match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); break; case O_VERREVPATH: /* Outgoing packets automatically pass/match */ match = ((oif != NULL) || (m->m_pkthdr.rcvif == NULL) || ( #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), m->m_pkthdr.rcvif, args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib))); break; case O_VERSRCREACH: /* Outgoing packets automatically pass/match */ match = (hlen > 0 && ((oif != NULL) || #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), NULL, args->f_id.fib) : #endif verify_path(src_ip, NULL, args->f_id.fib))); break; case O_ANTISPOOF: /* Outgoing packets automatically pass/match */ if (oif == NULL && hlen > 0 && ( (is_ipv4 && in_localaddr(src_ip)) #ifdef INET6 || (is_ipv6 && in6_localaddr(&(args->f_id.src_ip6))) #endif )) match = #ifdef INET6 is_ipv6 ? verify_path6( &(args->f_id.src_ip6), m->m_pkthdr.rcvif, args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib); else match = 1; break; case O_IPSEC: match = (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); /* otherwise no match */ break; #ifdef INET6 case O_IP6_SRC: match = is_ipv6 && IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, &((ipfw_insn_ip6 *)cmd)->addr6); break; case O_IP6_DST: match = is_ipv6 && IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, &((ipfw_insn_ip6 *)cmd)->addr6); break; case O_IP6_SRC_MASK: case O_IP6_DST_MASK: if (is_ipv6) { int i = cmdlen - 1; struct in6_addr p; struct in6_addr *d = &((ipfw_insn_ip6 *)cmd)->addr6; for (; !match && i > 0; d += 2, i -= F_INSN_SIZE(struct in6_addr) * 2) { p = (cmd->opcode == O_IP6_SRC_MASK) ? args->f_id.src_ip6: args->f_id.dst_ip6; APPLY_MASK(&p, &d[1]); match = IN6_ARE_ADDR_EQUAL(&d[0], &p); } } break; case O_FLOW6ID: match = is_ipv6 && flow6id_match(args->f_id.flow_id6, (ipfw_insn_u32 *) cmd); break; case O_EXT_HDR: match = is_ipv6 && (ext_hd & ((ipfw_insn *) cmd)->arg1); break; case O_IP6: match = is_ipv6; break; #endif case O_IP4: match = is_ipv4; break; case O_TAG: { struct m_tag *mtag; uint32_t tag = TARG(cmd->arg1, tag); /* Packet is already tagged with this tag? */ mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); /* We have `untag' action when F_NOT flag is * present. And we must remove this mtag from * mbuf and reset `match' to zero (`match' will * be inversed later). * Otherwise we should allocate new mtag and * push it into mbuf. */ if (cmd->len & F_NOT) { /* `untag' action */ if (mtag != NULL) m_tag_delete(m, mtag); match = 0; } else { if (mtag == NULL) { mtag = m_tag_alloc( MTAG_IPFW, tag, 0, M_NOWAIT); if (mtag != NULL) m_tag_prepend(m, mtag); } match = 1; } break; } case O_FIB: /* try match the specified fib */ if (args->f_id.fib == cmd->arg1) match = 1; break; case O_SOCKARG: { #ifndef USERSPACE /* not supported in userspace */ struct inpcb *inp = args->inp; struct inpcbinfo *pi; if (is_ipv6) /* XXX can we remove this ? */ break; if (proto == IPPROTO_TCP) pi = &V_tcbinfo; else if (proto == IPPROTO_UDP) pi = &V_udbinfo; else break; /* * XXXRW: so_user_cookie should almost * certainly be inp_user_cookie? */ /* For incoming packet, lookup up the inpcb using the src/dest ip/port tuple */ if (inp == NULL) { inp = in_pcblookup(pi, src_ip, htons(src_port), dst_ip, htons(dst_port), INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { tablearg = inp->inp_socket->so_user_cookie; if (tablearg) match = 1; INP_RUNLOCK(inp); } } else { if (inp->inp_socket) { tablearg = inp->inp_socket->so_user_cookie; if (tablearg) match = 1; } } #endif /* !USERSPACE */ break; } case O_TAGGED: { struct m_tag *mtag; uint32_t tag = TARG(cmd->arg1, tag); if (cmdlen == 1) { match = m_tag_locate(m, MTAG_IPFW, tag, NULL) != NULL; break; } /* we have ranges */ for (mtag = m_tag_first(m); mtag != NULL && !match; mtag = m_tag_next(m, mtag)) { uint16_t *p; int i; if (mtag->m_tag_cookie != MTAG_IPFW) continue; p = ((ipfw_insn_u16 *)cmd)->ports; i = cmdlen - 1; for(; !match && i > 0; i--, p += 2) match = mtag->m_tag_id >= p[0] && mtag->m_tag_id <= p[1]; } break; } /* * The second set of opcodes represents 'actions', * i.e. the terminal part of a rule once the packet * matches all previous patterns. * Typically there is only one action for each rule, * and the opcode is stored at the end of the rule * (but there are exceptions -- see below). * * In general, here we set retval and terminate the * outer loop (would be a 'break 3' in some language, * but we need to set l=0, done=1) * * Exceptions: * O_COUNT and O_SKIPTO actions: * instead of terminating, we jump to the next rule * (setting l=0), or to the SKIPTO target (setting * f/f_len, cmd and l as needed), respectively. * * O_TAG, O_LOG and O_ALTQ action parameters: * perform some action and set match = 1; * * O_LIMIT and O_KEEP_STATE: these opcodes are * not real 'actions', and are stored right * before the 'action' part of the rule. * These opcodes try to install an entry in the * state tables; if successful, we continue with * the next opcode (match=1; break;), otherwise * the packet must be dropped (set retval, * break loops with l=0, done=1) * * O_PROBE_STATE and O_CHECK_STATE: these opcodes * cause a lookup of the state table, and a jump * to the 'action' part of the parent rule * if an entry is found, or * (CHECK_STATE only) a jump to the next rule if * the entry is not found. * The result of the lookup is cached so that * further instances of these opcodes become NOPs. * The jump to the next rule is done by setting * l=0, cmdlen=0. */ case O_LIMIT: case O_KEEP_STATE: - if (ipfw_install_state(chain, f, + if (ipfw_dyn_install_state(chain, f, (ipfw_insn_limit *)cmd, args, tablearg)) { /* error or limit violation */ retval = IP_FW_DENY; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ } match = 1; break; case O_PROBE_STATE: case O_CHECK_STATE: /* * dynamic rules are checked at the first * keep-state or check-state occurrence, * with the result being stored in dyn_dir * and dyn_name. * The compiler introduces a PROBE_STATE * instruction for us when we have a * KEEP_STATE (because PROBE_STATE needs * to be run first). * * (dyn_dir == MATCH_UNKNOWN) means this is * first lookup for such f_id. Do lookup. * * (dyn_dir != MATCH_UNKNOWN && * dyn_name != 0 && dyn_name != cmd->arg1) * means previous lookup didn't find dynamic * rule for specific state name and current * lookup will search rule with another state * name. Redo lookup. * * (dyn_dir != MATCH_UNKNOWN && dyn_name == 0) * means previous lookup was for `any' name * and it didn't find rule. No need to do * lookup again. */ if ((dyn_dir == MATCH_UNKNOWN || (dyn_name != 0 && dyn_name != cmd->arg1)) && - (q = ipfw_lookup_dyn_rule(&args->f_id, - &dyn_dir, proto == IPPROTO_TCP ? - TCP(ulp): NULL, + (q = ipfw_dyn_lookup_state(&args->f_id, + ulp, pktlen, &dyn_dir, (dyn_name = cmd->arg1))) != NULL) { /* - * Found dynamic entry, update stats - * and jump to the 'action' part of - * the parent rule by setting - * f, cmd, l and clearing cmdlen. + * Found dynamic entry, jump to the + * 'action' part of the parent rule + * by setting f, cmd, l and clearing + * cmdlen. */ - IPFW_INC_DYN_COUNTER(q, pktlen); + f = q; /* XXX we would like to have f_pos * readily accessible in the dynamic * rule, instead of having to * lookup q->rule. */ - f = q->rule; f_pos = ipfw_find_rule(chain, - f->rulenum, f->id); + f->rulenum, f->id); cmd = ACTION_PTR(f); l = f->cmd_len - f->act_ofs; - ipfw_dyn_unlock(q); cmdlen = 0; match = 1; break; } /* * Dynamic entry not found. If CHECK_STATE, * skip to next rule, if PROBE_STATE just * ignore and continue with next opcode. */ if (cmd->opcode == O_CHECK_STATE) l = 0; /* exit inner loop */ match = 1; break; case O_ACCEPT: retval = 0; /* accept */ l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_PIPE: case O_QUEUE: set_match(args, f_pos, chain); args->rule.info = TARG(cmd->arg1, pipe); if (cmd->opcode == O_PIPE) args->rule.info |= IPFW_IS_PIPE; if (V_fw_one_pass) args->rule.info |= IPFW_ONEPASS; retval = IP_FW_DUMMYNET; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_DIVERT: case O_TEE: if (args->eh) /* not on layer 2 */ break; /* otherwise this is terminal */ l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ retval = (cmd->opcode == O_DIVERT) ? IP_FW_DIVERT : IP_FW_TEE; set_match(args, f_pos, chain); args->rule.info = TARG(cmd->arg1, divert); break; case O_COUNT: IPFW_INC_RULE_COUNTER(f, pktlen); l = 0; /* exit inner loop */ break; case O_SKIPTO: IPFW_INC_RULE_COUNTER(f, pktlen); f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0); /* * Skip disabled rules, and re-enter * the inner loop with the correct * f_pos, f, l and cmd. * Also clear cmdlen and skip_or */ for (; f_pos < chain->n_rules - 1 && (V_set_disable & (1 << chain->map[f_pos]->set)); f_pos++) ; /* Re-enter the inner loop at the skipto rule. */ f = chain->map[f_pos]; l = f->cmd_len; cmd = f->cmd; match = 1; cmdlen = 0; skip_or = 0; continue; break; /* not reached */ case O_CALLRETURN: { /* * Implementation of `subroutine' call/return, * in the stack carried in an mbuf tag. This * is different from `skipto' in that any call * address is possible (`skipto' must prevent * backward jumps to avoid endless loops). * We have `return' action when F_NOT flag is * present. The `m_tag_id' field is used as * stack pointer. */ struct m_tag *mtag; uint16_t jmpto, *stack; #define IS_CALL ((cmd->len & F_NOT) == 0) #define IS_RETURN ((cmd->len & F_NOT) != 0) /* * Hand-rolled version of m_tag_locate() with * wildcard `type'. * If not already tagged, allocate new tag. */ mtag = m_tag_first(m); while (mtag != NULL) { if (mtag->m_tag_cookie == MTAG_IPFW_CALL) break; mtag = m_tag_next(m, mtag); } if (mtag == NULL && IS_CALL) { mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, IPFW_CALLSTACK_SIZE * sizeof(uint16_t), M_NOWAIT); if (mtag != NULL) m_tag_prepend(m, mtag); } /* * On error both `call' and `return' just * continue with next rule. */ if (IS_RETURN && (mtag == NULL || mtag->m_tag_id == 0)) { l = 0; /* exit inner loop */ break; } if (IS_CALL && (mtag == NULL || mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { printf("ipfw: call stack error, " "go to next rule\n"); l = 0; /* exit inner loop */ break; } IPFW_INC_RULE_COUNTER(f, pktlen); stack = (uint16_t *)(mtag + 1); /* * The `call' action may use cached f_pos * (in f->next_rule), whose version is written * in f->next_rule. * The `return' action, however, doesn't have * fixed jump address in cmd->arg1 and can't use * cache. */ if (IS_CALL) { stack[mtag->m_tag_id] = f->rulenum; mtag->m_tag_id++; f_pos = JUMP(chain, f, cmd->arg1, tablearg, 1); } else { /* `return' action */ mtag->m_tag_id--; jmpto = stack[mtag->m_tag_id] + 1; f_pos = ipfw_find_rule(chain, jmpto, 0); } /* * Skip disabled rules, and re-enter * the inner loop with the correct * f_pos, f, l and cmd. * Also clear cmdlen and skip_or */ for (; f_pos < chain->n_rules - 1 && (V_set_disable & (1 << chain->map[f_pos]->set)); f_pos++) ; /* Re-enter the inner loop at the dest rule. */ f = chain->map[f_pos]; l = f->cmd_len; cmd = f->cmd; cmdlen = 0; skip_or = 0; continue; break; /* NOTREACHED */ } #undef IS_CALL #undef IS_RETURN case O_REJECT: /* * Drop the packet and send a reject notice * if the packet is not ICMP (or is an ICMP * query), and it is not multicast/broadcast. */ if (hlen > 0 && is_ipv4 && offset == 0 && (proto != IPPROTO_ICMP || is_icmp_query(ICMP(ulp))) && !(m->m_flags & (M_BCAST|M_MCAST)) && !IN_MULTICAST(ntohl(dst_ip.s_addr))) { send_reject(args, cmd->arg1, iplen, ip); m = args->m; } /* FALLTHROUGH */ #ifdef INET6 case O_UNREACH6: if (hlen > 0 && is_ipv6 && ((offset & IP6F_OFF_MASK) == 0) && (proto != IPPROTO_ICMPV6 || (is_icmp6_query(icmp6_type) == 1)) && !(m->m_flags & (M_BCAST|M_MCAST)) && !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { send_reject6( args, cmd->arg1, hlen, (struct ip6_hdr *)ip); m = args->m; } /* FALLTHROUGH */ #endif case O_DENY: retval = IP_FW_DENY; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_FORWARD_IP: if (args->eh) /* not valid on layer2 pkts */ break; - if (q == NULL || q->rule != f || - dyn_dir == MATCH_FORWARD) { + if (q != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; sa = &(((ipfw_insn_sa *)cmd)->sa); if (sa->sin_addr.s_addr == INADDR_ANY) { #ifdef INET6 /* * We use O_FORWARD_IP opcode for * fwd rule with tablearg, but tables * now support IPv6 addresses. And * when we are inspecting IPv6 packet, * we can use nh6 field from * table_value as next_hop6 address. */ if (is_ipv6) { struct sockaddr_in6 *sa6; sa6 = args->next_hop6 = &args->hopstore6; sa6->sin6_family = AF_INET6; sa6->sin6_len = sizeof(*sa6); sa6->sin6_addr = TARG_VAL( chain, tablearg, nh6); sa6->sin6_port = sa->sin_port; /* * Set sin6_scope_id only for * link-local unicast addresses. */ if (IN6_IS_ADDR_LINKLOCAL( &sa6->sin6_addr)) sa6->sin6_scope_id = TARG_VAL(chain, tablearg, zoneid); } else #endif { args->hopstore.sin_port = sa->sin_port; sa = args->next_hop = &args->hopstore; sa->sin_family = AF_INET; sa->sin_len = sizeof(*sa); sa->sin_addr.s_addr = htonl( TARG_VAL(chain, tablearg, nh4)); } } else { args->next_hop = sa; } } retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; #ifdef INET6 case O_FORWARD_IP6: if (args->eh) /* not valid on layer2 pkts */ break; - if (q == NULL || q->rule != f || - dyn_dir == MATCH_FORWARD) { + if (q != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in6 *sin6; sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); args->next_hop6 = sin6; } retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; #endif case O_NETGRAPH: case O_NGTEE: set_match(args, f_pos, chain); args->rule.info = TARG(cmd->arg1, netgraph); if (V_fw_one_pass) args->rule.info |= IPFW_ONEPASS; retval = (cmd->opcode == O_NETGRAPH) ? IP_FW_NETGRAPH : IP_FW_NGTEE; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ break; case O_SETFIB: { uint32_t fib; IPFW_INC_RULE_COUNTER(f, pktlen); fib = TARG(cmd->arg1, fib) & 0x7FFF; if (fib >= rt_numfibs) fib = 0; M_SETFIB(m, fib); args->f_id.fib = fib; l = 0; /* exit inner loop */ break; } case O_SETDSCP: { uint16_t code; code = TARG(cmd->arg1, dscp) & 0x3F; l = 0; /* exit inner loop */ if (is_ipv4) { uint16_t old; old = *(uint16_t *)ip; ip->ip_tos = (code << 2) | (ip->ip_tos & 0x03); ip->ip_sum = cksum_adjust(ip->ip_sum, old, *(uint16_t *)ip); } else if (is_ipv6) { uint8_t *v; v = &((struct ip6_hdr *)ip)->ip6_vfc; *v = (*v & 0xF0) | (code >> 2); v++; *v = (*v & 0x3F) | ((code & 0x03) << 6); } else break; IPFW_INC_RULE_COUNTER(f, pktlen); break; } case O_NAT: l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ /* * Ensure that we do not invoke NAT handler for * non IPv4 packets. Libalias expects only IPv4. */ if (!is_ipv4 || !IPFW_NAT_LOADED) { retval = IP_FW_DENY; break; } struct cfg_nat *t; int nat_id; set_match(args, f_pos, chain); /* Check if this is 'global' nat rule */ if (cmd->arg1 == IP_FW_NAT44_GLOBAL) { retval = ipfw_nat_ptr(args, NULL, m); break; } t = ((ipfw_insn_nat *)cmd)->nat; if (t == NULL) { nat_id = TARG(cmd->arg1, nat); t = (*lookup_nat_ptr)(&chain->nat, nat_id); if (t == NULL) { retval = IP_FW_DENY; break; } if (cmd->arg1 != IP_FW_TARG) ((ipfw_insn_nat *)cmd)->nat = t; } retval = ipfw_nat_ptr(args, t, m); break; case O_REASS: { int ip_off; IPFW_INC_RULE_COUNTER(f, pktlen); l = 0; /* in any case exit inner loop */ ip_off = ntohs(ip->ip_off); /* if not fragmented, go to next rule */ if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) break; args->m = m = ip_reass(m); /* * do IP header checksum fixup. */ if (m == NULL) { /* fragment got swallowed */ retval = IP_FW_DENY; } else { /* good, packet complete */ int hlen; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(m, hlen); retval = IP_FW_REASS; set_match(args, f_pos, chain); } done = 1; /* exit outer loop */ break; } case O_EXTERNAL_ACTION: l = 0; /* in any case exit inner loop */ retval = ipfw_run_eaction(chain, args, cmd, &done); /* * If both @retval and @done are zero, * consider this as rule matching and * update counters. */ if (retval == 0 && done == 0) { IPFW_INC_RULE_COUNTER(f, pktlen); /* * Reset the result of the last * dynamic state lookup. * External action can change * @args content, and it may be * used for new state lookup later. */ dyn_dir = MATCH_UNKNOWN; } break; default: panic("-- unknown opcode %d\n", cmd->opcode); } /* end of switch() on opcodes */ /* * if we get here with l=0, then match is irrelevant. */ if (cmd->len & F_NOT) match = !match; if (match) { if (cmd->len & F_OR) skip_or = 1; } else { if (!(cmd->len & F_OR)) /* not an OR block, */ break; /* try next rule */ } } /* end of inner loop, scan opcodes */ #undef PULLUP_LEN if (done) break; /* next_rule:; */ /* try next rule */ } /* end of outer for, scan rules */ if (done) { struct ip_fw *rule = chain->map[f_pos]; /* Update statistics */ IPFW_INC_RULE_COUNTER(rule, pktlen); } else { retval = IP_FW_DENY; printf("ipfw: ouch!, skip past end of rules, denying packet\n"); } IPFW_PF_RUNLOCK(chain); #ifdef __FreeBSD__ if (ucred_cache != NULL) crfree(ucred_cache); #endif return (retval); pullup_failed: if (V_fw_verbose) printf("ipfw: pullup failed\n"); return (IP_FW_DENY); } /* * Set maximum number of tables that can be used in given VNET ipfw instance. */ #ifdef SYSCTL_NODE static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) { int error; unsigned int ntables; ntables = V_fw_tables_max; error = sysctl_handle_int(oidp, &ntables, 0, req); /* Read operation or some error */ if ((error != 0) || (req->newptr == NULL)) return (error); return (ipfw_resize_tables(&V_layer3_chain, ntables)); } /* * Switches table namespace between global and per-set. */ static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS) { int error; unsigned int sets; sets = V_fw_tables_sets; error = sysctl_handle_int(oidp, &sets, 0, req); /* Read operation or some error */ if ((error != 0) || (req->newptr == NULL)) return (error); return (ipfw_switch_tables_namespace(&V_layer3_chain, sets)); } #endif /* * Module and VNET glue */ /* * Stuff that must be initialised only on boot or module load */ static int ipfw_init(void) { int error = 0; /* * Only print out this stuff the first time around, * when called from the sysinit code. */ printf("ipfw2 " #ifdef INET6 "(+ipv6) " #endif "initialized, divert %s, nat %s, " "default to %s, logging ", #ifdef IPDIVERT "enabled", #else "loadable", #endif #ifdef IPFIREWALL_NAT "enabled", #else "loadable", #endif default_to_accept ? "accept" : "deny"); /* * Note: V_xxx variables can be accessed here but the vnet specific * initializer may not have been called yet for the VIMAGE case. * Tuneables will have been processed. We will print out values for * the default vnet. * XXX This should all be rationalized AFTER 8.0 */ if (V_fw_verbose == 0) printf("disabled\n"); else if (V_verbose_limit == 0) printf("unlimited\n"); else printf("limited to %d packets/entry by default\n", V_verbose_limit); /* Check user-supplied table count for validness */ if (default_fw_tables > IPFW_TABLES_MAX) default_fw_tables = IPFW_TABLES_MAX; ipfw_init_sopt_handler(); ipfw_init_obj_rewriter(); ipfw_iface_init(); return (error); } /* * Called for the removal of the last instance only on module unload. */ static void ipfw_destroy(void) { ipfw_iface_destroy(); ipfw_destroy_sopt_handler(); ipfw_destroy_obj_rewriter(); printf("IP firewall unloaded\n"); } /* * Stuff that must be initialized for every instance * (including the first of course). */ static int vnet_ipfw_init(const void *unused) { int error, first; struct ip_fw *rule = NULL; struct ip_fw_chain *chain; chain = &V_layer3_chain; first = IS_DEFAULT_VNET(curvnet) ? 1 : 0; /* First set up some values that are compile time options */ V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ V_fw_deny_unknown_exthdrs = 1; #ifdef IPFIREWALL_VERBOSE V_fw_verbose = 1; #endif #ifdef IPFIREWALL_VERBOSE_LIMIT V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; #endif #ifdef IPFIREWALL_NAT LIST_INIT(&chain->nat); #endif /* Init shared services hash table */ ipfw_init_srv(chain); ipfw_init_counters(); - /* insert the default rule and create the initial map */ - chain->n_rules = 1; - chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); - rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); - /* Set initial number of tables */ V_fw_tables_max = default_fw_tables; error = ipfw_init_tables(chain, first); if (error) { printf("ipfw2: setting up tables failed\n"); free(chain->map, M_IPFW); free(rule, M_IPFW); return (ENOSPC); } + IPFW_LOCK_INIT(chain); + /* fill and insert the default rule */ - rule->act_ofs = 0; - rule->rulenum = IPFW_DEFAULT_RULE; + rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); rule->cmd_len = 1; - rule->set = RESVD_SET; rule->cmd[0].len = 1; rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; - chain->default_rule = chain->map[0] = rule; - chain->id = rule->id = 1; - /* Pre-calculate rules length for legacy dump format */ - chain->static_len = sizeof(struct ip_fw_rule0); + chain->default_rule = rule; + ipfw_add_protected_rule(chain, rule, 0); - IPFW_LOCK_INIT(chain); ipfw_dyn_init(chain); ipfw_eaction_init(chain, first); #ifdef LINEAR_SKIPTO ipfw_init_skipto_cache(chain); #endif ipfw_bpf_init(first); /* First set up some values that are compile time options */ V_ipfw_vnet_ready = 1; /* Open for business */ /* * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. * Even if the latter two fail we still keep the module alive * because the sockopt and layer2 paths are still useful. * ipfw[6]_hook return 0 on success, ENOENT on failure, * so we can ignore the exact return value and just set a flag. * * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so * changes in the underlying (per-vnet) variables trigger * immediate hook()/unhook() calls. * In layer2 we have the same behaviour, except that V_ether_ipfw * is checked on each packet because there are no pfil hooks. */ V_ip_fw_ctl_ptr = ipfw_ctl3; error = ipfw_attach_hooks(1); return (error); } /* * Called for the removal of each instance. */ static int vnet_ipfw_uninit(const void *unused) { struct ip_fw *reap; struct ip_fw_chain *chain = &V_layer3_chain; int i, last; V_ipfw_vnet_ready = 0; /* tell new callers to go away */ /* * disconnect from ipv4, ipv6, layer2 and sockopt. * Then grab, release and grab again the WLOCK so we make * sure the update is propagated and nobody will be in. */ (void)ipfw_attach_hooks(0 /* detach */); V_ip_fw_ctl_ptr = NULL; last = IS_DEFAULT_VNET(curvnet) ? 1 : 0; IPFW_UH_WLOCK(chain); IPFW_UH_WUNLOCK(chain); ipfw_dyn_uninit(0); /* run the callout_drain */ IPFW_UH_WLOCK(chain); reap = NULL; IPFW_WLOCK(chain); for (i = 0; i < chain->n_rules; i++) ipfw_reap_add(chain, &reap, chain->map[i]); free(chain->map, M_IPFW); #ifdef LINEAR_SKIPTO ipfw_destroy_skipto_cache(chain); #endif IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); ipfw_destroy_tables(chain, last); ipfw_eaction_uninit(chain, last); if (reap != NULL) ipfw_reap_rules(reap); vnet_ipfw_iface_destroy(chain); ipfw_destroy_srv(chain); IPFW_LOCK_DESTROY(chain); ipfw_dyn_uninit(1); /* free the remaining parts */ ipfw_destroy_counters(); ipfw_bpf_uninit(last); return (0); } /* * Module event handler. * In general we have the choice of handling most of these events by the * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to * use the SYSINIT handlers as they are more capable of expressing the * flow of control during module and vnet operations, so this is just * a skeleton. Note there is no SYSINIT equivalent of the module * SHUTDOWN handler, but we don't have anything to do in that case anyhow. */ static int ipfw_modevent(module_t mod, int type, void *unused) { int err = 0; switch (type) { case MOD_LOAD: /* Called once at module load or * system boot if compiled in. */ break; case MOD_QUIESCE: /* Called before unload. May veto unloading. */ break; case MOD_UNLOAD: /* Called during unload. */ break; case MOD_SHUTDOWN: /* Called during system shutdown. */ break; default: err = EOPNOTSUPP; break; } return err; } static moduledata_t ipfwmod = { "ipfw", ipfw_modevent, 0 }; /* Define startup order. */ #define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL #define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */ #define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */ #define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */ DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER); FEATURE(ipfw_ctl3, "ipfw new sockopt calls"); MODULE_VERSION(ipfw, 3); /* should declare some dependencies here */ /* * Starting up. Done in order after ipfwmod() has been called. * VNET_SYSINIT is also called for each existing vnet and each new vnet. */ SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, ipfw_init, NULL); VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, vnet_ipfw_init, NULL); /* * Closing up shop. These are done in REVERSE ORDER, but still * after ipfwmod() has been called. Not called on reboot. * VNET_SYSUNINIT is also called for each exiting vnet as it exits. * or when the module is unloaded. */ SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, ipfw_destroy, NULL); VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, vnet_ipfw_uninit, NULL); /* end of file */ Index: stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c =================================================================== --- stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c (revision 326387) +++ stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c (revision 326388) @@ -1,1818 +1,1668 @@ /*- * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define DEB(x) #define DDB(x) x /* * Dynamic rule support for ipfw */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include /* for ETHERTYPE_IP */ #include #include #include #include #include #include #include /* ip_defttl */ #include #include #include #include /* IN6_ARE_ADDR_EQUAL */ #ifdef INET6 #include #include #endif #include #include /* XXX for in_cksum */ #ifdef MAC #include #endif /* * Description of dynamic rules. * * Dynamic rules are stored in lists accessed through a hash table * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can * be modified through the sysctl variable dyn_buckets which is * updated when the table becomes empty. * * XXX currently there is only one list, ipfw_dyn. * * When a packet is received, its address fields are first masked * with the mask defined for the rule, then hashed, then matched * against the entries in the corresponding list. * Dynamic rules can be used for different purposes: * + stateful rules; * + enforcing limits on the number of sessions; * + in-kernel NAT (not implemented yet) * * The lifetime of dynamic rules is regulated by dyn_*_lifetime, * measured in seconds and depending on the flags. * * The total number of dynamic rules is equal to UMA zone items count. * The max number of dynamic rules is dyn_max. When we reach * the maximum number of rules we do not create anymore. This is * done to avoid consuming too much memory, but also too much * time when searching on each packet (ideally, we should try instead * to put a limit on the length of the list on each bucket...). * * Each dynamic rule holds a pointer to the parent ipfw rule so * we know what action to perform. Dynamic rules are removed when * the parent rule is deleted. This can be changed by dyn_keep_states * sysctl. * * There are some limitations with dynamic rules -- we do not * obey the 'randomized match', and we do not do multiple * passes through the firewall. XXX check the latter!!! */ struct ipfw_dyn_bucket { struct mtx mtx; /* Bucket protecting lock */ ipfw_dyn_rule *head; /* Pointer to first rule */ }; /* * Static variables followed by global ones */ static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v); static VNET_DEFINE(u_int32_t, dyn_buckets_max); static VNET_DEFINE(u_int32_t, curr_dyn_buckets); static VNET_DEFINE(struct callout, ipfw_timeout); #define V_ipfw_dyn_v VNET(ipfw_dyn_v) #define V_dyn_buckets_max VNET(dyn_buckets_max) #define V_curr_dyn_buckets VNET(curr_dyn_buckets) #define V_ipfw_timeout VNET(ipfw_timeout) static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone); #define V_ipfw_dyn_rule_zone VNET(ipfw_dyn_rule_zone) #define IPFW_BUCK_LOCK_INIT(b) \ mtx_init(&(b)->mtx, "IPFW dynamic bucket", NULL, MTX_DEF) #define IPFW_BUCK_LOCK_DESTROY(b) \ mtx_destroy(&(b)->mtx) #define IPFW_BUCK_LOCK(i) mtx_lock(&V_ipfw_dyn_v[(i)].mtx) #define IPFW_BUCK_UNLOCK(i) mtx_unlock(&V_ipfw_dyn_v[(i)].mtx) #define IPFW_BUCK_ASSERT(i) mtx_assert(&V_ipfw_dyn_v[(i)].mtx, MA_OWNED) static VNET_DEFINE(int, dyn_keep_states); #define V_dyn_keep_states VNET(dyn_keep_states) /* * Timeouts for various events in handing dynamic rules. */ static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); static VNET_DEFINE(u_int32_t, dyn_short_lifetime); #define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) #define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) #define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) #define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) #define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) #define V_dyn_short_lifetime VNET(dyn_short_lifetime) /* * Keepalives are sent if dyn_keepalive is set. They are sent every * dyn_keepalive_period seconds, in the last dyn_keepalive_interval * seconds of lifetime of a rule. * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower * than dyn_keepalive_period. */ static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); static VNET_DEFINE(u_int32_t, dyn_keepalive_period); static VNET_DEFINE(u_int32_t, dyn_keepalive); static VNET_DEFINE(time_t, dyn_keepalive_last); #define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) #define V_dyn_keepalive_period VNET(dyn_keepalive_period) #define V_dyn_keepalive VNET(dyn_keepalive) #define V_dyn_keepalive_last VNET(dyn_keepalive_last) static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ #define DYN_COUNT uma_zone_get_cur(V_ipfw_dyn_rule_zone) #define V_dyn_max VNET(dyn_max) /* for userspace, we emulate the uma_zone_counter with ipfw_dyn_count */ static int ipfw_dyn_count; /* number of objects */ #ifdef USERSPACE /* emulation of UMA object counters for userspace */ #define uma_zone_get_cur(x) ipfw_dyn_count #endif /* USERSPACE */ static int last_log; /* Log ratelimiting */ static void ipfw_dyn_tick(void *vnetx); static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int); #ifdef SYSCTL_NODE static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS); static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS); SYSBEGIN(f2) SYSCTL_DECL(_net_inet_ip_fw); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_buckets_max), 0, "Max number of dyn. buckets"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, "Current Number of dyn. buckets"); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RD, 0, 0, sysctl_ipfw_dyn_count, "IU", "Number of dyn. rules"); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_dyn_max, "IU", "Max number of dyn. rules"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, "Lifetime of dyn. rules for acks"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, "Lifetime of dyn. rules for syn"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, "Lifetime of dyn. rules for fin"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, "Lifetime of dyn. rules for rst"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, "Lifetime of dyn. rules for UDP"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, "Lifetime of dyn. rules for other situations"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, "Enable keepalives for dyn. rules"); SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keep_states, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0, "Do not flush dynamic states on rule deletion"); SYSEND #endif /* SYSCTL_NODE */ #ifdef INET6 static __inline int -hash_packet6(struct ipfw_flow_id *id) +hash_packet6(const struct ipfw_flow_id *id) { u_int32_t i; i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ (id->src_ip6.__u6_addr.__u6_addr32[3]); return ntohl(i); } #endif /* * IMPORTANT: the hash function for dynamic rules must be commutative * in source and destination (ip,port), because rules are bidirectional * and we want to find both in the same bucket. */ static __inline int -hash_packet(struct ipfw_flow_id *id, int buckets) +hash_packet(const struct ipfw_flow_id *id, int buckets) { u_int32_t i; #ifdef INET6 if (IS_IP6_FLOW_ID(id)) i = hash_packet6(id); else #endif /* INET6 */ i = (id->dst_ip) ^ (id->src_ip); i ^= (id->dst_port) ^ (id->src_port); return (i & (buckets - 1)); } #if 0 #define DYN_DEBUG(fmt, ...) do { \ printf("%s: " fmt "\n", __func__, __VA_ARGS__); \ } while (0) #else #define DYN_DEBUG(fmt, ...) #endif static char *default_state_name = "default"; struct dyn_state_obj { struct named_object no; char name[64]; }; #define DYN_STATE_OBJ(ch, cmd) \ ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1)) /* * Classifier callback. * Return 0 if opcode contains object that should be referenced * or rewritten. */ static int dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); /* Don't rewrite "check-state any" */ if (cmd->arg1 == 0 && cmd->opcode == O_CHECK_STATE) return (1); *puidx = cmd->arg1; *ptype = 0; return (0); } static void dyn_update(ipfw_insn *cmd, uint16_t idx) { cmd->arg1 = idx; DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); } static int dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno) { ipfw_obj_ntlv *ntlv; const char *name; DYN_DEBUG("uidx %d", ti->uidx); if (ti->uidx != 0) { if (ti->tlvs == NULL) return (EINVAL); /* Search ntlv in the buffer provided by user */ ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_STATE_NAME); if (ntlv == NULL) return (EINVAL); name = ntlv->name; } else name = default_state_name; /* * Search named object with corresponding name. * Since states objects are global - ignore the set value * and use zero instead. */ *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, IPFW_TLV_STATE_NAME, name); /* * We always return success here. * The caller will check *pno and mark object as unresolved, * then it will automatically create "default" object. */ return (0); } static struct named_object * dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) { DYN_DEBUG("kidx %d", idx); return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); } static int dyn_create(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx) { struct namedobj_instance *ni; struct dyn_state_obj *obj; struct named_object *no; ipfw_obj_ntlv *ntlv; char *name; DYN_DEBUG("uidx %d", ti->uidx); if (ti->uidx != 0) { if (ti->tlvs == NULL) return (EINVAL); ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, IPFW_TLV_STATE_NAME); if (ntlv == NULL) return (EINVAL); name = ntlv->name; } else name = default_state_name; ni = CHAIN_TO_SRV(ch); obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); obj->no.name = obj->name; obj->no.etlv = IPFW_TLV_STATE_NAME; strlcpy(obj->name, name, sizeof(obj->name)); IPFW_UH_WLOCK(ch); no = ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_STATE_NAME, name); if (no != NULL) { /* * Object is already created. * Just return its kidx and bump refcount. */ *pkidx = no->kidx; no->refcnt++; IPFW_UH_WUNLOCK(ch); free(obj, M_IPFW); DYN_DEBUG("\tfound kidx %d", *pkidx); return (0); } if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { DYN_DEBUG("\talloc_idx failed for %s", name); IPFW_UH_WUNLOCK(ch); free(obj, M_IPFW); return (ENOSPC); } ipfw_objhash_add(ni, &obj->no); SRV_OBJECT(ch, obj->no.kidx) = obj; obj->no.refcnt++; *pkidx = obj->no.kidx; IPFW_UH_WUNLOCK(ch); DYN_DEBUG("\tcreated kidx %d", *pkidx); return (0); } static void dyn_destroy(struct ip_fw_chain *ch, struct named_object *no) { struct dyn_state_obj *obj; IPFW_UH_WLOCK_ASSERT(ch); KASSERT(no->refcnt == 1, ("Destroying object '%s' (type %u, idx %u) with refcnt %u", no->name, no->etlv, no->kidx, no->refcnt)); DYN_DEBUG("kidx %d", no->kidx); obj = SRV_OBJECT(ch, no->kidx); SRV_OBJECT(ch, no->kidx) = NULL; ipfw_objhash_del(CHAIN_TO_SRV(ch), no); ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx); free(obj, M_IPFW); } static struct opcode_obj_rewrite dyn_opcodes[] = { { O_KEEP_STATE, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, { O_CHECK_STATE, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, { O_PROBE_STATE, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, { O_LIMIT, IPFW_TLV_STATE_NAME, dyn_classify, dyn_update, dyn_findbyname, dyn_findbykidx, dyn_create, dyn_destroy }, }; /** * Print customizable flow id description via log(9) facility. */ static void -print_dyn_rule_flags(struct ipfw_flow_id *id, int dyn_type, int log_flags, - char *prefix, char *postfix) +print_dyn_rule_flags(const struct ipfw_flow_id *id, int dyn_type, + int log_flags, char *prefix, char *postfix) { struct in_addr da; #ifdef INET6 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; #else char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; #endif #ifdef INET6 if (IS_IP6_FLOW_ID(id)) { ip6_sprintf(src, &id->src_ip6); ip6_sprintf(dst, &id->dst_ip6); } else #endif { da.s_addr = htonl(id->src_ip); inet_ntop(AF_INET, &da, src, sizeof(src)); da.s_addr = htonl(id->dst_ip); inet_ntop(AF_INET, &da, dst, sizeof(dst)); } log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n", prefix, dyn_type, src, id->src_port, dst, id->dst_port, DYN_COUNT, postfix); } #define print_dyn_rule(id, dtype, prefix, postfix) \ print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix) #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) #define TIME_LE(a,b) ((int)((a)-(b)) < 0) static void dyn_update_proto_state(ipfw_dyn_rule *q, const struct ipfw_flow_id *id, - const struct tcphdr *tcp, int dir) + const void *ulp, int dir) { + const struct tcphdr *tcp; uint32_t ack; u_char flags; if (id->proto == IPPROTO_TCP) { + tcp = (const struct tcphdr *)ulp; flags = id->_flags & (TH_FIN | TH_SYN | TH_RST); #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) #define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8)) #define ACK_FWD 0x10000 /* fwd ack seen */ #define ACK_REV 0x20000 /* rev ack seen */ q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); switch (q->state & TCP_FLAGS) { case TH_SYN: /* opening */ q->expire = time_uptime + V_dyn_syn_lifetime; break; case BOTH_SYN: /* move to established */ case BOTH_SYN | TH_FIN: /* one side tries to close */ case BOTH_SYN | (TH_FIN << 8): #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) if (tcp == NULL) break; ack = ntohl(tcp->th_ack); if (dir == MATCH_FORWARD) { if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) { q->ack_fwd = ack; q->state |= ACK_FWD; } } else { if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) { q->ack_rev = ack; q->state |= ACK_REV; } } if ((q->state & (ACK_FWD | ACK_REV)) == (ACK_FWD | ACK_REV)) { q->expire = time_uptime + V_dyn_ack_lifetime; q->state &= ~(ACK_FWD | ACK_REV); } break; case BOTH_SYN | BOTH_FIN: /* both sides closed */ if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; q->expire = time_uptime + V_dyn_fin_lifetime; break; default: #if 0 /* * reset or some invalid combination, but can also * occur if we use keep-state the wrong way. */ if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) printf("invalid state: 0x%x\n", q->state); #endif if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; q->expire = time_uptime + V_dyn_rst_lifetime; break; } } else if (id->proto == IPPROTO_UDP) { q->expire = time_uptime + V_dyn_udp_lifetime; } else { /* other protocols */ q->expire = time_uptime + V_dyn_short_lifetime; } } /* * Lookup a dynamic rule, locked version. */ static ipfw_dyn_rule * -lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int i, int *match_direction, - struct tcphdr *tcp, uint16_t kidx) +lookup_dyn_rule_locked(const struct ipfw_flow_id *pkt, const void *ulp, + int i, int *match_direction, uint16_t kidx) { /* * Stateful ipfw extensions. * Lookup into dynamic session queue. */ ipfw_dyn_rule *prev, *q = NULL; int dir; IPFW_BUCK_ASSERT(i); dir = MATCH_NONE; for (prev = NULL, q = V_ipfw_dyn_v[i].head; q; prev = q, q = q->next) { if (q->dyn_type == O_LIMIT_PARENT) continue; + if (pkt->addr_type != q->id.addr_type) + continue; + if (pkt->proto != q->id.proto) continue; if (kidx != 0 && kidx != q->kidx) continue; if (IS_IP6_FLOW_ID(pkt)) { if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) && IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port) { dir = MATCH_FORWARD; break; } if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) && IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) && pkt->src_port == q->id.dst_port && pkt->dst_port == q->id.src_port) { dir = MATCH_REVERSE; break; } } else { if (pkt->src_ip == q->id.src_ip && pkt->dst_ip == q->id.dst_ip && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port) { dir = MATCH_FORWARD; break; } if (pkt->src_ip == q->id.dst_ip && pkt->dst_ip == q->id.src_ip && pkt->src_port == q->id.dst_port && pkt->dst_port == q->id.src_port) { dir = MATCH_REVERSE; break; } } } if (q == NULL) goto done; /* q = NULL, not found */ if (prev != NULL) { /* found and not in front */ prev->next = q->next; q->next = V_ipfw_dyn_v[i].head; V_ipfw_dyn_v[i].head = q; } /* update state according to flags */ - dyn_update_proto_state(q, pkt, tcp, dir); + dyn_update_proto_state(q, pkt, ulp, dir); done: if (match_direction != NULL) *match_direction = dir; return (q); } -ipfw_dyn_rule * -ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, - struct tcphdr *tcp, uint16_t kidx) +struct ip_fw * +ipfw_dyn_lookup_state(const struct ipfw_flow_id *pkt, const void *ulp, + int pktlen, int *match_direction, uint16_t kidx) { + struct ip_fw *rule; ipfw_dyn_rule *q; int i; i = hash_packet(pkt, V_curr_dyn_buckets); IPFW_BUCK_LOCK(i); - q = lookup_dyn_rule_locked(pkt, i, match_direction, tcp, kidx); + q = lookup_dyn_rule_locked(pkt, ulp, i, match_direction, kidx); if (q == NULL) - IPFW_BUCK_UNLOCK(i); - /* NB: return table locked when q is not NULL */ - return q; + rule = NULL; + else { + rule = q->rule; + IPFW_INC_DYN_COUNTER(q, pktlen); + } + IPFW_BUCK_UNLOCK(i); + return (rule); } -/* - * Unlock bucket mtx - * @p - pointer to dynamic rule - */ -void -ipfw_dyn_unlock(ipfw_dyn_rule *q) -{ - - IPFW_BUCK_UNLOCK(q->bucket); -} - static int resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets) { int i, k, nbuckets_old; ipfw_dyn_rule *q; struct ipfw_dyn_bucket *dyn_v, *dyn_v_old; /* Check if given number is power of 2 and less than 64k */ if ((nbuckets > 65536) || (!powerof2(nbuckets))) return 1; CTR3(KTR_NET, "%s: resize dynamic hash: %d -> %d", __func__, V_curr_dyn_buckets, nbuckets); /* Allocate and initialize new hash */ dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW, M_WAITOK | M_ZERO); for (i = 0 ; i < nbuckets; i++) IPFW_BUCK_LOCK_INIT(&dyn_v[i]); /* * Call upper half lock, as get_map() do to ease * read-only access to dynamic rules hash from sysctl */ IPFW_UH_WLOCK(chain); /* * Acquire chain write lock to permit hash access * for main traffic path without additional locks */ IPFW_WLOCK(chain); /* Save old values */ nbuckets_old = V_curr_dyn_buckets; dyn_v_old = V_ipfw_dyn_v; /* Skip relinking if array is not set up */ if (V_ipfw_dyn_v == NULL) V_curr_dyn_buckets = 0; /* Re-link all dynamic states */ for (i = 0 ; i < V_curr_dyn_buckets ; i++) { while (V_ipfw_dyn_v[i].head != NULL) { /* Remove from current chain */ q = V_ipfw_dyn_v[i].head; V_ipfw_dyn_v[i].head = q->next; /* Get new hash value */ k = hash_packet(&q->id, nbuckets); q->bucket = k; /* Add to the new head */ q->next = dyn_v[k].head; dyn_v[k].head = q; } } /* Update current pointers/buckets values */ V_curr_dyn_buckets = nbuckets; V_ipfw_dyn_v = dyn_v; IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); /* Start periodic callout on initial creation */ if (dyn_v_old == NULL) { callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, curvnet, 0); return (0); } /* Destroy all mutexes */ for (i = 0 ; i < nbuckets_old ; i++) IPFW_BUCK_LOCK_DESTROY(&dyn_v_old[i]); /* Free old hash */ free(dyn_v_old, M_IPFW); return 0; } /** * Install state of type 'type' for a dynamic session. * The hash table contains two type of rules: * - regular rules (O_KEEP_STATE) * - rules for sessions with limited number of sess per user * (O_LIMIT). When they are created, the parent is * increased by 1, and decreased on delete. In this case, * the third parameter is the parent rule and not the chain. * - "parent" rules for the above (O_LIMIT_PARENT). */ static ipfw_dyn_rule * -add_dyn_rule(struct ipfw_flow_id *id, int i, uint8_t dyn_type, +add_dyn_rule(const struct ipfw_flow_id *id, int i, uint8_t dyn_type, struct ip_fw *rule, uint16_t kidx) { ipfw_dyn_rule *r; IPFW_BUCK_ASSERT(i); r = uma_zalloc(V_ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); if (r == NULL) { if (last_log != time_uptime) { last_log = time_uptime; log(LOG_DEBUG, "ipfw: Cannot allocate dynamic state, " "consider increasing net.inet.ip.fw.dyn_max\n"); } return NULL; } ipfw_dyn_count++; /* * refcount on parent is already incremented, so * it is safe to use parent unlocked. */ if (dyn_type == O_LIMIT) { ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; if ( parent->dyn_type != O_LIMIT_PARENT) panic("invalid parent"); r->parent = parent; rule = parent->rule; } r->id = *id; r->expire = time_uptime + V_dyn_syn_lifetime; r->rule = rule; r->dyn_type = dyn_type; IPFW_ZERO_DYN_COUNTER(r); r->count = 0; r->kidx = kidx; r->bucket = i; r->next = V_ipfw_dyn_v[i].head; V_ipfw_dyn_v[i].head = r; DEB(print_dyn_rule(id, dyn_type, "add dyn entry", "total");) return r; } /** * lookup dynamic parent rule using pkt and rule as search keys. * If the lookup fails, then install one. */ static ipfw_dyn_rule * -lookup_dyn_parent(struct ipfw_flow_id *pkt, int *pindex, struct ip_fw *rule, - uint16_t kidx) +lookup_dyn_parent(const struct ipfw_flow_id *pkt, int *pindex, + struct ip_fw *rule, uint16_t kidx) { ipfw_dyn_rule *q; int i, is_v6; is_v6 = IS_IP6_FLOW_ID(pkt); i = hash_packet( pkt, V_curr_dyn_buckets ); *pindex = i; IPFW_BUCK_LOCK(i); for (q = V_ipfw_dyn_v[i].head ; q != NULL ; q=q->next) if (q->dyn_type == O_LIMIT_PARENT && kidx == q->kidx && rule == q->rule && pkt->proto == q->id.proto && pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port && ( (is_v6 && IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), &(q->id.src_ip6)) && IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), &(q->id.dst_ip6))) || (!is_v6 && pkt->src_ip == q->id.src_ip && pkt->dst_ip == q->id.dst_ip) ) ) { q->expire = time_uptime + V_dyn_short_lifetime; DEB(print_dyn_rule(pkt, q->dyn_type, "lookup_dyn_parent found", "");) return q; } /* Add virtual limiting rule */ return add_dyn_rule(pkt, i, O_LIMIT_PARENT, rule, kidx); } /** * Install dynamic state for rule type cmd->o.opcode * * Returns 1 (failure) if state is not installed because of errors or because * session limitations are enforced. */ int -ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, +ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg) { ipfw_dyn_rule *q; int i; DEB(print_dyn_rule(&args->f_id, cmd->o.opcode, "install_state", (cmd->o.arg1 == 0 ? "": DYN_STATE_OBJ(chain, &cmd->o)->name));) i = hash_packet(&args->f_id, V_curr_dyn_buckets); IPFW_BUCK_LOCK(i); - q = lookup_dyn_rule_locked(&args->f_id, i, NULL, NULL, cmd->o.arg1); + q = lookup_dyn_rule_locked(&args->f_id, NULL, i, NULL, cmd->o.arg1); if (q != NULL) { /* should never occur */ DEB( if (last_log != time_uptime) { last_log = time_uptime; printf("ipfw: %s: entry already present, done\n", __func__); }) IPFW_BUCK_UNLOCK(i); return (0); } /* * State limiting is done via uma(9) zone limiting. * Save pointer to newly-installed rule and reject * packet if add_dyn_rule() returned NULL. * Note q is currently set to NULL. */ switch (cmd->o.opcode) { case O_KEEP_STATE: /* bidir rule */ q = add_dyn_rule(&args->f_id, i, O_KEEP_STATE, rule, cmd->o.arg1); break; case O_LIMIT: { /* limit number of sessions */ struct ipfw_flow_id id; ipfw_dyn_rule *parent; uint32_t conn_limit; uint16_t limit_mask = cmd->limit_mask; int pindex; conn_limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit); DEB( if (cmd->conn_limit == IP_FW_TARG) printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " "(tablearg)\n", __func__, conn_limit); else printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", __func__, conn_limit); ) id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; id.proto = args->f_id.proto; id.addr_type = args->f_id.addr_type; id.fib = M_GETFIB(args->m); if (IS_IP6_FLOW_ID (&(args->f_id))) { bzero(&id.src_ip6, sizeof(id.src_ip6)); bzero(&id.dst_ip6, sizeof(id.dst_ip6)); if (limit_mask & DYN_SRC_ADDR) id.src_ip6 = args->f_id.src_ip6; if (limit_mask & DYN_DST_ADDR) id.dst_ip6 = args->f_id.dst_ip6; } else { if (limit_mask & DYN_SRC_ADDR) id.src_ip = args->f_id.src_ip; if (limit_mask & DYN_DST_ADDR) id.dst_ip = args->f_id.dst_ip; } if (limit_mask & DYN_SRC_PORT) id.src_port = args->f_id.src_port; if (limit_mask & DYN_DST_PORT) id.dst_port = args->f_id.dst_port; /* * We have to release lock for previous bucket to * avoid possible deadlock */ IPFW_BUCK_UNLOCK(i); parent = lookup_dyn_parent(&id, &pindex, rule, cmd->o.arg1); if (parent == NULL) { printf("ipfw: %s: add parent failed\n", __func__); IPFW_BUCK_UNLOCK(pindex); return (1); } if (parent->count >= conn_limit) { if (V_fw_verbose && last_log != time_uptime) { last_log = time_uptime; char sbuf[24]; last_log = time_uptime; snprintf(sbuf, sizeof(sbuf), "%d drop session", parent->rule->rulenum); print_dyn_rule_flags(&args->f_id, cmd->o.opcode, LOG_SECURITY | LOG_DEBUG, sbuf, "too many entries"); } IPFW_BUCK_UNLOCK(pindex); return (1); } /* Increment counter on parent */ parent->count++; IPFW_BUCK_UNLOCK(pindex); IPFW_BUCK_LOCK(i); q = add_dyn_rule(&args->f_id, i, O_LIMIT, (struct ip_fw *)parent, cmd->o.arg1); if (q == NULL) { /* Decrement index and notify caller */ IPFW_BUCK_UNLOCK(i); IPFW_BUCK_LOCK(pindex); parent->count--; IPFW_BUCK_UNLOCK(pindex); return (1); } break; } default: printf("ipfw: %s: unknown dynamic rule type %u\n", __func__, cmd->o.opcode); } if (q == NULL) { IPFW_BUCK_UNLOCK(i); return (1); /* Notify caller about failure */ } dyn_update_proto_state(q, &args->f_id, NULL, MATCH_FORWARD); IPFW_BUCK_UNLOCK(i); return (0); -} - -/* - * Generate a TCP packet, containing either a RST or a keepalive. - * When flags & TH_RST, we are sending a RST packet, because of a - * "reset" action matched the packet. - * Otherwise we are sending a keepalive, and flags & TH_ - * The 'replyto' mbuf is the mbuf being replied to, if any, and is required - * so that MAC can label the reply appropriately. - */ -struct mbuf * -ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, - u_int32_t ack, int flags) -{ - struct mbuf *m = NULL; /* stupid compiler */ - int len, dir; - struct ip *h = NULL; /* stupid compiler */ -#ifdef INET6 - struct ip6_hdr *h6 = NULL; -#endif - struct tcphdr *th = NULL; - - MGETHDR(m, M_NOWAIT, MT_DATA); - if (m == NULL) - return (NULL); - - M_SETFIB(m, id->fib); -#ifdef MAC - if (replyto != NULL) - mac_netinet_firewall_reply(replyto, m); - else - mac_netinet_firewall_send(m); -#else - (void)replyto; /* don't warn about unused arg */ -#endif - - switch (id->addr_type) { - case 4: - len = sizeof(struct ip) + sizeof(struct tcphdr); - break; -#ifdef INET6 - case 6: - len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - break; -#endif - default: - /* XXX: log me?!? */ - FREE_PKT(m); - return (NULL); - } - dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); - - m->m_data += max_linkhdr; - m->m_flags |= M_SKIP_FIREWALL; - m->m_pkthdr.len = m->m_len = len; - m->m_pkthdr.rcvif = NULL; - bzero(m->m_data, len); - - switch (id->addr_type) { - case 4: - h = mtod(m, struct ip *); - - /* prepare for checksum */ - h->ip_p = IPPROTO_TCP; - h->ip_len = htons(sizeof(struct tcphdr)); - if (dir) { - h->ip_src.s_addr = htonl(id->src_ip); - h->ip_dst.s_addr = htonl(id->dst_ip); - } else { - h->ip_src.s_addr = htonl(id->dst_ip); - h->ip_dst.s_addr = htonl(id->src_ip); - } - - th = (struct tcphdr *)(h + 1); - break; -#ifdef INET6 - case 6: - h6 = mtod(m, struct ip6_hdr *); - - /* prepare for checksum */ - h6->ip6_nxt = IPPROTO_TCP; - h6->ip6_plen = htons(sizeof(struct tcphdr)); - if (dir) { - h6->ip6_src = id->src_ip6; - h6->ip6_dst = id->dst_ip6; - } else { - h6->ip6_src = id->dst_ip6; - h6->ip6_dst = id->src_ip6; - } - - th = (struct tcphdr *)(h6 + 1); - break; -#endif - } - - if (dir) { - th->th_sport = htons(id->src_port); - th->th_dport = htons(id->dst_port); - } else { - th->th_sport = htons(id->dst_port); - th->th_dport = htons(id->src_port); - } - th->th_off = sizeof(struct tcphdr) >> 2; - - if (flags & TH_RST) { - if (flags & TH_ACK) { - th->th_seq = htonl(ack); - th->th_flags = TH_RST; - } else { - if (flags & TH_SYN) - seq++; - th->th_ack = htonl(seq); - th->th_flags = TH_RST | TH_ACK; - } - } else { - /* - * Keepalive - use caller provided sequence numbers - */ - th->th_seq = htonl(seq); - th->th_ack = htonl(ack); - th->th_flags = TH_ACK; - } - - switch (id->addr_type) { - case 4: - th->th_sum = in_cksum(m, len); - - /* finish the ip header */ - h->ip_v = 4; - h->ip_hl = sizeof(*h) >> 2; - h->ip_tos = IPTOS_LOWDELAY; - h->ip_off = htons(0); - h->ip_len = htons(len); - h->ip_ttl = V_ip_defttl; - h->ip_sum = 0; - break; -#ifdef INET6 - case 6: - th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), - sizeof(struct tcphdr)); - - /* finish the ip6 header */ - h6->ip6_vfc |= IPV6_VERSION; - h6->ip6_hlim = IPV6_DEFHLIM; - break; -#endif - } - - return (m); } /* * Queue keepalive packets for given dynamic rule */ static struct mbuf ** ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q) { struct mbuf *m_rev, *m_fwd; m_rev = (q->state & ACK_REV) ? NULL : ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); m_fwd = (q->state & ACK_FWD) ? NULL : ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0); if (m_rev != NULL) { *mtailp = m_rev; mtailp = &(*mtailp)->m_nextpkt; } if (m_fwd != NULL) { *mtailp = m_fwd; mtailp = &(*mtailp)->m_nextpkt; } return (mtailp); } /* * This procedure is used to perform various maintenance * on dynamic hash list. Currently it is called every second. */ static void ipfw_dyn_tick(void * vnetx) { struct ip_fw_chain *chain; int check_ka = 0; #ifdef VIMAGE struct vnet *vp = vnetx; #endif CURVNET_SET(vp); chain = &V_layer3_chain; /* Run keepalive checks every keepalive_period iff ka is enabled */ if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) && (V_dyn_keepalive != 0)) { V_dyn_keepalive_last = time_uptime; check_ka = 1; } check_dyn_rules(chain, NULL, check_ka, 1); callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0); CURVNET_RESTORE(); } /* * Walk through all dynamic states doing generic maintenance: * 1) free expired states * 2) free all states based on deleted rule / set * 3) send keepalives for states if needed * * @chain - pointer to current ipfw rules chain * @rule - delete all states originated by given rule if != NULL * @set - delete all states originated by any rule in set @set if != RESVD_SET * @check_ka - perform checking/sending keepalives * @timer - indicate call from timer routine. * * Timer routine must call this function unlocked to permit * sending keepalives/resizing table. * * Others has to call function with IPFW_UH_WLOCK held. * Additionally, function assume that dynamic rule/set is * ALREADY deleted so no new states can be generated by * 'deleted' rules. * * Write lock is needed to ensure that unused parent rules * are not freed by other instance (see stage 2, 3) */ static void check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int check_ka, int timer) { struct mbuf *m0, *m, *mnext, **mtailp; struct ip *h; int i, dyn_count, new_buckets = 0, max_buckets; int expired = 0, expired_limits = 0, parents = 0, total = 0; ipfw_dyn_rule *q, *q_prev, *q_next; ipfw_dyn_rule *exp_head, **exptailp; ipfw_dyn_rule *exp_lhead, **expltailp; KASSERT(V_ipfw_dyn_v != NULL, ("%s: dynamic table not allocated", __func__)); /* Avoid possible LOR */ KASSERT(!check_ka || timer, ("%s: keepalive check with lock held", __func__)); /* * Do not perform any checks if we currently have no dynamic states */ if (DYN_COUNT == 0) return; /* Expired states */ exp_head = NULL; exptailp = &exp_head; /* Expired limit states */ exp_lhead = NULL; expltailp = &exp_lhead; /* * We make a chain of packets to go out here -- not deferring * until after we drop the IPFW dynamic rule lock would result * in a lock order reversal with the normal packet input -> ipfw * call stack. */ m0 = NULL; mtailp = &m0; /* Protect from hash resizing */ if (timer != 0) IPFW_UH_WLOCK(chain); else IPFW_UH_WLOCK_ASSERT(chain); #define NEXT_RULE() { q_prev = q; q = q->next ; continue; } /* Stage 1: perform requested deletion */ for (i = 0 ; i < V_curr_dyn_buckets ; i++) { IPFW_BUCK_LOCK(i); for (q = V_ipfw_dyn_v[i].head, q_prev = q; q ; ) { /* account every rule */ total++; /* Skip parent rules at all */ if (q->dyn_type == O_LIMIT_PARENT) { parents++; NEXT_RULE(); } /* * Remove rules which are: * 1) expired * 2) matches deletion range */ if ((TIME_LEQ(q->expire, time_uptime)) || (rt != NULL && ipfw_match_range(q->rule, rt))) { if (TIME_LE(time_uptime, q->expire) && q->dyn_type == O_KEEP_STATE && V_dyn_keep_states != 0) { /* * Do not delete state if * it is not expired and * dyn_keep_states is ON. * However we need to re-link it * to any other stable rule */ q->rule = chain->default_rule; NEXT_RULE(); } /* Unlink q from current list */ q_next = q->next; if (q == V_ipfw_dyn_v[i].head) V_ipfw_dyn_v[i].head = q_next; else q_prev->next = q_next; q->next = NULL; /* queue q to expire list */ if (q->dyn_type != O_LIMIT) { *exptailp = q; exptailp = &(*exptailp)->next; DEB(print_dyn_rule(&q->id, q->dyn_type, "unlink entry", "left"); ) } else { /* Separate list for limit rules */ *expltailp = q; expltailp = &(*expltailp)->next; expired_limits++; DEB(print_dyn_rule(&q->id, q->dyn_type, "unlink limit entry", "left"); ) } q = q_next; expired++; continue; } /* * Check if we need to send keepalive: * we need to ensure if is time to do KA, * this is established TCP session, and * expire time is within keepalive interval */ if ((check_ka != 0) && (q->id.proto == IPPROTO_TCP) && ((q->state & BOTH_SYN) == BOTH_SYN) && (TIME_LEQ(q->expire, time_uptime + V_dyn_keepalive_interval))) mtailp = ipfw_dyn_send_ka(mtailp, q); NEXT_RULE(); } IPFW_BUCK_UNLOCK(i); } /* Stage 2: decrement counters from O_LIMIT parents */ if (expired_limits != 0) { /* * XXX: Note that deleting set with more than one * heavily-used LIMIT rules can result in overwhelming * locking due to lack of per-hash value sorting * * We should probably think about: * 1) pre-allocating hash of size, say, * MAX(16, V_curr_dyn_buckets / 1024) * 2) checking if expired_limits is large enough * 3) If yes, init hash (or its part), re-link * current list and start decrementing procedure in * each bucket separately */ /* * Small optimization: do not unlock bucket until * we see the next item resides in different bucket */ if (exp_lhead != NULL) { i = exp_lhead->parent->bucket; IPFW_BUCK_LOCK(i); } for (q = exp_lhead; q != NULL; q = q->next) { if (i != q->parent->bucket) { IPFW_BUCK_UNLOCK(i); i = q->parent->bucket; IPFW_BUCK_LOCK(i); } /* Decrease parent refcount */ q->parent->count--; } if (exp_lhead != NULL) IPFW_BUCK_UNLOCK(i); } /* * We protectet ourselves from unused parent deletion * (from the timer function) by holding UH write lock. */ /* Stage 3: remove unused parent rules */ if ((parents != 0) && (expired != 0)) { for (i = 0 ; i < V_curr_dyn_buckets ; i++) { IPFW_BUCK_LOCK(i); for (q = V_ipfw_dyn_v[i].head, q_prev = q ; q ; ) { if (q->dyn_type != O_LIMIT_PARENT) NEXT_RULE(); if (q->count != 0) NEXT_RULE(); /* Parent rule without consumers */ /* Unlink q from current list */ q_next = q->next; if (q == V_ipfw_dyn_v[i].head) V_ipfw_dyn_v[i].head = q_next; else q_prev->next = q_next; q->next = NULL; /* Add to expired list */ *exptailp = q; exptailp = &(*exptailp)->next; DEB(print_dyn_rule(&q->id, q->dyn_type, "unlink parent entry", "left"); ) expired++; q = q_next; } IPFW_BUCK_UNLOCK(i); } } #undef NEXT_RULE if (timer != 0) { /* * Check if we need to resize hash: * if current number of states exceeds number of buckes in hash, * grow hash size to the minimum power of 2 which is bigger than * current states count. Limit hash size by 64k. */ max_buckets = (V_dyn_buckets_max > 65536) ? 65536 : V_dyn_buckets_max; dyn_count = DYN_COUNT; if ((dyn_count > V_curr_dyn_buckets * 2) && (dyn_count < max_buckets)) { new_buckets = V_curr_dyn_buckets; while (new_buckets < dyn_count) { new_buckets *= 2; if (new_buckets >= max_buckets) break; } } IPFW_UH_WUNLOCK(chain); } /* Finally delete old states ad limits if any */ for (q = exp_head; q != NULL; q = q_next) { q_next = q->next; uma_zfree(V_ipfw_dyn_rule_zone, q); ipfw_dyn_count--; } for (q = exp_lhead; q != NULL; q = q_next) { q_next = q->next; uma_zfree(V_ipfw_dyn_rule_zone, q); ipfw_dyn_count--; } /* * The rest code MUST be called from timer routine only * without holding any locks */ if (timer == 0) return; /* Send keepalive packets if any */ for (m = m0; m != NULL; m = mnext) { mnext = m->m_nextpkt; m->m_nextpkt = NULL; h = mtod(m, struct ip *); if (h->ip_v == 4) ip_output(m, NULL, NULL, 0, NULL, NULL); #ifdef INET6 else ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif } /* Run table resize without holding any locks */ if (new_buckets != 0) resize_dynamic_table(chain, new_buckets); } /* * Deletes all dynamic rules originated by given rule or all rules in * given set. Specify RESVD_SET to indicate set should not be used. * @chain - pointer to current ipfw rules chain * @rr - delete all states originated by rules in matched range. * * Function has to be called with IPFW_UH_WLOCK held. * Additionally, function assume that dynamic rule/set is * ALREADY deleted so no new states can be generated by * 'deleted' rules. */ void ipfw_expire_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { check_dyn_rules(chain, rt, 0, 0); } /* * Check if rule contains at least one dynamic opcode. * * Returns 1 if such opcode is found, 0 otherwise. */ int ipfw_is_dyn_rule(struct ip_fw *rule) { int cmdlen, l; ipfw_insn *cmd; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); switch (cmd->opcode) { case O_LIMIT: case O_KEEP_STATE: case O_PROBE_STATE: case O_CHECK_STATE: return (1); } } return (0); } void ipfw_dyn_init(struct ip_fw_chain *chain) { V_ipfw_dyn_v = NULL; V_dyn_buckets_max = 256; /* must be power of 2 */ V_curr_dyn_buckets = 256; /* must be power of 2 */ V_dyn_ack_lifetime = 300; V_dyn_syn_lifetime = 20; V_dyn_fin_lifetime = 1; V_dyn_rst_lifetime = 1; V_dyn_udp_lifetime = 10; V_dyn_short_lifetime = 5; V_dyn_keepalive_interval = 20; V_dyn_keepalive_period = 5; V_dyn_keepalive = 1; /* do send keepalives */ V_dyn_keepalive_last = time_uptime; V_dyn_max = 16384; /* max # of dynamic rules */ V_ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* Enforce limit on dynamic rules */ uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); callout_init(&V_ipfw_timeout, 1); /* * This can potentially be done on first dynamic rule * being added to chain. */ resize_dynamic_table(chain, V_curr_dyn_buckets); IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); } void ipfw_dyn_uninit(int pass) { int i; if (pass == 0) { callout_drain(&V_ipfw_timeout); return; } IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); if (V_ipfw_dyn_v != NULL) { /* * Skip deleting all dynamic states - * uma_zdestroy() does this more efficiently; */ /* Destroy all mutexes */ for (i = 0 ; i < V_curr_dyn_buckets ; i++) IPFW_BUCK_LOCK_DESTROY(&V_ipfw_dyn_v[i]); free(V_ipfw_dyn_v, M_IPFW); V_ipfw_dyn_v = NULL; } uma_zdestroy(V_ipfw_dyn_rule_zone); } #ifdef SYSCTL_NODE /* * Get/set maximum number of dynamic states in given VNET instance. */ static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS) { int error; unsigned int nstates; nstates = V_dyn_max; error = sysctl_handle_int(oidp, &nstates, 0, req); /* Read operation or some error */ if ((error != 0) || (req->newptr == NULL)) return (error); V_dyn_max = nstates; uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); return (0); } /* * Get current number of dynamic states in given VNET instance. */ static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS) { int error; unsigned int nstates; nstates = DYN_COUNT; error = sysctl_handle_int(oidp, &nstates, 0, req); return (error); } #endif /* * Returns size of dynamic states in legacy format */ int ipfw_dyn_len(void) { return (V_ipfw_dyn_v == NULL) ? 0 : (DYN_COUNT * sizeof(ipfw_dyn_rule)); } /* * Returns number of dynamic states. * Used by dump format v1 (current). */ int ipfw_dyn_get_count(void) { return (V_ipfw_dyn_v == NULL) ? 0 : DYN_COUNT; } static void export_dyn_rule(ipfw_dyn_rule *src, ipfw_dyn_rule *dst) { uint16_t rulenum; rulenum = (uint16_t)src->rule->rulenum; memcpy(dst, src, sizeof(*src)); memcpy(&dst->rule, &rulenum, sizeof(rulenum)); /* * store set number into high word of * dst->rule pointer. */ memcpy((char *)&dst->rule + sizeof(rulenum), &src->rule->set, sizeof(src->rule->set)); /* * store a non-null value in "next". * The userland code will interpret a * NULL here as a marker * for the last dynamic rule. */ memcpy(&dst->next, &dst, sizeof(dst)); dst->expire = TIME_LEQ(dst->expire, time_uptime) ? 0: dst->expire - time_uptime; } /* * Fills int buffer given by @sd with dynamic states. * Used by dump format v1 (current). * * Returns 0 on success. */ int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) { ipfw_dyn_rule *p; ipfw_obj_dyntlv *dst, *last; ipfw_obj_ctlv *ctlv; int i; size_t sz; if (V_ipfw_dyn_v == NULL) return (0); IPFW_UH_RLOCK_ASSERT(chain); ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); sz = sizeof(ipfw_obj_dyntlv); ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; ctlv->objsize = sz; last = NULL; for (i = 0 ; i < V_curr_dyn_buckets; i++) { IPFW_BUCK_LOCK(i); for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, sz); if (dst == NULL) { IPFW_BUCK_UNLOCK(i); return (ENOMEM); } export_dyn_rule(p, &dst->state); dst->head.length = sz; dst->head.type = IPFW_TLV_DYN_ENT; last = dst; } IPFW_BUCK_UNLOCK(i); } if (last != NULL) /* mark last dynamic rule */ last->head.flags = IPFW_DF_LAST; return (0); } /* * Fill given buffer with dynamic states (legacy format). * IPFW_UH_RLOCK has to be held while calling. */ void ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) { ipfw_dyn_rule *p, *last = NULL; char *bp; int i; if (V_ipfw_dyn_v == NULL) return; bp = *pbp; IPFW_UH_RLOCK_ASSERT(chain); for (i = 0 ; i < V_curr_dyn_buckets; i++) { IPFW_BUCK_LOCK(i); for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { if (bp + sizeof *p <= ep) { ipfw_dyn_rule *dst = (ipfw_dyn_rule *)bp; export_dyn_rule(p, dst); last = dst; bp += sizeof(ipfw_dyn_rule); } } IPFW_BUCK_UNLOCK(i); } if (last != NULL) /* mark last dynamic rule */ bzero(&last->next, sizeof(last)); *pbp = bp; } /* end of file */ Index: stable/11/sys/netpfil/ipfw/ip_fw_private.h =================================================================== --- stable/11/sys/netpfil/ipfw/ip_fw_private.h (revision 326387) +++ stable/11/sys/netpfil/ipfw/ip_fw_private.h (revision 326388) @@ -1,786 +1,787 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _IPFW2_PRIVATE_H #define _IPFW2_PRIVATE_H /* * Internal constants and data structures used by ipfw components * and not meant to be exported outside the kernel. */ #ifdef _KERNEL /* * For platforms that do not have SYSCTL support, we wrap the * SYSCTL_* into a function (one per file) to collect the values * into an array at module initialization. The wrapping macros, * SYSBEGIN() and SYSEND, are empty in the default case. */ #ifndef SYSBEGIN #define SYSBEGIN(x) #endif #ifndef SYSEND #define SYSEND #endif /* Return values from ipfw_chk() */ enum { IP_FW_PASS = 0, IP_FW_DENY, IP_FW_DIVERT, IP_FW_TEE, IP_FW_DUMMYNET, IP_FW_NETGRAPH, IP_FW_NGTEE, IP_FW_NAT, IP_FW_REASS, }; /* * Structure for collecting parameters to dummynet for ip6_output forwarding */ struct _ip6dn_args { struct ip6_pktopts *opt_or; int flags_or; struct ip6_moptions *im6o_or; struct ifnet *origifp_or; struct ifnet *ifp_or; struct sockaddr_in6 dst_or; u_long mtu_or; }; /* * Arguments for calling ipfw_chk() and dummynet_io(). We put them * all into a structure because this way it is easier and more * efficient to pass variables around and extend the interface. */ struct ip_fw_args { struct mbuf *m; /* the mbuf chain */ struct ifnet *oif; /* output interface */ struct sockaddr_in *next_hop; /* forward address */ struct sockaddr_in6 *next_hop6; /* ipv6 forward address */ /* * On return, it points to the matching rule. * On entry, rule.slot > 0 means the info is valid and * contains the starting rule for an ipfw search. * If chain_id == chain->id && slot >0 then jump to that slot. * Otherwise, we locate the first rule >= rulenum:rule_id */ struct ipfw_rule_ref rule; /* match/restart info */ struct ether_header *eh; /* for bridged packets */ struct ipfw_flow_id f_id; /* grabbed from IP header */ //uint32_t cookie; /* a cookie depending on rule action */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ union { /* store here if cannot use a pointer */ struct sockaddr_in hopstore; struct sockaddr_in6 hopstore6; }; }; MALLOC_DECLARE(M_IPFW); /* * Hooks sometime need to know the direction of the packet * (divert, dummynet, netgraph, ...) * We use a generic definition here, with bit0-1 indicating the * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the * specific protocol * indicating the protocol (if necessary) */ enum { DIR_MASK = 0x3, DIR_OUT = 0, DIR_IN = 1, DIR_FWD = 2, DIR_DROP = 3, PROTO_LAYER2 = 0x4, /* set for layer 2 */ /* PROTO_DEFAULT = 0, */ PROTO_IPV4 = 0x08, PROTO_IPV6 = 0x10, PROTO_IFB = 0x0c, /* layer2 + ifbridge */ /* PROTO_OLDBDG = 0x14, unused, old bridge */ }; /* wrapper for freeing a packet, in case we need to do more work */ #ifndef FREE_PKT #if defined(__linux__) || defined(_WIN32) #define FREE_PKT(m) netisr_dispatch(-1, m) #else #define FREE_PKT(m) m_freem(m) #endif #endif /* !FREE_PKT */ /* * Function definitions. */ /* attach (arg = 1) or detach (arg = 0) hooks */ int ipfw_attach_hooks(int); #ifdef NOTYET void ipfw_nat_destroy(void); #endif /* In ip_fw_log.c */ struct ip; struct ip_fw_chain; void ipfw_bpf_init(int); void ipfw_bpf_uninit(int); void ipfw_bpf_mtap2(void *, u_int, struct mbuf *); void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg, struct ip *ip); VNET_DECLARE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) VNET_DECLARE(int, verbose_limit); #define V_verbose_limit VNET(verbose_limit) /* In ip_fw_dynamic.c */ enum { /* result for matching dynamic rules */ MATCH_REVERSE = 0, MATCH_FORWARD, MATCH_NONE, MATCH_UNKNOWN, }; /* * The lock for dynamic rules is only used once outside the file, * and only to release the result of lookup_dyn_rule(). * Eventually we may implement it with a callback on the function. */ struct ip_fw_chain; struct sockopt_data; int ipfw_is_dyn_rule(struct ip_fw *rule); void ipfw_expire_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *); -void ipfw_dyn_unlock(ipfw_dyn_rule *q); struct tcphdr; struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, u_int32_t, u_int32_t, int); -int ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, +int ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg); -ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, - int *match_direction, struct tcphdr *tcp, uint16_t kidx); +struct ip_fw *ipfw_dyn_lookup_state(const struct ipfw_flow_id *pkt, + const void *ulp, int pktlen, int *match_direction, uint16_t kidx); void ipfw_remove_dyn_children(struct ip_fw *rule); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd); void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); int ipfw_dyn_get_count(void); /* common variables */ VNET_DECLARE(int, fw_one_pass); #define V_fw_one_pass VNET(fw_one_pass) VNET_DECLARE(int, fw_verbose); #define V_fw_verbose VNET(fw_verbose) VNET_DECLARE(struct ip_fw_chain, layer3_chain); #define V_layer3_chain VNET(layer3_chain) VNET_DECLARE(int, ipfw_vnet_ready); #define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) VNET_DECLARE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) VNET_DECLARE(int, autoinc_step); #define V_autoinc_step VNET(autoinc_step) VNET_DECLARE(unsigned int, fw_tables_max); #define V_fw_tables_max VNET(fw_tables_max) VNET_DECLARE(unsigned int, fw_tables_sets); #define V_fw_tables_sets VNET(fw_tables_sets) struct tables_config; #ifdef _KERNEL /* * Here we have the structure representing an ipfw rule. * * It starts with a general area * followed by an array of one or more instructions, which the code * accesses as an array of 32-bit values. * * Given a rule pointer r: * * r->cmd is the start of the first instruction. * ACTION_PTR(r) is the start of the first action (things to do * once a rule matched). */ struct ip_fw { uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ uint8_t flags; /* currently unused */ counter_u64_t cntr; /* Pointer to rule counters */ uint32_t timestamp; /* tv_sec of last match */ uint32_t id; /* rule id */ uint32_t cached_id; /* used by jump_fast */ uint32_t cached_pos; /* used by jump_fast */ ipfw_insn cmd[1]; /* storage for commands */ }; #define IPFW_RULE_CNTR_SIZE (2 * sizeof(uint64_t)) #endif struct ip_fw_chain { struct ip_fw **map; /* array of rule ptrs to ease lookup */ uint32_t id; /* ruleset id */ int n_rules; /* number of static rules */ void *tablestate; /* runtime table info */ void *valuestate; /* runtime table value info */ int *idxmap; /* skipto array of rules */ void **srvstate; /* runtime service mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; #endif int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ struct ip_fw *default_rule; struct tables_config *tblcfg; /* tables module data */ void *ifcfg; /* interface module data */ int *idxmap_back; /* standby skipto array of rules */ struct namedobj_instance *srvmap; /* cfg name->number mappings */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t uh_lock; #else struct rwlock uh_lock; /* lock for upper half */ #endif }; /* 64-byte structure representing multi-field table value */ struct table_value { uint32_t tag; /* O_TAG/O_TAGGED */ uint32_t pipe; /* O_PIPE/O_QUEUE */ uint16_t divert; /* O_DIVERT/O_TEE */ uint16_t skipto; /* skipto, CALLRET */ uint32_t netgraph; /* O_NETGRAPH/O_NGTEE */ uint32_t fib; /* O_SETFIB */ uint32_t nat; /* O_NAT */ uint32_t nh4; uint8_t dscp; uint8_t spare0; uint16_t spare1; /* -- 32 bytes -- */ struct in6_addr nh6; uint32_t limit; /* O_LIMIT */ uint32_t zoneid; /* scope zone id for nh6 */ uint64_t refcnt; /* Number of references */ }; struct named_object { TAILQ_ENTRY(named_object) nn_next; /* namehash */ TAILQ_ENTRY(named_object) nv_next; /* valuehash */ char *name; /* object name */ uint16_t etlv; /* Export TLV id */ uint8_t subtype;/* object subtype within class */ uint8_t set; /* set object belongs to */ uint16_t kidx; /* object kernel index */ uint16_t spare; uint32_t ocnt; /* object counter for internal use */ uint32_t refcnt; /* number of references */ }; TAILQ_HEAD(namedobjects_head, named_object); struct sockopt; /* used by tcp_var.h */ struct sockopt_data { caddr_t kbuf; /* allocated buffer */ size_t ksize; /* given buffer size */ size_t koff; /* data already used */ size_t kavail; /* number of bytes available */ size_t ktotal; /* total bytes pushed */ struct sockopt *sopt; /* socket data */ caddr_t sopt_val; /* sopt user buffer */ size_t valsize; /* original data size */ }; struct ipfw_ifc; typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); struct ipfw_iface { struct named_object no; char ifname[64]; int resolved; uint16_t ifindex; uint16_t spare; uint64_t gencnt; TAILQ_HEAD(, ipfw_ifc) consumers; }; struct ipfw_ifc { TAILQ_ENTRY(ipfw_ifc) next; struct ipfw_iface *iface; ipfw_ifc_cb *cb; void *cbdata; }; /* Macro for working with various counters */ #define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \ counter_u64_add((_cntr)->cntr, 1); \ counter_u64_add((_cntr)->cntr + 1, _bytes); \ if ((_cntr)->timestamp != time_uptime) \ (_cntr)->timestamp = time_uptime; \ } while (0) #define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \ (_cntr)->pcnt++; \ (_cntr)->bcnt += _bytes; \ } while (0) #define IPFW_ZERO_RULE_COUNTER(_cntr) do { \ counter_u64_zero((_cntr)->cntr); \ counter_u64_zero((_cntr)->cntr + 1); \ (_cntr)->timestamp = 0; \ } while (0) #define IPFW_ZERO_DYN_COUNTER(_cntr) do { \ (_cntr)->pcnt = 0; \ (_cntr)->bcnt = 0; \ } while (0) #define TARG_VAL(ch, k, f) ((struct table_value *)((ch)->valuestate))[k].f #define IP_FW_ARG_TABLEARG(ch, a, f) \ (((a) == IP_FW_TARG) ? TARG_VAL(ch, tablearg, f) : (a)) /* * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c * so the variable and the macros must be here. */ #if defined( __linux__ ) || defined( _WIN32 ) #define IPFW_LOCK_INIT(_chain) do { \ rw_init(&(_chain)->rwmtx, "IPFW static rules"); \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ rw_destroy(&(_chain)->rwmtx); \ rw_destroy(&(_chain)->uh_lock); \ } while (0) #define IPFW_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_RLOCKED) #define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK_TRACKER #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) #define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) #define IPFW_PF_RLOCK(p) IPFW_RLOCK(p) #define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p) #else /* FreeBSD */ #define IPFW_LOCK_INIT(_chain) do { \ rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ } while (0) #define IPFW_LOCK_DESTROY(_chain) do { \ rw_destroy(&(_chain)->uh_lock); \ } while (0) #define IPFW_RLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_RLOCKED) #define IPFW_WLOCK_ASSERT(_chain) rm_assert(&V_pfil_lock, RA_WLOCKED) #define IPFW_RLOCK_TRACKER struct rm_priotracker _tracker #define IPFW_RLOCK(p) rm_rlock(&V_pfil_lock, &_tracker) #define IPFW_RUNLOCK(p) rm_runlock(&V_pfil_lock, &_tracker) #define IPFW_WLOCK(p) rm_wlock(&V_pfil_lock) #define IPFW_WUNLOCK(p) rm_wunlock(&V_pfil_lock) #define IPFW_PF_RLOCK(p) #define IPFW_PF_RUNLOCK(p) #endif #define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED) #define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED) #define IPFW_UH_UNLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_UNLOCKED) #define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock) #define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock) #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock) #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock) struct obj_idx { uint16_t uidx; /* internal index supplied by userland */ uint16_t kidx; /* kernel object index */ uint16_t off; /* tlv offset from rule end in 4-byte words */ uint8_t spare; uint8_t type; /* object type within its category */ }; struct rule_check_info { uint16_t flags; /* rule-specific check flags */ uint16_t object_opcodes; /* num of opcodes referencing objects */ uint16_t urule_numoff; /* offset of rulenum in bytes */ uint8_t version; /* rule version */ uint8_t spare; ipfw_obj_ctlv *ctlv; /* name TLV containter */ struct ip_fw *krule; /* resulting rule pointer */ caddr_t urule; /* original rule pointer */ struct obj_idx obuf[8]; /* table references storage */ }; /* Legacy interface support */ /* * FreeBSD 8 export rule format */ struct ip_fw_rule0 { struct ip_fw *x_next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ uint8_t _pad; /* padding */ uint32_t id; /* rule id */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; struct ip_fw_bcounter0 { uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ }; /* Kernel rule length */ /* * RULE _K_ SIZE _V_ -> * get kernel size from userland rool version _V_. * RULE _U_ SIZE _V_ -> * get user size version _V_ from kernel rule * RULESIZE _V_ -> * get user size rule length */ /* FreeBSD8 <> current kernel format */ #define RULEUSIZE0(r) (sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4) #define RULEKSIZE0(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) /* FreeBSD11 <> current kernel format */ #define RULEUSIZE1(r) (roundup2(sizeof(struct ip_fw_rule) + \ (r)->cmd_len * 4 - 4, 8)) #define RULEKSIZE1(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) /* * Tables/Objects index rewriting code */ /* Default and maximum number of ipfw tables/objects. */ #define IPFW_TABLES_MAX 65536 #define IPFW_TABLES_DEFAULT 128 #define IPFW_OBJECTS_MAX 65536 #define IPFW_OBJECTS_DEFAULT 1024 #define CHAIN_TO_SRV(ch) ((ch)->srvmap) #define SRV_OBJECT(ch, idx) ((ch)->srvstate[(idx)]) struct tid_info { uint32_t set; /* table set */ uint16_t uidx; /* table index */ uint8_t type; /* table type */ uint8_t atype; uint8_t spare; int tlen; /* Total TLV size block */ void *tlvs; /* Pointer to first TLV */ }; /* * Classifier callback. Checks if @cmd opcode contains kernel object reference. * If true, returns its index and type. * Returns 0 if match is found, 1 overwise. */ typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype); /* * Updater callback. Sets kernel object reference index to @puidx */ typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx); /* * Finder callback. Tries to find named object by name (specified via @ti). * Stores found named object pointer in @pno. * If object was not found, NULL is stored. * * Return 0 if input data was valid. */ typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch, struct tid_info *ti, struct named_object **pno); /* * Another finder callback. Tries to findex named object by kernel index. * * Returns pointer to named object or NULL. */ typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch, uint16_t kidx); /* * Object creator callback. Tries to create object specified by @ti. * Stores newly-allocated object index in @pkidx. * * Returns 0 on success. */ typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti, uint16_t *pkidx); /* * Object destroy callback. Intended to free resources allocated by * create_object callback. */ typedef void (ipfw_obj_destroy_cb)(struct ip_fw_chain *ch, struct named_object *no); /* * Sets handler callback. Handles moving and swaping set of named object. * SWAP_ALL moves all named objects from set `set' to `new_set' and vise versa; * TEST_ALL checks that there aren't any named object with conflicting names; * MOVE_ALL moves all named objects from set `set' to `new_set'; * COUNT_ONE used to count number of references used by object with kidx `set'; * TEST_ONE checks that named object with kidx `set' can be moved to `new_set`; * MOVE_ONE moves named object with kidx `set' to set `new_set'. */ enum ipfw_sets_cmd { SWAP_ALL = 0, TEST_ALL, MOVE_ALL, COUNT_ONE, TEST_ONE, MOVE_ONE }; typedef int (ipfw_obj_sets_cb)(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd); struct opcode_obj_rewrite { uint32_t opcode; /* Opcode to act upon */ uint32_t etlv; /* Relevant export TLV id */ ipfw_obj_rw_cl *classifier; /* Check if rewrite is needed */ ipfw_obj_rw_upd *update; /* update cmd with new value */ ipfw_obj_fname_cb *find_byname; /* Find named object by name */ ipfw_obj_fidx_cb *find_bykidx; /* Find named object by kidx */ ipfw_obj_create_cb *create_object; /* Create named object */ ipfw_obj_destroy_cb *destroy_object;/* Destroy named object */ ipfw_obj_sets_cb *manage_sets; /* Swap or move sets */ }; #define IPFW_ADD_OBJ_REWRITER(f, c) do { \ if ((f) != 0) \ ipfw_add_obj_rewriter(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) #define IPFW_DEL_OBJ_REWRITER(l, c) do { \ if ((l) != 0) \ ipfw_del_obj_rewriter(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) /* In ip_fw_iface.c */ int ipfw_iface_init(void); void ipfw_iface_destroy(void); void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch); int ipfw_iface_ref(struct ip_fw_chain *ch, char *name, struct ipfw_ifc *ic); void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); /* In ip_fw_sockopt.c */ void ipfw_init_skipto_cache(struct ip_fw_chain *chain); void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain); int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_ctl3(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); +int ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule, + int locked); void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, struct ip_fw *rule); void ipfw_reap_rules(struct ip_fw *head); void ipfw_init_counters(void); void ipfw_destroy_counters(void); struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize); int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt); typedef int (sopt_handler_f)(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct sockopt_data *sd); struct ipfw_sopt_handler { uint16_t opcode; uint8_t version; uint8_t dir; sopt_handler_f *handler; uint64_t refcnt; }; #define HDIR_SET 0x01 /* Handler is used to set some data */ #define HDIR_GET 0x02 /* Handler is used to retrieve data */ #define HDIR_BOTH HDIR_GET|HDIR_SET void ipfw_init_sopt_handler(void); void ipfw_destroy_sopt_handler(void); void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count); int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count); caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed); caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed); #define IPFW_ADD_SOPT_HANDLER(f, c) do { \ if ((f) != 0) \ ipfw_add_sopt_handler(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) #define IPFW_DEL_SOPT_HANDLER(l, c) do { \ if ((l) != 0) \ ipfw_del_sopt_handler(c, \ sizeof(c) / sizeof(c[0])); \ } while(0) struct namedobj_instance; typedef int (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *, void *arg); typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, const void *key, uint32_t kopt); typedef int (objhash_cmp_f)(struct named_object *no, const void *key, uint32_t kopt); struct namedobj_instance *ipfw_objhash_create(uint32_t items); void ipfw_objhash_destroy(struct namedobj_instance *); void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks); void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks); void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks); void ipfw_objhash_bitmap_free(void *idx, int blocks); void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f); struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name); struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set, uint32_t type, const char *name); struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t idx); int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, struct named_object *b); void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no); void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no); uint32_t ipfw_objhash_count(struct namedobj_instance *ni); uint32_t ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type); int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg); int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f, void *arg, uint16_t type); int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx); int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx); void ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f, objhash_cmp_f *cmp_f); int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti, uint32_t etlv, struct named_object **pno); void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv); ipfw_obj_ntlv *ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv); void ipfw_init_obj_rewriter(void); void ipfw_destroy_obj_rewriter(void); void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count); int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count); int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti); void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx); int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx); void ipfw_init_srv(struct ip_fw_chain *ch); void ipfw_destroy_srv(struct ip_fw_chain *ch); int ipfw_check_object_name_generic(const char *name); int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd); /* In ip_fw_eaction.c */ typedef int (ipfw_eaction_t)(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done); int ipfw_eaction_init(struct ip_fw_chain *ch, int first); void ipfw_eaction_uninit(struct ip_fw_chain *ch, int last); uint16_t ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler, const char *name); int ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id); int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args, ipfw_insn *cmd, int *done); /* In ip_fw_table.c */ struct table_info; typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen, uint32_t *val); int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, void *paddr, uint32_t *val); struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx); int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx); void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx); int ipfw_init_tables(struct ip_fw_chain *ch, int first); int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets); void ipfw_destroy_tables(struct ip_fw_chain *ch, int last); /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */ extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); typedef int ipfw_nat_cfg_t(struct sockopt *); VNET_DECLARE(int, ipfw_nat_ready); #define V_ipfw_nat_ready VNET(ipfw_nat_ready) #define IPFW_NAT_LOADED (V_ipfw_nat_ready) extern ipfw_nat_t *ipfw_nat_ptr; extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; extern ipfw_nat_cfg_t *ipfw_nat_del_ptr; extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; /* Helper functions for IP checksum adjustment */ static __inline uint16_t cksum_add(uint16_t sum, uint16_t a) { uint16_t res; res = sum + a; return (res + (res < a)); } static __inline uint16_t cksum_adjust(uint16_t oldsum, uint16_t old, uint16_t new) { return (~cksum_add(cksum_add(~oldsum, ~old), new)); } #endif /* _KERNEL */ #endif /* _IPFW2_PRIVATE_H */ Index: stable/11/sys/netpfil/ipfw/ip_fw_sockopt.c =================================================================== --- stable/11/sys/netpfil/ipfw/ip_fw_sockopt.c (revision 326387) +++ stable/11/sys/netpfil/ipfw/ip_fw_sockopt.c (revision 326388) @@ -1,4628 +1,4651 @@ /*- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa * Copyright (c) 2014 Yandex LLC * Copyright (c) 2014 Alexander V. Chernikov * * Supported by: Valeria Paoli * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Control socket and rule management routines for ipfw. * Control is currently implemented via IP_FW3 setsockopt() code. */ #include "opt_ipfw.h" #include "opt_inet.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include "opt_inet6.h" #include #include #include #include /* struct m_tag used by nested headers */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* hooks */ #include #include #include #ifdef MAC #include #endif static int ipfw_ctl(struct sockopt *sopt); static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci); static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, struct rule_check_info *ci); static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, struct rule_check_info *ci); static int rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci); #define NAMEDOBJ_HASH_SIZE 32 struct namedobj_instance { struct namedobjects_head *names; struct namedobjects_head *values; uint32_t nn_size; /* names hash size */ uint32_t nv_size; /* number hash size */ u_long *idx_mask; /* used items bitmask */ uint32_t max_blocks; /* number of "long" blocks in bitmask */ uint32_t count; /* number of items */ uint16_t free_off[IPFW_MAX_SETS]; /* first possible free offset */ objhash_hash_f *hash_f; objhash_cmp_f *cmp_f; }; #define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */ static uint32_t objhash_hash_name(struct namedobj_instance *ni, const void *key, uint32_t kopt); static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val); static int objhash_cmp_name(struct named_object *no, const void *name, uint32_t set); MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd); /* ctl3 handler data */ struct mtx ctl3_lock; #define CTL3_LOCK_INIT() mtx_init(&ctl3_lock, "ctl3_lock", NULL, MTX_DEF) #define CTL3_LOCK_DESTROY() mtx_destroy(&ctl3_lock) #define CTL3_LOCK() mtx_lock(&ctl3_lock) #define CTL3_UNLOCK() mtx_unlock(&ctl3_lock) static struct ipfw_sopt_handler *ctl3_handlers; static size_t ctl3_hsize; static uint64_t ctl3_refct, ctl3_gencnt; #define CTL3_SMALLBUF 4096 /* small page-size write buffer */ #define CTL3_LARGEBUF 16 * 1024 * 1024 /* handle large rulesets */ static int ipfw_flush_sopt_data(struct sockopt_data *sd); static struct ipfw_sopt_handler scodes[] = { { IP_FW_XGET, 0, HDIR_GET, dump_config }, { IP_FW_XADD, 0, HDIR_BOTH, add_rules }, { IP_FW_XDEL, 0, HDIR_BOTH, del_rules }, { IP_FW_XZERO, 0, HDIR_SET, clear_rules }, { IP_FW_XRESETLOG, 0, HDIR_SET, clear_rules }, { IP_FW_XMOVE, 0, HDIR_SET, move_rules }, { IP_FW_SET_SWAP, 0, HDIR_SET, manage_sets }, { IP_FW_SET_MOVE, 0, HDIR_SET, manage_sets }, { IP_FW_SET_ENABLE, 0, HDIR_SET, manage_sets }, { IP_FW_DUMP_SOPTCODES, 0, HDIR_GET, dump_soptcodes }, { IP_FW_DUMP_SRVOBJECTS,0, HDIR_GET, dump_srvobjects }, }; static int set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule); static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype); static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, uint32_t *bmask); static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti); static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti, struct obj_idx *pidx, int *unresolved); static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule); static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *end); static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx, struct sockopt_data *sd); /* * Opcode object rewriter variables */ struct opcode_obj_rewrite *ctl3_rewriters; static size_t ctl3_rsize; /* * static variables followed by global ones */ static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone); #define V_ipfw_cntr_zone VNET(ipfw_cntr_zone) void ipfw_init_counters() { V_ipfw_cntr_zone = uma_zcreate("IPFW counters", IPFW_RULE_CNTR_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); } void ipfw_destroy_counters() { uma_zdestroy(V_ipfw_cntr_zone); } struct ip_fw * ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) { struct ip_fw *rule; rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO); return (rule); } static void free_rule(struct ip_fw *rule) { uma_zfree(V_ipfw_cntr_zone, rule->cntr); free(rule, M_IPFW); } /* * Find the smallest rule >= key, id. * We could use bsearch but it is so simple that we code it directly */ int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id) { int i, lo, hi; struct ip_fw *r; for (lo = 0, hi = chain->n_rules - 1; lo < hi;) { i = (lo + hi) / 2; r = chain->map[i]; if (r->rulenum < key) lo = i + 1; /* continue from the next one */ else if (r->rulenum > key) hi = i; /* this might be good */ else if (r->id < id) lo = i + 1; /* continue from the next one */ else /* r->id >= id */ hi = i; /* this might be good */ } return hi; } /* * Builds skipto cache on rule set @map. */ static void update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map) { int *smap, rulenum; int i, mi; IPFW_UH_WLOCK_ASSERT(chain); mi = 0; rulenum = map[mi]->rulenum; smap = chain->idxmap_back; if (smap == NULL) return; for (i = 0; i < 65536; i++) { smap[i] = mi; /* Use the same rule index until i < rulenum */ if (i != rulenum || i == 65535) continue; /* Find next rule with num > i */ rulenum = map[++mi]->rulenum; while (rulenum == i) rulenum = map[++mi]->rulenum; } } /* * Swaps prepared (backup) index with current one. */ static void swap_skipto_cache(struct ip_fw_chain *chain) { int *map; IPFW_UH_WLOCK_ASSERT(chain); IPFW_WLOCK_ASSERT(chain); map = chain->idxmap; chain->idxmap = chain->idxmap_back; chain->idxmap_back = map; } /* * Allocate and initialize skipto cache. */ void ipfw_init_skipto_cache(struct ip_fw_chain *chain) { int *idxmap, *idxmap_back; idxmap = malloc(65536 * sizeof(uint32_t *), M_IPFW, M_WAITOK | M_ZERO); idxmap_back = malloc(65536 * sizeof(uint32_t *), M_IPFW, M_WAITOK | M_ZERO); /* * Note we may be called at any time after initialization, * for example, on first skipto rule, so we need to * provide valid chain->idxmap on return */ IPFW_UH_WLOCK(chain); if (chain->idxmap != NULL) { IPFW_UH_WUNLOCK(chain); free(idxmap, M_IPFW); free(idxmap_back, M_IPFW); return; } /* Set backup pointer first to permit building cache */ chain->idxmap_back = idxmap_back; update_skipto_cache(chain, chain->map); IPFW_WLOCK(chain); /* It is now safe to set chain->idxmap ptr */ chain->idxmap = idxmap; swap_skipto_cache(chain); IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); } /* * Destroys skipto cache. */ void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain) { if (chain->idxmap != NULL) free(chain->idxmap, M_IPFW); if (chain->idxmap != NULL) free(chain->idxmap_back, M_IPFW); } /* * allocate a new map, returns the chain locked. extra is the number * of entries to add or delete. */ static struct ip_fw ** get_map(struct ip_fw_chain *chain, int extra, int locked) { for (;;) { struct ip_fw **map; int i, mflags; mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK); i = chain->n_rules + extra; map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags); if (map == NULL) { printf("%s: cannot allocate map\n", __FUNCTION__); return NULL; } if (!locked) IPFW_UH_WLOCK(chain); if (i >= chain->n_rules + extra) /* good */ return map; /* otherwise we lost the race, free and retry */ if (!locked) IPFW_UH_WUNLOCK(chain); free(map, M_IPFW); } } /* * swap the maps. It is supposed to be called with IPFW_UH_WLOCK */ static struct ip_fw ** swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) { struct ip_fw **old_map; IPFW_WLOCK(chain); chain->id++; chain->n_rules = new_len; old_map = chain->map; chain->map = new_map; swap_skipto_cache(chain); IPFW_WUNLOCK(chain); return old_map; } static void export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr) { struct timeval boottime; cntr->size = sizeof(*cntr); if (krule->cntr != NULL) { cntr->pcnt = counter_u64_fetch(krule->cntr); cntr->bcnt = counter_u64_fetch(krule->cntr + 1); cntr->timestamp = krule->timestamp; } if (cntr->timestamp > 0) { getboottime(&boottime); cntr->timestamp += boottime.tv_sec; } } static void export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr) { struct timeval boottime; if (krule->cntr != NULL) { cntr->pcnt = counter_u64_fetch(krule->cntr); cntr->bcnt = counter_u64_fetch(krule->cntr + 1); cntr->timestamp = krule->timestamp; } if (cntr->timestamp > 0) { getboottime(&boottime); cntr->timestamp += boottime.tv_sec; } } /* * Copies rule @urule from v1 userland format (current). * to kernel @krule. * Assume @krule is zeroed. */ static void import_rule1(struct rule_check_info *ci) { struct ip_fw_rule *urule; struct ip_fw *krule; urule = (struct ip_fw_rule *)ci->urule; krule = (struct ip_fw *)ci->krule; /* copy header */ krule->act_ofs = urule->act_ofs; krule->cmd_len = urule->cmd_len; krule->rulenum = urule->rulenum; krule->set = urule->set; krule->flags = urule->flags; /* Save rulenum offset */ ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum); /* Copy opcodes */ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); } /* * Export rule into v1 format (Current). * Layout: * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT) * [ ip_fw_rule ] OR * [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs). * ] * Assume @data is zeroed. */ static void export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs) { struct ip_fw_bcounter *cntr; struct ip_fw_rule *urule; ipfw_obj_tlv *tlv; /* Fill in TLV header */ tlv = (ipfw_obj_tlv *)data; tlv->type = IPFW_TLV_RULE_ENT; tlv->length = len; if (rcntrs != 0) { /* Copy counters */ cntr = (struct ip_fw_bcounter *)(tlv + 1); urule = (struct ip_fw_rule *)(cntr + 1); export_cntr1_base(krule, cntr); } else urule = (struct ip_fw_rule *)(tlv + 1); /* copy header */ urule->act_ofs = krule->act_ofs; urule->cmd_len = krule->cmd_len; urule->rulenum = krule->rulenum; urule->set = krule->set; urule->flags = krule->flags; urule->id = krule->id; /* Copy opcodes */ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); } /* * Copies rule @urule from FreeBSD8 userland format (v0) * to kernel @krule. * Assume @krule is zeroed. */ static void import_rule0(struct rule_check_info *ci) { struct ip_fw_rule0 *urule; struct ip_fw *krule; int cmdlen, l; ipfw_insn *cmd; ipfw_insn_limit *lcmd; ipfw_insn_if *cmdif; urule = (struct ip_fw_rule0 *)ci->urule; krule = (struct ip_fw *)ci->krule; /* copy header */ krule->act_ofs = urule->act_ofs; krule->cmd_len = urule->cmd_len; krule->rulenum = urule->rulenum; krule->set = urule->set; if ((urule->_pad & 1) != 0) krule->flags |= IPFW_RULE_NOOPT; /* Save rulenum offset */ ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum); /* Copy opcodes */ memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); /* * Alter opcodes: * 1) convert tablearg value from 65535 to 0 * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room * for targ). * 3) convert table number in iface opcodes to u16 * 4) convert old `nat global` into new 65535 */ l = krule->cmd_len; cmd = krule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); switch (cmd->opcode) { /* Opcodes supporting tablearg */ case O_TAG: case O_TAGGED: case O_PIPE: case O_QUEUE: case O_DIVERT: case O_TEE: case O_SKIPTO: case O_CALLRETURN: case O_NETGRAPH: case O_NGTEE: case O_NAT: if (cmd->arg1 == IP_FW_TABLEARG) cmd->arg1 = IP_FW_TARG; else if (cmd->arg1 == 0) cmd->arg1 = IP_FW_NAT44_GLOBAL; break; case O_SETFIB: case O_SETDSCP: if (cmd->arg1 == IP_FW_TABLEARG) cmd->arg1 = IP_FW_TARG; else cmd->arg1 |= 0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; if (lcmd->conn_limit == IP_FW_TABLEARG) lcmd->conn_limit = IP_FW_TARG; break; /* Interface tables */ case O_XMIT: case O_RECV: case O_VIA: /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') break; cmdif->p.kidx = (uint16_t)cmdif->p.glob; break; } } } /* * Copies rule @krule from kernel to FreeBSD8 userland format (v0) */ static void export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) { int cmdlen, l; ipfw_insn *cmd; ipfw_insn_limit *lcmd; ipfw_insn_if *cmdif; /* copy header */ memset(urule, 0, len); urule->act_ofs = krule->act_ofs; urule->cmd_len = krule->cmd_len; urule->rulenum = krule->rulenum; urule->set = krule->set; if ((krule->flags & IPFW_RULE_NOOPT) != 0) urule->_pad |= 1; /* Copy opcodes */ memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); /* Export counters */ export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt); /* * Alter opcodes: * 1) convert tablearg value from 0 to 65535 * 2) Remove highest bit from O_SETFIB/O_SETDSCP values. * 3) convert table number in iface opcodes to int */ l = urule->cmd_len; cmd = urule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); switch (cmd->opcode) { /* Opcodes supporting tablearg */ case O_TAG: case O_TAGGED: case O_PIPE: case O_QUEUE: case O_DIVERT: case O_TEE: case O_SKIPTO: case O_CALLRETURN: case O_NETGRAPH: case O_NGTEE: case O_NAT: if (cmd->arg1 == IP_FW_TARG) cmd->arg1 = IP_FW_TABLEARG; else if (cmd->arg1 == IP_FW_NAT44_GLOBAL) cmd->arg1 = 0; break; case O_SETFIB: case O_SETDSCP: if (cmd->arg1 == IP_FW_TARG) cmd->arg1 = IP_FW_TABLEARG; else cmd->arg1 &= ~0x8000; break; case O_LIMIT: lcmd = (ipfw_insn_limit *)cmd; if (lcmd->conn_limit == IP_FW_TARG) lcmd->conn_limit = IP_FW_TABLEARG; break; /* Interface tables */ case O_XMIT: case O_RECV: case O_VIA: /* Interface table, possibly */ cmdif = (ipfw_insn_if *)cmd; if (cmdif->name[0] != '\1') break; cmdif->p.glob = cmdif->p.kidx; break; } } } /* * Add new rule(s) to the list possibly creating rule number for each. * Update the rule_number in the input struct so the caller knows it as well. * Must be called without IPFW_UH held */ static int commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count) { int error, i, insert_before, tcount; uint16_t rulenum, *pnum; struct rule_check_info *ci; struct ip_fw *krule; struct ip_fw **map; /* the new array of pointers */ /* Check if we need to do table/obj index remap */ tcount = 0; for (ci = rci, i = 0; i < count; ci++, i++) { if (ci->object_opcodes == 0) continue; /* * Rule has some object opcodes. * We need to find (and create non-existing) * kernel objects, and reference existing ones. */ error = rewrite_rule_uidx(chain, ci); if (error != 0) { /* * rewrite failed, state for current rule * has been reverted. Check if we need to * revert more. */ if (tcount > 0) { /* * We have some more table rules * we need to rollback. */ IPFW_UH_WLOCK(chain); while (ci != rci) { ci--; if (ci->object_opcodes == 0) continue; unref_rule_objects(chain,ci->krule); } IPFW_UH_WUNLOCK(chain); } return (error); } tcount++; } /* get_map returns with IPFW_UH_WLOCK if successful */ map = get_map(chain, count, 0 /* not locked */); if (map == NULL) { if (tcount > 0) { /* Unbind tables */ IPFW_UH_WLOCK(chain); for (ci = rci, i = 0; i < count; ci++, i++) { if (ci->object_opcodes == 0) continue; unref_rule_objects(chain, ci->krule); } IPFW_UH_WUNLOCK(chain); } return (ENOSPC); } if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; /* FIXME: Handle count > 1 */ ci = rci; krule = ci->krule; rulenum = krule->rulenum; /* find the insertion point, we will insert before */ insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE; i = ipfw_find_rule(chain, insert_before, 0); /* duplicate first part */ if (i > 0) bcopy(chain->map, map, i * sizeof(struct ip_fw *)); map[i] = krule; /* duplicate remaining part, we always have the default rule */ bcopy(chain->map + i, map + i + 1, sizeof(struct ip_fw *) *(chain->n_rules - i)); if (rulenum == 0) { /* Compute rule number and write it back */ rulenum = i > 0 ? map[i-1]->rulenum : 0; if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) rulenum += V_autoinc_step; krule->rulenum = rulenum; /* Save number to userland rule */ pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff); *pnum = rulenum; } krule->id = chain->id + 1; update_skipto_cache(chain, map); map = swap_map(chain, map, chain->n_rules + 1); chain->static_len += RULEUSIZE0(krule); IPFW_UH_WUNLOCK(chain); if (map) free(map, M_IPFW); return (0); } +int +ipfw_add_protected_rule(struct ip_fw_chain *chain, struct ip_fw *rule, + int locked) +{ + struct ip_fw **map; + + map = get_map(chain, 1, locked); + if (map == NULL) + return (ENOMEM); + if (chain->n_rules > 0) + bcopy(chain->map, map, + chain->n_rules * sizeof(struct ip_fw *)); + map[chain->n_rules] = rule; + rule->rulenum = IPFW_DEFAULT_RULE; + rule->set = RESVD_SET; + rule->id = chain->id + 1; + /* We add rule in the end of chain, no need to update skipto cache */ + map = swap_map(chain, map, chain->n_rules + 1); + chain->static_len += RULEUSIZE0(rule); + IPFW_UH_WUNLOCK(chain); + free(map, M_IPFW); + return (0); +} + /* * Adds @rule to the list of rules to reap */ void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, struct ip_fw *rule) { IPFW_UH_WLOCK_ASSERT(chain); /* Unlink rule from everywhere */ unref_rule_objects(chain, rule); *((struct ip_fw **)rule) = *head; *head = rule; } /* * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. */ void ipfw_reap_rules(struct ip_fw *head) { struct ip_fw *rule; while ((rule = head) != NULL) { head = *((struct ip_fw **)head); free_rule(rule); } } /* * Rules to keep are * (default || reserved || !match_set || !match_number) * where * default ::= (rule->rulenum == IPFW_DEFAULT_RULE) * // the default rule is always protected * * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET) * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush") * * match_set ::= (cmd == 0 || rule->set == set) * // set number is ignored for cmd == 0 * * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum) * // number is ignored for cmd == 1 or n == 0 * */ int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt) { /* Don't match default rule for modification queries */ if (rule->rulenum == IPFW_DEFAULT_RULE && (rt->flags & IPFW_RCFLAG_DEFAULT) == 0) return (0); /* Don't match rules in reserved set for flush requests */ if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET) return (0); /* If we're filtering by set, don't match other sets */ if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set) return (0); if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule)) return (0); return (1); } struct manage_sets_args { uint16_t set; uint8_t new_set; }; static int swap_sets_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct manage_sets_args *args; args = (struct manage_sets_args *)arg; if (no->set == (uint8_t)args->set) no->set = args->new_set; else if (no->set == args->new_set) no->set = (uint8_t)args->set; return (0); } static int move_sets_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct manage_sets_args *args; args = (struct manage_sets_args *)arg; if (no->set == (uint8_t)args->set) no->set = args->new_set; return (0); } static int test_sets_cb(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct manage_sets_args *args; args = (struct manage_sets_args *)arg; if (no->set != (uint8_t)args->set) return (0); if (ipfw_objhash_lookup_name_type(ni, args->new_set, no->etlv, no->name) != NULL) return (EEXIST); return (0); } /* * Generic function to handler moving and swapping sets. */ int ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type, uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) { struct manage_sets_args args; struct named_object *no; args.set = set; args.new_set = new_set; switch (cmd) { case SWAP_ALL: return (ipfw_objhash_foreach_type(ni, swap_sets_cb, &args, type)); case TEST_ALL: return (ipfw_objhash_foreach_type(ni, test_sets_cb, &args, type)); case MOVE_ALL: return (ipfw_objhash_foreach_type(ni, move_sets_cb, &args, type)); case COUNT_ONE: /* * @set used to pass kidx. * When @new_set is zero - reset object counter, * otherwise increment it. */ no = ipfw_objhash_lookup_kidx(ni, set); if (new_set != 0) no->ocnt++; else no->ocnt = 0; return (0); case TEST_ONE: /* @set used to pass kidx */ no = ipfw_objhash_lookup_kidx(ni, set); /* * First check number of references: * when it differs, this mean other rules are holding * reference to given object, so it is not possible to * change its set. Note that refcnt may account references * to some going-to-be-added rules. Since we don't know * their numbers (and even if they will be added) it is * perfectly OK to return error here. */ if (no->ocnt != no->refcnt) return (EBUSY); if (ipfw_objhash_lookup_name_type(ni, new_set, type, no->name) != NULL) return (EEXIST); return (0); case MOVE_ONE: /* @set used to pass kidx */ no = ipfw_objhash_lookup_kidx(ni, set); no->set = new_set; return (0); } return (EINVAL); } /* * Delete rules matching range @rt. * Saves number of deleted rules in @ndel. * * Returns 0 on success. */ static int delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel) { struct ip_fw *reap, *rule, **map; int end, start; int i, n, ndyn, ofs; reap = NULL; IPFW_UH_WLOCK(chain); /* arbitrate writers */ /* * Stage 1: Determine range to inspect. * Range is half-inclusive, e.g [start, end). */ start = 0; end = chain->n_rules - 1; if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) { start = ipfw_find_rule(chain, rt->start_rule, 0); - end = ipfw_find_rule(chain, rt->end_rule, 0); - if (rt->end_rule != IPFW_DEFAULT_RULE) - while (chain->map[end]->rulenum == rt->end_rule) - end++; + if (rt->end_rule >= IPFW_DEFAULT_RULE) + rt->end_rule = IPFW_DEFAULT_RULE - 1; + end = ipfw_find_rule(chain, rt->end_rule, UINT32_MAX); } /* Allocate new map of the same size */ map = get_map(chain, 0, 1 /* locked */); if (map == NULL) { IPFW_UH_WUNLOCK(chain); return (ENOMEM); } n = 0; ndyn = 0; ofs = start; /* 1. bcopy the initial part of the map */ if (start > 0) bcopy(chain->map, map, start * sizeof(struct ip_fw *)); /* 2. copy active rules between start and end */ for (i = start; i < end; i++) { rule = chain->map[i]; if (ipfw_match_range(rule, rt) == 0) { map[ofs++] = rule; continue; } n++; if (ipfw_is_dyn_rule(rule) != 0) ndyn++; } /* 3. copy the final part of the map */ bcopy(chain->map + end, map + ofs, (chain->n_rules - end) * sizeof(struct ip_fw *)); /* 4. recalculate skipto cache */ update_skipto_cache(chain, map); /* 5. swap the maps (under UH_WLOCK + WHLOCK) */ map = swap_map(chain, map, chain->n_rules - n); /* 6. Remove all dynamic states originated by deleted rules */ if (ndyn > 0) ipfw_expire_dyn_rules(chain, rt); /* 7. now remove the rules deleted from the old map */ for (i = start; i < end; i++) { rule = map[i]; if (ipfw_match_range(rule, rt) == 0) continue; chain->static_len -= RULEUSIZE0(rule); ipfw_reap_add(chain, &reap, rule); } IPFW_UH_WUNLOCK(chain); ipfw_reap_rules(reap); if (map != NULL) free(map, M_IPFW); *ndel = n; return (0); } static int move_objects(struct ip_fw_chain *ch, ipfw_range_tlv *rt) { struct opcode_obj_rewrite *rw; struct ip_fw *rule; ipfw_insn *cmd; int cmdlen, i, l, c; uint16_t kidx; IPFW_UH_WLOCK_ASSERT(ch); /* Stage 1: count number of references by given rules */ for (c = 0, i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; if (rule->set == rt->new_set) /* nothing to do */ continue; /* Search opcodes with named objects */ for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd; l > 0; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, NULL); if (rw == NULL || rw->manage_sets == NULL) continue; /* * When manage_sets() returns non-zero value to * COUNT_ONE command, consider this as an object * doesn't support sets (e.g. disabled with sysctl). * So, skip checks for this object. */ if (rw->manage_sets(ch, kidx, 1, COUNT_ONE) != 0) continue; c++; } } if (c == 0) /* No objects found */ return (0); /* Stage 2: verify "ownership" */ for (c = 0, i = 0; (i < ch->n_rules - 1) && c == 0; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; if (rule->set == rt->new_set) /* nothing to do */ continue; /* Search opcodes with named objects */ for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd; l > 0 && c == 0; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, NULL); if (rw == NULL || rw->manage_sets == NULL) continue; /* Test for ownership and conflicting names */ c = rw->manage_sets(ch, kidx, (uint8_t)rt->new_set, TEST_ONE); } } /* Stage 3: change set and cleanup */ for (i = 0; i < ch->n_rules - 1; i++) { rule = ch->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; if (rule->set == rt->new_set) /* nothing to do */ continue; /* Search opcodes with named objects */ for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd; l > 0; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, NULL); if (rw == NULL || rw->manage_sets == NULL) continue; /* cleanup object counter */ rw->manage_sets(ch, kidx, 0 /* reset counter */, COUNT_ONE); if (c != 0) continue; /* change set */ rw->manage_sets(ch, kidx, (uint8_t)rt->new_set, MOVE_ONE); } } return (c); }/* * Changes set of given rule rannge @rt * with each other. * * Returns 0 on success. */ static int move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { struct ip_fw *rule; int i; IPFW_UH_WLOCK(chain); /* * Move rules with matching paramenerts to a new set. * This one is much more complex. We have to ensure * that all referenced tables (if any) are referenced * by given rule subset only. Otherwise, we can't move * them to new set and have to return error. */ if ((i = move_objects(chain, rt)) != 0) { IPFW_UH_WUNLOCK(chain); return (i); } /* XXX: We have to do swap holding WLOCK */ for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; rule->set = rt->new_set; } IPFW_UH_WUNLOCK(chain); return (0); } /* * Clear counters for a specific rule. * Normally run under IPFW_UH_RLOCK, but these are idempotent ops * so we only care that rules do not disappear. */ static void clear_counters(struct ip_fw *rule, int log_only) { ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); if (log_only == 0) IPFW_ZERO_RULE_COUNTER(rule); if (l->o.opcode == O_LOG) l->log_left = l->max_log; } /* * Flushes rules counters and/or log values on matching range. * * Returns number of items cleared. */ static int clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only) { struct ip_fw *rule; int num; int i; num = 0; rt->flags |= IPFW_RCFLAG_DEFAULT; IPFW_UH_WLOCK(chain); /* arbitrate writers */ for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (ipfw_match_range(rule, rt) == 0) continue; clear_counters(rule, log_only); num++; } IPFW_UH_WUNLOCK(chain); return (num); } static int check_range_tlv(ipfw_range_tlv *rt) { if (rt->head.length != sizeof(*rt)) return (1); if (rt->start_rule > rt->end_rule) return (1); if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS) return (1); if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags) return (1); return (0); } /* * Delete rules matching specified parameters * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * Reply: [ ipfw_obj_header ipfw_range_tlv ] * * Saves number of deleted rules in ipfw_range_tlv->new_set. * * Returns 0 on success. */ static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; int error, ndel; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (check_range_tlv(&rh->range) != 0) return (EINVAL); ndel = 0; if ((error = delete_range(chain, &rh->range, &ndel)) != 0) return (error); /* Save number of rules deleted */ rh->range.new_set = ndel; return (0); } /* * Move rules/sets matching specified parameters * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * * Returns 0 on success. */ static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (check_range_tlv(&rh->range) != 0) return (EINVAL); return (move_range(chain, &rh->range)); } /* * Clear rule accounting data matching specified parameters * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * Reply: [ ipfw_obj_header ipfw_range_tlv ] * * Saves number of cleared rules in ipfw_range_tlv->new_set. * * Returns 0 on success. */ static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; int log_only, num; char *msg; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (check_range_tlv(&rh->range) != 0) return (EINVAL); log_only = (op3->opcode == IP_FW_XRESETLOG); num = clear_range(chain, &rh->range, log_only); if (rh->range.flags & IPFW_RCFLAG_ALL) msg = log_only ? "All logging counts reset" : "Accounting cleared"; else msg = log_only ? "logging count reset" : "cleared"; if (V_fw_verbose) { int lev = LOG_SECURITY | LOG_NOTICE; log(lev, "ipfw: %s.\n", msg); } /* Save number of rules cleared */ rh->range.new_set = num; return (0); } static void enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { uint32_t v_set; IPFW_UH_WLOCK_ASSERT(chain); /* Change enabled/disabled sets mask */ v_set = (V_set_disable | rt->set) & ~rt->new_set; v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */ IPFW_WLOCK(chain); V_set_disable = v_set; IPFW_WUNLOCK(chain); } static int swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv) { struct opcode_obj_rewrite *rw; struct ip_fw *rule; int i; IPFW_UH_WLOCK_ASSERT(chain); if (rt->set == rt->new_set) /* nothing to do */ return (0); if (mv != 0) { /* * Berfore moving the rules we need to check that * there aren't any conflicting named objects. */ for (rw = ctl3_rewriters; rw < ctl3_rewriters + ctl3_rsize; rw++) { if (rw->manage_sets == NULL) continue; i = rw->manage_sets(chain, (uint8_t)rt->set, (uint8_t)rt->new_set, TEST_ALL); if (i != 0) return (EEXIST); } } /* Swap or move two sets */ for (i = 0; i < chain->n_rules - 1; i++) { rule = chain->map[i]; if (rule->set == (uint8_t)rt->set) rule->set = (uint8_t)rt->new_set; else if (rule->set == (uint8_t)rt->new_set && mv == 0) rule->set = (uint8_t)rt->set; } for (rw = ctl3_rewriters; rw < ctl3_rewriters + ctl3_rsize; rw++) { if (rw->manage_sets == NULL) continue; rw->manage_sets(chain, (uint8_t)rt->set, (uint8_t)rt->new_set, mv != 0 ? MOVE_ALL: SWAP_ALL); } return (0); } /* * Swaps or moves set * Data layout (v0)(current): * Request: [ ipfw_obj_header ipfw_range_tlv ] * * Returns 0 on success. */ static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_range_header *rh; int ret; if (sd->valsize != sizeof(*rh)) return (EINVAL); rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); if (rh->range.head.length != sizeof(ipfw_range_tlv)) return (1); /* enable_sets() expects bitmasks. */ if (op3->opcode != IP_FW_SET_ENABLE && (rh->range.set >= IPFW_MAX_SETS || rh->range.new_set >= IPFW_MAX_SETS)) return (EINVAL); ret = 0; IPFW_UH_WLOCK(chain); switch (op3->opcode) { case IP_FW_SET_SWAP: case IP_FW_SET_MOVE: ret = swap_sets(chain, &rh->range, op3->opcode == IP_FW_SET_MOVE); break; case IP_FW_SET_ENABLE: enable_sets(chain, &rh->range); break; } IPFW_UH_WUNLOCK(chain); return (ret); } /** * Remove all rules with given number, or do set manipulation. * Assumes chain != NULL && *chain != NULL. * * The argument is an uint32_t. The low 16 bit are the rule or set number; * the next 8 bits are the new set; the top 8 bits indicate the command: * * 0 delete rules numbered "rulenum" * 1 delete rules in set "rulenum" * 2 move rules "rulenum" to set "new_set" * 3 move rules from set "rulenum" to set "new_set" * 4 swap sets "rulenum" and "new_set" * 5 delete rules "rulenum" and set "new_set" */ static int del_entry(struct ip_fw_chain *chain, uint32_t arg) { uint32_t num; /* rule number or old_set */ uint8_t cmd, new_set; int do_del, ndel; int error = 0; ipfw_range_tlv rt; num = arg & 0xffff; cmd = (arg >> 24) & 0xff; new_set = (arg >> 16) & 0xff; if (cmd > 5 || new_set > RESVD_SET) return EINVAL; if (cmd == 0 || cmd == 2 || cmd == 5) { if (num >= IPFW_DEFAULT_RULE) return EINVAL; } else { if (num > RESVD_SET) /* old_set */ return EINVAL; } /* Convert old requests into new representation */ memset(&rt, 0, sizeof(rt)); rt.start_rule = num; rt.end_rule = num; rt.set = num; rt.new_set = new_set; do_del = 0; switch (cmd) { case 0: /* delete rules numbered "rulenum" */ if (num == 0) rt.flags |= IPFW_RCFLAG_ALL; else rt.flags |= IPFW_RCFLAG_RANGE; do_del = 1; break; case 1: /* delete rules in set "rulenum" */ rt.flags |= IPFW_RCFLAG_SET; do_del = 1; break; case 5: /* delete rules "rulenum" and set "new_set" */ rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET; rt.set = new_set; rt.new_set = 0; do_del = 1; break; case 2: /* move rules "rulenum" to set "new_set" */ rt.flags |= IPFW_RCFLAG_RANGE; break; case 3: /* move rules from set "rulenum" to set "new_set" */ IPFW_UH_WLOCK(chain); error = swap_sets(chain, &rt, 1); IPFW_UH_WUNLOCK(chain); return (error); case 4: /* swap sets "rulenum" and "new_set" */ IPFW_UH_WLOCK(chain); error = swap_sets(chain, &rt, 0); IPFW_UH_WUNLOCK(chain); return (error); default: return (ENOTSUP); } if (do_del != 0) { if ((error = delete_range(chain, &rt, &ndel)) != 0) return (error); if (ndel == 0 && (cmd != 1 && num != 0)) return (EINVAL); return (0); } return (move_range(chain, &rt)); } /** * Reset some or all counters on firewall rules. * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, * the next 8 bits are the set number, the top 8 bits are the command: * 0 work with rules from all set's; * 1 work with rules only from specified set. * Specified rule number is zero if we want to clear all entries. * log_only is 1 if we only want to reset logs, zero otherwise. */ static int zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) { struct ip_fw *rule; char *msg; int i; uint16_t rulenum = arg & 0xffff; uint8_t set = (arg >> 16) & 0xff; uint8_t cmd = (arg >> 24) & 0xff; if (cmd > 1) return (EINVAL); if (cmd == 1 && set > RESVD_SET) return (EINVAL); IPFW_UH_RLOCK(chain); if (rulenum == 0) { V_norule_counter = 0; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; /* Skip rules not in our set. */ if (cmd == 1 && rule->set != set) continue; clear_counters(rule, log_only); } msg = log_only ? "All logging counts reset" : "Accounting cleared"; } else { int cleared = 0; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (rule->rulenum == rulenum) { if (cmd == 0 || rule->set == set) clear_counters(rule, log_only); cleared = 1; } if (rule->rulenum > rulenum) break; } if (!cleared) { /* we did not find any matching rules */ IPFW_UH_RUNLOCK(chain); return (EINVAL); } msg = log_only ? "logging count reset" : "cleared"; } IPFW_UH_RUNLOCK(chain); if (V_fw_verbose) { int lev = LOG_SECURITY | LOG_NOTICE; if (rulenum) log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); else log(lev, "ipfw: %s.\n", msg); } return (0); } /* * Check rule head in FreeBSD11 format * */ static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, struct rule_check_info *ci) { int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* Check for valid cmd_len */ l = roundup2(RULESIZE(rule), sizeof(uint64_t)); if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } if (rule->rulenum > IPFW_DEFAULT_RULE - 1) return (EINVAL); return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } /* * Check rule head in FreeBSD8 format * */ static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, struct rule_check_info *ci) { int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } /* Check for valid cmd_len */ l = sizeof(*rule) + rule->cmd_len * 4 - 4; if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); } if (rule->act_ofs >= rule->cmd_len) { printf("ipfw: bogus action offset (%u > %u)\n", rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } if (rule->rulenum > IPFW_DEFAULT_RULE - 1) return (EINVAL); return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); } static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) { int cmdlen, l; int have_action; have_action = 0; /* * Now go for the individual checks. Very simple ones, basically only * instruction sizes. */ for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (cmdlen > l) { printf("ipfw: opcode %d size truncated\n", cmd->opcode); return EINVAL; } switch (cmd->opcode) { case O_PROBE_STATE: case O_KEEP_STATE: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; ci->object_opcodes++; break; case O_PROTO: case O_IP_SRC_ME: case O_IP_DST_ME: case O_LAYER2: case O_IN: case O_FRAG: case O_DIVERTED: case O_IPOPT: case O_IPTOS: case O_IPPRECEDENCE: case O_IPVER: case O_SOCKARG: case O_TCPFLAGS: case O_TCPOPTS: case O_ESTAB: case O_VERREVPATH: case O_VERSRCREACH: case O_ANTISPOOF: case O_IPSEC: #ifdef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: #endif case O_IP4: case O_TAG: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_EXTERNAL_ACTION: if (cmd->arg1 == 0 || cmdlen != F_INSN_SIZE(ipfw_insn)) { printf("ipfw: invalid external " "action opcode\n"); return (EINVAL); } ci->object_opcodes++; /* * Do we have O_EXTERNAL_INSTANCE or O_EXTERNAL_DATA * opcode? */ if (l != cmdlen) { l -= cmdlen; cmd += cmdlen; cmdlen = F_LEN(cmd); if (cmd->opcode == O_EXTERNAL_DATA) goto check_action; if (cmd->opcode != O_EXTERNAL_INSTANCE) { printf("ipfw: invalid opcode " "next to external action %u\n", cmd->opcode); return (EINVAL); } if (cmd->arg1 == 0 || cmdlen != F_INSN_SIZE(ipfw_insn)) { printf("ipfw: invalid external " "action instance opcode\n"); return (EINVAL); } ci->object_opcodes++; } goto check_action; case O_FIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if (cmd->arg1 >= rt_numfibs) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; } break; case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; if ((cmd->arg1 != IP_FW_TARG) && ((cmd->arg1 & 0x7FFF) >= rt_numfibs)) { printf("ipfw: invalid fib number %d\n", cmd->arg1 & 0x7FFF); return EINVAL; } goto check_action; case O_UID: case O_GID: case O_JAIL: case O_IP_SRC: case O_IP_DST: case O_TCPSEQ: case O_TCPACK: case O_PROB: case O_ICMPTYPE: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; case O_LIMIT: if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) goto bad_size; ci->object_opcodes++; break; case O_LOG: if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) goto bad_size; ((ipfw_insn_log *)cmd)->log_left = ((ipfw_insn_log *)cmd)->max_log; break; case O_IP_SRC_MASK: case O_IP_DST_MASK: /* only odd command lengths */ if ((cmdlen & 1) == 0) goto bad_size; break; case O_IP_SRC_SET: case O_IP_DST_SET: if (cmd->arg1 == 0 || cmd->arg1 > 256) { printf("ipfw: invalid set size %d\n", cmd->arg1); return EINVAL; } if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + (cmd->arg1+31)/32 ) goto bad_size; break; case O_IP_SRC_LOOKUP: if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; case O_IP_DST_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; ci->object_opcodes++; break; case O_IP_FLOW_LOOKUP: if (cmd->arg1 >= V_fw_tables_max) { printf("ipfw: invalid table number %d\n", cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; ci->object_opcodes++; break; case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; break; case O_NOP: case O_IPID: case O_IPTTL: case O_IPLEN: case O_TCPDATALEN: case O_TCPWIN: case O_TAGGED: if (cmdlen < 1 || cmdlen > 31) goto bad_size; break; case O_DSCP: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1) goto bad_size; break; case O_MAC_TYPE: case O_IP_SRCPORT: case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ if (cmdlen < 2 || cmdlen > 31) goto bad_size; break; case O_RECV: case O_XMIT: case O_VIA: if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) goto bad_size; ci->object_opcodes++; break; case O_ALTQ: if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) goto bad_size; break; case O_PIPE: case O_QUEUE: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; goto check_action; case O_FORWARD_IP: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) goto bad_size; goto check_action; #ifdef INET6 case O_FORWARD_IP6: if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6)) goto bad_size; goto check_action; #endif /* INET6 */ case O_DIVERT: case O_TEE: if (ip_divert_ptr == NULL) return EINVAL; else goto check_size; case O_NETGRAPH: case O_NGTEE: if (ng_ipfw_input_p == NULL) return EINVAL; else goto check_size; case O_NAT: if (!IPFW_NAT_LOADED) return EINVAL; if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) goto bad_size; goto check_action; case O_CHECK_STATE: ci->object_opcodes++; /* FALLTHROUGH */ case O_FORWARD_MAC: /* XXX not implemented yet */ case O_COUNT: case O_ACCEPT: case O_DENY: case O_REJECT: case O_SETDSCP: #ifdef INET6 case O_UNREACH6: #endif case O_SKIPTO: case O_REASS: case O_CALLRETURN: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; check_action: if (have_action) { printf("ipfw: opcode %d, multiple actions" " not allowed\n", cmd->opcode); return (EINVAL); } have_action = 1; if (l != cmdlen) { printf("ipfw: opcode %d, action must be" " last opcode\n", cmd->opcode); return (EINVAL); } break; #ifdef INET6 case O_IP6_SRC: case O_IP6_DST: if (cmdlen != F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn)) goto bad_size; break; case O_FLOW6ID: if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + ((ipfw_insn_u32 *)cmd)->o.arg1) goto bad_size; break; case O_IP6_SRC_MASK: case O_IP6_DST_MASK: if ( !(cmdlen & 1) || cmdlen > 127) goto bad_size; break; case O_ICMP6TYPE: if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) goto bad_size; break; #endif default: switch (cmd->opcode) { #ifndef INET6 case O_IP6_SRC_ME: case O_IP6_DST_ME: case O_EXT_HDR: case O_IP6: case O_UNREACH6: case O_IP6_SRC: case O_IP6_DST: case O_FLOW6ID: case O_IP6_SRC_MASK: case O_IP6_DST_MASK: case O_ICMP6TYPE: printf("ipfw: no IPv6 support in kernel\n"); return (EPROTONOSUPPORT); #endif default: printf("ipfw: opcode %d, unknown opcode\n", cmd->opcode); return (EINVAL); } } } if (have_action == 0) { printf("ipfw: missing action\n"); return (EINVAL); } return 0; bad_size: printf("ipfw: opcode %d size %d wrong\n", cmd->opcode, cmdlen); return (EINVAL); } /* * Translation of requests for compatibility with FreeBSD 7.2/8. * a static variable tells us if we have an old client from userland, * and if necessary we translate requests and responses between the * two formats. */ static int is7 = 0; struct ip_fw7 { struct ip_fw7 *next; /* linked list of rules */ struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ uint8_t set; /* rule set (0..31) */ // #define RESVD_SET 31 /* set for default and persistent rules */ uint8_t _pad; /* padding */ // uint32_t id; /* rule id, only in v.8 */ /* These fields are present in all rules. */ uint64_t pcnt; /* Packet counter */ uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ ipfw_insn cmd[1]; /* storage for commands */ }; static int convert_rule_to_7(struct ip_fw_rule0 *rule); static int convert_rule_to_8(struct ip_fw_rule0 *rule); #ifndef RULESIZE7 #define RULESIZE7(rule) (sizeof(struct ip_fw7) + \ ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4) #endif /* * Copy the static and dynamic rules to the supplied buffer * and return the amount of space actually used. * Must be run under IPFW_UH_RLOCK */ static size_t ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; struct ip_fw *rule; struct ip_fw_rule0 *dst; struct timeval boottime; int error, i, l, warnflag; time_t boot_seconds; warnflag = 0; getboottime(&boottime); boot_seconds = boottime.tv_sec; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; if (is7) { /* Convert rule to FreeBSd 7.2 format */ l = RULESIZE7(rule); if (bp + l + sizeof(uint32_t) <= ep) { bcopy(rule, bp, l + sizeof(uint32_t)); error = set_legacy_obj_kidx(chain, (struct ip_fw_rule0 *)bp); if (error != 0) return (0); error = convert_rule_to_7((struct ip_fw_rule0 *) bp); if (error) return 0; /*XXX correct? */ /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? */ bcopy(&V_set_disable, &(((struct ip_fw7 *)bp)->next_rule), sizeof(V_set_disable)); if (((struct ip_fw7 *)bp)->timestamp) ((struct ip_fw7 *)bp)->timestamp += boot_seconds; bp += l; } continue; /* go to next rule */ } l = RULEUSIZE0(rule); if (bp + l > ep) { /* should not happen */ printf("overflow dumping static rules\n"); break; } dst = (struct ip_fw_rule0 *)bp; export_rule0(rule, dst, l); error = set_legacy_obj_kidx(chain, dst); /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? * * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask * so we need to fail _after_ saving at least one mask. */ bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable)); if (dst->timestamp) dst->timestamp += boot_seconds; bp += l; if (error != 0) { if (error == 2) { /* Non-fatal table rewrite error. */ warnflag = 1; continue; } printf("Stop on rule %d. Fail to convert table\n", rule->rulenum); break; } } if (warnflag != 0) printf("ipfw: process %s is using legacy interfaces," " consider rebuilding\n", ""); ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */ return (bp - (char *)buf); } struct dump_args { uint32_t b; /* start rule */ uint32_t e; /* end rule */ uint32_t rcount; /* number of rules */ uint32_t rsize; /* rules size */ uint32_t tcount; /* number of tables */ int rcounters; /* counters */ }; void ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv) { ntlv->head.type = no->etlv; ntlv->head.length = sizeof(*ntlv); ntlv->idx = no->kidx; strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); } /* * Export named object info in instance @ni, identified by @kidx * to ipfw_obj_ntlv. TLV is allocated from @sd space. * * Returns 0 on success. */ static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx, struct sockopt_data *sd) { struct named_object *no; ipfw_obj_ntlv *ntlv; no = ipfw_objhash_lookup_kidx(ni, kidx); KASSERT(no != NULL, ("invalid object kernel index passed")); ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ipfw_export_obj_ntlv(no, ntlv); return (0); } /* * Dumps static rules with table TLVs in buffer @sd. * * Returns 0 on success. */ static int dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da, uint32_t *bmask, struct sockopt_data *sd) { int error; int i, l; uint32_t tcount; ipfw_obj_ctlv *ctlv; struct ip_fw *krule; struct namedobj_instance *ni; caddr_t dst; /* Dump table names first (if any) */ if (da->tcount > 0) { /* Header first */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); ctlv->head.type = IPFW_TLV_TBLNAME_LIST; ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + sizeof(*ctlv); ctlv->count = da->tcount; ctlv->objsize = sizeof(ipfw_obj_ntlv); } i = 0; tcount = da->tcount; ni = ipfw_get_table_objhash(chain); while (tcount > 0) { if ((bmask[i / 32] & (1 << (i % 32))) == 0) { i++; continue; } /* Jump to shared named object bitmask */ if (i >= IPFW_TABLES_MAX) { ni = CHAIN_TO_SRV(chain); i -= IPFW_TABLES_MAX; bmask += IPFW_TABLES_MAX / 32; } if ((error = export_objhash_ntlv(ni, i, sd)) != 0) return (error); i++; tcount--; } /* Dump rules */ ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); if (ctlv == NULL) return (ENOMEM); ctlv->head.type = IPFW_TLV_RULE_LIST; ctlv->head.length = da->rsize + sizeof(*ctlv); ctlv->count = da->rcount; for (i = da->b; i < da->e; i++) { krule = chain->map[i]; l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv); if (da->rcounters != 0) l += sizeof(struct ip_fw_bcounter); dst = (caddr_t)ipfw_get_sopt_space(sd, l); if (dst == NULL) return (ENOMEM); export_rule1(krule, dst, l, da->rcounters); } return (0); } /* * Marks every object index used in @rule with bit in @bmask. * Used to generate bitmask of referenced tables/objects for given ruleset * or its part. * * Returns number of newly-referenced objects. */ static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, uint32_t *bmask) { struct opcode_obj_rewrite *rw; ipfw_insn *cmd; int bidx, cmdlen, l, count; uint16_t kidx; uint8_t subtype; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; count = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, &subtype); if (rw == NULL) continue; bidx = kidx / 32; /* * Maintain separate bitmasks for table and * non-table objects. */ if (rw->etlv != IPFW_TLV_TBL_NAME) bidx += IPFW_TABLES_MAX / 32; if ((bmask[bidx] & (1 << (kidx % 32))) == 0) count++; bmask[bidx] |= 1 << (kidx % 32); } return (count); } /* * Dumps requested objects data * Data layout (version 0)(current): * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags * size = ipfw_cfg_lheader.size * Reply: [ ipfw_cfg_lheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) * ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ] * ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional) * ] * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize. * The rest (size, count) are set to zero and needs to be ignored. * * Returns 0 on success. */ static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_cfg_lheader *hdr; struct ip_fw *rule; size_t sz, rnum; uint32_t hdr_flags; int error, i; struct dump_args da; uint32_t *bmask; hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); if (hdr == NULL) return (EINVAL); error = 0; bmask = NULL; /* Allocate needed state. Note we allocate 2xspace mask, for table&srv */ if (hdr->flags & IPFW_CFG_GET_STATIC) bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* * STAGE 1: Determine size/count for objects in range. * Prepare used tables bitmask. */ sz = sizeof(ipfw_cfg_lheader); memset(&da, 0, sizeof(da)); da.b = 0; da.e = chain->n_rules; if (hdr->end_rule != 0) { /* Handle custom range */ if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE) rnum = IPFW_DEFAULT_RULE; da.b = ipfw_find_rule(chain, rnum, 0); - rnum = hdr->end_rule; - rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE; - da.e = ipfw_find_rule(chain, rnum, 0) + 1; + rnum = (hdr->end_rule < IPFW_DEFAULT_RULE) ? + hdr->end_rule + 1: IPFW_DEFAULT_RULE; + da.e = ipfw_find_rule(chain, rnum, UINT32_MAX) + 1; } if (hdr->flags & IPFW_CFG_GET_STATIC) { for (i = da.b; i < da.e; i++) { rule = chain->map[i]; da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv); da.rcount++; /* Update bitmask of used objects for given range */ da.tcount += mark_object_kidx(chain, rule, bmask); } /* Add counters if requested */ if (hdr->flags & IPFW_CFG_GET_COUNTERS) { da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount; da.rcounters = 1; } if (da.tcount > 0) sz += da.tcount * sizeof(ipfw_obj_ntlv) + sizeof(ipfw_obj_ctlv); sz += da.rsize + sizeof(ipfw_obj_ctlv); } if (hdr->flags & IPFW_CFG_GET_STATES) sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) + sizeof(ipfw_obj_ctlv); /* * Fill header anyway. * Note we have to save header fields to stable storage * buffer inside @sd can be flushed after dumping rules */ hdr->size = sz; hdr->set_mask = ~V_set_disable; hdr_flags = hdr->flags; hdr = NULL; if (sd->valsize < sz) { error = ENOMEM; goto cleanup; } /* STAGE2: Store actual data */ if (hdr_flags & IPFW_CFG_GET_STATIC) { error = dump_static_rules(chain, &da, bmask, sd); if (error != 0) goto cleanup; } if (hdr_flags & IPFW_CFG_GET_STATES) error = ipfw_dump_states(chain, sd); cleanup: IPFW_UH_RUNLOCK(chain); if (bmask != NULL) free(bmask, M_TEMP); return (error); } int ipfw_check_object_name_generic(const char *name) { int nsize; nsize = sizeof(((ipfw_obj_ntlv *)0)->name); if (strnlen(name, nsize) == nsize) return (EINVAL); if (name[0] == '\0') return (EINVAL); return (0); } /* * Creates non-existent objects referenced by rule. * * Return 0 on success. */ int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti) { struct opcode_obj_rewrite *rw; struct obj_idx *p; uint16_t kidx; int error; /* * Compatibility stuff: do actual creation for non-existing, * but referenced objects. */ for (p = oib; p < pidx; p++) { if (p->kidx != 0) continue; ti->uidx = p->uidx; ti->type = p->type; ti->atype = 0; rw = find_op_rw(cmd + p->off, NULL, NULL); KASSERT(rw != NULL, ("Unable to find handler for op %d", (cmd + p->off)->opcode)); if (rw->create_object == NULL) error = EOPNOTSUPP; else error = rw->create_object(ch, ti, &kidx); if (error == 0) { p->kidx = kidx; continue; } /* * Error happened. We have to rollback everything. * Drop all already acquired references. */ IPFW_UH_WLOCK(ch); unref_oib_objects(ch, cmd, oib, pidx); IPFW_UH_WUNLOCK(ch); return (error); } return (0); } /* * Compatibility function for old ipfw(8) binaries. * Rewrites table/nat kernel indices with userland ones. * Convert tables matching '/^\d+$/' to their atoi() value. * Use number 65535 for other tables. * * Returns 0 on success. */ static int set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule) { struct opcode_obj_rewrite *rw; struct named_object *no; ipfw_insn *cmd; char *end; long val; int cmdlen, error, l; uint16_t kidx, uidx; uint8_t subtype; error = 0; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); /* Check if is index in given opcode */ rw = find_op_rw(cmd, &kidx, &subtype); if (rw == NULL) continue; /* Try to find referenced kernel object */ no = rw->find_bykidx(ch, kidx); if (no == NULL) continue; val = strtol(no->name, &end, 10); if (*end == '\0' && val < 65535) { uidx = val; } else { /* * We are called via legacy opcode. * Save error and show table as fake number * not to make ipfw(8) hang. */ uidx = 65535; error = 2; } rw->update(cmd, uidx); } return (error); } /* * Unreferences all already-referenced objects in given @cmd rule, * using information in @oib. * * Used to rollback partially converted rule on error. */ static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, struct obj_idx *end) { struct opcode_obj_rewrite *rw; struct named_object *no; struct obj_idx *p; IPFW_UH_WLOCK_ASSERT(ch); for (p = oib; p < end; p++) { if (p->kidx == 0) continue; rw = find_op_rw(cmd + p->off, NULL, NULL); KASSERT(rw != NULL, ("Unable to find handler for op %d", (cmd + p->off)->opcode)); /* Find & unref by existing idx */ no = rw->find_bykidx(ch, p->kidx); KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx)); no->refcnt--; } } /* * Remove references from every object used in @rule. * Used at rule removal code. */ static void unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule) { struct opcode_obj_rewrite *rw; struct named_object *no; ipfw_insn *cmd; int cmdlen, l; uint16_t kidx; uint8_t subtype; IPFW_UH_WLOCK_ASSERT(ch); l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); rw = find_op_rw(cmd, &kidx, &subtype); if (rw == NULL) continue; no = rw->find_bykidx(ch, kidx); KASSERT(no != NULL, ("object id %d not found", kidx)); KASSERT(no->subtype == subtype, ("wrong type %d (%d) for object id %d", no->subtype, subtype, kidx)); KASSERT(no->refcnt > 0, ("refcount for object %d is %d", kidx, no->refcnt)); if (no->refcnt == 1 && rw->destroy_object != NULL) rw->destroy_object(ch, no); else no->refcnt--; } } /* * Find and reference object (if any) stored in instruction @cmd. * * Saves object info in @pidx, sets * - @unresolved to 1 if object should exists but not found * * Returns non-zero value in case of error. */ static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti, struct obj_idx *pidx, int *unresolved) { struct named_object *no; struct opcode_obj_rewrite *rw; int error; /* Check if this opcode is candidate for rewrite */ rw = find_op_rw(cmd, &ti->uidx, &ti->type); if (rw == NULL) return (0); /* Need to rewrite. Save necessary fields */ pidx->uidx = ti->uidx; pidx->type = ti->type; /* Try to find referenced kernel object */ error = rw->find_byname(ch, ti, &no); if (error != 0) return (error); if (no == NULL) { /* * Report about unresolved object for automaic * creation. */ *unresolved = 1; return (0); } /* * Object is already exist. * Its subtype should match with expected value. */ if (ti->type != no->subtype) return (EINVAL); /* Bump refcount and update kidx. */ no->refcnt++; rw->update(cmd, no->kidx); return (0); } /* * Finds and bumps refcount for objects referenced by given @rule. * Auto-creates non-existing tables. * Fills in @oib array with userland/kernel indexes. * * Returns 0 on success. */ static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti) { struct obj_idx *pidx; ipfw_insn *cmd; int cmdlen, error, l, unresolved; pidx = oib; l = rule->cmd_len; cmd = rule->cmd; cmdlen = 0; error = 0; IPFW_UH_WLOCK(ch); /* Increase refcount on each existing referenced table. */ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); unresolved = 0; error = ref_opcode_object(ch, cmd, ti, pidx, &unresolved); if (error != 0) break; /* * Compatibility stuff for old clients: * prepare to automaitcally create non-existing objects. */ if (unresolved != 0) { pidx->off = rule->cmd_len - l; pidx++; } } if (error != 0) { /* Unref everything we have already done */ unref_oib_objects(ch, rule->cmd, oib, pidx); IPFW_UH_WUNLOCK(ch); return (error); } IPFW_UH_WUNLOCK(ch); /* Perform auto-creation for non-existing objects */ if (pidx != oib) error = create_objects_compat(ch, rule->cmd, oib, pidx, ti); /* Calculate real number of dynamic objects */ ci->object_opcodes = (uint16_t)(pidx - oib); return (error); } /* * Checks is opcode is referencing table of appropriate type. * Adds reference count for found table if true. * Rewrites user-supplied opcode values with kernel ones. * * Returns 0 on success and appropriate error code otherwise. */ static int rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci) { int error; ipfw_insn *cmd; uint8_t type; struct obj_idx *p, *pidx_first, *pidx_last; struct tid_info ti; /* * Prepare an array for storing opcode indices. * Use stack allocation by default. */ if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) { /* Stack */ pidx_first = ci->obuf; } else pidx_first = malloc( ci->object_opcodes * sizeof(struct obj_idx), M_IPFW, M_WAITOK | M_ZERO); error = 0; type = 0; memset(&ti, 0, sizeof(ti)); /* Use set rule is assigned to. */ ti.set = ci->krule->set; if (ci->ctlv != NULL) { ti.tlvs = (void *)(ci->ctlv + 1); ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv); } /* Reference all used tables and other objects */ error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti); if (error != 0) goto free; /* * Note that ref_rule_objects() might have updated ci->object_opcodes * to reflect actual number of object opcodes. */ /* Perform rewrite of remaining opcodes */ p = pidx_first; pidx_last = pidx_first + ci->object_opcodes; for (p = pidx_first; p < pidx_last; p++) { cmd = ci->krule->cmd + p->off; update_opcode_kidx(cmd, p->kidx); } free: if (pidx_first != ci->obuf) free(pidx_first, M_IPFW); return (error); } /* * Adds one or more rules to ipfw @chain. * Data layout (version 0)(current): * Request: * [ * ip_fw3_opheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3) * ] * Reply: * [ * ip_fw3_opheader * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] * ] * * Rules in reply are modified to store their actual ruleset number. * * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending * according to their idx field and there has to be no duplicates. * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending. * (*3) Each ip_fw structure needs to be aligned to u64 boundary. * * Returns 0 on success. */ static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_ctlv *ctlv, *rtlv, *tstate; ipfw_obj_ntlv *ntlv; int clen, error, idx; uint32_t count, read; struct ip_fw_rule *r; struct rule_check_info rci, *ci, *cbuf; int i, rsize; op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize); ctlv = (ipfw_obj_ctlv *)(op3 + 1); read = sizeof(ip_fw3_opheader); rtlv = NULL; tstate = NULL; cbuf = NULL; memset(&rci, 0, sizeof(struct rule_check_info)); if (read + sizeof(*ctlv) > sd->valsize) return (EINVAL); if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { clen = ctlv->head.length; /* Check size and alignment */ if (clen > sd->valsize || clen < sizeof(*ctlv)) return (EINVAL); if ((clen % sizeof(uint64_t)) != 0) return (EINVAL); /* * Some table names or other named objects. * Check for validness. */ count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv); if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv)) return (EINVAL); /* * Check each TLV. * Ensure TLVs are sorted ascending and * there are no duplicates. */ idx = -1; ntlv = (ipfw_obj_ntlv *)(ctlv + 1); while (count > 0) { if (ntlv->head.length != sizeof(ipfw_obj_ntlv)) return (EINVAL); error = ipfw_check_object_name_generic(ntlv->name); if (error != 0) return (error); if (ntlv->idx <= idx) return (EINVAL); idx = ntlv->idx; count--; ntlv++; } tstate = ctlv; read += ctlv->head.length; ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); } if (read + sizeof(*ctlv) > sd->valsize) return (EINVAL); if (ctlv->head.type == IPFW_TLV_RULE_LIST) { clen = ctlv->head.length; if (clen + read > sd->valsize || clen < sizeof(*ctlv)) return (EINVAL); if ((clen % sizeof(uint64_t)) != 0) return (EINVAL); /* * TODO: Permit adding multiple rules at once */ if (ctlv->count != 1) return (ENOTSUP); clen -= sizeof(*ctlv); if (ctlv->count > clen / sizeof(struct ip_fw_rule)) return (EINVAL); /* Allocate state for each rule or use stack */ if (ctlv->count == 1) { memset(&rci, 0, sizeof(struct rule_check_info)); cbuf = &rci; } else cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP, M_WAITOK | M_ZERO); ci = cbuf; /* * Check each rule for validness. * Ensure numbered rules are sorted ascending * and properly aligned */ idx = 0; r = (struct ip_fw_rule *)(ctlv + 1); count = 0; error = 0; while (clen > 0) { rsize = roundup2(RULESIZE(r), sizeof(uint64_t)); if (rsize > clen || ctlv->count <= count) { error = EINVAL; break; } ci->ctlv = tstate; error = check_ipfw_rule1(r, rsize, ci); if (error != 0) break; /* Check sorting */ if (r->rulenum != 0 && r->rulenum < idx) { printf("rulenum %d idx %d\n", r->rulenum, idx); error = EINVAL; break; } idx = r->rulenum; ci->urule = (caddr_t)r; rsize = roundup2(rsize, sizeof(uint64_t)); clen -= rsize; r = (struct ip_fw_rule *)((caddr_t)r + rsize); count++; ci++; } if (ctlv->count != count || error != 0) { if (cbuf != &rci) free(cbuf, M_TEMP); return (EINVAL); } rtlv = ctlv; read += ctlv->head.length; ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); } if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) { if (cbuf != NULL && cbuf != &rci) free(cbuf, M_TEMP); return (EINVAL); } /* * Passed rules seems to be valid. * Allocate storage and try to add them to chain. */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) { clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule); ci->krule = ipfw_alloc_rule(chain, clen); import_rule1(ci); } if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) { /* Free allocate krules */ for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) free_rule(ci->krule); } if (cbuf != NULL && cbuf != &rci) free(cbuf, M_TEMP); return (error); } /* * Lists all sopts currently registered. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader ipfw_sopt_info x N ] * * Returns 0 on success */ static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { struct _ipfw_obj_lheader *olh; ipfw_sopt_info *i; struct ipfw_sopt_handler *sh; uint32_t count, n, size; olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); if (olh == NULL) return (EINVAL); if (sd->valsize < olh->size) return (EINVAL); CTL3_LOCK(); count = ctl3_hsize; size = count * sizeof(ipfw_sopt_info) + sizeof(ipfw_obj_lheader); /* Fill in header regadless of buffer size */ olh->count = count; olh->objsize = sizeof(ipfw_sopt_info); if (size > olh->size) { olh->size = size; CTL3_UNLOCK(); return (ENOMEM); } olh->size = size; for (n = 1; n <= count; n++) { i = (ipfw_sopt_info *)ipfw_get_sopt_space(sd, sizeof(*i)); KASSERT(i != NULL, ("previously checked buffer is not enough")); sh = &ctl3_handlers[n]; i->opcode = sh->opcode; i->version = sh->version; i->refcnt = sh->refcnt; } CTL3_UNLOCK(); return (0); } /* * Compares two opcodes. * Used both in qsort() and bsearch(). * * Returns 0 if match is found. */ static int compare_opcodes(const void *_a, const void *_b) { const struct opcode_obj_rewrite *a, *b; a = (const struct opcode_obj_rewrite *)_a; b = (const struct opcode_obj_rewrite *)_b; if (a->opcode < b->opcode) return (-1); else if (a->opcode > b->opcode) return (1); return (0); } /* * XXX: Rewrite bsearch() */ static int find_op_rw_range(uint16_t op, struct opcode_obj_rewrite **plo, struct opcode_obj_rewrite **phi) { struct opcode_obj_rewrite *ctl3_max, *lo, *hi, h, *rw; memset(&h, 0, sizeof(h)); h.opcode = op; rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters, ctl3_rsize, sizeof(h), compare_opcodes); if (rw == NULL) return (1); /* Find the first element matching the same opcode */ lo = rw; for ( ; lo > ctl3_rewriters && (lo - 1)->opcode == op; lo--) ; /* Find the last element matching the same opcode */ hi = rw; ctl3_max = ctl3_rewriters + ctl3_rsize; for ( ; (hi + 1) < ctl3_max && (hi + 1)->opcode == op; hi++) ; *plo = lo; *phi = hi; return (0); } /* * Finds opcode object rewriter based on @code. * * Returns pointer to handler or NULL. */ static struct opcode_obj_rewrite * find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { struct opcode_obj_rewrite *rw, *lo, *hi; uint16_t uidx; uint8_t subtype; if (find_op_rw_range(cmd->opcode, &lo, &hi) != 0) return (NULL); for (rw = lo; rw <= hi; rw++) { if (rw->classifier(cmd, &uidx, &subtype) == 0) { if (puidx != NULL) *puidx = uidx; if (ptype != NULL) *ptype = subtype; return (rw); } } return (NULL); } int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx) { if (find_op_rw(cmd, puidx, NULL) == NULL) return (1); return (0); } void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx) { struct opcode_obj_rewrite *rw; rw = find_op_rw(cmd, NULL, NULL); KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode)); rw->update(cmd, idx); } void ipfw_init_obj_rewriter() { ctl3_rewriters = NULL; ctl3_rsize = 0; } void ipfw_destroy_obj_rewriter() { if (ctl3_rewriters != NULL) free(ctl3_rewriters, M_IPFW); ctl3_rewriters = NULL; ctl3_rsize = 0; } /* * Adds one or more opcode object rewrite handlers to the global array. * Function may sleep. */ void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count) { size_t sz; struct opcode_obj_rewrite *tmp; CTL3_LOCK(); for (;;) { sz = ctl3_rsize + count; CTL3_UNLOCK(); tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO); CTL3_LOCK(); if (ctl3_rsize + count <= sz) break; /* Retry */ free(tmp, M_IPFW); } /* Merge old & new arrays */ sz = ctl3_rsize + count; memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw)); memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw)); qsort(tmp, sz, sizeof(*rw), compare_opcodes); /* Switch new and free old */ if (ctl3_rewriters != NULL) free(ctl3_rewriters, M_IPFW); ctl3_rewriters = tmp; ctl3_rsize = sz; CTL3_UNLOCK(); } /* * Removes one or more object rewrite handlers from the global array. */ int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count) { size_t sz; struct opcode_obj_rewrite *ctl3_max, *ktmp, *lo, *hi; int i; CTL3_LOCK(); for (i = 0; i < count; i++) { if (find_op_rw_range(rw[i].opcode, &lo, &hi) != 0) continue; for (ktmp = lo; ktmp <= hi; ktmp++) { if (ktmp->classifier != rw[i].classifier) continue; ctl3_max = ctl3_rewriters + ctl3_rsize; sz = (ctl3_max - (ktmp + 1)) * sizeof(*ktmp); memmove(ktmp, ktmp + 1, sz); ctl3_rsize--; break; } } if (ctl3_rsize == 0) { if (ctl3_rewriters != NULL) free(ctl3_rewriters, M_IPFW); ctl3_rewriters = NULL; } CTL3_UNLOCK(); return (0); } static int export_objhash_ntlv_internal(struct namedobj_instance *ni, struct named_object *no, void *arg) { struct sockopt_data *sd; ipfw_obj_ntlv *ntlv; sd = (struct sockopt_data *)arg; ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); if (ntlv == NULL) return (ENOMEM); ipfw_export_obj_ntlv(no, ntlv); return (0); } /* * Lists all service objects. * Data layout (v0)(current): * Request: [ ipfw_obj_lheader ] size = ipfw_obj_lheader.size * Reply: [ ipfw_obj_lheader [ ipfw_obj_ntlv x N ] (optional) ] * Returns 0 on success */ static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, struct sockopt_data *sd) { ipfw_obj_lheader *hdr; int count; hdr = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); if (hdr == NULL) return (EINVAL); IPFW_UH_RLOCK(chain); count = ipfw_objhash_count(CHAIN_TO_SRV(chain)); hdr->size = sizeof(ipfw_obj_lheader) + count * sizeof(ipfw_obj_ntlv); if (sd->valsize < hdr->size) { IPFW_UH_RUNLOCK(chain); return (ENOMEM); } hdr->count = count; hdr->objsize = sizeof(ipfw_obj_ntlv); if (count > 0) ipfw_objhash_foreach(CHAIN_TO_SRV(chain), export_objhash_ntlv_internal, sd); IPFW_UH_RUNLOCK(chain); return (0); } /* * Compares two sopt handlers (code, version and handler ptr). * Used both as qsort() and bsearch(). * Does not compare handler for latter case. * * Returns 0 if match is found. */ static int compare_sh(const void *_a, const void *_b) { const struct ipfw_sopt_handler *a, *b; a = (const struct ipfw_sopt_handler *)_a; b = (const struct ipfw_sopt_handler *)_b; if (a->opcode < b->opcode) return (-1); else if (a->opcode > b->opcode) return (1); if (a->version < b->version) return (-1); else if (a->version > b->version) return (1); /* bsearch helper */ if (a->handler == NULL) return (0); if ((uintptr_t)a->handler < (uintptr_t)b->handler) return (-1); else if ((uintptr_t)a->handler > (uintptr_t)b->handler) return (1); return (0); } /* * Finds sopt handler based on @code and @version. * * Returns pointer to handler or NULL. */ static struct ipfw_sopt_handler * find_sh(uint16_t code, uint8_t version, sopt_handler_f *handler) { struct ipfw_sopt_handler *sh, h; memset(&h, 0, sizeof(h)); h.opcode = code; h.version = version; h.handler = handler; sh = (struct ipfw_sopt_handler *)bsearch(&h, ctl3_handlers, ctl3_hsize, sizeof(h), compare_sh); return (sh); } static int find_ref_sh(uint16_t opcode, uint8_t version, struct ipfw_sopt_handler *psh) { struct ipfw_sopt_handler *sh; CTL3_LOCK(); if ((sh = find_sh(opcode, version, NULL)) == NULL) { CTL3_UNLOCK(); printf("ipfw: ipfw_ctl3 invalid option %d""v""%d\n", opcode, version); return (EINVAL); } sh->refcnt++; ctl3_refct++; /* Copy handler data to requested buffer */ *psh = *sh; CTL3_UNLOCK(); return (0); } static void find_unref_sh(struct ipfw_sopt_handler *psh) { struct ipfw_sopt_handler *sh; CTL3_LOCK(); sh = find_sh(psh->opcode, psh->version, NULL); KASSERT(sh != NULL, ("ctl3 handler disappeared")); sh->refcnt--; ctl3_refct--; CTL3_UNLOCK(); } void ipfw_init_sopt_handler() { CTL3_LOCK_INIT(); IPFW_ADD_SOPT_HANDLER(1, scodes); } void ipfw_destroy_sopt_handler() { IPFW_DEL_SOPT_HANDLER(1, scodes); CTL3_LOCK_DESTROY(); } /* * Adds one or more sockopt handlers to the global array. * Function may sleep. */ void ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count) { size_t sz; struct ipfw_sopt_handler *tmp; CTL3_LOCK(); for (;;) { sz = ctl3_hsize + count; CTL3_UNLOCK(); tmp = malloc(sizeof(*sh) * sz, M_IPFW, M_WAITOK | M_ZERO); CTL3_LOCK(); if (ctl3_hsize + count <= sz) break; /* Retry */ free(tmp, M_IPFW); } /* Merge old & new arrays */ sz = ctl3_hsize + count; memcpy(tmp, ctl3_handlers, ctl3_hsize * sizeof(*sh)); memcpy(&tmp[ctl3_hsize], sh, count * sizeof(*sh)); qsort(tmp, sz, sizeof(*sh), compare_sh); /* Switch new and free old */ if (ctl3_handlers != NULL) free(ctl3_handlers, M_IPFW); ctl3_handlers = tmp; ctl3_hsize = sz; ctl3_gencnt++; CTL3_UNLOCK(); } /* * Removes one or more sockopt handlers from the global array. */ int ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count) { size_t sz; struct ipfw_sopt_handler *tmp, *h; int i; CTL3_LOCK(); for (i = 0; i < count; i++) { tmp = &sh[i]; h = find_sh(tmp->opcode, tmp->version, tmp->handler); if (h == NULL) continue; sz = (ctl3_handlers + ctl3_hsize - (h + 1)) * sizeof(*h); memmove(h, h + 1, sz); ctl3_hsize--; } if (ctl3_hsize == 0) { if (ctl3_handlers != NULL) free(ctl3_handlers, M_IPFW); ctl3_handlers = NULL; } ctl3_gencnt++; CTL3_UNLOCK(); return (0); } /* * Writes data accumulated in @sd to sockopt buffer. * Zeroes internal @sd buffer. */ static int ipfw_flush_sopt_data(struct sockopt_data *sd) { struct sockopt *sopt; int error; size_t sz; sz = sd->koff; if (sz == 0) return (0); sopt = sd->sopt; if (sopt->sopt_dir == SOPT_GET) { error = copyout(sd->kbuf, sopt->sopt_val, sz); if (error != 0) return (error); } memset(sd->kbuf, 0, sd->ksize); sd->ktotal += sz; sd->koff = 0; if (sd->ktotal + sd->ksize < sd->valsize) sd->kavail = sd->ksize; else sd->kavail = sd->valsize - sd->ktotal; /* Update sopt buffer data */ sopt->sopt_valsize = sd->ktotal; sopt->sopt_val = sd->sopt_val + sd->ktotal; return (0); } /* * Ensures that @sd buffer has contiguous @neeeded number of * bytes. * * Returns pointer to requested space or NULL. */ caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed) { int error; caddr_t addr; if (sd->kavail < needed) { /* * Flush data and try another time. */ error = ipfw_flush_sopt_data(sd); if (sd->kavail < needed || error != 0) return (NULL); } addr = sd->kbuf + sd->koff; sd->koff += needed; sd->kavail -= needed; return (addr); } /* * Requests @needed contiguous bytes from @sd buffer. * Function is used to notify subsystem that we are * interesed in first @needed bytes (request header) * and the rest buffer can be safely zeroed. * * Returns pointer to requested space or NULL. */ caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed) { caddr_t addr; if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL) return (NULL); if (sd->kavail > 0) memset(sd->kbuf + sd->koff, 0, sd->kavail); return (addr); } /* * New sockopt handler. */ int ipfw_ctl3(struct sockopt *sopt) { int error, locked; size_t size, valsize; struct ip_fw_chain *chain; char xbuf[256]; struct sockopt_data sdata; struct ipfw_sopt_handler h; ip_fw3_opheader *op3 = NULL; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error != 0) return (error); if (sopt->sopt_name != IP_FW3) return (ipfw_ctl(sopt)); chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; memset(&sdata, 0, sizeof(sdata)); /* Read op3 header first to determine actual operation */ op3 = (ip_fw3_opheader *)xbuf; error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3)); if (error != 0) return (error); sopt->sopt_valsize = valsize; /* * Find and reference command. */ error = find_ref_sh(op3->opcode, op3->version, &h); if (error != 0) return (error); /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if ((h.dir & HDIR_SET) != 0 && h.opcode != IP_FW_XRESETLOG) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error != 0) { find_unref_sh(&h); return (error); } } /* * Fill in sockopt_data structure that may be useful for * IP_FW3 get requests. */ locked = 0; if (valsize <= sizeof(xbuf)) { /* use on-stack buffer */ sdata.kbuf = xbuf; sdata.ksize = sizeof(xbuf); sdata.kavail = valsize; } else { /* * Determine opcode type/buffer size: * allocate sliding-window buf for data export or * contiguous buffer for special ops. */ if ((h.dir & HDIR_SET) != 0) { /* Set request. Allocate contigous buffer. */ if (valsize > CTL3_LARGEBUF) { find_unref_sh(&h); return (EFBIG); } size = valsize; } else { /* Get request. Allocate sliding window buffer */ size = (valsizesopt_val, valsize); if (error != 0) return (error); locked = 1; } } sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); sdata.ksize = size; sdata.kavail = size; } sdata.sopt = sopt; sdata.sopt_val = sopt->sopt_val; sdata.valsize = valsize; /* * Copy either all request (if valsize < bsize_max) * or first bsize_max bytes to guarantee most consumers * that all necessary data has been copied). * Anyway, copy not less than sizeof(ip_fw3_opheader). */ if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize, sizeof(ip_fw3_opheader))) != 0) return (error); op3 = (ip_fw3_opheader *)sdata.kbuf; /* Finally, run handler */ error = h.handler(chain, op3, &sdata); find_unref_sh(&h); /* Flush state and free buffers */ if (error == 0) error = ipfw_flush_sopt_data(&sdata); else ipfw_flush_sopt_data(&sdata); if (locked != 0) vsunlock(sdata.sopt_val, valsize); /* Restore original pointer and set number of bytes written */ sopt->sopt_val = sdata.sopt_val; sopt->sopt_valsize = sdata.ktotal; if (sdata.kbuf != xbuf) free(sdata.kbuf, M_TEMP); return (error); } /** * {set|get}sockopt parser. */ int ipfw_ctl(struct sockopt *sopt) { #define RULE_MAXSIZE (512*sizeof(u_int32_t)) int error; size_t size, valsize; struct ip_fw *buf; struct ip_fw_rule0 *rule; struct ip_fw_chain *chain; u_int32_t rulenum[2]; uint32_t opt; struct rule_check_info ci; IPFW_RLOCK_TRACKER; chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; opt = sopt->sopt_name; /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ if (opt == IP_FW_ADD || (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); if (error != 0) return (error); } switch (opt) { case IP_FW_GET: /* * pass up a copy of the current rules. Static rules * come first (the last of which has number IPFW_DEFAULT_RULE), * followed by a possibly empty list of dynamic rule. * The last dynamic rule has NULL in the "next" field. * * Note that the calculated size is used to bound the * amount of data returned to the user. The rule set may * change between calculating the size and returning the * data in which case we'll just return what fits. */ for (;;) { int len = 0, want; size = chain->static_len; size += ipfw_dyn_len(); if (size >= sopt->sopt_valsize) break; buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* check again how much space we need */ want = chain->static_len + ipfw_dyn_len(); if (size >= want) len = ipfw_getrules(chain, buf, size); IPFW_UH_RUNLOCK(chain); if (size >= want) error = sooptcopyout(sopt, buf, len); free(buf, M_TEMP); if (size >= want) break; } break; case IP_FW_FLUSH: /* locking is done within del_entry() */ error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */ break; case IP_FW_ADD: rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, rule, RULE_MAXSIZE, sizeof(struct ip_fw7) ); memset(&ci, 0, sizeof(struct rule_check_info)); /* * If the size of commands equals RULESIZE7 then we assume * a FreeBSD7.2 binary is talking to us (set is7=1). * is7 is persistent so the next 'ipfw list' command * will use this format. * NOTE: If wrong version is guessed (this can happen if * the first ipfw command is 'ipfw [pipe] list') * the ipfw binary may crash or loop infinitly... */ size = sopt->sopt_valsize; if (size == RULESIZE7(rule)) { is7 = 1; error = convert_rule_to_8(rule); if (error) { free(rule, M_TEMP); return error; } size = RULESIZE(rule); } else is7 = 0; if (error == 0) error = check_ipfw_rule0(rule, size, &ci); if (error == 0) { /* locking is done within add_rule() */ struct ip_fw *krule; krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule)); ci.urule = (caddr_t)rule; ci.krule = krule; import_rule0(&ci); error = commit_rules(chain, &ci, 1); if (error != 0) free_rule(ci.krule); else if (sopt->sopt_dir == SOPT_GET) { if (is7) { error = convert_rule_to_7(rule); size = RULESIZE7(rule); if (error) { free(rule, M_TEMP); return error; } } error = sooptcopyout(sopt, rule, size); } } free(rule, M_TEMP); break; case IP_FW_DEL: /* * IP_FW_DEL is used for deleting single rules or sets, * and (ab)used to atomically manipulate sets. Argument size * is used to distinguish between the two: * sizeof(u_int32_t) * delete single rule or set of rules, * or reassign rules (or sets) to a different set. * 2*sizeof(u_int32_t) * atomic disable/enable sets. * first u_int32_t contains sets to be disabled, * second u_int32_t contains sets to be enabled. */ error = sooptcopyin(sopt, rulenum, 2*sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; size = sopt->sopt_valsize; if (size == sizeof(u_int32_t) && rulenum[0] != 0) { /* delete or reassign, locking done in del_entry() */ error = del_entry(chain, rulenum[0]); } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */ IPFW_UH_WLOCK(chain); V_set_disable = (V_set_disable | rulenum[0]) & ~rulenum[1] & ~(1<sopt_val != 0) { error = sooptcopyin(sopt, rulenum, sizeof(u_int32_t), sizeof(u_int32_t)); if (error) break; } error = zero_entry(chain, rulenum[0], sopt->sopt_name == IP_FW_RESETLOG); break; /*--- TABLE opcodes ---*/ case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: { ipfw_table_entry ent; struct tentry_info tei; struct tid_info ti; struct table_value v; error = sooptcopyin(sopt, &ent, sizeof(ent), sizeof(ent)); if (error) break; memset(&tei, 0, sizeof(tei)); tei.paddr = &ent.addr; tei.subtype = AF_INET; tei.masklen = ent.masklen; ipfw_import_table_value_legacy(ent.value, &v); tei.pvalue = &v; memset(&ti, 0, sizeof(ti)); ti.uidx = ent.tbl; ti.type = IPFW_TABLE_CIDR; error = (opt == IP_FW_TABLE_ADD) ? add_table_entry(chain, &ti, &tei, 0, 1) : del_table_entry(chain, &ti, &tei, 0, 1); } break; case IP_FW_TABLE_FLUSH: { u_int16_t tbl; struct tid_info ti; error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)); if (error) break; memset(&ti, 0, sizeof(ti)); ti.uidx = tbl; error = flush_table(chain, &ti); } break; case IP_FW_TABLE_GETSIZE: { u_int32_t tbl, cnt; struct tid_info ti; if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)))) break; memset(&ti, 0, sizeof(ti)); ti.uidx = tbl; IPFW_RLOCK(chain); error = ipfw_count_table(chain, &ti, &cnt); IPFW_RUNLOCK(chain); if (error) break; error = sooptcopyout(sopt, &cnt, sizeof(cnt)); } break; case IP_FW_TABLE_LIST: { ipfw_table *tbl; struct tid_info ti; if (sopt->sopt_valsize < sizeof(*tbl)) { error = EINVAL; break; } size = sopt->sopt_valsize; tbl = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); if (error) { free(tbl, M_TEMP); break; } tbl->size = (size - sizeof(*tbl)) / sizeof(ipfw_table_entry); memset(&ti, 0, sizeof(ti)); ti.uidx = tbl->tbl; IPFW_RLOCK(chain); error = ipfw_dump_table_legacy(chain, &ti, tbl); IPFW_RUNLOCK(chain); if (error) { free(tbl, M_TEMP); break; } error = sooptcopyout(sopt, tbl, size); free(tbl, M_TEMP); } break; /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) error = ipfw_nat_cfg_ptr(sopt); else { printf("IP_FW_NAT_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_DEL: if (IPFW_NAT_LOADED) error = ipfw_nat_del_ptr(sopt); else { printf("IP_FW_NAT_DEL: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_CONFIG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_cfg_ptr(sopt); else { printf("IP_FW_NAT_GET_CFG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; case IP_FW_NAT_GET_LOG: if (IPFW_NAT_LOADED) error = ipfw_nat_get_log_ptr(sopt); else { printf("IP_FW_NAT_GET_LOG: %s\n", "ipfw_nat not present, please load it"); error = EINVAL; } break; default: printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); error = EINVAL; } return (error); #undef RULE_MAXSIZE } #define RULE_MAXSIZE (256*sizeof(u_int32_t)) /* Functions to convert rules 7.2 <==> 8.0 */ static int convert_rule_to_7(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *)rule; /* copy of original rule, version 8 */ struct ip_fw_rule0 *tmp; /* Used to copy commands */ ipfw_insn *ccmd, *dst; int ll = 0, ccmdlen = 0; tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); if (tmp == NULL) { return 1; //XXX error } bcopy(rule, tmp, RULE_MAXSIZE); /* Copy fields */ //rule7->_pad = tmp->_pad; rule7->set = tmp->set; rule7->rulenum = tmp->rulenum; rule7->cmd_len = tmp->cmd_len; rule7->act_ofs = tmp->act_ofs; rule7->next_rule = (struct ip_fw7 *)tmp->next_rule; rule7->cmd_len = tmp->cmd_len; rule7->pcnt = tmp->pcnt; rule7->bcnt = tmp->bcnt; rule7->timestamp = tmp->timestamp; /* Copy commands */ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ; ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { ccmdlen = F_LEN(ccmd); bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); if (dst->opcode > O_NAT) /* O_REASS doesn't exists in 7.2 version, so * decrement opcode if it is after O_REASS */ dst->opcode--; if (ccmdlen > ll) { printf("ipfw: opcode %d size truncated\n", ccmd->opcode); return EINVAL; } } free(tmp, M_TEMP); return 0; } static int convert_rule_to_8(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *) rule; /* Used to copy commands */ ipfw_insn *ccmd, *dst; int ll = 0, ccmdlen = 0; /* Copy of original rule */ struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); if (tmp == NULL) { return 1; //XXX error } bcopy(rule7, tmp, RULE_MAXSIZE); for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ; ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { ccmdlen = F_LEN(ccmd); bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); if (dst->opcode > O_NAT) /* O_REASS doesn't exists in 7.2 version, so * increment opcode if it is after O_REASS */ dst->opcode++; if (ccmdlen > ll) { printf("ipfw: opcode %d size truncated\n", ccmd->opcode); return EINVAL; } } rule->_pad = tmp->_pad; rule->set = tmp->set; rule->rulenum = tmp->rulenum; rule->cmd_len = tmp->cmd_len; rule->act_ofs = tmp->act_ofs; rule->next_rule = (struct ip_fw *)tmp->next_rule; rule->cmd_len = tmp->cmd_len; rule->id = 0; /* XXX see if is ok = 0 */ rule->pcnt = tmp->pcnt; rule->bcnt = tmp->bcnt; rule->timestamp = tmp->timestamp; free (tmp, M_TEMP); return 0; } /* * Named object api * */ void ipfw_init_srv(struct ip_fw_chain *ch) { ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT); ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT, M_IPFW, M_WAITOK | M_ZERO); } void ipfw_destroy_srv(struct ip_fw_chain *ch) { free(ch->srvstate, M_IPFW); ipfw_objhash_destroy(ch->srvmap); } /* * Allocate new bitmask which can be used to enlarge/shrink * named instance index. */ void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks) { size_t size; int max_blocks; u_long *idx_mask; KASSERT((items % BLOCK_ITEMS) == 0, ("bitmask size needs to power of 2 and greater or equal to %zu", BLOCK_ITEMS)); max_blocks = items / BLOCK_ITEMS; size = items / 8; idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK); /* Mark all as free */ memset(idx_mask, 0xFF, size * IPFW_MAX_SETS); *idx_mask &= ~(u_long)1; /* Skip index 0 */ *idx = idx_mask; *pblocks = max_blocks; } /* * Copy current bitmask index to new one. */ void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks) { int old_blocks, new_blocks; u_long *old_idx, *new_idx; int i; old_idx = ni->idx_mask; old_blocks = ni->max_blocks; new_idx = *idx; new_blocks = *blocks; for (i = 0; i < IPFW_MAX_SETS; i++) { memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i], old_blocks * sizeof(u_long)); } } /* * Swaps current @ni index with new one. */ void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks) { int old_blocks; u_long *old_idx; old_idx = ni->idx_mask; old_blocks = ni->max_blocks; ni->idx_mask = *idx; ni->max_blocks = *blocks; /* Save old values */ *idx = old_idx; *blocks = old_blocks; } void ipfw_objhash_bitmap_free(void *idx, int blocks) { free(idx, M_IPFW); } /* * Creates named hash instance. * Must be called without holding any locks. * Return pointer to new instance. */ struct namedobj_instance * ipfw_objhash_create(uint32_t items) { struct namedobj_instance *ni; int i; size_t size; size = sizeof(struct namedobj_instance) + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE; ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO); ni->nn_size = NAMEDOBJ_HASH_SIZE; ni->nv_size = NAMEDOBJ_HASH_SIZE; ni->names = (struct namedobjects_head *)(ni +1); ni->values = &ni->names[ni->nn_size]; for (i = 0; i < ni->nn_size; i++) TAILQ_INIT(&ni->names[i]); for (i = 0; i < ni->nv_size; i++) TAILQ_INIT(&ni->values[i]); /* Set default hashing/comparison functions */ ni->hash_f = objhash_hash_name; ni->cmp_f = objhash_cmp_name; /* Allocate bitmask separately due to possible resize */ ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks); return (ni); } void ipfw_objhash_destroy(struct namedobj_instance *ni) { free(ni->idx_mask, M_IPFW); free(ni, M_IPFW); } void ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f, objhash_cmp_f *cmp_f) { ni->hash_f = hash_f; ni->cmp_f = cmp_f; } static uint32_t objhash_hash_name(struct namedobj_instance *ni, const void *name, uint32_t set) { return (fnv_32_str((const char *)name, FNV1_32_INIT)); } static int objhash_cmp_name(struct named_object *no, const void *name, uint32_t set) { if ((strcmp(no->name, (const char *)name) == 0) && (no->set == set)) return (0); return (1); } static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val) { uint32_t v; v = val % (ni->nv_size - 1); return (v); } struct named_object * ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name) { struct named_object *no; uint32_t hash; hash = ni->hash_f(ni, name, set) % ni->nn_size; TAILQ_FOREACH(no, &ni->names[hash], nn_next) { if (ni->cmp_f(no, name, set) == 0) return (no); } return (NULL); } /* * Find named object by @uid. * Check @tlvs for valid data inside. * * Returns pointer to found TLV or NULL. */ ipfw_obj_ntlv * ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv) { ipfw_obj_ntlv *ntlv; uintptr_t pa, pe; int l; pa = (uintptr_t)tlvs; pe = pa + len; l = 0; for (; pa < pe; pa += l) { ntlv = (ipfw_obj_ntlv *)pa; l = ntlv->head.length; if (l != sizeof(*ntlv)) return (NULL); if (ntlv->idx != uidx) continue; /* * When userland has specified zero TLV type, do * not compare it with eltv. In some cases userland * doesn't know what type should it have. Use only * uidx and name for search named_object. */ if (ntlv->head.type != 0 && ntlv->head.type != (uint16_t)etlv) continue; if (ipfw_check_object_name_generic(ntlv->name) != 0) return (NULL); return (ntlv); } return (NULL); } /* * Finds object config based on either legacy index * or name in ntlv. * Note @ti structure contains unchecked data from userland. * * Returns 0 in success and fills in @pno with found config */ int ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti, uint32_t etlv, struct named_object **pno) { char *name; ipfw_obj_ntlv *ntlv; uint32_t set; if (ti->tlvs == NULL) return (EINVAL); ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, etlv); if (ntlv == NULL) return (EINVAL); name = ntlv->name; /* * Use set provided by @ti instead of @ntlv one. * This is needed due to different sets behavior * controlled by V_fw_tables_sets. */ set = ti->set; *pno = ipfw_objhash_lookup_name(ni, set, name); if (*pno == NULL) return (ESRCH); return (0); } /* * Find named object by name, considering also its TLV type. */ struct named_object * ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set, uint32_t type, const char *name) { struct named_object *no; uint32_t hash; hash = ni->hash_f(ni, name, set) % ni->nn_size; TAILQ_FOREACH(no, &ni->names[hash], nn_next) { if (ni->cmp_f(no, name, set) == 0 && no->etlv == (uint16_t)type) return (no); } return (NULL); } struct named_object * ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx) { struct named_object *no; uint32_t hash; hash = objhash_hash_idx(ni, kidx); TAILQ_FOREACH(no, &ni->values[hash], nv_next) { if (no->kidx == kidx) return (no); } return (NULL); } int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, struct named_object *b) { if ((strcmp(a->name, b->name) == 0) && a->set == b->set) return (1); return (0); } void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no) { uint32_t hash; hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size; TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next); hash = objhash_hash_idx(ni, no->kidx); TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next); ni->count++; } void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no) { uint32_t hash; hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size; TAILQ_REMOVE(&ni->names[hash], no, nn_next); hash = objhash_hash_idx(ni, no->kidx); TAILQ_REMOVE(&ni->values[hash], no, nv_next); ni->count--; } uint32_t ipfw_objhash_count(struct namedobj_instance *ni) { return (ni->count); } uint32_t ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type) { struct named_object *no; uint32_t count; int i; count = 0; for (i = 0; i < ni->nn_size; i++) { TAILQ_FOREACH(no, &ni->names[i], nn_next) { if (no->etlv == type) count++; } } return (count); } /* * Runs @func for each found named object. * It is safe to delete objects from callback */ int ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg) { struct named_object *no, *no_tmp; int i, ret; for (i = 0; i < ni->nn_size; i++) { TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) { ret = f(ni, no, arg); if (ret != 0) return (ret); } } return (0); } /* * Runs @f for each found named object with type @type. * It is safe to delete objects from callback */ int ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f, void *arg, uint16_t type) { struct named_object *no, *no_tmp; int i, ret; for (i = 0; i < ni->nn_size; i++) { TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) { if (no->etlv != type) continue; ret = f(ni, no, arg); if (ret != 0) return (ret); } } return (0); } /* * Removes index from given set. * Returns 0 on success. */ int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx) { u_long *mask; int i, v; i = idx / BLOCK_ITEMS; v = idx % BLOCK_ITEMS; if (i >= ni->max_blocks) return (1); mask = &ni->idx_mask[i]; if ((*mask & ((u_long)1 << v)) != 0) return (1); /* Mark as free */ *mask |= (u_long)1 << v; /* Update free offset */ if (ni->free_off[0] > i) ni->free_off[0] = i; return (0); } /* * Allocate new index in given instance and stores in in @pidx. * Returns 0 on success. */ int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx) { struct namedobj_instance *ni; u_long *mask; int i, off, v; ni = (struct namedobj_instance *)n; off = ni->free_off[0]; mask = &ni->idx_mask[off]; for (i = off; i < ni->max_blocks; i++, mask++) { if ((v = ffsl(*mask)) == 0) continue; /* Mark as busy */ *mask &= ~ ((u_long)1 << (v - 1)); ni->free_off[0] = i; v = BLOCK_ITEMS * i + v - 1; *pidx = v; return (0); } return (1); } /* end of file */ Index: stable/11 =================================================================== --- stable/11 (revision 326387) +++ stable/11 (revision 326388) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r326086,326115-326118