Index: head/sys/netgraph/ng_ipfw.c =================================================================== --- head/sys/netgraph/ng_ipfw.c (revision 345162) +++ head/sys/netgraph/ng_ipfw.c (revision 345163) @@ -1,360 +1,360 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2005, Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ng_ipfw_mod_event(module_t mod, int event, void *data); static ng_constructor_t ng_ipfw_constructor; static ng_shutdown_t ng_ipfw_shutdown; static ng_newhook_t ng_ipfw_newhook; static ng_connect_t ng_ipfw_connect; static ng_findhook_t ng_ipfw_findhook; static ng_rcvdata_t ng_ipfw_rcvdata; static ng_disconnect_t ng_ipfw_disconnect; static hook_p ng_ipfw_findhook1(node_p, u_int16_t ); -static int ng_ipfw_input(struct mbuf **, int, struct ip_fw_args *, - int); +static int ng_ipfw_input(struct mbuf **, struct ip_fw_args *, bool); /* We have only one node */ static node_p fw_node; /* Netgraph node type descriptor */ static struct ng_type ng_ipfw_typestruct = { .version = NG_ABI_VERSION, .name = NG_IPFW_NODE_TYPE, .mod_event = ng_ipfw_mod_event, .constructor = ng_ipfw_constructor, .shutdown = ng_ipfw_shutdown, .newhook = ng_ipfw_newhook, .connect = ng_ipfw_connect, .findhook = ng_ipfw_findhook, .rcvdata = ng_ipfw_rcvdata, .disconnect = ng_ipfw_disconnect, }; NETGRAPH_INIT(ipfw, &ng_ipfw_typestruct); MODULE_DEPEND(ng_ipfw, ipfw, 3, 3, 3); /* Information we store for each hook */ struct ng_ipfw_hook_priv { hook_p hook; u_int16_t rulenum; }; typedef struct ng_ipfw_hook_priv *hpriv_p; static int ng_ipfw_mod_event(module_t mod, int event, void *data) { int error = 0; switch (event) { case MOD_LOAD: if (ng_ipfw_input_p != NULL) { error = EEXIST; break; } /* Setup node without any private data */ if ((error = ng_make_node_common(&ng_ipfw_typestruct, &fw_node)) != 0) { log(LOG_ERR, "%s: can't create ng_ipfw node", __func__); break; } /* Try to name node */ if (ng_name_node(fw_node, "ipfw") != 0) log(LOG_WARNING, "%s: failed to name node \"ipfw\"", __func__); /* Register hook */ ng_ipfw_input_p = ng_ipfw_input; break; case MOD_UNLOAD: /* * This won't happen if a node exists. * ng_ipfw_input_p is already cleared. */ break; default: error = EOPNOTSUPP; break; } return (error); } static int ng_ipfw_constructor(node_p node) { return (EINVAL); /* Only one node */ } static int ng_ipfw_newhook(node_p node, hook_p hook, const char *name) { hpriv_p hpriv; u_int16_t rulenum; const char *cp; char *endptr; /* Protect from leading zero */ if (name[0] == '0' && name[1] != '\0') return (EINVAL); /* Check that name contains only digits */ for (cp = name; *cp != '\0'; cp++) if (!isdigit(*cp)) return (EINVAL); /* Convert it to integer */ rulenum = (u_int16_t)strtol(name, &endptr, 10); if (*endptr != '\0') return (EINVAL); /* Allocate memory for this hook's private data */ hpriv = malloc(sizeof(*hpriv), M_NETGRAPH, M_NOWAIT | M_ZERO); if (hpriv== NULL) return (ENOMEM); hpriv->hook = hook; hpriv->rulenum = rulenum; NG_HOOK_SET_PRIVATE(hook, hpriv); return(0); } /* * Set hooks into queueing mode, to avoid recursion between * netgraph layer and ip_{input,output}. */ static int ng_ipfw_connect(hook_p hook) { NG_HOOK_FORCE_QUEUE(hook); return (0); } /* Look up hook by name */ static hook_p ng_ipfw_findhook(node_p node, const char *name) { u_int16_t n; /* numeric representation of hook */ char *endptr; n = (u_int16_t)strtol(name, &endptr, 10); if (*endptr != '\0') return NULL; return ng_ipfw_findhook1(node, n); } /* Look up hook by rule number */ static hook_p ng_ipfw_findhook1(node_p node, u_int16_t rulenum) { hook_p hook; hpriv_p hpriv; LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) { hpriv = NG_HOOK_PRIVATE(hook); if (NG_HOOK_IS_VALID(hook) && (hpriv->rulenum == rulenum)) return (hook); } return (NULL); } static int ng_ipfw_rcvdata(hook_p hook, item_p item) { struct m_tag *tag; struct ipfw_rule_ref *r; struct mbuf *m; struct ip *ip; NGI_GET_M(item, m); NG_FREE_ITEM(item); tag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); if (tag == NULL) { NG_FREE_M(m); return (EINVAL); /* XXX: find smth better */ } if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) return (ENOBUFS); ip = mtod(m, struct ip *); r = (struct ipfw_rule_ref *)(tag + 1); if (r->info & IPFW_INFO_IN) { switch (ip->ip_v) { #ifdef INET case IPVERSION: ip_input(m); return (0); #endif #ifdef INET6 case IPV6_VERSION >> 4: ip6_input(m); return (0); #endif } } else { switch (ip->ip_v) { #ifdef INET case IPVERSION: return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL)); #endif #ifdef INET6 case IPV6_VERSION >> 4: return (ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL)); #endif } } /* unknown IP protocol version */ NG_FREE_M(m); return (EPROTONOSUPPORT); } static int -ng_ipfw_input(struct mbuf **m0, int dir, struct ip_fw_args *fwa, int tee) +ng_ipfw_input(struct mbuf **m0, struct ip_fw_args *fwa, bool tee) { struct mbuf *m; hook_p hook; int error = 0; /* * Node must be loaded and corresponding hook must be present. */ if (fw_node == NULL || (hook = ng_ipfw_findhook1(fw_node, fwa->rule.info)) == NULL) return (ESRCH); /* no hook associated with this rule */ /* * We have two modes: in normal mode we add a tag to packet, which is * important to return packet back to IP stack. In tee mode we make * a copy of a packet and forward it into netgraph without a tag. */ - if (tee == 0) { + if (tee == false) { struct m_tag *tag; struct ipfw_rule_ref *r; m = *m0; *m0 = NULL; /* it belongs now to netgraph */ tag = m_tag_alloc(MTAG_IPFW_RULE, 0, sizeof(*r), M_NOWAIT|M_ZERO); if (tag == NULL) { m_freem(m); return (ENOMEM); } r = (struct ipfw_rule_ref *)(tag + 1); *r = fwa->rule; r->info &= IPFW_ONEPASS; /* keep this info */ - r->info |= dir ? IPFW_INFO_IN : IPFW_INFO_OUT; + r->info |= (fwa->flags & IPFW_ARGS_IN) ? + IPFW_INFO_IN : IPFW_INFO_OUT; m_tag_prepend(m, tag); } else if ((m = m_dup(*m0, M_NOWAIT)) == NULL) return (ENOMEM); /* which is ignored */ if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) return (EINVAL); NG_SEND_DATA_ONLY(error, hook, m); return (error); } static int ng_ipfw_shutdown(node_p node) { /* * After our single node has been removed, * the only thing that can be done is * 'kldunload ng_ipfw.ko' */ ng_ipfw_input_p = NULL; NG_NODE_UNREF(node); return (0); } static int ng_ipfw_disconnect(hook_p hook) { const hpriv_p hpriv = NG_HOOK_PRIVATE(hook); free(hpriv, M_NETGRAPH); NG_HOOK_SET_PRIVATE(hook, NULL); return (0); } Index: head/sys/netinet/ip_var.h =================================================================== --- head/sys/netinet/ip_var.h (revision 345162) +++ head/sys/netinet/ip_var.h (revision 345163) @@ -1,304 +1,302 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_var.h 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NETINET_IP_VAR_H_ #define _NETINET_IP_VAR_H_ #include #include /* * Overlay for ip header used by other protocols (tcp, udp). */ struct ipovly { u_char ih_x1[9]; /* (unused) */ u_char ih_pr; /* protocol */ u_short ih_len; /* protocol length */ struct in_addr ih_src; /* source internet address */ struct in_addr ih_dst; /* destination internet address */ }; #ifdef _KERNEL /* * Ip reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. * They are timed out after ipq_ttl drops to 0, and may also * be reclaimed if memory becomes tight. */ struct ipq { TAILQ_ENTRY(ipq) ipq_list; /* to other reass headers */ u_char ipq_ttl; /* time for reass q to live */ u_char ipq_p; /* protocol of this fragment */ u_short ipq_id; /* sequence id for reassembly */ int ipq_maxoff; /* total length of packet */ struct mbuf *ipq_frags; /* to ip headers of fragments */ struct in_addr ipq_src,ipq_dst; u_char ipq_nfrags; /* # frags in this packet */ struct label *ipq_label; /* MAC label */ }; #endif /* _KERNEL */ /* * Structure stored in mbuf in inpcb.ip_options * and passed to ip_output when ip options are in use. * The actual length of the options (including ipopt_dst) * is in m_len. */ #define MAX_IPOPTLEN 40 struct ipoption { struct in_addr ipopt_dst; /* first-hop dst if source routed */ char ipopt_list[MAX_IPOPTLEN]; /* options proper */ }; /* * Structure attached to inpcb.ip_moptions and * passed to ip_output when IP multicast options are in use. * This structure is lazy-allocated. */ struct ip_moptions { struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */ u_long imo_multicast_vif; /* vif num outgoing multicasts */ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ u_short imo_max_memberships; /* max memberships this socket */ struct in_multi **imo_membership; /* group memberships */ struct in_mfilter *imo_mfilters; /* source filters */ struct epoch_context imo_epoch_ctx; }; struct ipstat { uint64_t ips_total; /* total packets received */ uint64_t ips_badsum; /* checksum bad */ uint64_t ips_tooshort; /* packet too short */ uint64_t ips_toosmall; /* not enough data */ uint64_t ips_badhlen; /* ip header length < data size */ uint64_t ips_badlen; /* ip length < ip header length */ uint64_t ips_fragments; /* fragments received */ uint64_t ips_fragdropped; /* frags dropped (dups, out of space) */ uint64_t ips_fragtimeout; /* fragments timed out */ uint64_t ips_forward; /* packets forwarded */ uint64_t ips_fastforward; /* packets fast forwarded */ uint64_t ips_cantforward; /* packets rcvd for unreachable dest */ uint64_t ips_redirectsent; /* packets forwarded on same net */ uint64_t ips_noproto; /* unknown or unsupported protocol */ uint64_t ips_delivered; /* datagrams delivered to upper level*/ uint64_t ips_localout; /* total ip packets generated here */ uint64_t ips_odropped; /* lost packets due to nobufs, etc. */ uint64_t ips_reassembled; /* total packets reassembled ok */ uint64_t ips_fragmented; /* datagrams successfully fragmented */ uint64_t ips_ofragments; /* output fragments created */ uint64_t ips_cantfrag; /* don't fragment flag was set, etc. */ uint64_t ips_badoptions; /* error in option processing */ uint64_t ips_noroute; /* packets discarded due to no route */ uint64_t ips_badvers; /* ip version != 4 */ uint64_t ips_rawout; /* total raw ip packets generated */ uint64_t ips_toolong; /* ip length > max ip packet size */ uint64_t ips_notmember; /* multicasts for unregistered grps */ uint64_t ips_nogif; /* no match gif found */ uint64_t ips_badaddr; /* invalid address on header */ }; #ifdef _KERNEL #include #include VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat); /* * In-kernel consumers can use these accessor macros directly to update * stats. */ #define IPSTAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val)) #define IPSTAT_SUB(name, val) IPSTAT_ADD(name, -(val)) #define IPSTAT_INC(name) IPSTAT_ADD(name, 1) #define IPSTAT_DEC(name) IPSTAT_SUB(name, 1) /* * Kernel module consumers must use this accessor macro. */ void kmod_ipstat_inc(int statnum); #define KMOD_IPSTAT_INC(name) \ kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t)) void kmod_ipstat_dec(int statnum); #define KMOD_IPSTAT_DEC(name) \ kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t)) /* flags passed to ip_output as last parameter */ #define IP_FORWARDING 0x1 /* most of ip header exists */ #define IP_RAWOUTPUT 0x2 /* raw ip header exists */ #define IP_SENDONES 0x4 /* send all-ones broadcast */ #define IP_SENDTOIF 0x8 /* send on specific ifnet */ #define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */ #define IP_NODEFAULTFLOWID 0x40 /* Don't set the flowid from inp */ #ifdef __NO_STRICT_ALIGNMENT #define IP_HDR_ALIGNED_P(ip) 1 #else #define IP_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) #endif struct ip; struct inpcb; struct route; struct sockopt; struct inpcbinfo; VNET_DECLARE(int, ip_defttl); /* default IP ttl */ VNET_DECLARE(int, ipforwarding); /* ip forwarding */ #ifdef IPSTEALTH VNET_DECLARE(int, ipstealth); /* stealth forwarding */ #endif extern u_char ip_protox[]; VNET_DECLARE(struct socket *, ip_rsvpd); /* reservation protocol daemon*/ VNET_DECLARE(struct socket *, ip_mrouter); /* multicast routing daemon */ extern int (*legal_vif_num)(int); extern u_long (*ip_mcast_src)(int); VNET_DECLARE(int, rsvp_on); VNET_DECLARE(int, drop_redirect); extern struct pr_usrreqs rip_usrreqs; #define V_ip_id VNET(ip_id) #define V_ip_defttl VNET(ip_defttl) #define V_ipforwarding VNET(ipforwarding) #ifdef IPSTEALTH #define V_ipstealth VNET(ipstealth) #endif #define V_ip_rsvpd VNET(ip_rsvpd) #define V_ip_mrouter VNET(ip_mrouter) #define V_rsvp_on VNET(rsvp_on) #define V_drop_redirect VNET(drop_redirect) void inp_freemoptions(struct ip_moptions *); int inp_getmoptions(struct inpcb *, struct sockopt *); int inp_setmoptions(struct inpcb *, struct sockopt *); int ip_ctloutput(struct socket *, struct sockopt *sopt); void ip_drain(void); int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags); void ip_forward(struct mbuf *m, int srcrt); void ip_init(void); extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int ip_output(struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *, struct inpcb *); int ipproto_register(short); int ipproto_unregister(short); struct mbuf * ip_reass(struct mbuf *); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); void ip_slowtimo(void); void ip_fillid(struct ip *); int rip_ctloutput(struct socket *, struct sockopt *); void rip_ctlinput(int, struct sockaddr *, void *); void rip_init(void); int rip_input(struct mbuf **, int *, int); int rip_output(struct mbuf *, struct socket *, ...); int ipip_input(struct mbuf **, int *, int); int rsvp_input(struct mbuf **, int *, int); int ip_rsvp_init(struct socket *); int ip_rsvp_done(void); extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); extern void (*ip_rsvp_force_done)(struct socket *); extern int (*rsvp_input_p)(struct mbuf **, int *, int); VNET_DECLARE(struct pfil_head *, inet_pfil_head); #define V_inet_pfil_head VNET(inet_pfil_head) #define PFIL_INET_NAME "inet" void in_delayed_cksum(struct mbuf *m); /* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */ /* * Reference to an ipfw or packet filter rule that can be carried * outside critical sections. * A rule is identified by rulenum:rule_id which is ordered. * In version chain_id the rule can be found in slot 'slot', so * we don't need a lookup if chain_id == chain->id. * * On exit from the firewall this structure refers to the rule after * the matching one (slot points to the new rule; rulenum:rule_id-1 * is the matching rule), and additional info (e.g. info often contains * the insn argument or tablearg in the low 16 bits, in host format). * On entry, the structure is valid if slot>0, and refers to the starting * rules. 'info' contains the reason for reinject, e.g. divert port, * divert direction, and so on. */ struct ipfw_rule_ref { uint32_t slot; /* slot for matching rule */ uint32_t rulenum; /* matching rule number */ uint32_t rule_id; /* matching rule id */ uint32_t chain_id; /* ruleset id */ uint32_t info; /* see below */ }; enum { IPFW_INFO_MASK = 0x0000ffff, IPFW_INFO_OUT = 0x00000000, /* outgoing, just for convenience */ IPFW_INFO_IN = 0x80000000, /* incoming, overloads dir */ IPFW_ONEPASS = 0x40000000, /* One-pass, do not reinject */ IPFW_IS_MASK = 0x30000000, /* which source ? */ IPFW_IS_DIVERT = 0x20000000, IPFW_IS_DUMMYNET =0x10000000, IPFW_IS_PIPE = 0x08000000, /* pipe=1, queue = 0 */ }; #define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ #define MTAG_IPFW_RULE 1262273568 /* rule reference */ #define MTAG_IPFW_CALL 1308397630 /* call stack */ struct ip_fw_args; typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args); typedef int (*ip_fw_ctl_ptr_t)(struct sockopt *); VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr); #define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr) /* Divert hooks. */ extern void (*ip_divert_ptr)(struct mbuf *m, bool incoming); /* ng_ipfw hooks -- XXX make it the same as divert and dummynet */ -extern int (*ng_ipfw_input_p)(struct mbuf **, int, - struct ip_fw_args *, int); - +extern int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); extern int (*ip_dn_ctl_ptr)(struct sockopt *); extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); #endif /* _KERNEL */ #endif /* !_NETINET_IP_VAR_H_ */ Index: head/sys/netinet/raw_ip.c =================================================================== --- head/sys/netinet/raw_ip.c (revision 345162) +++ head/sys/netinet/raw_ip.c (revision 345163) @@ -1,1179 +1,1178 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DEFINE(int, ip_defttl) = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_defttl), 0, "Maximum TTL on IP packets"); VNET_DEFINE(struct inpcbhead, ripcb); VNET_DEFINE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) /* * Control and data hooks for ipfw, dummynet, divert and so on. * The data hooks are not used here but it is convenient * to keep them all in one place. */ VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL; VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); void (*ip_divert_ptr)(struct mbuf *, bool); -int (*ng_ipfw_input_p)(struct mbuf **, int, - struct ip_fw_args *, int); +int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip_mrouter); /* * The various mrouter and rsvp functions. */ int (*ip_mrouter_set)(struct socket *, struct sockopt *); int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int (*mrt_ioctl)(u_long, caddr_t, int); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); int (*rsvp_input_p)(struct mbuf **, int *, int); int (*ip_rsvp_vif)(struct socket *, struct sockopt *); void (*ip_rsvp_force_done)(struct socket *); #endif /* INET */ extern struct protosw inetsw[]; u_long rip_sendspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); u_long rip_recvspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); /* * Hash functions */ #define INP_PCBHASH_RAW_SIZE 256 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ (((proto) + (laddr) + (faddr)) % (mask) + 1) #ifdef INET static void rip_inshash(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *pcbhash; int hash; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); if (inp->inp_ip_p != 0 && inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != INADDR_ANY) { hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); } else hash = 0; pcbhash = &pcbinfo->ipi_hashbase[hash]; CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash); } static void rip_delhash(struct inpcb *inp) { INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); CK_LIST_REMOVE(inp, inp_hash); } #endif /* INET */ /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ static void rip_zone_change(void *tag) { uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); } static int rip_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "rawinp"); return (0); } void rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, 1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE static void rip_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_ripcbinfo); } VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL); #endif #ifdef INET static int rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, struct sockaddr_in *ripsrc) { int policyfail = 0; INP_LOCK_ASSERT(last); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv4)) { if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0) policyfail = 1; } #endif /* IPSEC */ #ifdef MAC if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) policyfail = 1; #endif /* Check the minimum TTL for socket. */ if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) policyfail = 1; if (!policyfail) { struct mbuf *opts = NULL; struct socket *so; so = last->inp_socket; if ((last->inp_flags & INP_CONTROLOPTS) || (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) ip_savecontrol(last, &opts, ip, n); SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)ripsrc, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&so->so_rcv); } else sorwakeup_locked(so); } else m_freem(n); return (policyfail); } /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last; struct sockaddr_in ripsrc; struct epoch_tracker et; int hash; *mp = NULL; bzero(&ripsrc, sizeof(ripsrc)); ripsrc.sin_len = sizeof(ripsrc); ripsrc.sin_family = AF_INET; ripsrc.sin_addr = ip->ip_src; last = NULL; ifp = m->m_pkthdr.rcvif; hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); INP_INFO_RLOCK_ET(&V_ripcbinfo, et); CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_1; if (jailed_without_vnet(inp->inp_cred)) { /* * XXX: If faddr was bound to multicast group, * jailed raw socket will drop datagram. */ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) goto skip_1; } last = inp; continue; skip_1: INP_RUNLOCK(inp); } CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (!in_nullhost(inp->inp_laddr) && !in_hosteq(inp->inp_laddr, ip->ip_dst)) continue; if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_2; if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) goto skip_2; } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (inp->inp_moptions != NULL && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { /* * If the incoming datagram is for IGMP, allow it * through unconditionally to the raw socket. * * In the case of IGMPv2, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. imo_multi_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if (proto != IPPROTO_IGMP) { struct sockaddr_in group; bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(inp->inp_moptions, ifp, (struct sockaddr *)&group, (struct sockaddr *)&ripsrc); } if (blocked != MCAST_PASS) { IPSTAT_INC(ips_notmember); goto skip_2; } } last = inp; continue; skip_2: INP_RUNLOCK(inp); } INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); if (last != NULL) { if (rip_append(last, ip, m, &ripsrc) != 0) IPSTAT_INC(ips_delivered); INP_RUNLOCK(last); } else { if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { IPSTAT_INC(ips_noproto); IPSTAT_DEC(ips_delivered); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); } else { m_freem(m); } } return (IPPROTO_DONE); } /* * Generate IP header and pass packet to ip_output. Tack on options user may * have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, ...) { struct ip *ip; int error; struct inpcb *inp = sotoinpcb(so); va_list ap; u_long dst; int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST; int cnt; u_char opttype, optlen, *cp; va_start(ap, so); dst = va_arg(ap, u_long); va_end(ap); /* * If the user handed us a complete IP packet, use it. Otherwise, * allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_NOWAIT); if (m == NULL) return(ENOBUFS); INP_RLOCK(inp); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; if (inp->inp_flags & INP_DONTFRAG) ip->ip_off = htons(IP_DF); else ip->ip_off = htons(0); ip->ip_p = inp->inp_ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would * let a source of INADDR_ANY pass, which we do not * want to see from jails. */ if (ip->ip_src.s_addr == INADDR_ANY) { error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src, inp->inp_cred); } else { error = prison_local_ip4(inp->inp_cred, &ip->ip_src); } if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } } ip->ip_ttl = inp->inp_ip_ttl; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } INP_RLOCK(inp); ip = mtod(m, struct ip *); error = prison_check_ip4(inp->inp_cred, &ip->ip_src); if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash. */ if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) || (ntohs(ip->ip_len) != m->m_pkthdr.len) || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } /* * Don't allow IP options which do not have the required * structure as specified in section 3.1 of RFC 791 on * pages 15-23. */ cp = (u_char *)(ip + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opttype = cp[IPOPT_OPTVAL]; if (opttype == IPOPT_EOL) break; if (opttype == IPOPT_NOP) { optlen = 1; continue; } if (cnt < IPOPT_OLEN + sizeof(u_char)) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(u_char) || optlen > cnt) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } } /* * This doesn't allow application to specify ID of zero, * but we got this limitation from the beginning of history. */ if (ip->ip_id == 0) ip_fillid(ip); /* * XXX prevent ip_output from overwriting header fields. */ flags |= IP_RAWOUTPUT; IPSTAT_INC(ips_rawout); } if (inp->inp_flags & INP_ONESBCAST) flags |= IP_SENDONES; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif error = ip_output(m, inp->inp_options, NULL, flags, inp->inp_moptions, inp); INP_RUNLOCK(inp); return (error); } /* * Raw IP socket option processing. * * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could * only be created by a privileged process, and as such, socket option * operations to manage system properties on any raw socket were allowed to * take place without explicit additional access control checks. However, * raw sockets can now also be created in jail(), and therefore explicit * checks are now required. Likewise, raw sockets can be used by a process * after it gives up privilege, so some caution is required. For options * passed down to the IP layer via ip_ctloutput(), checks are assumed to be * performed in ip_ctloutput() and therefore no check occurs here. * Unilaterally checking priv_check() here breaks normal IP socket option * operations on raw sockets. * * When adding new socket options here, make sure to add access control * checks here as necessary. * * XXX-BZ inp locking? */ int rip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) { if ((sopt->sopt_level == SOL_SOCKET) && (sopt->sopt_name == SO_SETFIB)) { inp->inp_inc.inc_fibnum = so->so_fibnum; return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: case IP_FW_TABLE_GETSIZE: case IP_FW_TABLE_LIST: case IP_FW_NAT_GET_CONFIG: case IP_FW_NAT_GET_LOG: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT; break ; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: case IP_FW_TABLE_FLUSH: case IP_FW_NAT_CFG: case IP_FW_NAT_DEL: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT ; break ; case IP_RSVP_ON: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_done(); break; case IP_RSVP_VIF_ON: case IP_RSVP_VIF_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_vif ? ip_rsvp_vif(so, sopt) : EINVAL; break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which are * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls * in_ifadown() to remove all routes corresponding to that address. It also * receives the PRC_IFUP messages from if_up() and reinstalls the interface * routes. */ void rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: IN_IFADDR_RLOCK(&in_ifa_tracker); CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, 0); /* * in_ifadown gets rid of all the rest of the * routes. This is not quite the right thing * to do, but at least if we are running a * routing process they will come back. */ in_ifadown(&ia->ia_ifa, 0); ifa_free(&ia->ia_ifa); break; } } if (ia == NULL) /* If ia matched, already unlocked. */ IN_IFADDR_RUNLOCK(&in_ifa_tracker); break; case PRC_IFUP: IN_IFADDR_RLOCK(&in_ifa_tracker); CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = ifa_del_loopback_route((struct ifaddr *)ia, sa); err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; err = ifa_add_loopback_route((struct ifaddr *)ia, sa); ifa_free(&ia->ia_ifa); break; } } static int rip_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); if (proto >= IPPROTO_MAX || proto < 0) return EPROTONOSUPPORT; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; inp->inp_ip_ttl = V_ip_defttl; rip_inshash(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); INP_WUNLOCK(inp); return (0); } static void rip_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_detach: inp == NULL")); KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, ("rip_detach: not closed")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); if (so == V_ip_mrouter && ip_mrouter_done) ip_mrouter_done(); if (ip_rsvp_force_done) ip_rsvp_force_done(so); if (so == V_ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } static void rip_dodisconnect(struct socket *so, struct inpcb *inp) { struct inpcbinfo *pcbinfo; pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr.s_addr = INADDR_ANY; rip_inshash(inp); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); } static void rip_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_abort: inp == NULL")); rip_dodisconnect(so, inp); } static void rip_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_close: inp == NULL")); rip_dodisconnect(so, inp); } static int rip_disconnect(struct socket *so) { struct inpcb *inp; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); rip_dodisconnect(so, inp); return (0); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; int error; if (nam->sa_len != sizeof(*addr)) return (EINVAL); error = prison_check_ip4(td->td_ucred, &addr->sin_addr); if (error != 0) return (error); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_bind: inp == NULL")); if (CK_STAILQ_EMPTY(&V_ifnet) || (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || (addr->sin_addr.s_addr && (inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)addr) == 0)) return (EADDRNOTAVAIL); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_laddr = addr->sin_addr; rip_inshash(inp); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (CK_STAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) return (EAFNOSUPPORT); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_connect: inp == NULL")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr = addr->sin_addr; rip_inshash(inp); soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; u_long dst; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_send: inp == NULL")); /* * Note: 'dst' reads below are unlocked. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } dst = inp->inp_faddr.s_addr; /* Unlocked read. */ } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } return (rip_output(m, so, dst)); } #endif /* INET */ static int rip_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; struct epoch_tracker et; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { n = V_ripcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } if (req->newptr != 0) return (EPERM); /* * OK, now we're committed to doing something. */ INP_INFO_WLOCK(&V_ripcbinfo); gencnt = V_ripcbinfo.ipi_gencnt; n = V_ripcbinfo.ipi_count; INP_INFO_WUNLOCK(&V_ripcbinfo); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); INP_INFO_RLOCK_ET(&V_ripcbinfo, et); for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; inp = CK_LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { in_pcbref(inp); inp_list[i++] = inp; } INP_WUNLOCK(inp); } INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else INP_RUNLOCK(inp); } INP_INFO_WLOCK(&V_ripcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_ripcbinfo); if (!error) { struct epoch_tracker et; /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ INP_INFO_RLOCK_ET(&V_ripcbinfo, et); xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); #ifdef INET struct pr_usrreqs rip_usrreqs = { .pru_abort = rip_abort, .pru_attach = rip_attach, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_control = in_control, .pru_detach = rip_detach, .pru_disconnect = rip_disconnect, .pru_peeraddr = in_getpeeraddr, .pru_send = rip_send, .pru_shutdown = rip_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = rip_close, }; #endif /* INET */ Index: head/sys/netpfil/ipfw/ip_fw_pfil.c =================================================================== --- head/sys/netpfil/ipfw/ip_fw_pfil.c (revision 345162) +++ head/sys/netpfil/ipfw/ip_fw_pfil.c (revision 345163) @@ -1,667 +1,665 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ipfw.h" #include "opt_inet.h" #include "opt_inet6.h" #ifndef INET #error IPFIREWALL requires INET. #endif /* INET */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #include #include #include VNET_DEFINE_STATIC(int, fw_enable) = 1; #define V_fw_enable VNET(fw_enable) #ifdef INET6 VNET_DEFINE_STATIC(int, fw6_enable) = 1; #define V_fw6_enable VNET(fw6_enable) #endif VNET_DEFINE_STATIC(int, fwlink_enable) = 0; #define V_fwlink_enable VNET(fwlink_enable) int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); /* Forward declarations. */ static int ipfw_divert(struct mbuf **, bool, struct ipfw_rule_ref *, int); #ifdef SYSCTL_NODE SYSBEGIN(f1) SYSCTL_DECL(_net_inet_ip_fw); SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw"); #ifdef INET6 SYSCTL_DECL(_net_inet6_ip6_fw); SYSCTL_PROC(_net_inet6_ip6_fw, OID_AUTO, enable, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0, ipfw_chg_hook, "I", "Enable ipfw+6"); #endif /* INET6 */ SYSCTL_DECL(_net_link_ether); SYSCTL_PROC(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fwlink_enable), 0, ipfw_chg_hook, "I", "Pass ether pkts through firewall"); SYSEND #endif /* SYSCTL_NODE */ /* * The pfilter hook to pass packets to ipfw_chk and then to * dummynet, divert, netgraph or other modules. * The packet may be consumed. */ static pfil_return_t ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; struct m_tag *tag; pfil_return_t ret; int ipfw, dir; args.flags = (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; dir = (flags & PFIL_IN) ? DIR_IN : DIR_OUT; again: /* * extract and remove the tag if present. If we are left * with onepass, optimize the outgoing path. */ tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); if (tag != NULL) { args.rule = *((struct ipfw_rule_ref *)(tag+1)); m_tag_delete(*m0, tag); if (args.rule.info & IPFW_ONEPASS) return (0); args.flags |= IPFW_ARGS_REF; } args.m = *m0; args.ifp = ifp; args.inp = inp; ipfw = ipfw_chk(&args); *m0 = args.m; KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); ret = PFIL_PASS; switch (ipfw) { case IP_FW_PASS: /* next_hop may be set by ipfw_chk */ if ((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR | IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0) break; #if (!defined(INET6) && !defined(INET)) ret = PFIL_DROPPED; #else { void *psa; size_t len; #ifdef INET if (args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)) { MPASS((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)) != (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)); MPASS((args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) == 0); len = sizeof(struct sockaddr_in); psa = (args.flags & IPFW_ARGS_NH4) ? &args.hopstore : args.next_hop; if (in_localip(satosin(psa)->sin_addr)) (*m0)->m_flags |= M_FASTFWD_OURS; (*m0)->m_flags |= M_IP_NEXTHOP; } #endif /* INET */ #ifdef INET6 if (args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) { MPASS((args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) != (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)); MPASS((args.flags & (IPFW_ARGS_NH4 | IPFW_ARGS_NH4PTR)) == 0); len = sizeof(struct sockaddr_in6); psa = args.next_hop6; (*m0)->m_flags |= M_IP6_NEXTHOP; } #endif /* INET6 */ /* * Incoming packets should not be tagged so we do not * m_tag_find. Outgoing packets may be tagged, so we * reuse the tag if present. */ tag = (flags & PFIL_IN) ? NULL : m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); if (tag != NULL) { m_tag_unlink(*m0, tag); } else { tag = m_tag_get(PACKET_TAG_IPFORWARD, len, M_NOWAIT); if (tag == NULL) { ret = PFIL_DROPPED; break; } } if ((args.flags & IPFW_ARGS_NH6) == 0) bcopy(psa, tag + 1, len); m_tag_prepend(*m0, tag); ret = 0; #ifdef INET6 /* IPv6 next hop needs additional handling */ if (args.flags & (IPFW_ARGS_NH6 | IPFW_ARGS_NH6PTR)) { struct sockaddr_in6 *sa6; sa6 = satosin6(tag + 1); if (args.flags & IPFW_ARGS_NH6) { sa6->sin6_family = AF_INET6; sa6->sin6_len = sizeof(*sa6); sa6->sin6_addr = args.hopstore6.sin6_addr; sa6->sin6_port = args.hopstore6.sin6_port; sa6->sin6_scope_id = args.hopstore6.sin6_scope_id; } /* * If nh6 address is link-local we should convert * it to kernel internal form before doing any * comparisons. */ if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { ret = PFIL_DROPPED; break; } if (in6_localip(&sa6->sin6_addr)) (*m0)->m_flags |= M_FASTFWD_OURS; } #endif /* INET6 */ } #endif /* INET || INET6 */ break; case IP_FW_DENY: ret = PFIL_DROPPED; break; case IP_FW_DUMMYNET: if (ip_dn_io_ptr == NULL) { ret = PFIL_DROPPED; break; } MPASS(args.flags & IPFW_ARGS_REF); if (mtod(*m0, struct ip *)->ip_v == 4) (void )ip_dn_io_ptr(m0, dir, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) (void )ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); else { ret = PFIL_DROPPED; break; } /* * XXX should read the return value. * dummynet normally eats the packet and sets *m0=NULL * unless the packet can be sent immediately. In this * case args is updated and we should re-run the * check without clearing args. */ if (*m0 != NULL) goto again; ret = PFIL_CONSUMED; break; case IP_FW_TEE: case IP_FW_DIVERT: if (ip_divert_ptr == NULL) { ret = PFIL_DROPPED; break; } MPASS(args.flags & IPFW_ARGS_REF); (void )ipfw_divert(m0, dir == DIR_IN, &args.rule, (ipfw == IP_FW_TEE) ? 1 : 0); /* continue processing for the original packet (tee). */ if (*m0) goto again; ret = PFIL_CONSUMED; break; case IP_FW_NGTEE: case IP_FW_NETGRAPH: if (ng_ipfw_input_p == NULL) { ret = PFIL_DROPPED; break; } MPASS(args.flags & IPFW_ARGS_REF); - (void )ng_ipfw_input_p(m0, dir, &args, - (ipfw == IP_FW_NGTEE) ? 1 : 0); + (void )ng_ipfw_input_p(m0, &args, ipfw == IP_FW_NGTEE); if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ ret = PFIL_CONSUMED; break; case IP_FW_NAT: /* honor one-pass in case of successful nat */ if (V_fw_one_pass) break; goto again; case IP_FW_REASS: goto again; /* continue with packet */ case IP_FW_NAT64: ret = PFIL_CONSUMED; break; default: KASSERT(0, ("%s: unknown retval", __func__)); } if (ret != PFIL_PASS) { if (*m0) FREE_PKT(*m0); *m0 = NULL; } return (ret); } /* * ipfw processing for ethernet packets (in and out). */ static pfil_return_t ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int dir, void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; struct ether_header save_eh; struct ether_header *eh; struct m_tag *mtag; struct mbuf *m; pfil_return_t ret; int i; args.flags = IPFW_ARGS_ETHER; args.flags |= (dir & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; again: /* fetch start point from rule, if any. remove the tag if present. */ mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); if (mtag != NULL) { args.rule = *((struct ipfw_rule_ref *)(mtag+1)); m_tag_delete(*m0, mtag); if (args.rule.info & IPFW_ONEPASS) return (0); args.flags |= IPFW_ARGS_REF; } /* I need some amt of data to be contiguous */ m = *m0; i = min(m->m_pkthdr.len, max_protohdr); if (m->m_len < i) { m = m_pullup(m, i); if (m == NULL) { *m0 = m; return (0); } } eh = mtod(m, struct ether_header *); save_eh = *eh; /* save copy for restore below */ m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ args.m = m; /* the packet we are looking at */ args.ifp = ifp; args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = inp; /* used by ipfw uid/gid/jail rules */ i = ipfw_chk(&args); m = args.m; if (m != NULL) { /* * Restore Ethernet header, as needed, in case the * mbuf chain was replaced by ipfw. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) { *m0 = NULL; return (0); } if (eh != mtod(m, struct ether_header *)) bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); } *m0 = m; ret = PFIL_PASS; /* Check result of ipfw_chk() */ switch (i) { case IP_FW_PASS: break; case IP_FW_DENY: ret = PFIL_DROPPED; break; case IP_FW_DUMMYNET: if (ip_dn_io_ptr == NULL) { ret = PFIL_DROPPED; break; } *m0 = NULL; dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; MPASS(args.flags & IPFW_ARGS_REF); ip_dn_io_ptr(&m, dir | PROTO_LAYER2, &args); return (PFIL_CONSUMED); case IP_FW_NGTEE: case IP_FW_NETGRAPH: if (ng_ipfw_input_p == NULL) { ret = PFIL_DROPPED; break; } MPASS(args.flags & IPFW_ARGS_REF); - (void )ng_ipfw_input_p(m0, (dir & PFIL_IN) ? DIR_IN : DIR_OUT, - &args, (i == IP_FW_NGTEE) ? 1 : 0); + (void )ng_ipfw_input_p(m0, &args, i == IP_FW_NGTEE); if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ ret = PFIL_CONSUMED; break; default: KASSERT(0, ("%s: unknown retval", __func__)); } if (ret != PFIL_PASS) { if (*m0) FREE_PKT(*m0); *m0 = NULL; } return (ret); } /* do the divert, return 1 on error 0 on success */ static int ipfw_divert(struct mbuf **m0, bool incoming, struct ipfw_rule_ref *rule, int tee) { /* * ipfw_chk() has already tagged the packet with the divert tag. * If tee is set, copy packet and return original. * If not tee, consume packet and send it to divert socket. */ struct mbuf *clone; struct ip *ip = mtod(*m0, struct ip *); struct m_tag *tag; /* Cloning needed for tee? */ if (tee == 0) { clone = *m0; /* use the original mbuf */ *m0 = NULL; } else { clone = m_dup(*m0, M_NOWAIT); /* If we cannot duplicate the mbuf, we sacrifice the divert * chain and continue with the tee-ed packet. */ if (clone == NULL) return 1; } /* * Divert listeners can normally handle non-fragmented packets, * but we can only reass in the non-tee case. * This means that listeners on a tee rule may get fragments, * and have to live with that. * Note that we now have the 'reass' ipfw option so if we care * we can do it before a 'tee'. */ if (!tee) switch (ip->ip_v) { case IPVERSION: if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { int hlen; struct mbuf *reass; reass = ip_reass(clone); /* Reassemble packet. */ if (reass == NULL) return 0; /* not an error */ /* if reass = NULL then it was consumed by ip_reass */ /* * IP header checksum fixup after reassembly and leave header * in network byte order. */ ip = mtod(reass, struct ip *); hlen = ip->ip_hl << 2; ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(reass, hlen); clone = reass; } break; #ifdef INET6 case IPV6_VERSION >> 4: { struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *); if (ip6->ip6_nxt == IPPROTO_FRAGMENT) { int nxt, off; off = sizeof(struct ip6_hdr); nxt = frag6_input(&clone, &off, 0); if (nxt == IPPROTO_DONE) return (0); } break; } #endif } /* attach a tag to the packet with the reinject info */ tag = m_tag_alloc(MTAG_IPFW_RULE, 0, sizeof(struct ipfw_rule_ref), M_NOWAIT); if (tag == NULL) { FREE_PKT(clone); return 1; } *((struct ipfw_rule_ref *)(tag+1)) = *rule; m_tag_prepend(clone, tag); /* Do the dirty job... */ ip_divert_ptr(clone, incoming); return 0; } /* * attach or detach hooks for a given protocol family */ VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet_hook); #define V_ipfw_inet_hook VNET(ipfw_inet_hook) #ifdef INET6 VNET_DEFINE_STATIC(pfil_hook_t, ipfw_inet6_hook); #define V_ipfw_inet6_hook VNET(ipfw_inet6_hook) #endif VNET_DEFINE_STATIC(pfil_hook_t, ipfw_link_hook); #define V_ipfw_link_hook VNET(ipfw_link_hook) static int ipfw_hook(int onoff, int pf) { struct pfil_hook_args pha; struct pfil_link_args pla; pfil_hook_t *h; pha.pa_version = PFIL_VERSION; pha.pa_flags = PFIL_IN | PFIL_OUT; pha.pa_modname = "ipfw"; pha.pa_ruleset = NULL; pla.pa_version = PFIL_VERSION; pla.pa_flags = PFIL_IN | PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; switch (pf) { case AF_INET: pha.pa_func = ipfw_check_packet; pha.pa_type = PFIL_TYPE_IP4; pha.pa_rulname = "default"; h = &V_ipfw_inet_hook; pla.pa_head = V_inet_pfil_head; break; #ifdef INET6 case AF_INET6: pha.pa_func = ipfw_check_packet; pha.pa_type = PFIL_TYPE_IP6; pha.pa_rulname = "default6"; h = &V_ipfw_inet6_hook; pla.pa_head = V_inet6_pfil_head; break; #endif case AF_LINK: pha.pa_func = ipfw_check_frame; pha.pa_type = PFIL_TYPE_ETHERNET; pha.pa_rulname = "default-link"; h = &V_ipfw_link_hook; pla.pa_head = V_link_pfil_head; break; } if (onoff) { *h = pfil_add_hook(&pha); pla.pa_hook = *h; (void)pfil_link(&pla); } else if (*h != NULL) pfil_remove_hook(*h); return 0; } int ipfw_attach_hooks(int arg) { int error = 0; if (arg == 0) /* detach */ ipfw_hook(0, AF_INET); else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) { error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */ printf("ipfw_hook() error\n"); } #ifdef INET6 if (arg == 0) /* detach */ ipfw_hook(0, AF_INET6); else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) { error = ENOENT; printf("ipfw6_hook() error\n"); } #endif if (arg == 0) /* detach */ ipfw_hook(0, AF_LINK); else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) { error = ENOENT; printf("ipfw_link_hook() error\n"); } return error; } int ipfw_chg_hook(SYSCTL_HANDLER_ARGS) { int newval; int error; int af; if (arg1 == &V_fw_enable) af = AF_INET; #ifdef INET6 else if (arg1 == &V_fw6_enable) af = AF_INET6; #endif else if (arg1 == &V_fwlink_enable) af = AF_LINK; else return (EINVAL); newval = *(int *)arg1; /* Handle sysctl change */ error = sysctl_handle_int(oidp, &newval, 0, req); if (error) return (error); /* Formalize new value */ newval = (newval) ? 1 : 0; if (*(int *)arg1 == newval) return (0); error = ipfw_hook(newval, af); if (error) return (error); *(int *)arg1 = newval; return (0); } /* end of file */