Index: sys/dev/virtio/network/if_vtnet.c =================================================================== --- sys/dev/virtio/network/if_vtnet.c +++ sys/dev/virtio/network/if_vtnet.c @@ -52,6 +52,7 @@ #include #include +#include #include #include #include @@ -927,6 +928,7 @@ vtnet_setup_interface(struct vtnet_softc *sc) { device_t dev; + struct pfil_head_args pa; struct ifnet *ifp; dev = sc->vtnet_dev; @@ -1030,6 +1032,12 @@ NETDUMP_SET(ifp, vtnet); + pa.pa_version = PFIL_VERSION; + pa.pa_flags = PFIL_IN; + pa.pa_type = PFIL_TYPE_ETHERNET; + pa.pa_headname = ifp->if_xname; + sc->vtnet_pfil = pfil_head_register(&pa); + return (0); } @@ -1773,9 +1781,11 @@ struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; - struct mbuf *m; + struct mbuf *m, *mr; struct virtio_net_hdr_mrg_rxbuf *mhdr; int len, deq, nbufs, adjsz, count; + pfil_return_t pfil; + bool pfil_done; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; @@ -1812,6 +1822,32 @@ adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } + /* + * If we have enough data in first mbuf, run it through + * pfil before we allocated any new one. + */ + if (PFIL_HOOKED_IN(sc->vtnet_pfil) && + len - adjsz >= ETHER_HDR_LEN + max_protohdr) { + pfil = pfil_run_hooks(sc->vtnet_pfil, + m->m_data + adjsz, ifp, + len - adjsz | PFIL_MEMPTR | PFIL_IN, NULL); + switch (pfil) { + case PFIL_REALLOCED: + mr = pfil_mem2mbuf(m->m_data + adjsz); + vtnet_rxq_input(rxq, mr, hdr); + continue; + case PFIL_DROPPED: + case PFIL_CONSUMED: + vtnet_rxq_discard_buf(rxq, m); + continue; + default: + KASSERT(pfil == PFIL_PASS, + ("Filter returned %d!\n", pfil)); + }; + pfil_done = true; + } else + pfil_done = false; + if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); @@ -1842,6 +1878,19 @@ memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); m_adj(m, adjsz); + if (PFIL_HOOKED_IN(sc->vtnet_pfil) && pfil_done == false) { + pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN, + NULL); + switch (pfil) { + case PFIL_DROPPED: + case PFIL_CONSUMED: + continue; + default: + KASSERT(pfil == PFIL_PASS, + ("Filter returned %d!\n", pfil)); + } + } + vtnet_rxq_input(rxq, m, hdr); /* Must recheck after dropping the Rx lock. */ Index: sys/dev/virtio/network/if_vtnetvar.h =================================================================== --- sys/dev/virtio/network/if_vtnetvar.h +++ sys/dev/virtio/network/if_vtnetvar.h @@ -136,6 +136,7 @@ struct ifnet *vtnet_ifp; struct vtnet_rxq *vtnet_rxqs; struct vtnet_txq *vtnet_txqs; + pfil_head_t vtnet_pfil; uint32_t vtnet_flags; #define VTNET_FLAG_SUSPENDED 0x0001 Index: sys/netgraph/ng_ipfw.c =================================================================== --- sys/netgraph/ng_ipfw.c +++ sys/netgraph/ng_ipfw.c @@ -72,8 +72,7 @@ static ng_disconnect_t ng_ipfw_disconnect; static hook_p ng_ipfw_findhook1(node_p, u_int16_t ); -static int ng_ipfw_input(struct mbuf **, int, struct ip_fw_args *, - int); +static int ng_ipfw_input(struct mbuf **, struct ip_fw_args *, bool); /* We have only one node */ static node_p fw_node; @@ -285,7 +284,7 @@ } static int -ng_ipfw_input(struct mbuf **m0, int dir, struct ip_fw_args *fwa, int tee) +ng_ipfw_input(struct mbuf **m0, struct ip_fw_args *fwa, bool tee) { struct mbuf *m; hook_p hook; @@ -303,7 +302,7 @@ * important to return packet back to IP stack. In tee mode we make * a copy of a packet and forward it into netgraph without a tag. */ - if (tee == 0) { + if (tee == false) { struct m_tag *tag; struct ipfw_rule_ref *r; m = *m0; @@ -318,7 +317,8 @@ r = (struct ipfw_rule_ref *)(tag + 1); *r = fwa->rule; r->info &= IPFW_ONEPASS; /* keep this info */ - r->info |= dir ? IPFW_INFO_IN : IPFW_INFO_OUT; + r->info |= (fwa->flags & IPFW_ARGS_IN) ? + IPFW_INFO_IN : IPFW_INFO_OUT; m_tag_prepend(m, tag); } else Index: sys/netinet/ip_divert.c =================================================================== --- sys/netinet/ip_divert.c +++ sys/netinet/ip_divert.c @@ -184,7 +184,7 @@ * then pass them along with mbuf chain. */ static void -divert_packet(struct mbuf *m, int incoming) +divert_packet(struct mbuf *m, bool incoming) { struct ip *ip; struct inpcb *inp; Index: sys/netinet/ip_var.h =================================================================== --- sys/netinet/ip_var.h +++ sys/netinet/ip_var.h @@ -292,13 +292,11 @@ #define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr) /* Divert hooks. */ -extern void (*ip_divert_ptr)(struct mbuf *m, int incoming); +extern void (*ip_divert_ptr)(struct mbuf *m, bool incoming); /* ng_ipfw hooks -- XXX make it the same as divert and dummynet */ -extern int (*ng_ipfw_input_p)(struct mbuf **, int, - struct ip_fw_args *, int); - +extern int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); extern int (*ip_dn_ctl_ptr)(struct sockopt *); -extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); +extern int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *); #endif /* _KERNEL */ #endif /* !_NETINET_IP_VAR_H_ */ Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -100,10 +100,9 @@ VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); -int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *); -void (*ip_divert_ptr)(struct mbuf *, int); -int (*ng_ipfw_input_p)(struct mbuf **, int, - struct ip_fw_args *, int); +int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *); +void (*ip_divert_ptr)(struct mbuf *, bool); +int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); #ifdef INET /* Index: sys/netpfil/ipfw/ip_dn_io.c =================================================================== --- sys/netpfil/ipfw/ip_dn_io.c +++ sys/netpfil/ipfw/ip_dn_io.c @@ -841,7 +841,7 @@ dt->rule = fwa->rule; dt->rule.info &= IPFW_ONEPASS; /* only keep this info */ dt->dn_dir = dir; - dt->ifp = fwa->oif; + dt->ifp = fwa->flags & IPFW_ARGS_OUT ? fwa->ifp : NULL; /* dt->output tame is updated as we move through */ dt->output_time = dn_cfg.curr_time; dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0; @@ -854,22 +854,27 @@ * We use the argument to locate the flowset fs and the sched_set sch * associated to it. The we apply flow_mask and sched_mask to * determine the queue and scheduler instances. - * - * dir where shall we send the packet after dummynet. - * *m0 the mbuf with the packet - * ifp the 'ifp' parameter from the caller. - * NULL in ip_input, destination interface in ip_output, */ int -dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) +dummynet_io(struct mbuf **m0, struct ip_fw_args *fwa) { struct mbuf *m = *m0; struct dn_fsk *fs = NULL; struct dn_sch_inst *si; struct dn_queue *q = NULL; /* default */ + int fs_id, dir; - int fs_id = (fwa->rule.info & IPFW_INFO_MASK) + + fs_id = (fwa->rule.info & IPFW_INFO_MASK) + ((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0); + /* XXXGL: convert args to dir */ + if (fwa->flags & IPFW_ARGS_IN) + dir = DIR_IN; + else + dir = DIR_OUT; + if (fwa->flags & IPFW_ARGS_ETHER) + dir |= PROTO_LAYER2; + else if (fwa->flags & IPFW_ARGS_IP6) + dir |= PROTO_IPV6; DN_BH_WLOCK(); io_pkt++; /* we could actually tag outside the lock, but who cares... */ Index: sys/netpfil/ipfw/ip_dn_private.h =================================================================== --- sys/netpfil/ipfw/ip_dn_private.h +++ sys/netpfil/ipfw/ip_dn_private.h @@ -387,11 +387,26 @@ uint16_t iphdr_off; /* IP header offset for mtodo() */ }; +/* + * Possible values for dn_dir. XXXGL: this needs to be reviewed + * and converted to same values ip_fw_args.flags use. + */ +enum { + DIR_OUT = 0, + DIR_IN = 1, + DIR_FWD = 2, + DIR_DROP = 3, + PROTO_LAYER2 = 0x4, /* set for layer 2 */ + PROTO_IPV4 = 0x08, + PROTO_IPV6 = 0x10, + PROTO_IFB = 0x0c, /* layer2 + ifbridge */ +}; + extern struct dn_parms dn_cfg; //VNET_DECLARE(struct dn_parms, _base_dn_cfg); //#define dn_cfg VNET(_base_dn_cfg) -int dummynet_io(struct mbuf **, int , struct ip_fw_args *); +int dummynet_io(struct mbuf **, struct ip_fw_args *); void dummynet_task(void *context, int pending); void dn_reschedule(void); struct dn_pkt_tag * dn_tag_get(struct mbuf *m); Index: sys/netpfil/ipfw/ip_fw2.c =================================================================== --- sys/netpfil/ipfw/ip_fw2.c +++ sys/netpfil/ipfw/ip_fw2.c @@ -1080,13 +1080,11 @@ struct inpcbinfo *pi; struct ipfw_flow_id *id; struct inpcb *pcb, *inp; - struct ifnet *oif; int lookupflags; int match; id = &args->f_id; inp = args->inp; - oif = args->oif; /* * Check to see if the UDP or TCP stack supplied us with @@ -1124,16 +1122,16 @@ if (*ugid_lookupp == 0) { if (id->addr_type == 6) { #ifdef INET6 - if (oif == NULL) + if (args->flags & IPFW_ARGS_IN) pcb = in6_pcblookup_mbuf(pi, &id->src_ip6, htons(id->src_port), &id->dst_ip6, htons(id->dst_port), - lookupflags, oif, args->m); + lookupflags, NULL, args->m); else pcb = in6_pcblookup_mbuf(pi, &id->dst_ip6, htons(id->dst_port), &id->src_ip6, htons(id->src_port), - lookupflags, oif, args->m); + lookupflags, args->ifp, args->m); #else *ugid_lookupp = -1; return (0); @@ -1141,16 +1139,16 @@ } else { src_ip.s_addr = htonl(id->src_ip); dst_ip.s_addr = htonl(id->dst_ip); - if (oif == NULL) + if (args->flags & IPFW_ARGS_IN) pcb = in_pcblookup_mbuf(pi, src_ip, htons(id->src_port), dst_ip, htons(id->dst_port), - lookupflags, oif, args->m); + lookupflags, NULL, args->m); else pcb = in_pcblookup_mbuf(pi, dst_ip, htons(id->dst_port), src_ip, htons(id->src_port), - lookupflags, oif, args->m); + lookupflags, args->ifp, args->m); } if (pcb != NULL) { INP_RLOCK_ASSERT(pcb); @@ -1260,11 +1258,9 @@ * * args->m (in/out) The packet; we set to NULL when/if we nuke it. * Starts with the IP header. - * args->eh (in) Mac header if present, NULL for layer3 packet. * args->L3offset Number of bytes bypassed if we came from L2. * e.g. often sizeof(eh) ** NOTYET ** - * args->oif Outgoing interface, NULL if packet is incoming. - * The incoming interface is in the mbuf. (in) + * args->ifp Incoming or outgoing interface. * args->divert_rule (in/out) * Skip up to the first rule past this rule number; * upon return, non-zero port number for divert or tee. @@ -1300,23 +1296,19 @@ * the implementation of the various instructions to make sure * that they still work. * - * args->eh The MAC header. It is non-null for a layer2 - * packet, it is NULL for a layer-3 packet. - * **notyet** - * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. - * * m | args->m Pointer to the mbuf, as received from the caller. * It may change if ipfw_chk() does an m_pullup, or if it * consumes the packet because it calls send_reject(). * XXX This has to change, so that ipfw_chk() never modifies * or consumes the buffer. - * ip is the beginning of the ip(4 or 6) header. - * Calculated by adding the L3offset to the start of data. - * (Until we start using L3offset, the packet is - * supposed to start with the ip header). + * OR + * args->mem Pointer to contigous memory chunk. + * ip Is the beginning of the ip(4 or 6) header. + * eh Ethernet header in case if input is Layer2. */ - struct mbuf *m = args->m; - struct ip *ip = mtod(m, struct ip *); + struct mbuf *m; + struct ip *ip; + struct ether_header *eh; /* * For rules which contain uid/gid or jail constraints, cache @@ -1331,17 +1323,9 @@ struct ucred *ucred_cache = NULL; #endif int ucred_lookup = 0; - - /* - * oif | args->oif If NULL, ipfw_chk has been called on the - * inbound path (ether_input, ip_input). - * If non-NULL, ipfw_chk has been called on the outbound path - * (ether_output, ip_output). - */ - struct ifnet *oif = args->oif; - int f_pos = 0; /* index of current rule in the array */ int retval = 0; + struct ifnet *oif, *iif; /* * hlen The length of the IP header. @@ -1381,7 +1365,6 @@ struct in_addr src_ip, dst_ip; /* NOTE: network format */ int iplen = 0; int pktlen; - uint16_t etype; /* Host order stored ether type */ struct ipfw_dyn_info dyn_info; struct ip_fw *q = NULL; @@ -1405,14 +1388,47 @@ int done = 0; /* flag to exit the outer loop */ IPFW_RLOCK_TRACKER; - - if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) - return (IP_FW_PASS); /* accept */ + bool mem; + + if ((mem = (args->flags & IPFW_ARGS_LENMASK))) { + if (args->flags & IPFW_ARGS_ETHER) { + eh = (struct ether_header *)args->mem; + if (eh->ether_type == htons(ETHERTYPE_VLAN)) { + struct ether_vlan_header *evh; + + evh = eh; + ip = (struct ip *)(evh + 1); + } else + ip = (struct ip *)(eh + 1); + } else { + eh = NULL; + ip = (struct ip *)args->mem; + } + pktlen = IPFW_ARGS_LENGTH(args->flags); + args->f_id.fib = args->ifp->if_fib; /* best guess */ + } else { + m = args->m; + if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) + return (IP_FW_PASS); /* accept */ + if (args->flags & IPFW_ARGS_ETHER) { + /* We need some amount of data to be contiguous. */ + if (m->m_len < min(m->m_pkthdr.len, max_protohdr) && + (args->m = m = m_pullup(m, min(m->m_pkthdr.len, + max_protohdr))) == NULL) + goto pullup_failed; + eh = mtod(m, struct ether_header *); + ip = (struct ip *)(eh + 1); + } else { + eh = NULL; + ip = mtod(m, struct ip *); + } + pktlen = m->m_pkthdr.len; + args->f_id.fib = M_GETFIB(m); /* mbuf not altered */ + } dst_ip.s_addr = 0; /* make sure it is initialized */ src_ip.s_addr = 0; /* make sure it is initialized */ src_port = dst_port = 0; - pktlen = m->m_pkthdr.len; DYN_INFO_INIT(&dyn_info); /* @@ -1422,31 +1438,45 @@ * this way). */ #define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) +#define EHLEN (eh != NULL ? ETHER_HDR_LEN : 0) #define PULLUP_LEN(_len, p, T) \ do { \ - int x = (_len) + T; \ - if ((m)->m_len < x) { \ - args->m = m = m_pullup(m, x); \ - if (m == NULL) \ - goto pullup_failed; \ + int x = (_len) + T + EHLEN; \ + if (mem) { \ + MPASS(pktlen >= x); \ + p = (char *)args->mem + (_len) + EHLEN; \ + } else { \ + if (__predict_false((m)->m_len < x)) { \ + args->m = m = m_pullup(m, x); \ + if (m == NULL) \ + goto pullup_failed; \ + } \ + p = mtod(m, char *) + (_len) + EHLEN; \ + } \ +} while (0) +/* + * In case pointers got stale after pullups, update them. + */ +#define UPDATE_POINTERS() \ +do { \ + if (!mem) { \ + if (eh != NULL) { \ + eh = mtod(m, struct ether_header *); \ + ip = (struct ip *)(eh + 1); \ + } else \ + ip = mtod(m, struct ip *); \ + args->m = m; \ } \ - p = (mtod(m, char *) + (_len)); \ } while (0) - - /* - * if we have an ether header, - */ - if (args->flags & IPFW_ARGS_ETHER) - etype = ntohs(args->eh->ether_type); - else - etype = 0; /* Identify IP packets and fill up variables. */ if (pktlen >= sizeof(struct ip6_hdr) && - (etype == 0 || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { + (eh == NULL || eh->ether_type == htons(ETHERTYPE_IPV6)) && + ip->ip_v == 6) { struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; is_ipv6 = 1; + args->flags |= IPFW_ARGS_IP6; hlen = sizeof(struct ip6_hdr); proto = ip6->ip6_nxt; /* Search extension headers to find upper layer protocols */ @@ -1619,7 +1649,7 @@ break; } /*switch */ } - ip = mtod(m, struct ip *); + UPDATE_POINTERS(); ip6 = (struct ip6_hdr *)ip; args->f_id.addr_type = 6; args->f_id.src_ip6 = ip6->ip6_src; @@ -1627,8 +1657,10 @@ args->f_id.flow_id6 = ntohl(ip6->ip6_flow); iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6); } else if (pktlen >= sizeof(struct ip) && - (etype == 0 || etype == ETHERTYPE_IP) && ip->ip_v == 4) { + (eh == NULL || eh->ether_type == htons(ETHERTYPE_IP)) && + ip->ip_v == 4) { is_ipv4 = 1; + args->flags |= IPFW_ARGS_IP4; hlen = ip->ip_hl << 2; /* * Collect parameters into local variables for faster @@ -1684,7 +1716,7 @@ } } - ip = mtod(m, struct ip *); + UPDATE_POINTERS(); args->f_id.addr_type = 4; args->f_id.src_ip = ntohl(src_ip.s_addr); args->f_id.dst_ip = ntohl(dst_ip.s_addr); @@ -1701,7 +1733,6 @@ args->f_id.proto = proto; args->f_id.src_port = src_port = ntohs(src_port); args->f_id.dst_port = dst_port = ntohs(dst_port); - args->f_id.fib = M_GETFIB(m); IPFW_PF_RLOCK(chain); if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ @@ -1724,6 +1755,15 @@ f_pos = 0; } + if (args->flags & IPFW_ARGS_IN) { + iif = args->ifp; + oif = NULL; + } else { + MPASS(args->flags & IPFW_ARGS_OUT); + iif = mem ? NULL : m->m_pkthdr.rcvif; + oif = args->ifp; + } + /* * Now scan the rules, and parse microinstructions for each rule. * We have two nested loops and an inner switch. Sometimes we @@ -1820,8 +1860,8 @@ break; case O_RECV: - match = iface_match(m->m_pkthdr.rcvif, - (ipfw_insn_if *)cmd, chain, &tablearg); + match = iface_match(iif, (ipfw_insn_if *)cmd, + chain, &tablearg); break; case O_XMIT: @@ -1830,9 +1870,8 @@ break; case O_VIA: - match = iface_match(oif ? oif : - m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, - chain, &tablearg); + match = iface_match(args->ifp, + (ipfw_insn_if *)cmd, chain, &tablearg); break; case O_MACADDR2: @@ -1841,7 +1880,7 @@ ((ipfw_insn_mac *)cmd)->addr; u_int32_t *mask = (u_int32_t *) ((ipfw_insn_mac *)cmd)->mask; - u_int32_t *hdr = (u_int32_t *)args->eh; + u_int32_t *hdr = (u_int32_t *)eh; match = ( want[0] == (hdr[0] & mask[0]) && @@ -1858,8 +1897,11 @@ for (i = cmdlen - 1; !match && i>0; i--, p += 2) - match = (etype >= p[0] && - etype <= p[1]); + match = + (ntohs(eh->ether_type) >= + p[0] && + ntohs(eh->ether_type) <= + p[1]); } break; @@ -2333,8 +2375,8 @@ } case O_LOG: - ipfw_log(chain, f, hlen, args, m, - oif, offset | ip6f_mf, tablearg, ip); + ipfw_log(chain, f, hlen, args, + offset | ip6f_mf, tablearg, ip); match = 1; break; @@ -2344,16 +2386,14 @@ case O_VERREVPATH: /* Outgoing packets automatically pass/match */ - match = ((oif != NULL) || - (m->m_pkthdr.rcvif == NULL) || + match = (args->flags & IPFW_ARGS_OUT || ( #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - m->m_pkthdr.rcvif, args->f_id.fib) : + iif, args->f_id.fib) : #endif - verify_path(src_ip, m->m_pkthdr.rcvif, - args->f_id.fib))); + verify_path(src_ip, iif, args->f_id.fib))); break; case O_VERSRCREACH: @@ -2379,12 +2419,10 @@ match = #ifdef INET6 is_ipv6 ? verify_path6( - &(args->f_id.src_ip6), - m->m_pkthdr.rcvif, + &(args->f_id.src_ip6), iif, args->f_id.fib) : #endif - verify_path(src_ip, - m->m_pkthdr.rcvif, + verify_path(src_ip, iif, args->f_id.fib); else match = 1; Index: sys/netpfil/ipfw/ip_fw_bpf.c =================================================================== --- sys/netpfil/ipfw/ip_fw_bpf.c +++ sys/netpfil/ipfw/ip_fw_bpf.c @@ -160,25 +160,52 @@ return (0); } +void +ipfw_bpf_tap(u_char *pkt, u_int pktlen) +{ + LOGIF_RLOCK_TRACKER; + + LOGIF_RLOCK(); + if (V_log_if != NULL) + BPF_TAP(V_log_if, pkt, pktlen); + LOGIF_RUNLOCK(); +} + +void +ipfw_bpf_mtap(struct mbuf *m) +{ + LOGIF_RLOCK_TRACKER; + + LOGIF_RLOCK(); + if (V_log_if != NULL) + BPF_MTAP(V_log_if, m); + LOGIF_RUNLOCK(); +} + void ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m) { + struct ifnet *logif; LOGIF_RLOCK_TRACKER; LOGIF_RLOCK(); - if (dlen == ETHER_HDR_LEN) { - if (V_log_if == NULL) { - LOGIF_RUNLOCK(); - return; - } - BPF_MTAP2(V_log_if, data, dlen, m); - } else if (dlen == PFLOG_HDRLEN) { - if (V_pflog_if == NULL) { - LOGIF_RUNLOCK(); - return; - } - BPF_MTAP2(V_pflog_if, data, dlen, m); + switch (dlen) { + case (ETHER_HDR_LEN): + logif = V_log_if; + break; + case (PFLOG_HDRLEN): + logif = V_pflog_if; + break; + default: +#ifdef INVARIANTS + panic("%s: unsupported len %d", __func__, dlen); +#endif + logif = NULL; } + + if (logif != NULL) + BPF_MTAP2(logif, data, dlen, m); + LOGIF_RUNLOCK(); } Index: sys/netpfil/ipfw/ip_fw_dynamic.c =================================================================== --- sys/netpfil/ipfw/ip_fw_dynamic.c +++ sys/netpfil/ipfw/ip_fw_dynamic.c @@ -1173,12 +1173,9 @@ * determine the scope zone id to resolve address scope ambiguity. */ if (IN6_IS_ADDR_LINKLOCAL(&args->f_id.src_ip6) || - IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) { - MPASS(args->oif != NULL || - args->m->m_pkthdr.rcvif != NULL); - return (in6_getscopezone(args->oif != NULL ? args->oif: - args->m->m_pkthdr.rcvif, IPV6_ADDR_SCOPE_LINKLOCAL)); - } + IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) + return (in6_getscopezone(args->ifp, IPV6_ADDR_SCOPE_LINKLOCAL)); + return (0); } Index: sys/netpfil/ipfw/ip_fw_log.c =================================================================== --- sys/netpfil/ipfw/ip_fw_log.c +++ sys/netpfil/ipfw/ip_fw_log.c @@ -99,30 +99,32 @@ */ void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, - struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, - u_short offset, uint32_t tablearg, struct ip *ip) + struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip) { char *action; int limit_reached = 0; char action2[92], proto[128], fragment[32]; if (V_fw_verbose == 0) { - if (args->flags & IPFW_ARGS_ETHER) /* layer2, use orig hdr */ - ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m); + if (args->flags & IPFW_ARGS_LENMASK) + ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags)); + else if (args->flags & IPFW_ARGS_ETHER) + /* layer2, use orig hdr */ + ipfw_bpf_mtap(args->m); else { /* Add fake header. Later we will store * more info in the header. */ if (ip->ip_v == 4) ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00", - ETHER_HDR_LEN, m); + ETHER_HDR_LEN, args->m); else if (ip->ip_v == 6) ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd", - ETHER_HDR_LEN, m); + ETHER_HDR_LEN, args->m); else /* Obviously bogus EtherType. */ ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff", - ETHER_HDR_LEN, m); + ETHER_HDR_LEN, args->m); } return; } @@ -405,19 +407,14 @@ } } #ifdef __FreeBSD__ - if (oif || m->m_pkthdr.rcvif) - log(LOG_SECURITY | LOG_INFO, - "ipfw: %d %s %s %s via %s%s\n", - f ? f->rulenum : -1, - action, proto, oif ? "out" : "in", - oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, - fragment); - else + log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s %s via %s%s\n", + f ? f->rulenum : -1, action, proto, + args->flags & IPFW_ARGS_OUT ? "out" : "in", args->ifp->if_xname, + fragment); +#else + log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s [no if info]%s\n", + f ? f->rulenum : -1, action, proto, fragment); #endif - log(LOG_SECURITY | LOG_INFO, - "ipfw: %d %s %s [no if info]%s\n", - f ? f->rulenum : -1, - action, proto, fragment); if (limit_reached) log(LOG_SECURITY | LOG_NOTICE, "ipfw: limit %d reached on entry %d\n", Index: sys/netpfil/ipfw/ip_fw_nat.c =================================================================== --- sys/netpfil/ipfw/ip_fw_nat.c +++ sys/netpfil/ipfw/ip_fw_nat.c @@ -347,7 +347,7 @@ /* Check if this is 'global' instance */ if (t == NULL) { - if (args->oif == NULL) { + if (args->flags & IPFW_ARGS_IN) { /* Wrong direction, skip processing */ args->m = mcl; return (IP_FW_NAT); @@ -374,7 +374,7 @@ return (IP_FW_NAT); } } else { - if (args->oif == NULL) + if (args->flags & IPFW_ARGS_IN) retval = LibAliasIn(t->lib, c, mcl->m_len + M_TRAILINGSPACE(mcl)); else @@ -391,7 +391,8 @@ * PKT_ALIAS_DENY_INCOMING flag is set. */ if (retval == PKT_ALIAS_ERROR || - (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT || + ((args->flags & IPFW_ARGS_IN) && + (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT || (retval == PKT_ALIAS_IGNORED && (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) { /* XXX - should i add some logging? */ Index: sys/netpfil/ipfw/ip_fw_pfil.c =================================================================== --- sys/netpfil/ipfw/ip_fw_pfil.c +++ sys/netpfil/ipfw/ip_fw_pfil.c @@ -85,7 +85,7 @@ int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); /* Forward declarations. */ -static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int); +static int ipfw_divert(struct mbuf **, struct ip_fw_args *, bool); #ifdef SYSCTL_NODE @@ -118,7 +118,7 @@ * The packet may be consumed. */ static pfil_return_t -ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int dir, +ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; @@ -126,9 +126,7 @@ pfil_return_t ret; int ipfw; - /* convert dir to IPFW values */ - dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; - args.flags = 0; + args.flags = (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; again: /* * extract and remove the tag if present. If we are left @@ -144,7 +142,7 @@ } args.m = *m0; - args.oif = dir == DIR_OUT ? ifp : NULL; + args.ifp = ifp; args.inp = inp; ipfw = ipfw_chk(&args); @@ -198,7 +196,7 @@ * m_tag_find. Outgoing packets may be tagged, so we * reuse the tag if present. */ - tag = (dir == DIR_IN) ? NULL : + tag = (flags & PFIL_IN) ? NULL : m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); if (tag != NULL) { m_tag_unlink(*m0, tag); @@ -255,10 +253,8 @@ break; } MPASS(args.flags & IPFW_ARGS_REF); - if (mtod(*m0, struct ip *)->ip_v == 4) - (void )ip_dn_io_ptr(m0, dir, &args); - else if (mtod(*m0, struct ip *)->ip_v == 6) - (void )ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); + if (args.flags & (IPFW_ARGS_IP4 | IPFW_ARGS_IP6)) + (void )ip_dn_io_ptr(m0, &args); else { ret = PFIL_DROPPED; break; @@ -282,8 +278,7 @@ break; } MPASS(args.flags & IPFW_ARGS_REF); - (void )ipfw_divert(m0, dir, &args.rule, - (ipfw == IP_FW_TEE) ? 1 : 0); + (void )ipfw_divert(m0, &args, ipfw == IP_FW_TEE); /* continue processing for the original packet (tee). */ if (*m0) goto again; @@ -297,8 +292,7 @@ break; } MPASS(args.flags & IPFW_ARGS_REF); - (void )ng_ipfw_input_p(m0, dir, &args, - (ipfw == IP_FW_NGTEE) ? 1 : 0); + (void )ng_ipfw_input_p(m0, &args, ipfw == IP_FW_NGTEE); if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ ret = PFIL_CONSUMED; @@ -334,68 +328,50 @@ * ipfw processing for ethernet packets (in and out). */ static pfil_return_t -ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int dir, +ipfw_check_frame(pfil_packet_t p, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { struct ip_fw_args args; - struct ether_header save_eh; - struct ether_header *eh; - struct m_tag *mtag; - struct mbuf *m; pfil_return_t ret; - int i; + bool mem, realloc; + int ipfw; - args.flags = IPFW_ARGS_ETHER; -again: - /* fetch start point from rule, if any. remove the tag if present. */ - mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); - if (mtag != NULL) { - args.rule = *((struct ipfw_rule_ref *)(mtag+1)); - m_tag_delete(*m0, mtag); - if (args.rule.info & IPFW_ONEPASS) - return (0); - args.flags |= IPFW_ARGS_REF; + if (flags & PFIL_MEMPTR) { + mem = true; + realloc = false; + args.flags = PFIL_LENGTH(flags) | IPFW_ARGS_ETHER; + args.mem = p.mem; + } else { + mem = realloc = false; + args.flags = IPFW_ARGS_ETHER; } + args.flags |= (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT; + args.ifp = ifp; + args.inp = inp; - /* I need some amt of data to be contiguous */ - m = *m0; - i = min(m->m_pkthdr.len, max_protohdr); - if (m->m_len < i) { - m = m_pullup(m, i); - if (m == NULL) { - *m0 = m; - return (0); - } - } - eh = mtod(m, struct ether_header *); - save_eh = *eh; /* save copy for restore below */ - m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ - - args.m = m; /* the packet we are looking at */ - args.oif = dir & PFIL_OUT ? ifp: NULL; /* destination, if any */ - args.eh = &save_eh; /* MAC header for bridged/MAC packets */ - args.inp = inp; /* used by ipfw uid/gid/jail rules */ - i = ipfw_chk(&args); - m = args.m; - if (m != NULL) { +again: + if (!mem) { /* - * Restore Ethernet header, as needed, in case the - * mbuf chain was replaced by ipfw. + * Fetch start point from rule, if any. + * Remove the tag if present. */ - M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); - if (m == NULL) { - *m0 = NULL; - return (0); + struct m_tag *mtag; + + mtag = m_tag_locate(*p.m, MTAG_IPFW_RULE, 0, NULL); + if (mtag != NULL) { + args.rule = *((struct ipfw_rule_ref *)(mtag+1)); + m_tag_delete(*p.m, mtag); + if (args.rule.info & IPFW_ONEPASS) + return (PFIL_PASS); + args.flags |= IPFW_ARGS_REF; } - if (eh != mtod(m, struct ether_header *)) - bcopy(&save_eh, mtod(m, struct ether_header *), - ETHER_HDR_LEN); + args.m = *p.m; } - *m0 = m; + + ipfw = ipfw_chk(&args); ret = PFIL_PASS; - /* Check result of ipfw_chk() */ - switch (i) { + switch (ipfw) { case IP_FW_PASS: break; @@ -408,10 +384,16 @@ ret = PFIL_DROPPED; break; } - *m0 = NULL; - dir = (dir & PFIL_IN) ? DIR_IN : DIR_OUT; + if (mem) { + if (pfil_realloc(&p, flags, ifp) != 0) { + ret = PFIL_DROPPED; + break; + } + mem = false; + realloc = true; + } MPASS(args.flags & IPFW_ARGS_REF); - ip_dn_io_ptr(&m, dir | PROTO_LAYER2, &args); + ip_dn_io_ptr(p.m, &args); return (PFIL_CONSUMED); case IP_FW_NGTEE: @@ -420,10 +402,17 @@ ret = PFIL_DROPPED; break; } + if (mem) { + if (pfil_realloc(&p, flags, ifp) != 0) { + ret = PFIL_DROPPED; + break; + } + mem = false; + realloc = true; + } MPASS(args.flags & IPFW_ARGS_REF); - (void )ng_ipfw_input_p(m0, (dir & PFIL_IN) ? DIR_IN : DIR_OUT, - &args, (i == IP_FW_NGTEE) ? 1 : 0); - if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */ + (void )ng_ipfw_input_p(p.m, &args, ipfw == IP_FW_NGTEE); + if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ ret = PFIL_CONSUMED; break; @@ -432,19 +421,21 @@ KASSERT(0, ("%s: unknown retval", __func__)); } - if (ret != PFIL_PASS) { - if (*m0) - FREE_PKT(*m0); - *m0 = NULL; + if (!mem && ret != PFIL_PASS) { + if (*p.m) + FREE_PKT(*p.m); + *p.m = NULL; } + if (realloc && ret == PFIL_PASS) + ret = PFIL_REALLOCED; + return (ret); } /* do the divert, return 1 on error 0 on success */ static int -ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, - int tee) +ipfw_divert(struct mbuf **m0, struct ip_fw_args *args, bool tee) { /* * ipfw_chk() has already tagged the packet with the divert tag. @@ -456,7 +447,7 @@ struct m_tag *tag; /* Cloning needed for tee? */ - if (tee == 0) { + if (tee == false) { clone = *m0; /* use the original mbuf */ *m0 = NULL; } else { @@ -476,7 +467,7 @@ * Note that we now have the 'reass' ipfw option so if we care * we can do it before a 'tee'. */ - if (!tee) switch (ip->ip_v) { + if (tee == false) switch (ip->ip_v) { case IPVERSION: if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { int hlen; @@ -525,11 +516,11 @@ FREE_PKT(clone); return 1; } - *((struct ipfw_rule_ref *)(tag+1)) = *rule; + *((struct ipfw_rule_ref *)(tag+1)) = args->rule; m_tag_prepend(clone, tag); /* Do the dirty job... */ - ip_divert_ptr(clone, incoming); + ip_divert_ptr(clone, args->flags & IPFW_ARGS_IN); return 0; } @@ -553,7 +544,7 @@ pfil_hook_t *h; pha.pa_version = PFIL_VERSION; - pha.pa_flags = PFIL_IN | PFIL_OUT; + pha.pa_flags = PFIL_IN | PFIL_OUT | PFIL_MEMPTR; pha.pa_modname = "ipfw"; pha.pa_ruleset = NULL; Index: sys/netpfil/ipfw/ip_fw_private.h =================================================================== --- sys/netpfil/ipfw/ip_fw_private.h +++ sys/netpfil/ipfw/ip_fw_private.h @@ -85,12 +85,19 @@ */ struct ip_fw_args { uint32_t flags; -#define IPFW_ARGS_ETHER 0x0001 /* has valid ethernet header */ -#define IPFW_ARGS_NH4 0x0002 /* has IPv4 next hop in hopstore */ -#define IPFW_ARGS_NH6 0x0004 /* has IPv6 next hop in hopstore */ -#define IPFW_ARGS_NH4PTR 0x0008 /* has IPv4 next hop in next_hop */ -#define IPFW_ARGS_NH6PTR 0x0010 /* has IPv6 next hop in next_hop6 */ -#define IPFW_ARGS_REF 0x0020 /* has valid ipfw_rule_ref */ +#define IPFW_ARGS_ETHER 0x00010000 /* valid ethernet header */ +#define IPFW_ARGS_NH4 0x00020000 /* IPv4 next hop in hopstore */ +#define IPFW_ARGS_NH6 0x00040000 /* IPv6 next hop in hopstore */ +#define IPFW_ARGS_NH4PTR 0x00080000 /* IPv4 next hop in next_hop */ +#define IPFW_ARGS_NH6PTR 0x00100000 /* IPv6 next hop in next_hop6 */ +#define IPFW_ARGS_REF 0x00200000 /* valid ipfw_rule_ref */ +#define IPFW_ARGS_IN 0x00400000 /* called on input */ +#define IPFW_ARGS_OUT 0x00800000 /* called on output */ +#define IPFW_ARGS_IP4 0x01000000 /* belongs to v4 ISR */ +#define IPFW_ARGS_IP6 0x02000000 /* belongs to v6 ISR */ +#define IPFW_ARGS_DROP 0x04000000 /* drop it (dummynet) */ +#define IPFW_ARGS_LENMASK 0x0000ffff /* length of data in *mem */ +#define IPFW_ARGS_LENGTH(f) ((f) & IPFW_ARGS_LENMASK) /* * On return, it points to the matching rule. * On entry, rule.slot > 0 means the info is valid and @@ -100,18 +107,15 @@ */ struct ipfw_rule_ref rule; /* match/restart info */ - struct ifnet *oif; /* output interface */ + struct ifnet *ifp; /* input/output interface */ struct inpcb *inp; union { /* - * We don't support forwarding on layer2, thus we can - * keep eh pointer in this union. * next_hop[6] pointers can be used to point to next hop * stored in rule's opcode to avoid copying into hopstore. * Also, it is expected that all 0x1-0x10 flags are mutually * exclusive. */ - struct ether_header *eh; /* for bridged packets */ struct sockaddr_in *next_hop; struct sockaddr_in6 *next_hop6; /* ipfw next hop storage */ @@ -122,35 +126,15 @@ uint16_t sin6_port; } hopstore6; }; - - struct mbuf *m; /* the mbuf chain */ + union { + struct mbuf *m; /* the mbuf chain */ + void *mem; /* or memory pointer */ + }; struct ipfw_flow_id f_id; /* grabbed from IP header */ }; MALLOC_DECLARE(M_IPFW); -/* - * Hooks sometime need to know the direction of the packet - * (divert, dummynet, netgraph, ...) - * We use a generic definition here, with bit0-1 indicating the - * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the - * specific protocol - * indicating the protocol (if necessary) - */ -enum { - DIR_MASK = 0x3, - DIR_OUT = 0, - DIR_IN = 1, - DIR_FWD = 2, - DIR_DROP = 3, - PROTO_LAYER2 = 0x4, /* set for layer 2 */ - /* PROTO_DEFAULT = 0, */ - PROTO_IPV4 = 0x08, - PROTO_IPV6 = 0x10, - PROTO_IFB = 0x0c, /* layer2 + ifbridge */ - /* PROTO_OLDBDG = 0x14, unused, old bridge */ -}; - /* wrapper for freeing a packet, in case we need to do more work */ #ifndef FREE_PKT #if defined(__linux__) || defined(_WIN32) @@ -179,10 +163,11 @@ void ipfw_bpf_init(int); void ipfw_bpf_uninit(int); +void ipfw_bpf_tap(u_char *, u_int); +void ipfw_bpf_mtap(struct mbuf *); void ipfw_bpf_mtap2(void *, u_int, struct mbuf *); void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, - struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, - u_short offset, uint32_t tablearg, struct ip *ip); + struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip); VNET_DECLARE(u_int64_t, norule_counter); #define V_norule_counter VNET(norule_counter) VNET_DECLARE(int, verbose_limit); Index: sys/netpfil/pf/pf.c =================================================================== --- sys/netpfil/pf/pf.c +++ sys/netpfil/pf/pf.c @@ -91,8 +91,6 @@ #include #include -#include /* XXX: only for DIR_IN/DIR_OUT */ - #ifdef INET6 #include #include @@ -6184,7 +6182,7 @@ m->m_flags &= ~M_FASTFWD_OURS; } } - ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); + ip_divert_ptr(*m0, dir == PF_IN); *m0 = NULL; return (action);