Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -434,6 +435,7 @@ if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; + struct pfil_head *pfil; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; @@ -453,7 +455,6 @@ typedef struct if_rxsd { caddr_t *ifsd_cl; - struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; @@ -2036,11 +2037,12 @@ bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); - MPASS(sd_m[frag_idx] == NULL); - if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { - break; + if (sd_m[frag_idx] == NULL) { + if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { + break; + } + sd_m[frag_idx] = m; } - sd_m[frag_idx] = m; bit_set(fl->ifl_rx_bitmap, frag_idx); #if MEMORY_LOGGING fl->ifl_m_enqueued++; @@ -2496,13 +2498,16 @@ prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } -static void -rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) +static struct mbuf * +rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, + int *pf_rv, if_rxd_info_t ri) { int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; - int next; + caddr_t payload; + struct mbuf *m; + int next, len; map = NULL; flid = irf->irf_flid; @@ -2510,7 +2515,7 @@ fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; - sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; + m = fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING @@ -2526,39 +2531,88 @@ /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); + + if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL) { + payload = *sd->ifsd_cl; + payload += ri->iri_pad; + len = ri->iri_len - ri->iri_pad; + *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, + len | PFIL_MEMPTR | PFIL_IN, NULL); + switch (*pf_rv) { + case PFIL_DROPPED: + case PFIL_CONSUMED: + /* + * The filter ate it. Everything is recycled. + */ + m = NULL; + unload = 0; + break; + case PFIL_REALLOCED: + /* + * The filter copied it. Everything is recycled. + */ + m = pfil_mem2mbuf(payload); + unload = 0; + break; + default: + /* + * Filter said it was OK, so receive like + * normal + */ + MPASS(*pf_rv == PFIL_PASS); + fl->ifl_sds.ifsd_m[cidx] = NULL; + } + } else { + fl->ifl_sds.ifsd_m[cidx] = NULL; + *pf_rv = PFIL_PASS; + } + if (unload) bus_dmamap_unload(fl->ifl_buf_tag, map); fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; bit_clear(fl->ifl_rx_bitmap, cidx); + return (m); } static struct mbuf * -assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) +assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) { int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; + int *pf_rv_ptr; + bool consumed; i = 0; mh = NULL; + consumed = false; + *pf_rv = PFIL_PASS; + pf_rv_ptr = pf_rv; do { - rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); + m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, + pf_rv_ptr, ri); MPASS(*sd->ifsd_cl != NULL); - MPASS(*sd->ifsd_m != NULL); - /* Don't include zero-length frags */ - if (ri->iri_frags[i].irf_len == 0) { + /* + * Exclude zero-length frags & frags from + * packets the filter has consumed or dropped + */ + if (ri->iri_frags[i].irf_len == 0 || consumed || + *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED) { + if (mh == NULL) { + /* everything saved here */ + consumed = true; + pf_rv_ptr = NULL; + continue; + } /* XXX we can save the cluster here, but not the mbuf */ - m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); - m_free(*sd->ifsd_m); - *sd->ifsd_m = NULL; + m_init(m, M_NOWAIT, MT_DATA, 0); + m_free(m); continue; } - m = *sd->ifsd_m; - *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; @@ -2570,6 +2624,10 @@ /* assuming padding is only on the first fragment */ padlen = 0; } + if (*pf_rv == PFIL_CONSUMED) { + /* mbuf is already in a good state */ + continue; + } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; @@ -2595,22 +2653,28 @@ { struct if_rxsd sd; struct mbuf *m; + int pf_rv; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { - rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); - m = *sd.ifsd_m; - *sd.ifsd_m = NULL; - m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); + m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd, + &pf_rv, ri); + if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) + return (m); + if (pf_rv == PFIL_PASS) { + m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT - if (!IP_ALIGNED(m)) - m->m_data += 2; + if (!IP_ALIGNED(m)) + m->m_data += 2; #endif - memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); - m->m_len = ri->iri_frags[0].irf_len; - } else { - m = assemble_segments(rxq, ri, &sd); + memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); + m->m_len = ri->iri_frags[0].irf_len; + } + } else { + m = assemble_segments(rxq, ri, &sd, &pf_rv); + if (pf_rv != PFIL_PASS && pf_rv != PFIL_REALLOCED) + return (m); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; @@ -2707,6 +2771,8 @@ return (false); } + /* pfil needs the vnet to be set */ + CURVNET_SET_QUIET(ifp->if_vnet); for (budget_left = budget; budget_left > 0 && avail > 0;) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); @@ -2724,6 +2790,8 @@ if (err) goto err; + rx_pkts += 1; + rx_bytes += ri.iri_len; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ @@ -2747,6 +2815,7 @@ avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { + /* XXX no longer makes sense */ DBG_COUNTER_INC(rx_mbuf_null); continue; } @@ -2758,6 +2827,7 @@ mt = m; } } + CURVNET_RESTORE(); /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); @@ -4374,6 +4444,42 @@ } } +static void +iflib_add_pfil(if_ctx_t ctx) +{ + struct pfil_head *pfil; + struct pfil_head_args pa; + iflib_rxq_t rxq; + int i; + + pa.pa_version = PFIL_VERSION; + pa.pa_flags = PFIL_IN; + pa.pa_type = PFIL_TYPE_ETHERNET; + pa.pa_headname = ctx->ifc_ifp->if_xname; + pfil = pfil_head_register(&pa); + + for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { + rxq->pfil = pfil; + } +} + +static void +iflib_rem_pfil(if_ctx_t ctx) +{ + struct pfil_head *pfil; + iflib_rxq_t rxq; + int i; + + + rxq = ctx->ifc_rxqs; + pfil = rxq->pfil; + for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { + rxq->pfil = NULL; + } + pfil_head_unregister(pfil); + +} + int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { @@ -4577,6 +4683,7 @@ if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); + iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); return (0); @@ -4908,6 +5015,7 @@ iflib_netmap_detach(ifp); ether_ifdetach(ifp); + iflib_rem_pfil(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */