Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -1861,37 +1861,37 @@ dev/ixgb/if_ixgb.c optional ixgb dev/ixgb/ixgb_ee.c optional ixgb dev/ixgb/ixgb_hw.c optional ixgb -dev/ixgbe/if_ix.c optional ix inet \ +dev/ixgbe/if_ix.c optional ix inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP" dev/ixgbe/if_ixv.c optional ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP" -dev/ixgbe/ix_txrx.c optional ix inet | ixv inet \ +dev/ixgbe/ix_txrx.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_osdep.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_osdep.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_phy.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_phy.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_api.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_api.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_common.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_common.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_mbx.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_mbx.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_vf.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_vf.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_82598.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_82598.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_82599.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_82599.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_x540.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_x540.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_x550.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_x550.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_dcb.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_dcb.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_dcb_82598.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_dcb_82598.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_dcb_82599.c optional ix inet | ixv inet \ +dev/ixgbe/ixgbe_dcb_82599.c optional ix inet iflib | ixv inet iflib \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/jme/if_jme.c optional jme pci dev/joy/joy.c optional joy @@ -3505,8 +3505,8 @@ net/if_tap.c optional tap net/if_vlan.c optional vlan net/if_vxlan.c optional vxlan inet | vxlan inet6 -net/ifdi_if.m optional ether -net/iflib.c optional ether +net/ifdi_if.m optional ether pci +net/iflib.c optional ether pci net/mp_ring.c optional ether net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 +++ sys/conf/files.amd64 @@ -216,25 +216,25 @@ dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux32 -dev/ixl/if_ixl.c optional ixl pci \ +dev/ixl/if_ixl.c optional ixl iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/if_ixlv.c optional ixlv pci \ +dev/ixl/if_ixlv.c optional ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/ixlvc.c optional ixlv pci \ +dev/ixl/ixlvc.c optional ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/ixl_txrx.c optional ixl pci | ixlv pci \ +dev/ixl/ixl_txrx.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/i40e_osdep.c optional ixl pci | ixlv pci \ +dev/ixl/i40e_osdep.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/i40e_lan_hmc.c optional ixl pci | ixlv pci \ +dev/ixl/i40e_lan_hmc.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/i40e_hmc.c optional ixl pci | ixlv pci \ +dev/ixl/i40e_hmc.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/i40e_common.c optional ixl pci | ixlv pci \ +dev/ixl/i40e_common.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/i40e_nvm.c optional ixl pci | ixlv pci \ +dev/ixl/i40e_nvm.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" -dev/ixl/i40e_adminq.c optional ixl pci | ixlv pci \ +dev/ixl/i40e_adminq.c optional ixl iflib | ixlv iflib \ compile-with "${NORMAL_C} -I$S/dev/ixl" dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -3874,6 +3874,19 @@ return (count); } +int +if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg) +{ + struct ifmultiaddr *ifma; + int cnt = 0; + + if_maddr_rlock(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) + cnt += filter(arg, ifma, cnt); + if_maddr_runlock(ifp); + return (cnt); +} + struct mbuf * if_dequeue(if_t ifp) { Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -628,6 +628,7 @@ int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_count(if_t ifp, int max); +int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg); int if_getamcount(if_t ifp); struct ifaddr * if_getifaddr(if_t ifp); Index: sys/net/iflib.h =================================================================== --- sys/net/iflib.h +++ sys/net/iflib.h @@ -69,16 +69,31 @@ } *if_rxd_info_t; #define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ +#define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ +#define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ typedef struct if_pkt_info { - struct mbuf *ipi_m; /* tx packet */ + uint32_t ipi_len; /* packet length */ bus_dma_segment_t *ipi_segs; /* physical addresses */ uint16_t ipi_qsidx; /* queue set index */ uint16_t ipi_nsegs; /* number of segments */ uint16_t ipi_ndescs; /* number of descriptors used by encap */ - uint16_t ipi_flags; /* per-packet flags */ + uint16_t ipi_flags; /* iflib per-packet flags */ uint32_t ipi_pidx; /* start pidx for encap */ uint32_t ipi_new_pidx; /* next available pidx post-encap */ + /* offload handling */ + uint64_t ipi_csum_flags; /* packet checksum flags */ + uint16_t ipi_tso_segsz; /* tso segment size */ + uint16_t ipi_mflags; /* packet mbuf flags */ + uint16_t ipi_vtag; /* VLAN tag */ + uint16_t ipi_etype; /* ether header type */ + uint8_t ipi_ehdrlen; /* ether header length */ + uint8_t ipi_ip_hlen; /* ip header length */ + uint8_t ipi_tcp_hlen; /* tcp header length */ + uint8_t ipi_tcp_hflags; /* tcp header flags */ + uint8_t ipi_ipproto; /* ip protocol */ + + /* implied padding */ } *if_pkt_info_t; typedef struct if_irq { Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -54,8 +54,15 @@ #include #include #include +#include +#include +#include +#include +#include + #include +#include #include #include @@ -63,9 +70,11 @@ #include #include #include +#include #include + #include "ifdi_if.h" /* @@ -122,6 +131,7 @@ iflib_qset_t ifc_qsets; uint32_t ifc_if_flags; uint32_t ifc_flags; + uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; @@ -253,6 +263,12 @@ #define IFC_LEGACY 0x1 #define IFC_QFLUSH 0x2 +#define IFC_MULTISEG 0x4 +#define IFC_DMAR 0x8 + +#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ + CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ + CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { if_ctx_t ift_ctx; @@ -312,7 +328,7 @@ iflib_rxq_t ifl_rxq; uint8_t ifl_id; iflib_dma_info_t ifl_ifdi; - uint64_t ifl_phys_addrs[256]; + uint64_t ifl_bus_addrs[256]; caddr_t ifl_vm_addrs[256]; }; @@ -375,6 +391,12 @@ #define mtx_held(m) (((m)->mtx_lock & ~MTX_FLAGMASK) != (uintptr_t)0) +/* + * Only allow a single packet to take up most 1/nth of the tx ring + */ +#define MAX_SINGLE_PACKET_FRACTION 12 +#define IF_BAD_DMA (uint64_t)-1 + #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) @@ -537,30 +559,6 @@ static uint32_t iflib_txq_can_drain(struct mp_ring *); static int iflib_register(if_ctx_t); - -#if IFLIB_DEBUG -static void * -if_dbg_malloc(unsigned long size, struct malloc_type *type, int flags) -{ - caddr_t p, ptmp; - char buf[4] = {0, 0, 0, 0}; - int i; - - ptmp = p = malloc(size, type, flags); - - if ((flags & M_ZERO) == 0) - return (p); - - for (i = 0; i < size; i += 4, ptmp += 4) { - if (bcmp(buf, ptmp, 4) != 0) - panic("received non-zero memory from malloc"); - } - return (p); -} - -#define malloc if_dbg_malloc -#endif - #ifdef DEV_NETMAP #include #include @@ -661,7 +659,6 @@ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; - pi.ipi_m = NULL; pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; pi.ipi_ndescs = 0; @@ -1041,26 +1038,25 @@ device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); - goto fail_2; + goto fail_1; } - dma->idi_paddr = 0; + dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); - if (err || dma->idi_paddr == 0) { + if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); - goto fail_3; + goto fail_2; } dma->idi_size = size; return (0); -fail_3: - bus_dmamap_unload(dma->idi_tag, dma->idi_map); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); +fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; @@ -1073,11 +1069,11 @@ { if (dma->idi_tag == NULL) return; - if (dma->idi_paddr != 0) { + if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); - dma->idi_paddr = 0; + dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); @@ -1154,8 +1150,8 @@ iflib_sd_t txsd; int err, i, nsegments; - if (ctx->ifc_softc_ctx.isc_tx_nsegments > sctx->isc_ntxd / 12) - ctx->ifc_softc_ctx.isc_tx_nsegments = max(1, sctx->isc_ntxd / 12); + if (ctx->ifc_softc_ctx.isc_tx_nsegments > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION) + ctx->ifc_softc_ctx.isc_tx_nsegments = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION); nsegments = ctx->ifc_softc_ctx.isc_tx_nsegments; MPASS(sctx->isc_ntxd > 0); @@ -1353,6 +1349,28 @@ return (err); } + +/* + * Internal service routines + */ + +struct rxq_refill_cb_arg { + int error; + bus_dma_segment_t seg; + int nseg; +}; + +static void +_rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + struct rxq_refill_cb_arg *cb_arg = arg; + + cb_arg->error = error; + cb_arg->seg = segs[0]; + cb_arg->nseg = nseg; +} + + /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context @@ -1370,20 +1388,21 @@ iflib_sd_t rxsd = &fl->ifl_sds[pidx]; caddr_t cl; int n, i = 0; - uint64_t phys_addr; + uint64_t bus_addr; + int err; n = count; MPASS(n > 0); MPASS(fl->ifl_credits >= 0); MPASS(fl->ifl_credits + n <= fl->ifl_size); -#ifdef INVARIANTS + if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); -#endif + DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); @@ -1415,34 +1434,38 @@ rxsd->ifsd_flags |= RX_SW_DESC_MAP_CREATED; } #endif -#if !defined(__i386__) && !defined(__amd64__) +#if defined(__i386__) || defined(__amd64__) + if (__predict_true(!(ctx->ifc_flags & IFC_DMAR))) { + bus_addr = pmap_kextract((vm_offset_t)cl); + } else +#endif { - struct refill_rxq_cb_arg cb_arg; + struct rxq_refill_cb_arg cb_arg; + iflib_rxq_t q; + cb_arg.error = 0; - err = bus_dmamap_load(q->ifr_desc_tag, sd->ifsd_map, - cl, q->ifr_buf_size, refill_rxq_cb, &cb_arg, 0); + q = fl->ifl_rxq; + err = bus_dmamap_load(q->ifr_desc_tag, rxsd->ifsd_map, + cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ - if (q->zone == zone_pack) - uma_zfree(q->ifr_zone, cl); + if (fl->ifl_zone == zone_pack) + uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } - phys_addr = cb_arg.seg.ds_addr; + bus_addr = cb_arg.seg.ds_addr; } -#else - phys_addr = pmap_kextract((vm_offset_t)cl); -#endif rxsd->ifsd_flags |= RX_SW_DESC_INUSE; MPASS(rxsd->ifsd_m == NULL); rxsd->ifsd_cl = cl; rxsd->ifsd_m = m; - fl->ifl_phys_addrs[i] = phys_addr; + fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; rxsd++; fl->ifl_credits++; @@ -1455,14 +1478,12 @@ } if (n == 0 || i == 256) { ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx, - fl->ifl_phys_addrs, fl->ifl_vm_addrs, i); + fl->ifl_bus_addrs, fl->ifl_vm_addrs, i); i = 0; pidx = fl->ifl_pidx; } } -#if !defined(__i386__) && !defined(__amd64__) done: -#endif DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; @@ -1476,12 +1497,11 @@ { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; -#ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); -#endif + if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } @@ -1506,13 +1526,10 @@ } if (d->ifsd_cl != NULL) uma_zfree(fl->ifl_zone, d->ifsd_cl); - } -#ifdef INVARIANTS - else { + } else { MPASS(d->ifsd_cl == NULL); MPASS(d->ifsd_m == NULL); } -#endif d->ifsd_cl = NULL; d->ifsd_m = NULL; if (++cidx == fl->ifl_size) @@ -1539,7 +1556,7 @@ iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; - int err = 0; + /* * XXX don't set the max_frame_size to larger * than the hardware can handle @@ -1552,6 +1569,8 @@ fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; + if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) + ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); @@ -1562,17 +1581,13 @@ /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); -#if 0 - _iflib_fl_refill(ctx, fl, fl->ifl_size); - MPASS(fl->ifl_pidx == 0); - MPASS(fl->ifl_size == fl->ifl_credits); - MPASS(fl->ifl_gen == 1); -#endif /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); + if (min(128, fl->ifl_size) != fl->ifl_credits) + return (ENOBUFS); /* * handle failure */ @@ -1580,7 +1595,7 @@ MPASS(rxq->ifr_ifdi != NULL); bus_dmamap_sync(rxq->ifr_ifdi->idi_tag, rxq->ifr_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - return (err); + return (0); } /********************************************************************* @@ -1755,68 +1770,12 @@ IFDI_STOP(ctx); } -/* - * Internal service routines - */ - -#if !defined(__i386__) && !defined(__amd64__) -struct rxq_refill_cb_arg { - int error; - bus_dma_segment_t seg; - int nseg; -}; - -static void -_rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) -{ - struct rxq_refill_cb_arg *cb_arg = arg; - - cb_arg->error = error; - cb_arg->seg = segs[0]; - cb_arg->nseg = nseg; -} -#endif - -/* - * Process one software descriptor - */ static struct mbuf * -iflib_rxd_pkt_get(iflib_fl_t fl, if_rxd_info_t ri) +assemble_segments(iflib_fl_t fl, if_rxd_info_t ri, iflib_sd_t sd, struct mbuf *m, int len) { - iflib_sd_t sd_next, sd = &fl->ifl_sds[fl->ifl_cidx]; - uint32_t flags = 0; - caddr_t cl; - struct mbuf *m; - int cidx_next, len = ri->iri_len; - - MPASS(sd->ifsd_cl != NULL); - MPASS(sd->ifsd_m != NULL); + iflib_sd_t sd_next; + int cidx_next; - fl->ifl_credits--; - m = sd->ifsd_m; - sd->ifsd_m = NULL; - if (sd->ifsd_mh == NULL) - flags |= M_PKTHDR; - - /* SYNC ? */ - if (ri->iri_len <= IFLIB_RX_COPY_THRESH) { - m_init(m, fl->ifl_zone, fl->ifl_buf_size, M_NOWAIT, MT_DATA, flags); - memcpy(m->m_data, sd->ifsd_cl, ri->iri_len); - } else { - bus_dmamap_unload(fl->ifl_rxq->ifr_desc_tag, sd->ifsd_map); - cl = sd->ifsd_cl; - sd->ifsd_cl = NULL; - - flags |= M_EXT; - m_init(m, fl->ifl_zone, fl->ifl_buf_size, M_NOWAIT, MT_DATA, flags); - m_cljset(m, cl, fl->ifl_cltype); - } - - if (ri->iri_pad) { - m->m_data += ri->iri_pad; - len -= ri->iri_pad; - } - m->m_len = len; if (sd->ifsd_mh == NULL) m->m_pkthdr.len = len; else @@ -1856,8 +1815,54 @@ m = sd->ifsd_mh; sd->ifsd_mh = sd->ifsd_mt = NULL; } - if (m == NULL) + return (m); +} + + +/* + * Process one software descriptor + */ +static struct mbuf * +iflib_rxd_pkt_get(iflib_fl_t fl, if_rxd_info_t ri) +{ + iflib_sd_t sd = &fl->ifl_sds[fl->ifl_cidx]; + uint32_t flags = 0; + caddr_t cl; + struct mbuf *m; + int len = ri->iri_len; + + MPASS(sd->ifsd_cl != NULL); + MPASS(sd->ifsd_m != NULL); + + fl->ifl_credits--; + m = sd->ifsd_m; + sd->ifsd_m = NULL; + if (__predict_true(sd->ifsd_mh == NULL)) + flags |= M_PKTHDR; + + /* SYNC ? */ + if (ri->iri_len <= IFLIB_RX_COPY_THRESH) { + m_init(m, fl->ifl_zone, fl->ifl_buf_size, M_NOWAIT, MT_DATA, flags); + memcpy(m->m_data, sd->ifsd_cl, ri->iri_len); + } else { + bus_dmamap_unload(fl->ifl_rxq->ifr_desc_tag, sd->ifsd_map); + cl = sd->ifsd_cl; + sd->ifsd_cl = NULL; + + flags |= M_EXT; + m_init(m, fl->ifl_zone, fl->ifl_buf_size, M_NOWAIT, MT_DATA, flags); + m_cljset(m, cl, fl->ifl_cltype); + } + + m->m_data += ri->iri_pad; + len -= ri->iri_pad; + m->m_len = len; + + if ((fl->ifl_rxq->ifr_ctx->ifc_flags & IFC_MULTISEG) && + (m = assemble_segments(fl, ri, sd, m, len)) == NULL) return (NULL); + else + m->m_pkthdr.len = len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; @@ -2026,10 +2031,10 @@ SLIST_REMOVE_HEAD(&rxq->ifr_lc.lro_active, next); tcp_lro_flush(&rxq->ifr_lc, queued); } -#ifdef INVARIANTS + if ((sctx->isc_flags & IFLIB_HAS_CQ) == 0) MPASS(cidx == *cidxp); -#endif + if (sctx->isc_flags & IFLIB_HAS_CQ) *cidxp = cidx; *genp = gen; @@ -2057,7 +2062,6 @@ if (__predict_false(txsd->ifsd_m != NULL)) { struct if_pkt_info pi; - pi.ipi_m = NULL; pi.ipi_qsidx = txq->ift_id; pi.ipi_pidx = txq->ift_pidx; ctx->isc_txd_encap(ctx->ifc_softc, &pi); @@ -2089,6 +2093,104 @@ txq->ift_db_pending = txq->ift_npending = 0; } + +#define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) +#define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) + +static int +iflib_parse_header(if_pkt_info_t pi, struct mbuf *m) +{ + struct ether_vlan_header *eh; + + pi->ipi_mflags = (m->m_flags & M_VLANTAG); + pi->ipi_csum_flags = m->m_pkthdr.csum_flags; + pi->ipi_vtag = (m->m_flags & M_VLANTAG) ? m->m_pkthdr.ether_vtag : 0; + pi->ipi_flags = 0; + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present, + * helpful for QinQ too. + */ + eh = mtod(m, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + pi->ipi_etype = ntohs(eh->evl_proto); + pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + pi->ipi_etype = ntohs(eh->evl_encap_proto); + pi->ipi_ehdrlen = ETHER_HDR_LEN; + } + + switch (pi->ipi_etype) { +#ifdef INET + case ETHERTYPE_IP: + { + struct ip *ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); + struct tcphdr *th; + + MPASS(m->m_len >= pi->ipi_ehdrlen + sizeof(struct ip)); + th = = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); + pi->ipi_ip_hlen = ip->ip_hl << 2; + pi->ipi_ipproto = ip->ip_p; + pi->ipi_flags |= IPI_TX_IPV4; +#ifdef ATR + if (ip->ip_p == IPPROTO_TCP) + pi->ipi_tcp_hflags = th->th_flags; +#endif + if (IS_TSO4(pi)) { + + if (__predict_false(ip->ip_p != IPPROTO_TCP)) + return (ENXIO); + + MPASS(m->m_len >= pi->ipi_ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr)); + ip->ip_sum = 0; + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(IPPROTO_TCP)); + pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; + pi->ipi_tcp_hlen = th->th_off << 2; + } + break; + } +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + { + struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); + struct tcphdr *th; + pi->ipi_ip_hlen = sizeof(struct ip6_hdr); + + MPASS(m->m_len >= pi->ipi_ehdrlen + sizeof(struct ip6_hdr)); + th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); + + /* XXX-BZ this will go badly in case of ext hdrs. */ + pi->ipi_ipproto = ip6->ip6_nxt; + pi->ipi_flags |= IPI_TX_IPV6; +#ifdef ATR + if (ip->ip_p == IPPROTO_TCP) + pi->ipi_tcp_hflags = th->th_flags; +#endif + if (IS_TSO6(pi)) { + + + if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) + return (ENXIO); + + MPASS(m->m_len >= pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr)); + th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); + pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; + pi->ipi_tcp_hlen = th->th_off << 2; + } + break; + } +#endif + default: + pi->ipi_csum_flags &= ~CSUM_OFFLOAD; + pi->ipi_ip_hlen = 0; + break; + } + return (0); +} + static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { @@ -2158,7 +2260,10 @@ return (ENOBUFS); } m_head = *m_headp; - pi.ipi_m = m_head; + + if ((err = iflib_parse_header(&pi, m_head)) != 0) + return (err); + pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; pi.ipi_pidx = pidx; @@ -2171,16 +2276,15 @@ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); - MPASS(pi.ipi_m != NULL); MPASS(txsd->ifsd_m == NULL); #ifdef INVARIANTS { int i; for (i = 0; i < sctx->isc_ntxd; i++) - MPASS(txq->ift_sds[i].ifsd_m != pi.ipi_m); + MPASS(txq->ift_sds[i].ifsd_m != m_head); } #endif - txsd->ifsd_m = pi.ipi_m; + txsd->ifsd_m = m_head; MPASS(pi.ipi_new_pidx >= 0 && pi.ipi_new_pidx < sctx->isc_ntxd); if (pi.ipi_new_pidx >= pi.ipi_pidx) { ndesc = pi.ipi_new_pidx - pi.ipi_pidx; @@ -2199,11 +2303,11 @@ return (err); } -#define BRBITS 8 +/* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 + #define NQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nqsets) -#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid >> BRBITS) % NQSETS(ctx)) + FIRST_QSET(ctx)) -#define BRIDX(txq, m) ((m)->m_pkthdr.flowid % txq->ift_nbr) +#define QIDX(ctx, m) (((m)->m_pkthdr.flowid % NQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_nsegments) @@ -2303,24 +2407,6 @@ return (reclaim); } -#if 0 -static void -iflib_tx_timeout(void *arg) -{ - - /* XXX */ -} - -static void -iflib_txq_deferred(struct buf_ring_sc *br __unused, void *sc) -{ - iflib_txq_t txq = sc; - - GROUPTASK_ENQUEUE(&txq->ift_task); -} -#endif - - static void _ring_peek(struct mp_ring *r, struct mbuf **m, int cidx, int count) { @@ -2498,36 +2584,6 @@ CTX_UNLOCK(ctx); } - -#if 0 -void -iflib_intr_rx(void *arg) -{ - iflib_rxq_t rxq = arg; - - ++rxq->ifr_rx_irq; - _task_fn_rx(arg, 0); -} - -void -iflib_intr_tx(void *arg) -{ - iflib_txq_t txq= arg; - - ++txq->ift_tx_irq; - _task_fn_tx(arg, 0); -} - -void -iflib_intr_link(void *arg) -{ - if_ctx_t ctx = arg; - - ++ctx->ifc_link_irq; - _task_fn_link(arg, 0); -} -#endif - static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { @@ -2692,6 +2748,10 @@ /* detaching ?*/ if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { iflib_init_locked(ctx); + if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) + ctx->ifc_flags |= IFC_MULTISEG; + else + ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } CTX_UNLOCK(ctx); @@ -2963,6 +3023,8 @@ return (err); } + if (bus_get_dma_tag(dev) != pci_get_dma_tag(dev, device_get_parent(dev))) + ctx->ifc_flags |= IFC_DMAR; scctx = &ctx->ifc_softc_ctx; msix_bar = scctx->isc_msix_bar; @@ -3862,19 +3924,8 @@ static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx) { - int avail; - avail = ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx); -#if 0 - rxq->ifr_pidx += avail; - if (rxq->ifr_pidx >= rxq->ifr_size) { - rxq->ifr_pidx -= rxq->ifr_size; - rxq->ifr_gen = 1; - } - - return (get_inuse(rxq->ifr_size, rxq->ifr_cidx, rxq->ifr_pidx, rxq->ifr_gen)); -#endif - return (avail); + return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx)); } void @@ -3958,7 +4009,7 @@ #else queuemsgs = msgs - admincnt; #endif - if (bus_get_cpus(dev, INTR_CPUS, &ctx->ifc_cpus) == 0) { + if (bus_get_cpus(dev, INTR_CPUS, &ctx->ifc_cpus, sizeof(ctx->ifc_cpus)) == 0) { #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else Index: sys/net/mp_ring.h =================================================================== --- sys/net/mp_ring.h +++ sys/net/mp_ring.h @@ -28,8 +28,8 @@ * */ -#ifndef __CXGBE_MP_RING_H -#define __CXGBE_MP_RING_H +#ifndef __NET_MP_RING_H +#define __NET_MP_RING_H #ifndef _KERNEL #error "no user-serviceable parts inside"