Index: head/sys/dev/cxgbe/adapter.h =================================================================== --- head/sys/dev/cxgbe/adapter.h (revision 367916) +++ head/sys/dev/cxgbe/adapter.h (revision 367917) @@ -1,1394 +1,1397 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_ADAPTER_H__ #define __T4_ADAPTER_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "offload.h" #include "t4_ioctl.h" #include "common/t4_msg.h" #include "firmware/t4fw_interface.h" #define KTR_CXGBE KTR_SPARE3 MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) __builtin_prefetch(x) #endif #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #define sysctl_handle_64 sysctl_handle_quad #define CTLTYPE_U64 CTLTYPE_QUAD #endif SYSCTL_DECL(_hw_cxgbe); struct adapter; typedef struct adapter adapter_t; enum { /* * All ingress queues use this entry size. Note that the firmware event * queue and any iq expecting CPL_RX_PKT in the descriptor needs this to * be at least 64. */ IQ_ESIZE = 64, /* Default queue sizes for all kinds of ingress queues */ FW_IQ_QSIZE = 256, RX_IQ_QSIZE = 1024, /* All egress queues use this entry size */ EQ_ESIZE = 64, /* Default queue sizes for all kinds of egress queues */ CTRL_EQ_QSIZE = 1024, TX_EQ_QSIZE = 1024, #if MJUMPAGESIZE != MCLBYTES SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif CL_METADATA_SIZE = CACHE_LINE_SIZE, SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */ TX_SGL_SEGS = 39, TX_SGL_SEGS_TSO = 38, TX_SGL_SEGS_VM = 38, TX_SGL_SEGS_VM_TSO = 37, TX_SGL_SEGS_EO_TSO = 30, /* XXX: lower for IPv6. */ TX_SGL_SEGS_VXLAN_TSO = 37, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; enum { /* adapter intr_type */ INTR_INTX = (1 << 0), INTR_MSI = (1 << 1), INTR_MSIX = (1 << 2) }; enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), XGMAC_VLANEX = (1 << 3), XGMAC_UCADDR = (1 << 4), XGMAC_MCADDRS = (1 << 5), XGMAC_ALL = 0xffff }; enum { /* flags understood by begin_synchronized_op */ HOLD_LOCK = (1 << 0), SLEEP_OK = (1 << 1), INTR_OK = (1 << 2), /* flags understood by end_synchronized_op */ LOCK_HELD = HOLD_LOCK, }; enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), CHK_MBOX_ACCESS = (1 << 2), MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), ADAP_ERR = (1 << 5), BUF_PACKING_OK = (1 << 6), IS_VF = (1 << 7), KERN_TLS_OK = (1 << 8), CXGBE_BUSY = (1 << 9), /* port flags */ HAS_TRACEQ = (1 << 3), FIXED_IFMEDIA = (1 << 4), /* ifmedia list doesn't change. */ /* VI flags */ DOOMED = (1 << 0), VI_INIT_DONE = (1 << 1), VI_SYSCTL_CTX = (1 << 2), TX_USES_VM_WR = (1 << 3), /* adapter debug_flags */ DF_DUMP_MBOX = (1 << 0), /* Log all mbox cmd/rpl. */ DF_LOAD_FW_ANYTIME = (1 << 1), /* Allow LOAD_FW after init */ DF_DISABLE_TCB_CACHE = (1 << 2), /* Disable TCB cache (T6+) */ DF_DISABLE_CFG_RETRY = (1 << 3), /* Disable fallback config */ DF_VERBOSE_SLOWINTR = (1 << 4), /* Chatty slow intr handler */ }; #define IS_DOOMED(vi) ((vi)->flags & DOOMED) #define SET_DOOMED(vi) do {(vi)->flags |= DOOMED;} while (0) #define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY) #define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0) #define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0) struct vi_info { device_t dev; struct port_info *pi; struct adapter *adapter; struct ifnet *ifp; struct pfil_head *pfil; unsigned long flags; int if_flags; uint16_t *rss, *nm_rss; uint16_t viid; /* opaque VI identifier */ uint16_t smt_idx; uint16_t vin; uint8_t vfvld; int16_t xact_addr_filt;/* index of exact MAC address filter */ uint16_t rss_size; /* size of VI's RSS table slice */ uint16_t rss_base; /* start of VI's RSS table slice */ int hashen; int nintr; int first_intr; /* These need to be int as they are used in sysctl */ int ntxq; /* # of tx queues */ int first_txq; /* index of first tx queue */ int rsrv_noflowq; /* Reserve queue 0 for non-flowid packets */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ int first_ofld_rxq; /* index of first offload rx queue */ int nnmtxq; int first_nm_txq; int nnmrxq; int first_nm_rxq; int tmr_idx; int ofld_tmr_idx; int pktc_idx; int ofld_pktc_idx; int qsize_rxq; int qsize_txq; struct timeval last_refreshed; struct fw_vi_stats_vf stats; struct callout tick; struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; struct tx_ch_rl_params { enum fw_sched_params_rate ratemode; /* %port (REL) or kbps (ABS) */ uint32_t maxrate; }; enum { CLRL_USER = (1 << 0), /* allocated manually. */ CLRL_SYNC = (1 << 1), /* sync hw update in progress. */ CLRL_ASYNC = (1 << 2), /* async hw update requested. */ CLRL_ERR = (1 << 3), /* last hw setup ended in error. */ }; struct tx_cl_rl_params { int refcount; uint8_t flags; enum fw_sched_params_rate ratemode; /* %port REL or ABS value */ enum fw_sched_params_unit rateunit; /* kbps or pps (when ABS) */ enum fw_sched_params_mode mode; /* aggr or per-flow */ uint32_t maxrate; uint16_t pktsize; uint16_t burstsize; }; /* Tx scheduler parameters for a channel/port */ struct tx_sched_params { /* Channel Rate Limiter */ struct tx_ch_rl_params ch_rl; /* Class WRR */ /* XXX */ /* Class Rate Limiter (including the default pktsize and burstsize). */ int pktsize; int burstsize; struct tx_cl_rl_params cl_rl[]; }; struct port_info { device_t dev; struct adapter *adapter; struct vi_info *vi; int nvi; int up_vis; int uld_vis; bool vxlan_tcam_entry; struct tx_sched_params *sched_params; struct mtx pi_lock; char lockname[16]; unsigned long flags; uint8_t lport; /* associated offload logical port */ int8_t mdio_addr; uint8_t port_type; uint8_t mod_type; uint8_t port_id; uint8_t tx_chan; uint8_t mps_bg_map; /* rx MPS buffer group bitmap */ uint8_t rx_e_chan_map; /* rx TP e-channel bitmap */ struct link_config link_cfg; struct ifmedia media; struct timeval last_refreshed; struct port_stats stats; u_int tnl_cong_drops; u_int tx_parse_error; int fcs_reg; uint64_t fcs_base; u_long tx_toe_tls_records; u_long tx_toe_tls_octets; u_long rx_toe_tls_records; u_long rx_toe_tls_octets; struct callout tick; }; #define IS_MAIN_VI(vi) ((vi) == &((vi)->pi->vi[0])) struct cluster_metadata { uma_zone_t zone; caddr_t cl; u_int refcount; }; struct fl_sdesc { caddr_t cl; uint16_t nmbuf; /* # of driver originated mbufs with ref on cluster */ int16_t moff; /* offset of metadata from cl */ uint8_t zidx; }; struct tx_desc { __be64 flit[8]; }; struct tx_sdesc { struct mbuf *m; /* m_nextpkt linked chain of frames */ uint8_t desc_used; /* # of hardware descriptors used by the WR */ }; #define IQ_PAD (IQ_ESIZE - sizeof(struct rsp_ctrl) - sizeof(struct rss_header)) struct iq_desc { struct rss_header rss; uint8_t cpl[IQ_PAD]; struct rsp_ctrl rsp; }; #undef IQ_PAD CTASSERT(sizeof(struct iq_desc) == IQ_ESIZE); enum { /* iq flags */ IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */ IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */ IQ_RX_TIMESTAMP = (1 << 2), /* provide the SGE rx timestamp */ IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */ IQ_ADJ_CREDIT = (1 << 4), /* hw is off by 1 credit for this iq */ /* iq state */ IQS_DISABLED = 0, IQS_BUSY = 1, IQS_IDLE = 2, /* netmap related flags */ NM_OFF = 0, NM_ON = 1, NM_BUSY = 2, }; enum { CPL_COOKIE_RESERVED = 0, CPL_COOKIE_FILTER, CPL_COOKIE_DDP0, CPL_COOKIE_DDP1, CPL_COOKIE_TOM, CPL_COOKIE_HASHFILTER, CPL_COOKIE_ETHOFLD, CPL_COOKIE_KERN_TLS, NUM_CPL_COOKIES = 8 /* Limited by M_COOKIE. Do not increase. */ }; struct sge_iq; struct rss_header; typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *, struct mbuf *); typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *); typedef int (*fw_msg_handler_t)(struct adapter *, const __be64 *); /* * Ingress Queue: T4 is producer, driver is consumer. */ struct sge_iq { uint32_t flags; volatile int state; struct adapter *adapter; struct iq_desc *desc; /* KVA of descriptor ring */ int8_t intr_pktc_idx; /* packet count threshold index */ uint8_t gen; /* generation bit */ uint8_t intr_params; /* interrupt holdoff parameters */ uint8_t intr_next; /* XXX: holdoff for next interrupt */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer index */ uint16_t cntxt_id; /* SGE context id for the iq */ uint16_t abs_id; /* absolute SGE id for the iq */ STAILQ_ENTRY(sge_iq) link; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ }; enum { EQ_CTRL = 1, EQ_ETH = 2, EQ_OFLD = 3, /* eq flags */ EQ_TYPEMASK = 0x3, /* 2 lsbits hold the type (see above) */ EQ_ALLOCATED = (1 << 2), /* firmware resources allocated */ EQ_ENABLED = (1 << 3), /* open for business */ EQ_QFLUSH = (1 << 4), /* if_qflush in progress */ }; /* Listed in order of preference. Update t4_sysctls too if you change these */ enum {DOORBELL_UDB, DOORBELL_WCWR, DOORBELL_UDBWC, DOORBELL_KDB}; /* * Egress Queue: driver is producer, T4 is consumer. * * Note: A free list is an egress queue (driver produces the buffers and T4 * consumes them) but it's special enough to have its own struct (see sge_fl). */ struct sge_eq { unsigned int flags; /* MUST be first */ unsigned int cntxt_id; /* SGE context id for the eq */ unsigned int abs_id; /* absolute SGE id for the eq */ struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ uint8_t doorbells; volatile uint32_t *udb; /* KVA of doorbell (lies within BAR2) */ u_int udb_qid; /* relative qid within the doorbell page */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer idx (desc idx) */ uint16_t pidx; /* producer idx (desc idx) */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ uint8_t tx_chan; /* tx channel used by the eq */ volatile u_int equiq; /* EQUIQ outstanding */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ char lockname[16]; }; struct rx_buf_info { uma_zone_t zone; /* zone that this cluster comes from */ uint16_t size1; /* same as size of cluster: 2K/4K/9K/16K. * hwsize[hwidx1] = size1. No spare. */ uint16_t size2; /* hwsize[hwidx2] = size2. * spare in cluster = size1 - size2. */ int8_t hwidx1; /* SGE bufsize idx for size1 */ int8_t hwidx2; /* SGE bufsize idx for size2 */ uint8_t type; /* EXT_xxx type of the cluster */ }; enum { NUM_MEMWIN = 3, MEMWIN0_APERTURE = 2048, MEMWIN0_BASE = 0x1b800, MEMWIN1_APERTURE = 32768, MEMWIN1_BASE = 0x28000, MEMWIN2_APERTURE_T4 = 65536, MEMWIN2_BASE_T4 = 0x30000, MEMWIN2_APERTURE_T5 = 128 * 1024, MEMWIN2_BASE_T5 = 0x60000, }; struct memwin { struct rwlock mw_lock __aligned(CACHE_LINE_SIZE); uint32_t mw_base; /* constant after setup_memwin */ uint32_t mw_aperture; /* ditto */ uint32_t mw_curpos; /* protected by mw_lock */ }; enum { FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */ FL_DOOMED = (1 << 1), /* about to be destroyed */ FL_BUF_PACKING = (1 << 2), /* buffer packing enabled */ FL_BUF_RESUME = (1 << 3), /* resume from the middle of the frame */ }; #define FL_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) <= fl->lowat) #define FL_NOT_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) >= 2 * fl->lowat) struct sge_fl { struct mtx fl_lock; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ struct fl_sdesc *sdesc; /* KVA of software descriptor ring */ uint16_t zidx; /* refill zone idx */ uint16_t safe_zidx; uint16_t lowat; /* # of buffers <= this means fl needs help */ int flags; uint16_t buf_boundary; /* The 16b idx all deal with hw descriptors */ uint16_t dbidx; /* hw pidx after last doorbell */ uint16_t sidx; /* index of status page */ volatile uint16_t hw_cidx; /* The 32b idx are all buffer idx, not hardware descriptor idx */ uint32_t cidx; /* consumer index */ uint32_t pidx; /* producer index */ uint32_t dbval; u_int rx_offset; /* offset in fl buf (when buffer packing) */ volatile uint32_t *udb; uint64_t cl_allocated; /* # of clusters allocated */ uint64_t cl_recycled; /* # of clusters recycled */ uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */ /* These 3 are valid when FL_BUF_RESUME is set, stale otherwise. */ struct mbuf *m0; struct mbuf **pnext; u_int remaining; uint16_t qsize; /* # of hw descriptors (status page included) */ uint16_t cntxt_id; /* SGE context id for the freelist */ TAILQ_ENTRY(sge_fl) link; /* All starving freelists */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; char lockname[16]; bus_addr_t ba; /* bus address of descriptor ring */ }; struct mp_ring; struct txpkts { uint8_t wr_type; /* type 0 or type 1 */ uint8_t npkt; /* # of packets in this work request */ uint8_t len16; /* # of 16B pieces used by this work request */ uint8_t score; /* 1-10. coalescing attempted if score > 3 */ uint8_t max_npkt; /* maximum number of packets allowed */ uint16_t plen; /* total payload (sum of all packets) */ /* straight from fw_eth_tx_pkts_vm_wr. */ __u8 ethmacdst[6]; __u8 ethmacsrc[6]; __be16 ethtype; __be16 vlantci; struct mbuf *mb[15]; }; /* txq: SGE egress queue + what's needed for Ethernet NIC */ struct sge_txq { struct sge_eq eq; /* MUST be first */ struct ifnet *ifp; /* the interface this txq belongs to */ struct mp_ring *r; /* tx software ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct sglist *gl; __be32 cpl_ctrl0; /* for convenience */ int tc_idx; /* traffic class */ struct txpkts txp; struct task tx_reclaim_task; /* stats for common events first */ uint64_t txcsum; /* # of times hardware assisted with checksum */ uint64_t tso_wrs; /* # of TSO work requests */ uint64_t vlan_insertion;/* # of times VLAN tag was inserted */ uint64_t imm_wrs; /* # of work requests with immediate data */ uint64_t sgl_wrs; /* # of work requests with direct SGL */ uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */ uint64_t txpkts0_wrs; /* # of type0 coalesced tx work requests */ uint64_t txpkts1_wrs; /* # of type1 coalesced tx work requests */ uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */ uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */ uint64_t raw_wrs; /* # of raw work requests (alloc_wr_mbuf) */ uint64_t vxlan_tso_wrs; /* # of VXLAN TSO work requests */ uint64_t vxlan_txcsum; uint64_t kern_tls_records; uint64_t kern_tls_short; uint64_t kern_tls_partial; uint64_t kern_tls_full; uint64_t kern_tls_octets; uint64_t kern_tls_waste; uint64_t kern_tls_options; uint64_t kern_tls_header; uint64_t kern_tls_fin; uint64_t kern_tls_fin_short; uint64_t kern_tls_cbc; uint64_t kern_tls_gcm; /* stats for not-that-common events */ /* Optional scratch space for constructing work requests. */ uint8_t ss[SGE_MAX_WR_LEN] __aligned(16); } __aligned(CACHE_LINE_SIZE); /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ struct ifnet *ifp; /* the interface this rxq belongs to */ struct lro_ctrl lro; /* LRO state */ /* stats for common events first */ uint64_t rxcsum; /* # of times hardware assisted with checksum */ uint64_t vlan_extraction;/* # of times VLAN tag was extracted */ uint64_t vxlan_rxcsum; /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_rxq * iq_to_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_rxq, iq)); } /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_ofld_rxq * iq_to_ofld_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_ofld_rxq, iq)); } struct wrqe { STAILQ_ENTRY(wrqe) link; struct sge_wrq *wrq; int wr_len; char wr[] __aligned(16); }; struct wrq_cookie { TAILQ_ENTRY(wrq_cookie) link; int ndesc; int pidx; }; /* * wrq: SGE egress queue that is given prebuilt work requests. Both the control * and offload tx queues are of this type. */ struct sge_wrq { struct sge_eq eq; /* MUST be first */ struct adapter *adapter; struct task wrq_tx_task; /* Tx desc reserved but WR not "committed" yet. */ TAILQ_HEAD(wrq_incomplete_wrs , wrq_cookie) incomplete_wrs; /* List of WRs ready to go out as soon as descriptors are available. */ STAILQ_HEAD(, wrqe) wr_list; u_int nwr_pending; u_int ndesc_needed; /* stats for common events first */ uint64_t tx_wrs_direct; /* # of WRs written directly to desc ring. */ uint64_t tx_wrs_ss; /* # of WRs copied from scratch space. */ uint64_t tx_wrs_copied; /* # of WRs queued and copied to desc ring. */ /* stats for not-that-common events */ /* * Scratch space for work requests that wrap around after reaching the * status page, and some information about the last WR that used it. */ uint16_t ss_pidx; uint16_t ss_len; uint8_t ss[SGE_MAX_WR_LEN]; } __aligned(CACHE_LINE_SIZE); #define INVALID_NM_RXQ_CNTXT_ID ((uint16_t)(-1)) struct sge_nm_rxq { /* Items used by the driver rx ithread are in this cacheline. */ volatile int nm_state __aligned(CACHE_LINE_SIZE); /* NM_OFF, NM_ON, or NM_BUSY */ u_int nid; /* netmap ring # for this queue */ struct vi_info *vi; struct iq_desc *iq_desc; uint16_t iq_abs_id; uint16_t iq_cntxt_id; uint16_t iq_cidx; uint16_t iq_sidx; uint8_t iq_gen; uint32_t fl_sidx; /* Items used by netmap rxsync are in this cacheline. */ __be64 *fl_desc __aligned(CACHE_LINE_SIZE); uint16_t fl_cntxt_id; uint32_t fl_pidx; uint32_t fl_sidx2; /* copy of fl_sidx */ uint32_t fl_db_val; u_int fl_db_saved; u_int fl_db_threshold; /* in descriptors */ u_int fl_hwidx:4; /* * fl_cidx is used by both the ithread and rxsync, the rest are not used * in the rx fast path. */ uint32_t fl_cidx __aligned(CACHE_LINE_SIZE); bus_dma_tag_t iq_desc_tag; bus_dmamap_t iq_desc_map; bus_addr_t iq_ba; int intr_idx; bus_dma_tag_t fl_desc_tag; bus_dmamap_t fl_desc_map; bus_addr_t fl_ba; + + void *bb; /* bit bucket for packets with nowhere to go. */ + uma_zone_t bb_zone; }; #define INVALID_NM_TXQ_CNTXT_ID ((u_int)(-1)) struct sge_nm_txq { struct tx_desc *desc; uint16_t cidx; uint16_t pidx; uint16_t sidx; uint16_t equiqidx; /* EQUIQ last requested at this pidx */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint8_t doorbells; volatile uint32_t *udb; u_int udb_qid; u_int cntxt_id; __be32 cpl_ctrl0; /* for convenience */ __be32 op_pkd; /* ditto */ u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; int iqidx; } __aligned(CACHE_LINE_SIZE); struct sge { int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx queues */ int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ int nnmrxq; /* total # of netmap rx queues */ int nnmtxq; /* total # of netmap tx queues */ int niq; /* total # of ingress queues */ int neq; /* total # of egress queues */ struct sge_iq fwq; /* Firmware event queue */ struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ struct sge_nm_txq *nm_txq; /* netmap tx queues */ struct sge_nm_rxq *nm_rxq; /* netmap rx queues */ uint16_t iq_start; /* first cntxt_id */ uint16_t iq_base; /* first abs_id */ int eq_start; /* first cntxt_id */ int eq_base; /* first abs_id */ int iqmap_sz; int eqmap_sz; struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ int8_t safe_zidx; struct rx_buf_info rx_buf_info[SW_ZONE_SIZES]; }; struct devnames { const char *nexus_name; const char *ifnet_name; const char *vi_ifnet_name; const char *pf03_drv_name; const char *vf_nexus_name; const char *vf_ifnet_name; }; struct clip_entry; struct adapter { SLIST_ENTRY(adapter) link; device_t dev; struct cdev *cdev; const struct devnames *names; /* PCIe register resources */ int regs_rid; struct resource *regs_res; int msix_rid; struct resource *msix_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; int udbs_rid; struct resource *udbs_res; volatile uint8_t *udbs_base; unsigned int pf; unsigned int mbox; unsigned int vpd_busy; unsigned int vpd_flag; /* Interrupt information */ int intr_type; int intr_count; struct irq { struct resource *res; int rid; void *tag; struct sge_rxq *rxq; struct sge_nm_rxq *nm_rxq; } __aligned(CACHE_LINE_SIZE) *irq; int sge_gts_reg; int sge_kdoorbell_reg; bus_dma_tag_t dmat; /* Parent DMA tag */ struct sge sge; int lro_timeout; int sc_do_rxcopy; int vxlan_port; u_int vxlan_refcount; int rawf_base; int nrawf; struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */ struct task async_event_task; struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; /* channel -> port */ struct mtx clip_table_lock; TAILQ_HEAD(, clip_entry) clip_table; int clip_gen; void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; struct t4_offload_policy *policy; struct rwlock policy_lock; void *iwarp_softc; /* (struct c4iw_dev *) */ struct iw_tunables iwt; void *iscsi_ulp_softc; /* (struct cxgbei_data *) */ void *ccr_softc; /* (struct ccr_softc *) */ struct l2t_data *l2t; /* L2 table */ struct smt_data *smt; /* Source MAC Table */ struct tid_info tids; vmem_t *key_map; struct tls_tunables tlst; uint8_t doorbells; int offload_map; /* ports with IFCAP_TOE enabled */ int active_ulds; /* ULDs activated on this adapter */ int flags; int debug_flags; char ifp_lockname[16]; struct mtx ifp_lock; struct ifnet *ifp; /* tracer ifp */ struct ifmedia media; int traceq; /* iq used by all tracers, -1 if none */ int tracer_valid; /* bitmap of valid tracers */ int tracer_enabled; /* bitmap of enabled tracers */ char fw_version[16]; char tp_version[16]; char er_version[16]; char bs_version[16]; char cfg_file[32]; u_int cfcsum; struct adapter_params params; const struct chip_params *chip_params; struct t4_virt_res vres; uint16_t nbmcaps; uint16_t linkcaps; uint16_t switchcaps; uint16_t niccaps; uint16_t toecaps; uint16_t rdmacaps; uint16_t cryptocaps; uint16_t iscsicaps; uint16_t fcoecaps; struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */ struct mtx sc_lock; char lockname[16]; /* Starving free lists */ struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */ TAILQ_HEAD(, sge_fl) sfl; struct callout sfl_callout; struct mtx reg_lock; /* for indirect register access */ struct memwin memwin[NUM_MEMWIN]; /* memory windows */ struct mtx tc_lock; struct task tc_task; const char *last_op; const void *last_op_thr; int last_op_flags; int swintr; int sensor_resets; struct callout ktls_tick; }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) #define ADAPTER_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) #define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) #define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED) #define ASSERT_SYNCHRONIZED_OP(sc) \ KASSERT(IS_BUSY(sc) && \ (mtx_owned(&(sc)->sc_lock) || sc->last_op_thr == curthread), \ ("%s: operation not synchronized.", __func__)) #define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock) #define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock) #define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED) #define PORT_LOCK_ASSERT_NOTOWNED(pi) mtx_assert(&(pi)->pi_lock, MA_NOTOWNED) #define FL_LOCK(fl) mtx_lock(&(fl)->fl_lock) #define FL_TRYLOCK(fl) mtx_trylock(&(fl)->fl_lock) #define FL_UNLOCK(fl) mtx_unlock(&(fl)->fl_lock) #define FL_LOCK_ASSERT_OWNED(fl) mtx_assert(&(fl)->fl_lock, MA_OWNED) #define FL_LOCK_ASSERT_NOTOWNED(fl) mtx_assert(&(fl)->fl_lock, MA_NOTOWNED) #define RXQ_FL_LOCK(rxq) FL_LOCK(&(rxq)->fl) #define RXQ_FL_UNLOCK(rxq) FL_UNLOCK(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_OWNED(rxq) FL_LOCK_ASSERT_OWNED(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_NOTOWNED(rxq) FL_LOCK_ASSERT_NOTOWNED(&(rxq)->fl) #define EQ_LOCK(eq) mtx_lock(&(eq)->eq_lock) #define EQ_TRYLOCK(eq) mtx_trylock(&(eq)->eq_lock) #define EQ_UNLOCK(eq) mtx_unlock(&(eq)->eq_lock) #define EQ_LOCK_ASSERT_OWNED(eq) mtx_assert(&(eq)->eq_lock, MA_OWNED) #define EQ_LOCK_ASSERT_NOTOWNED(eq) mtx_assert(&(eq)->eq_lock, MA_NOTOWNED) #define TXQ_LOCK(txq) EQ_LOCK(&(txq)->eq) #define TXQ_TRYLOCK(txq) EQ_TRYLOCK(&(txq)->eq) #define TXQ_UNLOCK(txq) EQ_UNLOCK(&(txq)->eq) #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) #define for_each_txq(vi, iter, q) \ for (q = &vi->adapter->sge.txq[vi->first_txq], iter = 0; \ iter < vi->ntxq; ++iter, ++q) #define for_each_rxq(vi, iter, q) \ for (q = &vi->adapter->sge.rxq[vi->first_rxq], iter = 0; \ iter < vi->nrxq; ++iter, ++q) #define for_each_ofld_txq(vi, iter, q) \ for (q = &vi->adapter->sge.ofld_txq[vi->first_ofld_txq], iter = 0; \ iter < vi->nofldtxq; ++iter, ++q) #define for_each_ofld_rxq(vi, iter, q) \ for (q = &vi->adapter->sge.ofld_rxq[vi->first_ofld_rxq], iter = 0; \ iter < vi->nofldrxq; ++iter, ++q) #define for_each_nm_txq(vi, iter, q) \ for (q = &vi->adapter->sge.nm_txq[vi->first_nm_txq], iter = 0; \ iter < vi->nnmtxq; ++iter, ++q) #define for_each_nm_rxq(vi, iter, q) \ for (q = &vi->adapter->sge.nm_rxq[vi->first_nm_rxq], iter = 0; \ iter < vi->nnmrxq; ++iter, ++q) #define for_each_vi(_pi, _iter, _vi) \ for ((_vi) = (_pi)->vi, (_iter) = 0; (_iter) < (_pi)->nvi; \ ++(_iter), ++(_vi)) #define IDXINCR(idx, incr, wrap) do { \ idx = wrap - idx > incr ? idx + incr : incr - (wrap - idx); \ } while (0) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 /* One for firmware events */ #define T4VF_EXTRA_INTR 1 static inline int forwarding_intr_to_fwq(struct adapter *sc) { return (sc->intr_count == 1); } static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { return bus_space_read_4(sc->bt, sc->bh, reg); } static inline void t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val) { bus_space_write_4(sc->bt, sc->bh, reg, val); } static inline uint64_t t4_read_reg64(struct adapter *sc, uint32_t reg) { #ifdef __LP64__ return bus_space_read_8(sc->bt, sc->bh, reg); #else return (uint64_t)bus_space_read_4(sc->bt, sc->bh, reg) + ((uint64_t)bus_space_read_4(sc->bt, sc->bh, reg + 4) << 32); #endif } static inline void t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val) { #ifdef __LP64__ bus_space_write_8(sc->bt, sc->bh, reg, val); #else bus_space_write_4(sc->bt, sc->bh, reg, val); bus_space_write_4(sc->bt, sc->bh, reg + 4, val>> 32); #endif } static inline void t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val) { *val = pci_read_config(sc->dev, reg, 1); } static inline void t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val) { pci_write_config(sc->dev, reg, val, 1); } static inline void t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val) { *val = pci_read_config(sc->dev, reg, 2); } static inline void t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val) { pci_write_config(sc->dev, reg, val, 2); } static inline void t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val) { *val = pci_read_config(sc->dev, reg, 4); } static inline void t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val) { pci_write_config(sc->dev, reg, val, 4); } static inline struct port_info * adap2pinfo(struct adapter *sc, int idx) { return (sc->port[idx]); } static inline void t4_os_set_hw_addr(struct port_info *pi, uint8_t hw_addr[]) { bcopy(hw_addr, pi->vi[0].hw_addr, ETHER_ADDR_LEN); } static inline int tx_resume_threshold(struct sge_eq *eq) { /* not quite the same as qsize / 4, but this will do. */ return (eq->sidx / 4); } static inline int t4_use_ldst(struct adapter *sc) { #ifdef notyet return (sc->flags & FW_OK || !sc->use_bd); #else return (0); #endif } static inline void CH_DUMP_MBOX(struct adapter *sc, int mbox, const int reg, const char *msg, const __be64 *const p, const bool err) { if (!(sc->debug_flags & DF_DUMP_MBOX) && !err) return; if (p != NULL) { log(err ? LOG_ERR : LOG_DEBUG, "%s: mbox %u %s %016llx %016llx %016llx %016llx " "%016llx %016llx %016llx %016llx\n", device_get_nameunit(sc->dev), mbox, msg, (long long)be64_to_cpu(p[0]), (long long)be64_to_cpu(p[1]), (long long)be64_to_cpu(p[2]), (long long)be64_to_cpu(p[3]), (long long)be64_to_cpu(p[4]), (long long)be64_to_cpu(p[5]), (long long)be64_to_cpu(p[6]), (long long)be64_to_cpu(p[7])); } else { log(err ? LOG_ERR : LOG_DEBUG, "%s: mbox %u %s %016llx %016llx %016llx %016llx " "%016llx %016llx %016llx %016llx\n", device_get_nameunit(sc->dev), mbox, msg, (long long)t4_read_reg64(sc, reg), (long long)t4_read_reg64(sc, reg + 8), (long long)t4_read_reg64(sc, reg + 16), (long long)t4_read_reg64(sc, reg + 24), (long long)t4_read_reg64(sc, reg + 32), (long long)t4_read_reg64(sc, reg + 40), (long long)t4_read_reg64(sc, reg + 48), (long long)t4_read_reg64(sc, reg + 56)); } } /* t4_main.c */ extern int t4_ntxq; extern int t4_nrxq; extern int t4_intr_types; extern int t4_tmr_idx; extern int t4_pktc_idx; extern unsigned int t4_qsize_rxq; extern unsigned int t4_qsize_txq; extern device_method_t cxgbe_methods[]; int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(struct port_info *); void t4_os_link_changed(struct port_info *); void t4_iterate(void (*)(struct adapter *, void *), void *); void t4_init_devnames(struct adapter *); void t4_add_adapter(struct adapter *); int t4_detach_common(device_t); int t4_map_bars_0_and_4(struct adapter *); int t4_map_bar_2(struct adapter *); int t4_setup_intr_handlers(struct adapter *); void t4_sysctls(struct adapter *); int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *); void doom_vi(struct adapter *, struct vi_info *); void end_synchronized_op(struct adapter *, int); int update_mac_settings(struct ifnet *, int); int adapter_full_init(struct adapter *); int adapter_full_uninit(struct adapter *); uint64_t cxgbe_get_counter(struct ifnet *, ift_counter); int vi_full_init(struct vi_info *); int vi_full_uninit(struct vi_info *); void vi_sysctls(struct vi_info *); void vi_tick(void *); int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); int alloc_atid(struct adapter *, void *); void *lookup_atid(struct adapter *, int); void free_atid(struct adapter *, int); void release_tid(struct adapter *, int, struct sge_wrq *); int cxgbe_media_change(struct ifnet *); void cxgbe_media_status(struct ifnet *, struct ifmediareq *); bool t4_os_dump_cimla(struct adapter *, int, bool); void t4_os_dump_devlog(struct adapter *); #ifdef KERN_TLS /* t4_kern_tls.c */ int cxgbe_tls_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); void cxgbe_tls_tag_free(struct m_snd_tag *); void t6_ktls_modload(void); void t6_ktls_modunload(void); int t6_ktls_try(struct ifnet *, struct socket *, struct ktls_session *); int t6_ktls_parse_pkt(struct mbuf *, int *, int *); int t6_ktls_write_wr(struct sge_txq *, void *, struct mbuf *, u_int, u_int); #endif /* t4_keyctx.c */ struct auth_hash; union authctx; void t4_aes_getdeckey(void *, const void *, unsigned int); void t4_copy_partial_hash(int, union authctx *, void *); void t4_init_gmac_hash(const char *, int, char *); void t4_init_hmac_digest(struct auth_hash *, u_int, const char *, int, char *); #ifdef DEV_NETMAP /* t4_netmap.c */ struct sge_nm_rxq; void cxgbe_nm_attach(struct vi_info *); void cxgbe_nm_detach(struct vi_info *); void service_nm_rxq(struct sge_nm_rxq *); #endif /* t4_sge.c */ void t4_sge_modload(void); void t4_sge_modunload(void); uint64_t t4_sge_extfree_refs(void); void t4_tweak_chip_settings(struct adapter *); int t4_read_chip_settings(struct adapter *); int t4_create_dma_tag(struct adapter *); void t4_sge_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid_list *); int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); int t4_setup_vi_queues(struct vi_info *); int t4_teardown_vi_queues(struct vi_info *); void t4_intr_all(void *); void t4_intr(void *); #ifdef DEV_NETMAP void t4_nm_intr(void *); void t4_vi_intr(void *); #endif void t4_intr_err(void *); void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); void t4_update_fl_bufsize(struct ifnet *); struct mbuf *alloc_wr_mbuf(int, int); int parse_pkt(struct mbuf **, bool); void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *); void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *); int tnl_cong(struct port_info *, int); void t4_register_an_handler(an_handler_t); void t4_register_fw_msg_handler(int, fw_msg_handler_t); void t4_register_cpl_handler(int, cpl_handler_t); void t4_register_shared_cpl_handler(int, cpl_handler_t, int); #ifdef RATELIMIT int ethofld_transmit(struct ifnet *, struct mbuf *); void send_etid_flush_wr(struct cxgbe_rate_tag *); #endif /* t4_tracer.c */ struct t4_tracer; void t4_tracer_modload(void); void t4_tracer_modunload(void); void t4_tracer_port_detach(struct adapter *); int t4_get_tracer(struct adapter *, struct t4_tracer *); int t4_set_tracer(struct adapter *, struct t4_tracer *); int t4_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); int t5_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); /* t4_sched.c */ int t4_set_sched_class(struct adapter *, struct t4_sched_params *); int t4_set_sched_queue(struct adapter *, struct t4_sched_queue *); int t4_init_tx_sched(struct adapter *); int t4_free_tx_sched(struct adapter *); void t4_update_tx_sched(struct adapter *); int t4_reserve_cl_rl_kbps(struct adapter *, int, u_int, int *); void t4_release_cl_rl(struct adapter *, int, int); int sysctl_tc(SYSCTL_HANDLER_ARGS); int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #ifdef RATELIMIT void t4_init_etid_table(struct adapter *); void t4_free_etid_table(struct adapter *); struct cxgbe_rate_tag *lookup_etid(struct adapter *, int); int cxgbe_rate_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); int cxgbe_rate_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); int cxgbe_rate_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); void cxgbe_rate_tag_free(struct m_snd_tag *); void cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *); void cxgbe_ratelimit_query(struct ifnet *, struct if_ratelimit_query_results *); #endif /* t4_filter.c */ int get_filter_mode(struct adapter *, uint32_t *); int set_filter_mode(struct adapter *, uint32_t); int get_filter(struct adapter *, struct t4_filter *); int set_filter(struct adapter *, struct t4_filter *); int del_filter(struct adapter *, struct t4_filter *); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int t4_hashfilter_ao_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int t4_hashfilter_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int t4_del_hashfilter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); void free_hftid_hash(struct tid_info *); static inline struct wrqe * alloc_wrqe(int wr_len, struct sge_wrq *wrq) { int len = offsetof(struct wrqe, wr) + wr_len; struct wrqe *wr; wr = malloc(len, M_CXGBE, M_NOWAIT); if (__predict_false(wr == NULL)) return (NULL); wr->wr_len = wr_len; wr->wrq = wrq; return (wr); } static inline void * wrtod(struct wrqe *wr) { return (&wr->wr[0]); } static inline void free_wrqe(struct wrqe *wr) { free(wr, M_CXGBE); } static inline void t4_wrq_tx(struct adapter *sc, struct wrqe *wr) { struct sge_wrq *wrq = wr->wrq; TXQ_LOCK(wrq); t4_wrq_tx_locked(sc, wrq, wr); TXQ_UNLOCK(wrq); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } /* Number of len16 -> number of descriptors */ static inline int tx_len16_to_desc(int len16) { return (howmany(len16, EQ_ESIZE / 16)); } #endif Index: head/sys/dev/cxgbe/t4_netmap.c =================================================================== --- head/sys/dev/cxgbe/t4_netmap.c (revision 367916) +++ head/sys/dev/cxgbe/t4_netmap.c (revision 367917) @@ -1,1295 +1,1343 @@ /*- * Copyright (c) 2014 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef DEV_NETMAP #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" extern int fl_pad; /* XXXNM */ /* * 0 = normal netmap rx * 1 = black hole * 2 = supermassive black hole (buffer packing enabled) */ int black_hole = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RWTUN, &black_hole, 0, "Sink incoming packets."); int rx_ndesc = 256; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN, &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated."); int rx_nframes = 64; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_nframes, CTLFLAG_RWTUN, &rx_nframes, 0, "max # of frames received before waking up netmap rx."); int holdoff_tmr_idx = 2; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN, &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues."); /* * Congestion drops. * -1: no congestion feedback (not recommended). * 0: backpressure the channel instead of dropping packets right away. * 1: no backpressure, drop packets for the congested queue immediately. */ static int nm_cong_drop = 1; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_cong_drop, CTLFLAG_RWTUN, &nm_cong_drop, 0, "Congestion control for netmap rx queues (0 = backpressure, 1 = drop"); int starve_fl = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, starve_fl, CTLFLAG_RWTUN, &starve_fl, 0, "Don't ring fl db for netmap rx queues."); /* * Try to process tx credits in bulk. This may cause a delay in the return of * tx credits and is suitable for bursty or non-stop tx only. */ int lazy_tx_credit_flush = 1; SYSCTL_INT(_hw_cxgbe, OID_AUTO, lazy_tx_credit_flush, CTLFLAG_RWTUN, &lazy_tx_credit_flush, 0, "lazy credit flush for netmap tx queues."); /* * Split the netmap rx queues into two groups that populate separate halves of * the RSS indirection table. This allows filters with hashmask to steer to a * particular group of queues. */ static int nm_split_rss = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_split_rss, CTLFLAG_RWTUN, &nm_split_rss, 0, "Split the netmap rx queues into two groups."); /* * netmap(4) says "netmap does not use features such as checksum offloading, TCP * segmentation offloading, encryption, VLAN encapsulation/decapsulation, etc." * but this knob can be used to get the hardware to checksum all tx traffic * anyway. */ static int nm_txcsum = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_txcsum, CTLFLAG_RWTUN, &nm_txcsum, 0, "Enable transmit checksum offloading."); static int alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong) { int rc, cntxt_id, i; __be32 v; struct adapter *sc = vi->adapter; struct sge_params *sp = &sc->params.sge; struct netmap_adapter *na = NA(vi->ifp); struct fw_iq_cmd c; MPASS(na != NULL); MPASS(nm_rxq->iq_desc != NULL); MPASS(nm_rxq->fl_desc != NULL); bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE); bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + sp->spg_len); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); MPASS(!forwarding_intr_to_fwq(sc)); KASSERT(nm_rxq->intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx)); v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx); c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(0) | V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); c.iqsize = htobe16(vi->qsize_rxq); c.iqaddr = htobe64(nm_rxq->iq_ba); if (cong >= 0) { c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | (black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0)); c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); c.fl0size = htobe16(na->num_rx_desc / 8 + sp->spg_len / EQ_ESIZE); c.fl0addr = htobe64(nm_rxq->fl_ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create netmap ingress queue: %d\n", rc); return (rc); } nm_rxq->iq_cidx = 0; MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - sp->spg_len / IQ_ESIZE); nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->iq_cntxt_id = be16toh(c.iqid); nm_rxq->iq_abs_id = be16toh(c.physiqid); cntxt_id = nm_rxq->iq_cntxt_id - sc->sge.iq_start; if (cntxt_id >= sc->sge.iqmap_sz) { panic ("%s: nm_rxq->iq_cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.iqmap_sz - 1); } sc->sge.iqmap[cntxt_id] = (void *)nm_rxq; nm_rxq->fl_cntxt_id = be16toh(c.fl0id); nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; nm_rxq->fl_db_saved = 0; /* matches the X_FETCHBURSTMAX_512B or X_FETCHBURSTMAX_256B above. */ nm_rxq->fl_db_threshold = chip_id(sc) <= CHELSIO_T5 ? 8 : 4; MPASS(nm_rxq->fl_sidx == na->num_rx_desc); cntxt_id = nm_rxq->fl_cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.eqmap_sz) { panic("%s: nm_rxq->fl_cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.eqmap_sz - 1); } sc->sge.eqmap[cntxt_id] = (void *)nm_rxq; nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) | sc->chip_params->sge_fl_db; if (chip_id(sc) >= CHELSIO_T5 && cong >= 0) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id); if (cong == 0) val = 1 << 19; else { val = 2 << 19; for (i = 0; i < 4; i++) { if (cong & (1 << i)) val |= 1 << (i << 2); } } rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* report error but carry on */ device_printf(sc->dev, "failed to set congestion manager context for " "ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc); } } t4_write_reg(sc, sc->sge_gts_reg, V_INGRESSQID(nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); return (rc); } static int free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { struct adapter *sc = vi->adapter; int rc; rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff); if (rc != 0) device_printf(sc->dev, "%s: failed for iq %d, fl %d: %d\n", __func__, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, rc); nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; return (rc); } static int alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { int rc, cntxt_id; size_t len; struct adapter *sc = vi->adapter; struct netmap_adapter *na = NA(vi->ifp); struct fw_eq_eth_cmd c; MPASS(na != NULL); MPASS(nm_txq->desc != NULL); len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; bzero(nm_txq->desc, len); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_ETH_CMD_EQSIZE(len / EQ_ESIZE)); c.eqaddr = htobe64(nm_txq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create netmap egress queue: %d\n", rc); return (rc); } nm_txq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); cntxt_id = nm_txq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.eqmap_sz) panic("%s: nm_txq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.eqmap_sz - 1); sc->sge.eqmap[cntxt_id] = (void *)nm_txq; nm_txq->pidx = nm_txq->cidx = 0; MPASS(nm_txq->sidx == na->num_tx_desc); nm_txq->equiqidx = nm_txq->equeqidx = nm_txq->dbidx = 0; nm_txq->doorbells = sc->doorbells; if (isset(&nm_txq->doorbells, DOORBELL_UDB) || isset(&nm_txq->doorbells, DOORBELL_UDBWC) || isset(&nm_txq->doorbells, DOORBELL_WCWR)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (nm_txq->cntxt_id >> s_qpp) << PAGE_SHIFT; nm_txq->udb_qid = nm_txq->cntxt_id & mask; if (nm_txq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) clrbit(&nm_txq->doorbells, DOORBELL_WCWR); else { udb += nm_txq->udb_qid << UDBS_SEG_SHIFT; nm_txq->udb_qid = 0; } nm_txq->udb = (volatile void *)udb; } if (sc->params.fw_vers < FW_VERSION32(1, 25, 1, 0)) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | V_FW_PARAMS_PARAM_YZ(nm_txq->cntxt_id); val = 0xff; rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { device_printf(vi->dev, "failed to bind netmap txq %d to class 0xff: %d\n", nm_txq->cntxt_id, rc); rc = 0; } } return (rc); } static int free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { struct adapter *sc = vi->adapter; int rc; rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id); if (rc != 0) device_printf(sc->dev, "%s: failed for eq %d: %d\n", __func__, nm_txq->cntxt_id, rc); nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; return (rc); } static int cxgbe_netmap_simple_rss(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_kring *kring; struct sge_nm_rxq *nm_rxq; int rc, i, j, nm_state, defq; uint16_t *rss; /* * Check if there's at least one active (or about to go active) netmap * rx queue. */ defq = -1; for_each_nm_rxq(vi, j, nm_rxq) { nm_state = atomic_load_int(&nm_rxq->nm_state); kring = na->rx_rings[nm_rxq->nid]; if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || (nm_state == NM_OFF && nm_kring_pending_on(kring))) { MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); if (defq == -1) { defq = nm_rxq->iq_abs_id; break; } } } if (defq == -1) { /* No active netmap queues. Switch back to NIC queues. */ rss = vi->rss; defq = vi->rss[0]; } else { for (i = 0; i < vi->rss_size;) { for_each_nm_rxq(vi, j, nm_rxq) { nm_state = atomic_load_int(&nm_rxq->nm_state); kring = na->rx_rings[nm_rxq->nid]; if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || (nm_state == NM_OFF && nm_kring_pending_on(kring))) { MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); vi->nm_rss[i++] = nm_rxq->iq_abs_id; if (i == vi->rss_size) break; } } } rss = vi->nm_rss; } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) if_printf(ifp, "netmap rss_config failed: %d\n", rc); rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0); if (rc != 0) { if_printf(ifp, "netmap defaultq config failed: %d\n", rc); } return (rc); } /* * Odd number of rx queues work best for split RSS mode as the first queue can * be dedicated for non-RSS traffic and the rest divided into two equal halves. */ static int cxgbe_netmap_split_rss(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_kring *kring; struct sge_nm_rxq *nm_rxq; int rc, i, j, nm_state, defq; int nactive[2] = {0, 0}; int dq[2] = {-1, -1}; bool dq_norss; /* default queue should not be in RSS table. */ MPASS(nm_split_rss != 0); MPASS(vi->nnmrxq > 1); for_each_nm_rxq(vi, i, nm_rxq) { j = i / ((vi->nnmrxq + 1) / 2); nm_state = atomic_load_int(&nm_rxq->nm_state); kring = na->rx_rings[nm_rxq->nid]; if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) || (nm_state == NM_OFF && nm_kring_pending_on(kring))) { MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); nactive[j]++; if (dq[j] == -1) { dq[j] = nm_rxq->iq_abs_id; break; } } } if (nactive[0] == 0 || nactive[1] == 0) return (cxgbe_netmap_simple_rss(sc, vi, ifp, na)); MPASS(dq[0] != -1 && dq[1] != -1); if (nactive[0] > nactive[1]) { defq = dq[0]; dq_norss = true; } else if (nactive[0] < nactive[1]) { defq = dq[1]; dq_norss = true; } else { defq = dq[0]; dq_norss = false; } i = 0; nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq]; while (i < vi->rss_size / 2) { for (j = 0; j < (vi->nnmrxq + 1) / 2; j++) { nm_state = atomic_load_int(&nm_rxq[j].nm_state); kring = na->rx_rings[nm_rxq[j].nid]; if ((nm_state == NM_OFF && !nm_kring_pending_on(kring)) || (nm_state == NM_ON && nm_kring_pending_off(kring))) { continue; } MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); if (dq_norss && defq == nm_rxq[j].iq_abs_id) continue; vi->nm_rss[i++] = nm_rxq[j].iq_abs_id; if (i == vi->rss_size / 2) break; } } while (i < vi->rss_size) { for (j = (vi->nnmrxq + 1) / 2; j < vi->nnmrxq; j++) { nm_state = atomic_load_int(&nm_rxq[j].nm_state); kring = na->rx_rings[nm_rxq[j].nid]; if ((nm_state == NM_OFF && !nm_kring_pending_on(kring)) || (nm_state == NM_ON && nm_kring_pending_off(kring))) { continue; } MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); if (dq_norss && defq == nm_rxq[j].iq_abs_id) continue; vi->nm_rss[i++] = nm_rxq[j].iq_abs_id; if (i == vi->rss_size) break; } } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, vi->nm_rss, vi->rss_size); if (rc != 0) if_printf(ifp, "netmap split_rss_config failed: %d\n", rc); rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0); if (rc != 0) if_printf(ifp, "netmap defaultq config failed: %d\n", rc); return (rc); } static inline int cxgbe_netmap_rss(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { if (nm_split_rss == 0 || vi->nnmrxq == 1) return (cxgbe_netmap_simple_rss(sc, vi, ifp, na)); else return (cxgbe_netmap_split_rss(sc, vi, ifp, na)); } static int cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_slot *slot; struct netmap_kring *kring; struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; int i, j, hwidx; struct rx_buf_info *rxb; ASSERT_SYNCHRONIZED_OP(sc); MPASS(vi->nnmrxq > 0); MPASS(vi->nnmtxq > 0); if ((vi->flags & VI_INIT_DONE) == 0 || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if_printf(ifp, "cannot enable netmap operation because " "interface is not UP.\n"); return (EAGAIN); } rxb = &sc->sge.rx_buf_info[0]; for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { if (rxb->size1 == NETMAP_BUF_SIZE(na)) { hwidx = rxb->hwidx1; break; } if (rxb->size2 == NETMAP_BUF_SIZE(na)) { hwidx = rxb->hwidx2; break; } } if (i >= SW_ZONE_SIZES) { if_printf(ifp, "no hwidx for netmap buffer size %d.\n", NETMAP_BUF_SIZE(na)); return (ENXIO); } /* Must set caps before calling netmap_reset */ nm_set_native_flags(na); for_each_nm_rxq(vi, i, nm_rxq) { kring = na->rx_rings[nm_rxq->nid]; if (!nm_kring_pending_on(kring) || nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID) continue; alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop)); nm_rxq->fl_hwidx = hwidx; + nm_rxq->bb_zone = rxb->zone; + nm_rxq->bb = uma_zalloc(nm_rxq->bb_zone, M_WAITOK); slot = netmap_reset(na, NR_RX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ /* We deal with 8 bufs at a time */ MPASS((na->num_rx_desc & 7) == 0); MPASS(na->num_rx_desc == nm_rxq->fl_sidx); for (j = 0; j < nm_rxq->fl_sidx; j++) { uint64_t ba; PNMB(na, &slot[j], &ba); MPASS(ba != 0); nm_rxq->fl_desc[j] = htobe64(ba | hwidx); } j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8; MPASS((j & 7) == 0); j /= 8; /* driver pidx to hardware pidx */ wmb(); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(j)); (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_OFF, NM_ON); } for_each_nm_txq(vi, i, nm_txq) { kring = na->tx_rings[nm_txq->nid]; if (!nm_kring_pending_on(kring) || nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID) continue; alloc_nm_txq_hwq(vi, nm_txq); slot = netmap_reset(na, NR_TX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ } if (vi->nm_rss == NULL) { vi->nm_rss = malloc(vi->rss_size * sizeof(uint16_t), M_CXGBE, M_ZERO | M_WAITOK); } return (cxgbe_netmap_rss(sc, vi, ifp, na)); } +static void +flush_nm_rxq(struct adapter *sc, struct vi_info *vi, struct sge_nm_rxq *nm_rxq) +{ + int i, n; + u_int fl_pidx, fl_pidx_target, hw_cidx_desc; + const uint64_t ba = pmap_kextract((vm_offset_t)nm_rxq->bb); + + hw_cidx_desc = nm_rxq->fl_cidx / 8; + if (hw_cidx_desc == 0) + fl_pidx_target = nm_rxq->fl_sidx2 - 8; + else + fl_pidx_target = (hw_cidx_desc - 1) * 8; + MPASS((fl_pidx_target & 7) == 0); + + fl_pidx = nm_rxq->fl_pidx; + MPASS((fl_pidx & 7) == 0); + for (n = 0; fl_pidx != fl_pidx_target; n++) { + for (i = 0; i < 8; i++, fl_pidx++) + nm_rxq->fl_desc[fl_pidx] = htobe64(ba | nm_rxq->fl_hwidx); + if (__predict_false(fl_pidx == nm_rxq->fl_sidx2)) + fl_pidx = 0; + } + t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(n)); +} + static int cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_kring *kring; int rc, i, nm_state, nactive; struct sge_nm_txq *nm_txq; struct sge_nm_rxq *nm_rxq; ASSERT_SYNCHRONIZED_OP(sc); MPASS(vi->nnmrxq > 0); MPASS(vi->nnmtxq > 0); if (!nm_netmap_on(na)) return (0); if ((vi->flags & VI_INIT_DONE) == 0) return (0); /* First remove the queues that are stopping from the RSS table. */ rc = cxgbe_netmap_rss(sc, vi, ifp, na); if (rc != 0) return (rc); /* error message logged already. */ + /* + * First pass over the rx queues to make sure they're all caught up. + * + * The freelists could be out of buffers and we may need to arrange + * things so that any packets still in flight (after TP's cong_drop + * logic but not yet DMA'd) have somewhere to go and do not block the + * pipeline. Do this before trying to free any queue. + */ + for_each_nm_rxq(vi, i, nm_rxq) { + nm_state = atomic_load_int(&nm_rxq->nm_state); + kring = na->rx_rings[nm_rxq->nid]; + if (nm_state == NM_OFF || !nm_kring_pending_off(kring)) + continue; + MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); + flush_nm_rxq(sc, vi, nm_rxq); + } + for_each_nm_txq(vi, i, nm_txq) { struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; kring = na->tx_rings[nm_txq->nid]; if (!nm_kring_pending_off(kring) || nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID) continue; /* Wait for hw pidx to catch up ... */ while (be16toh(nm_txq->pidx) != spg->pidx) pause("nmpidx", 1); /* ... and then for the cidx. */ while (spg->pidx != spg->cidx) pause("nmcidx", 1); free_nm_txq_hwq(vi, nm_txq); /* XXX: netmap, not the driver, should do this. */ kring->rhead = kring->rcur = kring->nr_hwcur = 0; kring->rtail = kring->nr_hwtail = kring->nkr_num_slots - 1; } nactive = 0; for_each_nm_rxq(vi, i, nm_rxq) { nm_state = atomic_load_int(&nm_rxq->nm_state); kring = na->rx_rings[nm_rxq->nid]; if (nm_state != NM_OFF && !nm_kring_pending_off(kring)) nactive++; if (nm_state == NM_OFF || !nm_kring_pending_off(kring)) continue; MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID); while (!atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_OFF)) pause("nmst", 1); free_nm_rxq_hwq(vi, nm_rxq); + uma_zfree(nm_rxq->bb_zone, nm_rxq->bb); + nm_rxq->bb = NULL; /* XXX: netmap, not the driver, should do this. */ kring->rhead = kring->rcur = kring->nr_hwcur = 0; kring->rtail = kring->nr_hwtail = 0; } netmap_krings_mode_commit(na, 0); if (nactive == 0) nm_clear_native_flags(na); return (rc); } static int cxgbe_netmap_reg(struct netmap_adapter *na, int on) { struct ifnet *ifp = na->ifp; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->adapter; int rc; rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg"); if (rc != 0) return (rc); if (on) rc = cxgbe_netmap_on(sc, vi, ifp, na); else rc = cxgbe_netmap_off(sc, vi, ifp, na); end_synchronized_op(sc, 0); return (rc); } /* How many packets can a single type1 WR carry in n descriptors */ static inline int ndesc_to_npkt(const int n) { MPASS(n > 0 && n <= SGE_MAX_WR_NDESC); return (n * 2 - 1); } #define MAX_NPKT_IN_TYPE1_WR (ndesc_to_npkt(SGE_MAX_WR_NDESC)) /* * Space (in descriptors) needed for a type1 WR (TX_PKTS or TX_PKTS2) that * carries n packets */ static inline int npkt_to_ndesc(const int n) { MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); return ((n + 2) / 2); } /* * Space (in 16B units) needed for a type1 WR (TX_PKTS or TX_PKTS2) that * carries n packets */ static inline int npkt_to_len16(const int n) { MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); return (n * 2 + 1); } #define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx) static void ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq) { int n; u_int db = nm_txq->doorbells; MPASS(nm_txq->pidx != nm_txq->dbidx); n = NMIDXDIFF(nm_txq, dbidx); if (n > 1) clrbit(&db, DOORBELL_WCWR); wmb(); switch (ffs(db) - 1) { case DOORBELL_UDB: *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); break; case DOORBELL_WCWR: { volatile uint64_t *dst, *src; /* * Queues whose 128B doorbell segment fits in the page do not * use relative qid (udb_qid is always 0). Only queues with * doorbell segments can do WCWR. */ KASSERT(nm_txq->udb_qid == 0 && n == 1, ("%s: inappropriate doorbell (0x%x, %d, %d) for nm_txq %p", __func__, nm_txq->doorbells, n, nm_txq->pidx, nm_txq)); dst = (volatile void *)((uintptr_t)nm_txq->udb + UDBS_WR_OFFSET - UDBS_DB_OFFSET); src = (void *)&nm_txq->desc[nm_txq->dbidx]; while (src != (void *)&nm_txq->desc[nm_txq->dbidx + 1]) *dst++ = *src++; wmb(); break; } case DOORBELL_UDBWC: *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); wmb(); break; case DOORBELL_KDB: t4_write_reg(sc, sc->sge_kdoorbell_reg, V_QID(nm_txq->cntxt_id) | V_PIDX(n)); break; } nm_txq->dbidx = nm_txq->pidx; } /* * Write work requests to send 'npkt' frames and ring the doorbell to send them * on their way. No need to check for wraparound. */ static void cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, struct netmap_kring *kring, int npkt, int npkt_remaining) { struct netmap_ring *ring = kring->ring; struct netmap_slot *slot; const u_int lim = kring->nkr_num_slots - 1; struct fw_eth_tx_pkts_wr *wr = (void *)&nm_txq->desc[nm_txq->pidx]; uint16_t len; uint64_t ba; struct cpl_tx_pkt_core *cpl; struct ulptx_sgl *usgl; int i, n; while (npkt) { n = min(npkt, MAX_NPKT_IN_TYPE1_WR); len = 0; wr = (void *)&nm_txq->desc[nm_txq->pidx]; wr->op_pkd = nm_txq->op_pkd; wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(npkt_to_len16(n))); wr->npkt = n; wr->r3 = 0; wr->type = 1; cpl = (void *)(wr + 1); for (i = 0; i < n; i++) { slot = &ring->slot[kring->nr_hwcur]; PNMB(kring->na, slot, &ba); MPASS(ba != 0); cpl->ctrl0 = nm_txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(slot->len); cpl->ctrl1 = nm_txcsum ? 0 : htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); usgl = (void *)(cpl + 1); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(1)); usgl->len0 = htobe32(slot->len); usgl->addr0 = htobe64(ba); slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); cpl = (void *)(usgl + 1); MPASS(slot->len + len <= UINT16_MAX); len += slot->len; kring->nr_hwcur = nm_next(kring->nr_hwcur, lim); } wr->plen = htobe16(len); npkt -= n; nm_txq->pidx += npkt_to_ndesc(n); MPASS(nm_txq->pidx <= nm_txq->sidx); if (__predict_false(nm_txq->pidx == nm_txq->sidx)) { /* * This routine doesn't know how to write WRs that wrap * around. Make sure it wasn't asked to. */ MPASS(npkt == 0); nm_txq->pidx = 0; } if (npkt == 0 && npkt_remaining == 0) { /* All done. */ if (lazy_tx_credit_flush == 0) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); nm_txq->equeqidx = nm_txq->pidx; nm_txq->equiqidx = nm_txq->pidx; } ring_nm_txq_db(sc, nm_txq); return; } if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); nm_txq->equeqidx = nm_txq->pidx; nm_txq->equiqidx = nm_txq->pidx; } else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); nm_txq->equeqidx = nm_txq->pidx; } if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC) ring_nm_txq_db(sc, nm_txq); } /* Will get called again. */ MPASS(npkt_remaining); } /* How many contiguous free descriptors starting at pidx */ static inline int contiguous_ndesc_available(struct sge_nm_txq *nm_txq) { if (nm_txq->cidx > nm_txq->pidx) return (nm_txq->cidx - nm_txq->pidx - 1); else if (nm_txq->cidx > 0) return (nm_txq->sidx - nm_txq->pidx); else return (nm_txq->sidx - nm_txq->pidx - 1); } static int reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq) { struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; uint16_t hw_cidx = spg->cidx; /* snapshot */ struct fw_eth_tx_pkts_wr *wr; int n = 0; hw_cidx = be16toh(hw_cidx); while (nm_txq->cidx != hw_cidx) { wr = (void *)&nm_txq->desc[nm_txq->cidx]; MPASS(wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)) || wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR))); MPASS(wr->type == 1); MPASS(wr->npkt > 0 && wr->npkt <= MAX_NPKT_IN_TYPE1_WR); n += wr->npkt; nm_txq->cidx += npkt_to_ndesc(wr->npkt); /* * We never sent a WR that wrapped around so the credits coming * back, WR by WR, should never cause the cidx to wrap around * either. */ MPASS(nm_txq->cidx <= nm_txq->sidx); if (__predict_false(nm_txq->cidx == nm_txq->sidx)) nm_txq->cidx = 0; } return (n); } static int cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->adapter; struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_nm_txq + kring->ring_id]; const u_int head = kring->rhead; u_int reclaimed = 0; int n, d, npkt_remaining, ndesc_remaining; /* * Tx was at kring->nr_hwcur last time around and now we need to advance * to kring->rhead. Note that the driver's pidx moves independent of * netmap's kring->nr_hwcur (pidx counts descriptors and the relation * between descriptors and frames isn't 1:1). */ npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur : kring->nkr_num_slots - kring->nr_hwcur + head; while (npkt_remaining) { reclaimed += reclaim_nm_tx_desc(nm_txq); ndesc_remaining = contiguous_ndesc_available(nm_txq); /* Can't run out of descriptors with packets still remaining */ MPASS(ndesc_remaining > 0); /* # of desc needed to tx all remaining packets */ d = (npkt_remaining / MAX_NPKT_IN_TYPE1_WR) * SGE_MAX_WR_NDESC; if (npkt_remaining % MAX_NPKT_IN_TYPE1_WR) d += npkt_to_ndesc(npkt_remaining % MAX_NPKT_IN_TYPE1_WR); if (d <= ndesc_remaining) n = npkt_remaining; else { /* Can't send all, calculate how many can be sent */ n = (ndesc_remaining / SGE_MAX_WR_NDESC) * MAX_NPKT_IN_TYPE1_WR; if (ndesc_remaining % SGE_MAX_WR_NDESC) n += ndesc_to_npkt(ndesc_remaining % SGE_MAX_WR_NDESC); } /* Send n packets and update nm_txq->pidx and kring->nr_hwcur */ npkt_remaining -= n; cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining); } MPASS(npkt_remaining == 0); MPASS(kring->nr_hwcur == head); MPASS(nm_txq->dbidx == nm_txq->pidx); /* * Second part: reclaim buffers for completed transmissions. */ if (reclaimed || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { reclaimed += reclaim_nm_tx_desc(nm_txq); kring->nr_hwtail += reclaimed; if (kring->nr_hwtail >= kring->nkr_num_slots) kring->nr_hwtail -= kring->nkr_num_slots; } return (0); } static int cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; struct ifnet *ifp = na->ifp; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->adapter; struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq + kring->ring_id]; u_int const head = kring->rhead; u_int n; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; if (black_hole) return (0); /* No updates ever. */ if (netmap_no_pendintr || force_update) { kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx); kring->nr_kflags &= ~NKR_PENDINTR; } if (nm_rxq->fl_db_saved > 0 && starve_fl == 0) { wmb(); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(nm_rxq->fl_db_saved)); nm_rxq->fl_db_saved = 0; } /* Userspace done with buffers from kring->nr_hwcur to head */ n = head >= kring->nr_hwcur ? head - kring->nr_hwcur : kring->nkr_num_slots - kring->nr_hwcur + head; n &= ~7U; if (n > 0) { u_int fl_pidx = nm_rxq->fl_pidx; struct netmap_slot *slot = &ring->slot[fl_pidx]; uint64_t ba; int i, dbinc = 0, hwidx = nm_rxq->fl_hwidx; /* * We always deal with 8 buffers at a time. We must have * stopped at an 8B boundary (fl_pidx) last time around and we * must have a multiple of 8B buffers to give to the freelist. */ MPASS((fl_pidx & 7) == 0); MPASS((n & 7) == 0); IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots); IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx2); while (n > 0) { for (i = 0; i < 8; i++, fl_pidx++, slot++) { PNMB(na, slot, &ba); MPASS(ba != 0); nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx); slot->flags &= ~NS_BUF_CHANGED; MPASS(fl_pidx <= nm_rxq->fl_sidx2); } n -= 8; if (fl_pidx == nm_rxq->fl_sidx2) { fl_pidx = 0; slot = &ring->slot[0]; } if (++dbinc == nm_rxq->fl_db_threshold) { wmb(); if (starve_fl) nm_rxq->fl_db_saved += dbinc; else { t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(dbinc)); } dbinc = 0; } } MPASS(nm_rxq->fl_pidx == fl_pidx); if (dbinc > 0) { wmb(); if (starve_fl) nm_rxq->fl_db_saved += dbinc; else { t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(dbinc)); } } } return (0); } void cxgbe_nm_attach(struct vi_info *vi) { struct port_info *pi; struct adapter *sc; struct netmap_adapter na; MPASS(vi->nnmrxq > 0); MPASS(vi->ifp != NULL); pi = vi->pi; sc = pi->adapter; bzero(&na, sizeof(na)); na.ifp = vi->ifp; na.na_flags = NAF_BDG_MAYSLEEP; /* Netmap doesn't know about the space reserved for the status page. */ na.num_tx_desc = vi->qsize_txq - sc->params.sge.spg_len / EQ_ESIZE; /* * The freelist's cidx/pidx drives netmap's rx cidx/pidx. So * num_rx_desc is based on the number of buffers that can be held in the * freelist, and not the number of entries in the iq. (These two are * not exactly the same due to the space taken up by the status page). */ na.num_rx_desc = rounddown(vi->qsize_rxq, 8); na.nm_txsync = cxgbe_netmap_txsync; na.nm_rxsync = cxgbe_netmap_rxsync; na.nm_register = cxgbe_netmap_reg; na.num_tx_rings = vi->nnmtxq; na.num_rx_rings = vi->nnmrxq; na.rx_buf_maxsize = MAX_MTU; netmap_attach(&na); /* This adds IFCAP_NETMAP to if_capabilities */ } void cxgbe_nm_detach(struct vi_info *vi) { MPASS(vi->nnmrxq > 0); MPASS(vi->ifp != NULL); netmap_detach(vi->ifp); } static inline const void * unwrap_nm_fw6_msg(const struct cpl_fw6_msg *cpl) { MPASS(cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL); /* data[0] is RSS header */ return (&cpl->data[1]); } static void handle_nm_sge_egr_update(struct adapter *sc, struct ifnet *ifp, const struct cpl_sge_egr_update *egr) { uint32_t oq; struct sge_nm_txq *nm_txq; oq = be32toh(egr->opcode_qid); MPASS(G_CPL_OPCODE(oq) == CPL_SGE_EGR_UPDATE); nm_txq = (void *)sc->sge.eqmap[G_EGR_QID(oq) - sc->sge.eq_start]; netmap_tx_irq(ifp, nm_txq->nid); } void service_nm_rxq(struct sge_nm_rxq *nm_rxq) { struct vi_info *vi = nm_rxq->vi; struct adapter *sc = vi->adapter; struct ifnet *ifp = vi->ifp; struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = na->rx_rings[nm_rxq->nid]; struct netmap_ring *ring = kring->ring; struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx]; const void *cpl; uint32_t lq; u_int work = 0; uint8_t opcode; uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx); u_int fl_credits = fl_cidx & 7; u_int ndesc = 0; /* desc processed since last cidx update */ u_int nframes = 0; /* frames processed since last netmap wakeup */ while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) { rmb(); lq = be32toh(d->rsp.pldbuflen_qid); opcode = d->rss.opcode; cpl = &d->cpl[0]; switch (G_RSPD_TYPE(d->rsp.u.type_gen)) { case X_RSPD_TYPE_FLBUF: /* fall through */ case X_RSPD_TYPE_CPL: MPASS(opcode < NUM_CPL_CMDS); switch (opcode) { case CPL_FW4_MSG: case CPL_FW6_MSG: cpl = unwrap_nm_fw6_msg(cpl); /* fall through */ case CPL_SGE_EGR_UPDATE: handle_nm_sge_egr_update(sc, ifp, cpl); break; case CPL_RX_PKT: ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - sc->params.sge.fl_pktshift; ring->slot[fl_cidx].flags = 0; nframes++; if (!(lq & F_RSPD_NEWBUF)) { MPASS(black_hole == 2); break; } fl_credits++; if (__predict_false(++fl_cidx == nm_rxq->fl_sidx)) fl_cidx = 0; break; default: panic("%s: unexpected opcode 0x%x on nm_rxq %p", __func__, opcode, nm_rxq); } break; case X_RSPD_TYPE_INTR: /* Not equipped to handle forwarded interrupts. */ panic("%s: netmap queue received interrupt for iq %u\n", __func__, lq); default: panic("%s: illegal response type %d on nm_rxq %p", __func__, G_RSPD_TYPE(d->rsp.u.type_gen), nm_rxq); } d++; if (__predict_false(++nm_rxq->iq_cidx == nm_rxq->iq_sidx)) { nm_rxq->iq_cidx = 0; d = &nm_rxq->iq_desc[0]; nm_rxq->iq_gen ^= F_RSPD_GEN; } if (__predict_false(++nframes == rx_nframes) && !black_hole) { atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); netmap_rx_irq(ifp, nm_rxq->nid, &work); nframes = 0; } if (__predict_false(++ndesc == rx_ndesc)) { if (black_hole && fl_credits >= 8) { fl_credits /= 8; IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(fl_credits)); fl_credits = fl_cidx & 7; } t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) | V_INGRESSQID(nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndesc = 0; } } atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); if (black_hole) { fl_credits /= 8; IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(fl_credits)); } else if (nframes > 0) netmap_rx_irq(ifp, nm_rxq->nid, &work); t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) | V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); } #endif